diff options
author | Luke Chen <luke.chen@mongodb.com> | 2020-11-13 15:57:42 +1100 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-11-13 05:15:33 +0000 |
commit | 8bb9ed7824835cdfd146d642468508c01ea087ad (patch) | |
tree | 25118dd8dabdc8e892057067583845f47547aaad /src/third_party | |
parent | 6dc4a4eaf4acc7472590040fa5795901b5140fc5 (diff) | |
download | mongo-8bb9ed7824835cdfd146d642468508c01ea087ad.tar.gz |
Import wiredtiger: 0ab3e59875ecfabbe3cd9d19c0c3e05b72bad1cf from branch mongodb-5.0
ref: 5f9a0178f2..0ab3e59875
for: 4.9.0
WT-6449 Hang analyzer for WT Evergreen tests
WT-6706 Add table import repair functionality
WT-6816 Design write gen scheme to allow dhandles with active history to get closed/re-opened
WT-6857 Define a new cursor for the history store access.
Diffstat (limited to 'src/third_party')
21 files changed, 1237 insertions, 107 deletions
diff --git a/src/third_party/wiredtiger/dist/s_funcs.list b/src/third_party/wiredtiger/dist/s_funcs.list index fe19e596bf4..0b7db52d26c 100644 --- a/src/third_party/wiredtiger/dist/s_funcs.list +++ b/src/third_party/wiredtiger/dist/s_funcs.list @@ -13,6 +13,7 @@ __wt_bulk_insert_fix __wt_bulk_insert_row __wt_bulk_insert_var __wt_config_getone +__wt_curhs_open __wt_cursor_get_raw_value __wt_debug_addr __wt_debug_addr_print diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 0e4185705d9..70e3738e022 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -346,6 +346,7 @@ Redistributions Refactor Resize RocksDB +Runtime SIMD SLIST SLVG @@ -634,6 +635,7 @@ curconfig curdump curextract curfile +curhs curindex curjoin curlog diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 59b9bb25f00..518309b2758 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-5.0", - "commit": "5f9a0178f292c964ef5b40c4e639e3e0a438e5e5" + "commit": "0ab3e59875ecfabbe3cd9d19c0c3e05b72bad1cf" } diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index 42139969802..60cde6ce8b5 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -522,16 +522,45 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) btree->checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT); /* Checkpoint generation */ /* - * In the regular case, we'll be initializing to the connection-wide base write generation since - * this is the largest of all btree write generations from the previous run. This has the nice - * property of ensuring that the range of write generations used by consecutive runs do not - * overlap which aids with debugging. + * The first time we open a btree, we'll be initializing the write gen to the connection-wide + * base write generation since this is the largest of all btree write generations from the + * previous run. This has the nice property of ensuring that the range of write generations used + * by consecutive runs do not overlap which aids with debugging. * - * In the import case, the btree write generation from the last run may actually be ahead of the - * connection-wide base write generation. In that case, we should initialize our write gen just - * ahead of our btree specific write generation. + * If we're reopening a btree or importing a new one to a running system, the btree write + * generation from the last run may actually be ahead of the connection-wide base write + * generation. In that case, we should initialize our write gen just ahead of our btree specific + * write generation. + * + * The runtime write generation is important since it's going to determine what we're going to + * use as the base write generation (and thus what pages to wipe transaction ids from). The idea + * is that we want to initialize it once the first time we open the btree during a run and then + * for every subsequent open, we want to reuse it. This so that we're still able to read + * transaction ids from the previous time a btree was open in the same run. + * + * FIXME-WT-6819: When we begin discarding dhandles more aggressively, we need to check that + * updates aren't having their transaction ids wiped after reopening the dhandle. The runtime + * write generation is relevant here since it should remain static across the entire run. + */ + btree->write_gen = WT_MAX(ckpt->write_gen + 1, conn->base_write_gen); + WT_ASSERT(session, ckpt->write_gen >= ckpt->run_write_gen); + + /* If this is the first time opening the tree this run. */ + if (F_ISSET(session, WT_SESSION_IMPORT) || ckpt->run_write_gen < conn->base_write_gen) + btree->base_write_gen = btree->run_write_gen = btree->write_gen; + else + btree->base_write_gen = btree->run_write_gen = ckpt->run_write_gen; + + /* + * We've just overwritten the runtime write generation based off the fact that know that we're + * importing and therefore, the checkpoint data's runtime write generation is meaningless. We + * need to ensure that the underlying dhandle doesn't get discarded without being included in a + * subsequent checkpoint including the new overwritten runtime write generation. Otherwise, + * we'll reopen, won't know that we're in the import case and will incorrectly use the old + * system's runtime write generation. */ - btree->write_gen = btree->base_write_gen = WT_MAX(ckpt->write_gen + 1, conn->base_write_gen); + if (F_ISSET(session, WT_SESSION_IMPORT)) + btree->modified = true; return (0); } diff --git a/src/third_party/wiredtiger/src/btree/bt_import.c b/src/third_party/wiredtiger/src/btree/bt_import.c index 39d0dda368a..e779d90fc66 100644 --- a/src/third_party/wiredtiger/src/btree/bt_import.c +++ b/src/third_party/wiredtiger/src/btree/bt_import.c @@ -13,7 +13,7 @@ * Import a WiredTiger file into the database and reconstruct its metadata. */ int -__wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp) +__wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **configp) { WT_BM *bm; WT_CKPT *ckpt, *ckptbase; @@ -24,13 +24,12 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp) WT_DECL_RET; WT_KEYED_ENCRYPTOR *kencryptor; uint32_t allocsize; - char *checkpoint_list, *fileconf, *metadata, fileid[64]; - const char *filecfg[] = { - WT_CONFIG_BASE(session, file_meta), NULL, NULL, NULL, NULL, NULL, NULL}; + char *checkpoint_list, *config, *config_tmp, *metadata, fileid[64]; + const char *cfg[] = {WT_CONFIG_BASE(session, file_meta), NULL, NULL, NULL, NULL, NULL, NULL}; const char *filename; ckptbase = NULL; - checkpoint_list = fileconf = metadata = NULL; + checkpoint_list = config = config_tmp = metadata = NULL; WT_ERR(__wt_scr_alloc(session, 0, &a)); WT_ERR(__wt_scr_alloc(session, 0, &b)); @@ -45,13 +44,11 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp) * size, but 512B allows us to read the descriptor block and that's all we care about. */ F_SET(session, WT_SESSION_IMPORT_REPAIR); - WT_ERR(__wt_block_manager_open(session, filename, filecfg, false, true, 512, &bm)); + WT_ERR(__wt_block_manager_open(session, filename, cfg, false, true, 512, &bm)); ret = bm->checkpoint_last(bm, session, &metadata, &checkpoint_list, checkpoint); WT_TRET(bm->close(bm, session)); F_CLR(session, WT_SESSION_IMPORT_REPAIR); WT_ERR(ret); - __wt_verbose(session, WT_VERB_CHECKPOINT, "import metadata: %s", metadata); - __wt_verbose(session, WT_VERB_CHECKPOINT, "import checkpoint-list: %s", checkpoint_list); /* * The metadata may have been encrypted, in which case it's also hexadecimal encoded. The @@ -59,7 +56,7 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp) * diagnosis. */ WT_ERR(__wt_config_getones(session, metadata, "block_metadata_encrypted", &v)); - WT_ERR(__wt_btree_config_encryptor(session, filecfg, &kencryptor)); + WT_ERR(__wt_btree_config_encryptor(session, cfg, &kencryptor)); if ((kencryptor == NULL && v.val != 0) || (kencryptor != NULL && v.val == 0)) WT_ERR_MSG(session, EINVAL, "%s: loaded object's encryption configuration doesn't match the database's encryption " @@ -99,20 +96,18 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp) * Strip out the checkpoint LSN, an imported file isn't associated with any log files. Assign a * unique file ID. */ - filecfg[1] = a->data; - filecfg[2] = checkpoint_list; - filecfg[3] = "checkpoint_backup_info="; - filecfg[4] = "checkpoint_lsn="; + cfg[1] = a->data; + cfg[2] = checkpoint_list; + cfg[3] = "checkpoint_backup_info="; + cfg[4] = "checkpoint_lsn="; WT_WITH_SCHEMA_LOCK(session, ret = __wt_snprintf(fileid, sizeof(fileid), "id=%" PRIu32, ++S2C(session)->next_file_id)); WT_ERR(ret); - filecfg[5] = fileid; - WT_ERR(__wt_config_collapse(session, filecfg, &fileconf)); - WT_ERR(__wt_metadata_insert(session, uri, fileconf)); - __wt_verbose(session, WT_VERB_CHECKPOINT, "import configuration: %s/%s", uri, fileconf); + cfg[5] = fileid; + WT_ERR(__wt_config_collapse(session, cfg, &config_tmp)); /* Now that we've retrieved the configuration, let's get the real allocation size. */ - WT_ERR(__wt_config_getones(session, fileconf, "allocation_size", &v)); + WT_ERR(__wt_config_getones(session, config_tmp, "allocation_size", &v)); allocsize = (uint32_t)v.val; /* @@ -120,32 +115,21 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp) * size. When we did this earlier, we were able to read the descriptor block properly but the * checkpoint's byte representation was wrong because it was using the wrong allocation size. */ - WT_ERR(__wt_block_manager_open(session, filename, filecfg, false, true, allocsize, &bm)); + WT_ERR(__wt_block_manager_open(session, filename, cfg, false, true, allocsize, &bm)); + __wt_free(session, checkpoint_list); + __wt_free(session, metadata); ret = bm->checkpoint_last(bm, session, &metadata, &checkpoint_list, checkpoint); WT_TRET(bm->close(bm, session)); /* - * The just inserted metadata was correct as of immediately before the final checkpoint, but - * it's not quite right. The block manager returned the corrected final checkpoint, put it all - * together. + * The metadata was correct as of immediately before the final checkpoint, but it's not quite + * right. The block manager returned the corrected final checkpoint, put it all together. * * Get the checkpoint information from the file's metadata as an array of WT_CKPT structures. - * - * XXX There's a problem here. If a file is imported from our future (leaf pages with unstable - * entries that have write-generations ahead of the current database's base write generation), - * we'll read the values and treat them as stable. A restart will fix this: when we added the - * imported file to our metadata, the write generation in the imported file's checkpoints - * updated our database's maximum write generation, and so a restart will have a maximum - * generation newer than the imported file's write generation. An alternative solution is to add - * a "base write generation" value to the imported file's metadata, and use that value instead - * of the connection's base write generation when deciding what page items should be read. Since - * all future writes to the imported file would be ahead of that write generation, it would have - * the effect we want. - * * Update the last checkpoint with the corrected information. Update the file's metadata with * the new checkpoint information. */ - WT_ERR(__wt_meta_ckptlist_get(session, uri, false, &ckptbase)); + WT_ERR(__wt_meta_ckptlist_get_from_config(session, false, &ckptbase, config_tmp)); WT_CKPT_FOREACH (ckptbase, ckpt) if (ckpt->name == NULL || (ckpt + 1)->name == NULL) break; @@ -153,20 +137,20 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp) WT_ERR_MSG(session, EINVAL, "no checkpoint information available to import"); F_SET(ckpt, WT_CKPT_UPDATE); WT_ERR(__wt_buf_set(session, &ckpt->raw, checkpoint->data, checkpoint->size)); - WT_ERR(__wt_meta_ckptlist_set(session, uri, ckptbase, NULL)); - - WT_ASSERT(session, fileconfp != NULL); - *fileconfp = fileconf; + WT_ERR(__wt_meta_ckptlist_update_config(session, ckptbase, config_tmp, &config)); + __wt_verbose(session, WT_VERB_CHECKPOINT, "import metadata: %s", config); + *configp = config; err: F_CLR(session, WT_SESSION_IMPORT_REPAIR); __wt_meta_ckptlist_free(session, &ckptbase); + __wt_free(session, checkpoint_list); if (ret != 0) - __wt_free(session, fileconf); + __wt_free(session, config); + __wt_free(session, config_tmp); __wt_free(session, metadata); - __wt_free(session, checkpoint_list); __wt_scr_free(session, &a); __wt_scr_free(session, &b); diff --git a/src/third_party/wiredtiger/src/cursor/cur_hs.c b/src/third_party/wiredtiger/src/cursor/cur_hs.c index ba2799e2127..d46429fac23 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_hs.c +++ b/src/third_party/wiredtiger/src/cursor/cur_hs.c @@ -141,3 +141,102 @@ __wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exa session, WT_ISO_READ_UNCOMMITTED, ret = cursor->search_near(cursor, exactp)); return (ret); } + +/* + * __curhs_close -- + * WT_CURSOR->close method for the hs cursor type. + */ +static int +__curhs_close(WT_CURSOR *cursor) +{ + WT_CURSOR *file_cursor; + WT_CURSOR_HS *hs_cursor; + WT_DECL_RET; + WT_SESSION_IMPL *session; + + hs_cursor = (WT_CURSOR_HS *)cursor; + file_cursor = hs_cursor->file_cursor; + CURSOR_API_CALL_PREPARE_ALLOWED( + cursor, session, close, file_cursor == NULL ? NULL : CUR2BT(file_cursor)); +err: + + if (file_cursor != NULL) + WT_TRET(file_cursor->close(file_cursor)); + __wt_cursor_close(cursor); + + API_END_RET(session, ret); +} + +/* + * __curhs_reset -- + * Reset a history store cursor. + */ +static int +__curhs_reset(WT_CURSOR *cursor) +{ + WT_CURSOR *file_cursor; + WT_CURSOR_HS *hs_cursor; + WT_DECL_RET; + WT_SESSION_IMPL *session; + + hs_cursor = (WT_CURSOR_HS *)cursor; + file_cursor = hs_cursor->file_cursor; + CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, CUR2BT(file_cursor)); + + ret = file_cursor->reset(file_cursor); + F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); + +err: + API_END_RET(session, ret); +} + +/* + * __wt_curhs_open -- + * Initialize a history store cursor. + */ +int +__wt_curhs_open(WT_SESSION_IMPL *session, WT_CURSOR *owner, WT_CURSOR **cursorp) +{ + WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */ + __wt_cursor_get_value, /* get-value */ + __wt_cursor_set_key, /* set-key */ + __wt_cursor_set_value, /* set-value */ + __wt_cursor_compare_notsup, /* compare */ + __wt_cursor_equals_notsup, /* equals */ + __wt_cursor_notsup, /* next */ + __wt_cursor_notsup, /* prev */ + __curhs_reset, /* reset */ + __wt_cursor_notsup, /* search */ + __wt_cursor_search_near_notsup, /* search-near */ + __wt_cursor_notsup, /* insert */ + __wt_cursor_modify_value_format_notsup, /* modify */ + __wt_cursor_notsup, /* update */ + __wt_cursor_notsup, /* remove */ + __wt_cursor_notsup, /* reserve */ + __wt_cursor_reconfigure_notsup, /* reconfigure */ + __wt_cursor_notsup, /* cache */ + __wt_cursor_reopen_notsup, /* reopen */ + __curhs_close); /* close */ + WT_CURSOR *cursor; + WT_CURSOR_HS *hs_cursor; + WT_DECL_RET; + + WT_RET(__wt_calloc_one(session, &hs_cursor)); + cursor = (WT_CURSOR *)hs_cursor; + *cursor = iface; + cursor->session = (WT_SESSION *)session; + cursor->key_format = WT_HS_KEY_FORMAT; + cursor->value_format = WT_HS_VALUE_FORMAT; + + /* Open the file cursor for operations on the regular history store .*/ + WT_ERR(__hs_cursor_open_int(session, &hs_cursor->file_cursor)); + + WT_ERR(__wt_cursor_init(cursor, WT_HS_URI, owner, NULL, cursorp)); + + if (0) { +err: + WT_TRET(__curhs_close(cursor)); + *cursorp = NULL; + } + return (ret); +} diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h index b066bcde18f..3cc12d78e74 100644 --- a/src/third_party/wiredtiger/src/include/btmem.h +++ b/src/third_party/wiredtiger/src/include/btmem.h @@ -235,11 +235,13 @@ struct __wt_ovfl_reuse { #else #define WT_HS_COMPRESSOR "none" #endif -#define WT_HS_CONFIG \ - "key_format=" WT_UNCHECKED_STRING(IuQQ) ",value_format=" WT_UNCHECKED_STRING( \ - QQQu) ",block_compressor=" WT_HS_COMPRESSOR \ - ",leaf_value_max=64MB" \ - ",prefix_compression=false" +#define WT_HS_KEY_FORMAT WT_UNCHECKED_STRING(IuQQ) +#define WT_HS_VALUE_FORMAT WT_UNCHECKED_STRING(QQQu) +#define WT_HS_CONFIG \ + "key_format=" WT_HS_KEY_FORMAT ",value_format=" WT_HS_VALUE_FORMAT \ + ",block_compressor=" WT_HS_COMPRESSOR \ + ",leaf_value_max=64MB" \ + ",prefix_compression=false" /* * WT_PAGE_MODIFY -- diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h index 19eea8088ae..4af6ea90f67 100644 --- a/src/third_party/wiredtiger/src/include/btree.h +++ b/src/third_party/wiredtiger/src/include/btree.h @@ -167,6 +167,7 @@ struct __wt_btree { uint64_t write_gen; /* Write generation */ uint64_t base_write_gen; /* Write generation on startup. */ + uint64_t run_write_gen; /* Runtime write generation. */ uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ wt_timestamp_t rec_max_timestamp; diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index 5460978d09f..1c5cd5dcb6b 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -282,6 +282,12 @@ struct __wt_cursor_dump { WT_CURSOR *child; }; +struct __wt_cursor_hs { + WT_CURSOR iface; + + WT_CURSOR *file_cursor; /* Queries of regular history store data */ +}; + struct __wt_cursor_index { WT_CURSOR iface; diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 0a6f3d79a9b..d902d72ff01 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -486,6 +486,8 @@ extern int __wt_curfile_next_random(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_curhs_open(WT_SESSION_IMPL *session, WT_CURSOR *owner, WT_CURSOR **cursorp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx, @@ -775,7 +777,7 @@ extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, cons size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_huffman_open(WT_SESSION_IMPL *session, void *symbol_frequency_array, u_int symcnt, u_int numbytes, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp) +extern int __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **configp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_inmem_unsupported_op(WT_SESSION_IMPL *session, const char *tag) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -1036,12 +1038,14 @@ extern int __wt_meta_checkpoint_last_name(WT_SESSION_IMPL *session, const char * const char **namep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_meta_ckptlist_get(WT_SESSION_IMPL *session, const char *fname, bool update, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_meta_ckptlist_get_with_config(WT_SESSION_IMPL *session, bool update, +extern int __wt_meta_ckptlist_get_from_config(WT_SESSION_IMPL *session, bool update, WT_CKPT **ckptbasep, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_meta_ckptlist_update_config(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, + const char *oldcfg, char **newcfgp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_meta_sysinfo_set(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_meta_track_checkpoint(WT_SESSION_IMPL *session) diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h index 9e274e9f3cc..b4902740837 100644 --- a/src/third_party/wiredtiger/src/include/meta.h +++ b/src/third_party/wiredtiger/src/include/meta.h @@ -137,7 +137,8 @@ struct __wt_ckpt { uint64_t size; /* Checkpoint size */ - uint64_t write_gen; /* Write generation */ + uint64_t write_gen; /* Write generation */ + uint64_t run_write_gen; /* Runtime write generation. */ char *block_metadata; /* Block-stored metadata */ char *block_checkpoint; /* Block-stored checkpoint */ diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h index 3492905ec0b..0186b98f4cc 100644 --- a/src/third_party/wiredtiger/src/include/session.h +++ b/src/third_party/wiredtiger/src/include/session.h @@ -170,31 +170,32 @@ struct __wt_session_impl { #define WT_SESSION_CACHE_CURSORS 0x00000004u #define WT_SESSION_CAN_WAIT 0x00000008u #define WT_SESSION_IGNORE_CACHE_SIZE 0x00000010u -#define WT_SESSION_IMPORT_REPAIR 0x00000020u -#define WT_SESSION_INSTANTIATE_PREPARE 0x00000040u -#define WT_SESSION_INTERNAL 0x00000080u -#define WT_SESSION_LOCKED_CHECKPOINT 0x00000100u -#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000200u -#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000400u -#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00000800u -#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00001000u -#define WT_SESSION_LOCKED_METADATA 0x00002000u -#define WT_SESSION_LOCKED_PASS 0x00004000u -#define WT_SESSION_LOCKED_SCHEMA 0x00008000u -#define WT_SESSION_LOCKED_SLOT 0x00010000u -#define WT_SESSION_LOCKED_TABLE_READ 0x00020000u -#define WT_SESSION_LOCKED_TABLE_WRITE 0x00040000u -#define WT_SESSION_LOCKED_TURTLE 0x00080000u -#define WT_SESSION_LOGGING_INMEM 0x00100000u -#define WT_SESSION_NO_DATA_HANDLES 0x00200000u -#define WT_SESSION_NO_LOGGING 0x00400000u -#define WT_SESSION_NO_RECONCILE 0x00800000u -#define WT_SESSION_NO_SCHEMA_LOCK 0x01000000u -#define WT_SESSION_QUIET_CORRUPT_FILE 0x02000000u -#define WT_SESSION_READ_WONT_NEED 0x04000000u -#define WT_SESSION_RESOLVING_TXN 0x08000000u -#define WT_SESSION_ROLLBACK_TO_STABLE 0x10000000u -#define WT_SESSION_SCHEMA_TXN 0x20000000u +#define WT_SESSION_IMPORT 0x00000020u +#define WT_SESSION_IMPORT_REPAIR 0x00000040u +#define WT_SESSION_INSTANTIATE_PREPARE 0x00000080u +#define WT_SESSION_INTERNAL 0x00000100u +#define WT_SESSION_LOCKED_CHECKPOINT 0x00000200u +#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000400u +#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000800u +#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00001000u +#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00002000u +#define WT_SESSION_LOCKED_METADATA 0x00004000u +#define WT_SESSION_LOCKED_PASS 0x00008000u +#define WT_SESSION_LOCKED_SCHEMA 0x00010000u +#define WT_SESSION_LOCKED_SLOT 0x00020000u +#define WT_SESSION_LOCKED_TABLE_READ 0x00040000u +#define WT_SESSION_LOCKED_TABLE_WRITE 0x00080000u +#define WT_SESSION_LOCKED_TURTLE 0x00100000u +#define WT_SESSION_LOGGING_INMEM 0x00200000u +#define WT_SESSION_NO_DATA_HANDLES 0x00400000u +#define WT_SESSION_NO_LOGGING 0x00800000u +#define WT_SESSION_NO_RECONCILE 0x01000000u +#define WT_SESSION_NO_SCHEMA_LOCK 0x02000000u +#define WT_SESSION_QUIET_CORRUPT_FILE 0x04000000u +#define WT_SESSION_READ_WONT_NEED 0x08000000u +#define WT_SESSION_RESOLVING_TXN 0x10000000u +#define WT_SESSION_ROLLBACK_TO_STABLE 0x20000000u +#define WT_SESSION_SCHEMA_TXN 0x40000000u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint32_t flags; diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h index b0e0b4c4585..a64fb5acdfe 100644 --- a/src/third_party/wiredtiger/src/include/wt_internal.h +++ b/src/third_party/wiredtiger/src/include/wt_internal.h @@ -141,6 +141,8 @@ struct __wt_cursor_data_source; typedef struct __wt_cursor_data_source WT_CURSOR_DATA_SOURCE; struct __wt_cursor_dump; typedef struct __wt_cursor_dump WT_CURSOR_DUMP; +struct __wt_cursor_hs; +typedef struct __wt_cursor_hs WT_CURSOR_HS; struct __wt_cursor_index; typedef struct __wt_cursor_index WT_CURSOR_INDEX; struct __wt_cursor_join; diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c index aa9191bdc6a..150880625a6 100644 --- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c +++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c @@ -501,7 +501,7 @@ __wt_meta_ckptlist_get( config = NULL; WT_ERR(__wt_metadata_search(session, fname, &config)); - WT_ERR(__wt_meta_ckptlist_get_with_config(session, update, ckptbasep, config)); + WT_ERR(__wt_meta_ckptlist_get_from_config(session, update, ckptbasep, config)); err: __wt_free(session, config); @@ -509,11 +509,11 @@ err: } /* - * __wt_meta_ckptlist_get_with_config -- + * __wt_meta_ckptlist_get_from_config -- * Provided a metadata config, load all available checkpoint information for a file. */ int -__wt_meta_ckptlist_get_with_config( +__wt_meta_ckptlist_get_from_config( WT_SESSION_IMPL *session, bool update, WT_CKPT **ckptbasep, const char *config) { WT_CKPT *ckpt, *ckptbase; @@ -697,6 +697,16 @@ __ckpt_load(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v, WT_C goto format; ckpt->write_gen = (uint64_t)a.val; + /* + * If runtime write generation isn't supplied, this means that we're doing an upgrade and that + * we're opening the tree for the first time. We should just leave it as 0 so it is recognized + * as part of a previous run. + */ + ret = __wt_config_subgets(session, v, "run_write_gen", &a); + WT_RET_NOTFOUND_OK(ret); + if (ret != WT_NOTFOUND && a.len != 0) + ckpt->run_write_gen = (uint64_t)a.val; + return (0); format: @@ -789,12 +799,13 @@ __wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM "=(addr=\"%.*s\",order=%" PRId64 ",time=%" PRIu64 ",size=%" PRId64 ",newest_start_durable_ts=%" PRId64 ",oldest_start_ts=%" PRId64 ",newest_txn=%" PRId64 ",newest_stop_durable_ts=%" PRId64 ",newest_stop_ts=%" PRId64 ",newest_stop_txn=%" PRId64 - ",prepare=%d,write_gen=%" PRId64 ")", + ",prepare=%d,write_gen=%" PRId64 ",run_write_gen=%" PRId64 ")", (int)ckpt->addr.size, (char *)ckpt->addr.data, ckpt->order, ckpt->sec, (int64_t)ckpt->size, (int64_t)ckpt->ta.newest_start_durable_ts, (int64_t)ckpt->ta.oldest_start_ts, (int64_t)ckpt->ta.newest_txn, (int64_t)ckpt->ta.newest_stop_durable_ts, (int64_t)ckpt->ta.newest_stop_ts, - (int64_t)ckpt->ta.newest_stop_txn, (int)ckpt->ta.prepare, (int64_t)ckpt->write_gen)); + (int64_t)ckpt->ta.newest_stop_txn, (int)ckpt->ta.prepare, (int64_t)ckpt->write_gen, + (int64_t)ckpt->run_write_gen)); } WT_RET(__wt_buf_catfmt(session, buf, ")")); @@ -860,6 +871,44 @@ __wt_ckpt_blkmod_to_meta(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_CKPT *ckpt) } /* + * __wt_meta_ckptlist_update_config -- + * Provided a metadata config and list of checkpoints, set a file's checkpoint value. + */ +int +__wt_meta_ckptlist_update_config( + WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *oldcfg, char **newcfgp) +{ + WT_CKPT *ckpt; + WT_DECL_ITEM(buf); + WT_DECL_RET; + char *newcfg; + const char *cfg[3]; + + newcfg = NULL; + WT_RET(__wt_scr_alloc(session, 1024, &buf)); + WT_ERR(__wt_meta_ckptlist_to_meta(session, ckptbase, buf)); + + /* Add backup block modifications for any added checkpoint. */ + WT_CKPT_FOREACH (ckptbase, ckpt) + if (F_ISSET(ckpt, WT_CKPT_ADD)) + WT_ERR(__wt_ckpt_blkmod_to_meta(session, buf, ckpt)); + + /* Replace the checkpoint entry. */ + cfg[0] = oldcfg; + cfg[1] = buf->mem; + cfg[2] = NULL; + WT_ERR(__wt_config_collapse(session, cfg, &newcfg)); + + *newcfgp = newcfg; + +err: + if (ret != 0) + __wt_free(session, newcfg); + __wt_scr_free(session, &buf); + return (ret); +} + +/* * __wt_meta_ckptlist_set -- * Set a file's checkpoint value from the WT_CKPT list. */ diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c index 84af4dffa0a..f49bea7250e 100644 --- a/src/third_party/wiredtiger/src/schema/schema_create.c +++ b/src/third_party/wiredtiger/src/schema/schema_create.c @@ -62,7 +62,7 @@ __check_imported_ts(WT_SESSION_IMPL *session, const char *uri, const char *confi ckptbase = NULL; txn_global = &S2C(session)->txn_global; - WT_ERR_NOTFOUND_OK(__wt_meta_ckptlist_get_with_config(session, false, &ckptbase, config), true); + WT_ERR_NOTFOUND_OK(__wt_meta_ckptlist_get_from_config(session, false, &ckptbase, config), true); if (ret == WT_NOTFOUND) WT_ERR_MSG(session, EINVAL, "%s: import could not find any checkpoint information in supplied metadata", uri); @@ -218,18 +218,18 @@ __create_file( ; *p = val->data; WT_ERR(__wt_config_collapse(session, filecfg, &fileconf)); - WT_ERR(__wt_metadata_insert(session, uri, fileconf)); } else { - /* Read the data file's descriptor block and try to recreate the associated metadata. */ + /* Try to recreate the associated metadata from the imported data source. */ WT_ERR(__wt_import_repair(session, uri, &fileconf)); } + WT_ERR(__wt_metadata_insert(session, uri, fileconf)); /* * Ensure that the timestamps in the imported data file are not in the future relative to * our oldest timestamp. */ if (import) - WT_ERR(__check_imported_ts(session, filename, fileconf)); + WT_ERR(__check_imported_ts(session, uri, fileconf)); } /* @@ -663,16 +663,18 @@ __create_table( WT_CONFIG_ITEM cgkey, cgval, ckey, cval; WT_DECL_RET; WT_TABLE *table; - size_t cgsize; + size_t len; int ncolgroups, nkeys; - char *tableconf, *cgname; + char *cgcfg, *cgname, *filecfg, *filename, *importcfg, *tablecfg; const char *cfg[4] = {WT_CONFIG_BASE(session, table_meta), config, NULL, NULL}; const char *tablename; bool import_repair; - cgname = NULL; + import_repair = false; + + cgcfg = filecfg = importcfg = tablecfg = NULL; + cgname = filename = NULL; table = NULL; - tableconf = NULL; WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE)); @@ -680,7 +682,7 @@ __create_table( WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:"); /* Check if the table already exists. */ - if ((ret = __wt_metadata_search(session, uri, &tableconf)) != WT_NOTFOUND) { + if ((ret = __wt_metadata_search(session, uri, &tablecfg)) != WT_NOTFOUND) { /* * Regardless of the 'exclusive' flag, we should raise an error if we try to import an * existing URI rather than just silently returning. @@ -707,6 +709,13 @@ __create_table( "'repair' option is provided", uri); WT_ERR_NOTFOUND_OK(ret, false); + } else { + /* Try to recreate the associated metadata from the imported data source. */ + len = strlen("file:") + strlen(tablename) + strlen(".wt") + 1; + WT_ERR(__wt_calloc_def(session, len, &filename)); + WT_ERR(__wt_snprintf(filename, len, "file:%s.wt", tablename)); + WT_ERR(__wt_import_repair(session, filename, &filecfg)); + cfg[2] = filecfg; } } @@ -716,14 +725,24 @@ __create_table( ; WT_ERR_NOTFOUND_OK(ret, false); - WT_ERR(__wt_config_collapse(session, cfg, &tableconf)); - WT_ERR(__wt_metadata_insert(session, uri, tableconf)); + WT_ERR(__wt_config_collapse(session, cfg, &tablecfg)); + WT_ERR(__wt_metadata_insert(session, uri, tablecfg)); if (ncolgroups == 0) { - cgsize = strlen("colgroup:") + strlen(tablename) + 1; - WT_ERR(__wt_calloc_def(session, cgsize, &cgname)); - WT_ERR(__wt_snprintf(cgname, cgsize, "colgroup:%s", tablename)); - WT_ERR(__create_colgroup(session, cgname, exclusive, config)); + len = strlen("colgroup:") + strlen(tablename) + 1; + WT_ERR(__wt_calloc_def(session, len, &cgname)); + WT_ERR(__wt_snprintf(cgname, len, "colgroup:%s", tablename)); + if (import_repair) { + len = + strlen(tablecfg) + strlen(",import=(enabled,file_metadata=())") + strlen(filecfg) + 1; + WT_ERR(__wt_calloc_def(session, len, &importcfg)); + WT_ERR(__wt_snprintf( + importcfg, len, "%s,import=(enabled,file_metadata=(%s))", tablecfg, filecfg)); + cfg[2] = importcfg; + WT_ERR(__wt_config_collapse(session, &cfg[1], &cgcfg)); + WT_ERR(__create_colgroup(session, cgname, exclusive, cgcfg)); + } else + WT_ERR(__create_colgroup(session, cgname, exclusive, config)); } /* @@ -739,8 +758,12 @@ __create_table( err: WT_TRET(__wt_schema_release_table(session, &table)); + __wt_free(session, cgcfg); __wt_free(session, cgname); - __wt_free(session, tableconf); + __wt_free(session, filecfg); + __wt_free(session, filename); + __wt_free(session, importcfg); + __wt_free(session, tablecfg); return (ret); } @@ -798,6 +821,8 @@ __schema_create(WT_SESSION_IMPL *session, const char *uri, const char *config) * back it all out. */ WT_RET(__wt_meta_track_on(session)); + if (import) + F_SET(session, WT_SESSION_IMPORT); if (WT_PREFIX_MATCH(uri, "colgroup:")) ret = __create_colgroup(session, uri, exclusive, config); @@ -816,6 +841,7 @@ __schema_create(WT_SESSION_IMPL *session, const char *uri, const char *config) ret = __wt_bad_object_type(session, uri); session->dhandle = NULL; + F_CLR(session, WT_SESSION_IMPORT); WT_TRET(__wt_meta_track_off(session, true, ret != 0)); return (ret); diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 54a13cedff0..8178e593f6b 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -1580,6 +1580,7 @@ __wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session, WT_TIME_AGGREGAT WT_CKPT_FOREACH (ckptbase, ckpt) if (F_ISSET(ckpt, WT_CKPT_ADD)) { ckpt->write_gen = btree->write_gen; + ckpt->run_write_gen = btree->run_write_gen; WT_TIME_AGGREGATE_COPY(&ckpt->ta, ta); } } diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c index 0b5b7da92c0..6e042fc9b33 100644 --- a/src/third_party/wiredtiger/src/txn/txn_recover.c +++ b/src/third_party/wiredtiger/src/txn/txn_recover.c @@ -841,11 +841,19 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(ret); } - /* Check whether the history store exists. */ - WT_ERR(__hs_exists(session, metac, cfg, &hs_exists)); - /* Scan the metadata to find the live files and their IDs. */ WT_ERR(__recovery_file_scan(&r)); + + /* + * Check whether the history store exists. + * + * This will open a dhandle on the history store and initialize its write gen so we must ensure + * that the connection-wide base write generation is stable at this point. Performing a recovery + * file scan will involve updating the connection-wide base write generation so we MUST do this + * before checking for the existence of a history store file. + */ + WT_ERR(__hs_exists(session, metac, cfg, &hs_exists)); + /* * Clear this out. We no longer need it and it could have been re-allocated when scanning the * files. diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml index 1827a2aec58..491d4b24cb3 100755 --- a/src/third_party/wiredtiger/test/evergreen.yml +++ b/src/third_party/wiredtiger/test/evergreen.yml @@ -284,6 +284,38 @@ functions: rm -rf "wiredtiger" rm -rf "wiredtiger.tgz" + "run wt hang analyzer": + command: shell.exec + params: + working_dir: "wiredtiger/build_posix" + script: | + set -o verbose + + # Dump core (-c) and debugger outputs (-o) + wt_hang_analyzer_option="-c -o file -o stdout" + + echo "Calling the wt hang analyzer ..." + PATH="/opt/mongodbtoolchain/gdb/bin:$PATH" ${python_binary|python3} ../test/wt_hang_analyzer/wt_hang_analyzer.py $wt_hang_analyzer_option + + "save wt hang analyzer core/debugger files": + - command: archive.targz_pack + params: + target: "wt-hang-analyzer.tgz" + source_dir: "wiredtiger/build_posix" + include: + - "./*core*" + - "./debugger*.*" + - command: s3.put + params: + aws_secret: ${aws_secret} + aws_key: ${aws_key} + local_file: wt-hang-analyzer.tgz + bucket: build_external + permissions: public-read + content_type: application/tar + display_name: WT Hang Analyzer Output - Execution ${execution} + remote_file: wiredtiger/${build_variant}/${revision}/wt_hang_analyzer/wt-hang-analyzer_${task_name}_${build_id}${postfix|}.tgz + "dump stderr/stdout": command: shell.exec params: @@ -423,8 +455,11 @@ post: - func: "upload artifact" vars: postfix: -${execution} + - func: "save wt hang analyzer core/debugger files" - func: "dump stderr/stdout" - func: "cleanup" +timeout: + - func: "run wt hang analyzer" tasks: # Base compile task on posix flavours diff --git a/src/third_party/wiredtiger/test/suite/test_import09.py b/src/third_party/wiredtiger/test/suite/test_import09.py new file mode 100644 index 00000000000..b6b747b9005 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_import09.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2020 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_import09.py +# Import a table with the repair option (no exported metadata). + +import os, random, shutil +from test_import01 import test_import_base +from wtscenario import make_scenarios + +class test_import09(test_import_base): + nrows = 100 + ntables = 1 + session_config = 'isolation=snapshot' + + allocsizes = [ + ('512', dict(allocsize='512')), + ('1024', dict(allocsize='1024')), + ('2048', dict(allocsize='2048')), + ('4096', dict(allocsize='4096')), + ] + compressors = [ + ('none', dict(compressor='none')), + ('nop', dict(compressor='nop')), + ('lz4', dict(compressor='lz4')), + ('snappy', dict(compressor='snappy')), + ('zlib', dict(compressor='zlib')), + ('zstd', dict(compressor='zstd')), + ] + encryptors = [ + ('none', dict(encryptor='none')), + ('nop', dict(encryptor='nop')), + ('rotn', dict(encryptor='rotn')), + ] + tables = [ + ('simple_table', dict( + is_simple = True, + keys = [k for k in range(1, nrows+1)], + values = random.sample(range(1000000), k=nrows), + config = 'key_format=r,value_format=i')), + ('table_with_named_columns', dict( + is_simple = False, + keys = [k for k in range(1, 7)], + values = [('Australia', 'Canberra', 1),('Japan', 'Tokyo', 2),('Italy', 'Rome', 3), + ('China', 'Beijing', 4),('Germany', 'Berlin', 5),('South Korea', 'Seoul', 6)], + config = 'columns=(id,country,capital,population),key_format=r,value_format=SSi')), + ] + scenarios = make_scenarios(tables, allocsizes, compressors, encryptors) + + # Check to verify table projections. + def check_projections(self, uri, keys, values): + for i in range(0, len(keys)): + self.check_record(uri + '(country,capital)', + keys[i], [values[i][0], values[i][1]]) + self.check_record(uri + '(country,population)', + keys[i], [values[i][0], values[i][2]]) + self.check_record(uri + '(capital,population)', + keys[i], [values[i][1], values[i][2]]) + + # Load the compressor extension, skip the test if missing. + def conn_extensions(self, extlist): + extlist.skip_if_missing = True + extlist.extension('compressors', self.compressor) + extlist.extension('encryptors', self.encryptor) + + def conn_config(self): + return 'cache_size=50MB,log=(enabled),statistics=(all),encryption=(name={})'.format( + self.encryptor) + + def test_import_table_repair(self): + # Add some tables & data and checkpoint. + self.populate(self.ntables, self.nrows) + self.session.checkpoint() + + # Create the table targeted for import. + original_db_table = 'original_db_table' + uri = 'table:' + original_db_table + create_config = ('allocation_size={},log=(enabled=true),block_compressor={},' + 'encryption=(name={}),') + self.config + self.session.create(uri, + create_config.format(self.allocsize, self.compressor, self.encryptor)) + + keys = self.keys + values = self.values + ts = [10*k for k in range(1, len(keys)+1)] + + # Add data to our target table and perform a checkpoint. + min_idx = 0 + max_idx = len(keys) // 3 + for i in range(min_idx, max_idx): + self.update(uri, keys[i], values[i], ts[i]) + self.session.checkpoint() + + # Add more data and checkpoint again. + min_idx = max_idx + max_idx = 2*len(keys) // 3 + for i in range(min_idx, max_idx): + self.update(uri, keys[i], values[i], ts[i]) + self.session.checkpoint() + + # Export the file and table metadata so we can verify our repair later. + original_db_file_uri = 'file:' + original_db_table + '.wt' + c = self.session.open_cursor('metadata:', None, None) + original_db_table_config = c[uri] + original_db_file_config = c[original_db_file_uri] + c.close() + + self.printVerbose(3, '\nFile configuration:\n' + original_db_file_config) + self.printVerbose(3, '\nTable configuration:\n' + original_db_table_config) + + # Close the connection. + self.close_conn() + + # Create a new database and connect to it. + newdir = 'IMPORT_DB' + shutil.rmtree(newdir, ignore_errors=True) + os.mkdir(newdir) + self.conn = self.setUpConnectionOpen(newdir) + self.session = self.setUpSessionOpen(self.conn) + + # Make a bunch of files and fill them with data. + self.populate(self.ntables, self.nrows) + self.session.checkpoint() + + # Bring forward the oldest to be past or equal to the timestamps we'll be importing. + self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(ts[max_idx])) + + # Copy over the datafile for the table we want to import. + self.copy_file(original_db_table + '.wt', '.', newdir) + + # Construct the config string. + import_config = 'log=(enabled=true),import=(enabled,repair=true)' + + # Import the file. + self.session.create(uri, import_config) + + # Verify object. + self.session.verify(uri) + + # Check that the previously inserted values survived the import. + self.check(uri, keys[:max_idx], values[:max_idx]) + + # Check against projections when the table is not simple. + if not self.is_simple: + self.check_projections(uri, keys[:max_idx], values[:max_idx]) + + # Compare configuration metadata. + c = self.session.open_cursor('metadata:', None, None) + new_db_file_config = c[original_db_file_uri] + new_db_table_config = c[uri] + c.close() + self.config_compare(original_db_file_config, new_db_file_config) + self.config_compare(original_db_table_config, new_db_table_config) + + # Add some data and check that the table operates as usual after importing. + min_idx = max_idx + max_idx = len(keys) + for i in range(min_idx, max_idx): + self.update(uri, keys[i], values[i], ts[i]) + self.check(uri, keys, values) + if not self.is_simple: + self.check_projections(uri, keys, values) + + # Perform a checkpoint. + self.session.checkpoint() diff --git a/src/third_party/wiredtiger/test/suite/test_txn25.py b/src/third_party/wiredtiger/test/suite/test_txn25.py new file mode 100644 index 00000000000..befdd6cdf24 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_txn25.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2020 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +# +# test_txn24.py +# Test the write generation mechanism to ensure that transaction ids get wiped between runs. +# + +import wiredtiger, wttest + +class test_txn25(wttest.WiredTigerTestCase): + conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)' + session_config = 'isolation=snapshot' + + def test_txn25(self): + uri = 'file:test_txn25' + create_config = 'allocation_size=512,key_format=S,value_format=S' + self.session.create(uri, create_config) + + # Populate the file and ensure that we start seeing some high transaction IDs in the system. + value1 = 'aaaaa' * 100 + value2 = 'bbbbb' * 100 + value3 = 'ccccc' * 100 + + # Keep transaction ids around. + session2 = self.conn.open_session() + session2.begin_transaction() + + cursor = self.session.open_cursor(uri) + for i in range(1, 1000): + self.session.begin_transaction() + cursor[str(i)] = value1 + self.session.commit_transaction() + + for i in range(1, 1000): + self.session.begin_transaction() + cursor[str(i)] = value2 + self.session.commit_transaction() + + for i in range(1, 1000): + self.session.begin_transaction() + cursor[str(i)] = value3 + self.session.commit_transaction() + + session2.rollback_transaction() + session2.close() + + # Close and re-open the connection. + cursor.close() + self.conn.close() + self.conn = wiredtiger.wiredtiger_open(self.home, self.conn_config) + self.session = self.conn.open_session(self.session_config) + + # Now that we've reopened, check that we can view the latest data from the previous run. + # + # Since we've restarted the system, our transaction IDs are going to begin from 1 again + # so we have to wipe the cell's transaction IDs in order to see them. + cursor = self.session.open_cursor(uri) + self.session.begin_transaction() + for i in range(1, 1000): + self.assertEqual(cursor[str(i)], value3) + self.session.rollback_transaction() diff --git a/src/third_party/wiredtiger/test/wt_hang_analyzer/wt_hang_analyzer.py b/src/third_party/wiredtiger/test/wt_hang_analyzer/wt_hang_analyzer.py new file mode 100644 index 00000000000..6c5b7832cb4 --- /dev/null +++ b/src/third_party/wiredtiger/test/wt_hang_analyzer/wt_hang_analyzer.py @@ -0,0 +1,603 @@ +#!/usr/bin/env python +"""Hang Analyzer module. + +A prototype hang analyzer for Evergreen integration to help investigate test timeouts. + +1. Script supports taking dumps, and/or dumping a summary of useful information about a process. +2. Script will iterate through a list of interesting processes, + and run the tools from step 1. The list of processes can be provided as an option. + +Currently only supports Linux. There are two issues with the MacOS and Windows implementations: +1. WT-6918 - lldb cannot attach to processes in MacOS. +2. WT-6919 - Windows cannot find the debug symbols. +""" + +import csv, glob, itertools, logging, re, tempfile, traceback +import os, sys, platform, signal, subprocess, threading, time +from distutils import spawn +from io import BytesIO, TextIOWrapper +from optparse import OptionParser +_IS_WINDOWS = (sys.platform == "win32") + +if _IS_WINDOWS: + import win32event + import win32api + +""" +Helper class to read output of a subprocess. + +Used to avoid deadlocks from the pipe buffer filling up and blocking the subprocess while it's +being waited on. +""" +class LoggerPipe(threading.Thread): + """Asynchronously reads the output of a subprocess and sends it to a logger.""" + + # The start() and join() methods are not intended to be called directly on the LoggerPipe + # instance. Since we override them for that effect, the super's version are preserved here. + __start = threading.Thread.start + __join = threading.Thread.join + + def __init__(self, logger, level, pipe_out): + """Initialize the LoggerPipe with the specified arguments.""" + + threading.Thread.__init__(self) + # Main thread should not call join() when exiting. + self.daemon = True + + self.__logger = logger + self.__level = level + self.__pipe_out = pipe_out + + self.__lock = threading.Lock() + self.__condition = threading.Condition(self.__lock) + + self.__started = False + self.__finished = False + + LoggerPipe.__start(self) + + def start(self): + """Start not implemented.""" + raise NotImplementedError("start should not be called directly") + + def run(self): + """Read the output from 'pipe_out' and logs each line to 'logger'.""" + + with self.__lock: + self.__started = True + self.__condition.notify_all() + + # Close the pipe when all of the output has been read. + with self.__pipe_out: + # Avoid buffering the output from the pipe. + for line in iter(self.__pipe_out.readline, b""): + # Convert the output of the process from a bytestring to a UTF-8 string, and replace + # any characters that cannot be decoded with the official Unicode replacement + # character, U+FFFD. + line = line.decode("utf-8", "replace") + self.__logger.log(self.__level, line.rstrip()) + + with self.__lock: + self.__finished = True + self.__condition.notify_all() + + def join(self, timeout=None): + """Join not implemented.""" + raise NotImplementedError("join should not be called directly") + + def wait_until_started(self): + """Wait until started.""" + with self.__lock: + while not self.__started: + self.__condition.wait() + + def wait_until_finished(self): + """Wait until finished.""" + with self.__lock: + while not self.__finished: + self.__condition.wait() + + # No need to pass a timeout to join() because the thread should already be done after + # notifying us it has finished reading output from the pipe. + LoggerPipe.__join(self) + +def call(args, logger): + """Call subprocess on args list.""" + logger.info(str(args)) + + # Use a common pipe for stdout & stderr for logging. + process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + logger_pipe = LoggerPipe(logger, logging.INFO, process.stdout) + logger_pipe.wait_until_started() + + ret = process.wait() + logger_pipe.wait_until_finished() + + if ret != 0: + logger.error("Bad exit code %d", ret) + raise Exception("Bad exit code %d from %s" % (ret, " ".join(args))) + +def callo(args, logger): + """Call subprocess on args string.""" + logger.info("%s", str(args)) + + return subprocess.check_output(args) + +def find_program(prog, paths): + """Find the specified program in env PATH, or tries a set of paths.""" + loc = spawn.find_executable(prog) + + if loc is not None: + return loc + + for loc in paths: + full_prog = os.path.join(loc, prog) + if os.path.exists(full_prog): + return full_prog + + return None + +def get_process_logger(debugger_output, pid, process_name): + """Return the process logger from options specified.""" + process_logger = logging.Logger("process", level=logging.DEBUG) + process_logger.mongo_process_filename = None + + if 'stdout' in debugger_output: + s_handler = logging.StreamHandler(sys.stdout) + s_handler.setFormatter(logging.Formatter(fmt="%(message)s")) + process_logger.addHandler(s_handler) + + if 'file' in debugger_output: + filename = "debugger_%s_%d.log" % (os.path.splitext(process_name)[0], pid) + process_logger.mongo_process_filename = filename + f_handler = logging.FileHandler(filename=filename, mode="w") + f_handler.setFormatter(logging.Formatter(fmt="%(message)s")) + process_logger.addHandler(f_handler) + + return process_logger + +class WindowsDumper(object): + """WindowsDumper class.""" + + @staticmethod + def __find_debugger(logger, debugger): + """Find the installed debugger.""" + # We are looking for c:\Program Files (x86)\Windows Kits\8.1\Debuggers\x64. + cdb = spawn.find_executable(debugger) + if cdb is not None: + return cdb + from win32com.shell import shell, shellcon + + # Cygwin via sshd does not expose the normal environment variables. + # Use the shell api to get the variable instead. + root_dir = shell.SHGetFolderPath(0, shellcon.CSIDL_PROGRAM_FILESX86, None, 0) + + # Construct the debugger search paths in most-recent order. + debugger_paths = [os.path.join(root_dir, "Windows Kits", "10", "Debuggers", "x64")] + for idx in reversed(range(0, 2)): + debugger_paths.append( + os.path.join(root_dir, "Windows Kits", "8." + str(idx), "Debuggers", "x64")) + + for dbg_path in debugger_paths: + logger.info("Checking for debugger in %s", dbg_path) + if os.path.exists(dbg_path): + return os.path.join(dbg_path, debugger) + + return None + + def dump_info(self, root_logger, logger, pid, process_name, take_dump): + """Dump useful information to the console.""" + debugger = "cdb.exe" + dbg = self.__find_debugger(root_logger, debugger) + + if dbg is None: + root_logger.warning("Debugger %s not found, skipping dumping of %d", debugger, pid) + return + + root_logger.info("Debugger %s, analyzing %s process with PID %d", dbg, process_name, pid) + + dump_command = "" + if take_dump: + # Dump to file, dump_<process name>.<pid>.mdmp. + dump_file = "dump_%s.%d.%s" % (os.path.splitext(process_name)[0], pid, + self.get_dump_ext()) + dump_command = ".dump /ma %s" % dump_file + root_logger.info("Dumping core to %s", dump_file) + + cmds = [ + ".symfix", # Fixup symbol path. + "!sym noisy", # Enable noisy symbol loading. + ".symopt +0x10", # Enable line loading (off by default in CDB, on by default in WinDBG). + ".reload", # Reload symbols. + "!peb", # Dump current exe & environment variables. + "lm", # Dump loaded modules. + dump_command, + "!uniqstack -pn", # Dump all unique threads with function arguments. + "!cs -l", # Dump all locked critical sections. + ".detach", # Detach. + "q" # Quit. + ] + + call([dbg, '-c', ";".join(cmds), '-p', str(pid)], logger) + + root_logger.info("Done analyzing %s process with PID %d", process_name, pid) + + @staticmethod + def get_dump_ext(): + """Return the dump file extension.""" + return "mdmp" + +class WindowsProcessList(object): + """WindowsProcessList class.""" + + @staticmethod + def __find_ps(): + """Find tasklist.""" + return os.path.join(os.environ["WINDIR"], "system32", "tasklist.exe") + + def dump_processes(self, logger): + """Get list of [Pid, Process Name].""" + ps = self.__find_ps() + + logger.info("Getting list of processes using %s", ps) + + ret = callo([ps, "/FO", "CSV"], logger) + + buff = TextIOWrapper(BytesIO(ret)) + csv_reader = csv.reader(buff) + + return [[int(row[1]), row[0]] for row in csv_reader if row[1] != "PID"] + +# LLDB dumper is for MacOS X. +class LLDBDumper(object): + """LLDBDumper class.""" + + @staticmethod + def __find_debugger(debugger): + """Find the installed debugger.""" + return find_program(debugger, ['/usr/bin']) + + def dump_info(self, root_logger, logger, pid, process_name, take_dump): + """Dump info.""" + debugger = "lldb" + dbg = self.__find_debugger(debugger) + + if dbg is None: + root_logger.warning("Debugger %s not found, skipping dumping of %d", debugger, pid) + return + + root_logger.info("Debugger %s, analyzing %s process with PID %d", dbg, process_name, pid) + + lldb_version = callo([dbg, "--version"], logger) + + logger.info(lldb_version) + + # Do we have the XCode or LLVM version of lldb? + # Old versions of lldb do not work well when taking commands via a file. + # XCode (7.2): lldb-340.4.119. + # LLVM - lldb version 3.7.0 ( revision ). + + lldb_version = str(lldb_version) + if 'version' not in lldb_version: + # We have XCode's lldb. + lldb_version = lldb_version[lldb_version.index("lldb-"):] + lldb_version = lldb_version.replace('lldb-', '') + lldb_major_version = int(lldb_version[:lldb_version.index('.')]) + if lldb_major_version < 340: + logger.warning("Debugger lldb is too old, please upgrade to XCode 7.2") + return + + dump_command = "" + if take_dump: + # Dump to file, dump_<process name>.<pid>.core. + dump_file = "dump_%s.%d.%s" % (process_name, pid, self.get_dump_ext()) + dump_command = "process save-core %s" % dump_file + root_logger.info("Dumping core to %s", dump_file) + + cmds = [ + "attach -p %d" % pid, + "target modules list", + "thread backtrace all", + dump_command, + "settings set interpreter.prompt-on-quit false", + "quit", + ] + + tf = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8') + + for cmd in cmds: + tf.write(cmd + "\n") + + tf.flush() + + # Works on in MacOS 10.9 & later. + #call([dbg] + list( itertools.chain.from_iterable([['-o', b] for b in cmds])), logger) + call(['cat', tf.name], logger) + call([dbg, '--source', tf.name], logger) + + root_logger.info("Done analyzing %s process with PID %d", process_name, pid) + + @staticmethod + def get_dump_ext(): + """Return the dump file extension.""" + return "core" + +class DarwinProcessList(object): + """DarwinProcessList class.""" + + @staticmethod + def __find_ps(): + """Find ps.""" + return find_program('ps', ['/bin']) + + def dump_processes(self, logger): + """Get list of [Pid, Process Name].""" + ps = self.__find_ps() + + logger.info("Getting list of processes using %s", ps) + + ret = callo([ps, "-axco", "pid,comm"], logger) + + buff = TextIOWrapper(BytesIO(ret)) + csv_reader = csv.reader(buff, delimiter=' ', quoting=csv.QUOTE_NONE, skipinitialspace=True) + + return [[int(row[0]), row[1]] for row in csv_reader if row[0] != "PID"] + +# GDB dumper is for Linux. +class GDBDumper(object): + """GDBDumper class.""" + + @staticmethod + def __find_debugger(debugger): + """Find the installed debugger.""" + return find_program(debugger, ['/opt/mongodbtoolchain/v3/bin/gdb', '/usr/bin']) + + def dump_info(self, root_logger, logger, pid, process_name, take_dump): + """Dump info.""" + debugger = "gdb" + dbg = self.__find_debugger(debugger) + + if dbg is None: + logger.warning("Debugger %s not found, skipping dumping of %d", debugger, pid) + return + + root_logger.info("Debugger %s, analyzing %s process with PID %d", dbg, process_name, pid) + + dump_command = "" + if take_dump: + # Dump to file, dump_<process name>.<pid>.core. + dump_file = "dump_%s.%d.%s" % (process_name, pid, self.get_dump_ext()) + dump_command = "gcore %s" % dump_file + root_logger.info("Dumping core to %s", dump_file) + + call([dbg, "--version"], logger) + + cmds = [ + "set interactive-mode off", + "set print thread-events off", # Suppress GDB messages of threads starting/finishing. + "file %s" % process_name, + "attach %d" % pid, + "info sharedlibrary", + "info threads", # Dump a simple list of commands to get the thread name. + "thread apply all bt", + "set python print-stack full", + # Lock the scheduler, before running commands, which execute code in the attached process. + "set scheduler-locking on", + dump_command, + "set confirm off", + "quit", + ] + + call([dbg, "--quiet", "--nx"] + + list(itertools.chain.from_iterable([['-ex', b] for b in cmds])), logger) + + root_logger.info("Done analyzing %s process with PID %d", process_name, pid) + + @staticmethod + def get_dump_ext(): + """Return the dump file extension.""" + return "core" + + @staticmethod + def _find_gcore(): + """Find the installed gcore.""" + dbg = "/usr/bin/gcore" + if os.path.exists(dbg): + return dbg + + return None + +class LinuxProcessList(object): + """LinuxProcessList class.""" + + @staticmethod + def __find_ps(): + """Find ps.""" + return find_program('ps', ['/bin', '/usr/bin']) + + def dump_processes(self, logger): + """Get list of [Pid, Process Name].""" + ps = self.__find_ps() + + logger.info("Getting list of processes using %s", ps) + + call([ps, "--version"], logger) + + ret = callo([ps, "-eo", "pid,args"], logger) + + buff = TextIOWrapper(BytesIO(ret)) + csv_reader = csv.reader(buff, delimiter=' ', quoting=csv.QUOTE_NONE, skipinitialspace=True) + + return [[int(row[0]), os.path.split(row[1])[1]] for row in csv_reader if row[0] != "PID"] + +def get_hang_analyzers(): + """Return hang analyzers.""" + + dbg = None + ps = None + + # Skip taking the dump in Mac OS and result in an error. + # FIXME : WT-6918 - Remove the skip block of code after fixing the issues. + if sys.platform == "darwin": + return [ps, dbg] + + if sys.platform.startswith("linux"): + dbg = GDBDumper() + ps = LinuxProcessList() + elif _IS_WINDOWS or sys.platform == "cygwin": + dbg = WindowsDumper() + ps = WindowsProcessList() + elif sys.platform == "darwin": + dbg = LLDBDumper() + ps = DarwinProcessList() + + return [ps, dbg] + +def check_dump_quota(quota, ext): + """Check if sum of the files with ext is within the specified quota in megabytes.""" + + files = glob.glob("*." + ext) + + size_sum = 0 + for file_name in files: + size_sum += os.path.getsize(file_name) + + return size_sum <= quota + +def pname_match(exact_match, pname, processes): + """Return True if the pname matches in processes.""" + pname = os.path.splitext(pname)[0] + for ip in processes: + if exact_match and pname == ip or not exact_match and ip in pname: + return True + return False + +# Basic procedure +# +# 1. Get a list of interesting processes. +# 2. Dump useful information or take dumps. +def main(): + """Execute Main program.""" + root_logger = logging.Logger("hang_analyzer", level=logging.DEBUG) + + handler = logging.StreamHandler(sys.stdout) + handler.setFormatter(logging.Formatter(fmt="%(message)s")) + root_logger.addHandler(handler) + + root_logger.info("Python Version: %s", sys.version) + root_logger.info("OS: %s", platform.platform()) + + try: + if _IS_WINDOWS or sys.platform == "cygwin": + distro = platform.win32_ver() + root_logger.info("Windows Distribution: %s", distro) + else: + distro = platform.linux_distribution() + root_logger.info("Linux Distribution: %s", distro) + + except AttributeError: + root_logger.warning("Cannot determine Linux distro since Python is too old") + + try: + uid = os.getuid() + root_logger.info("Current User: %s", uid) + current_login = os.getlogin() + root_logger.info("Current Login: %s", current_login) + except OSError: + root_logger.warning("Cannot determine Unix Current Login") + except AttributeError: + root_logger.warning("Cannot determine Unix Current Login, not supported on Windows") + + contain_processes = ["ex_", "intpack-test", "python", "test_"] + exact_processes = ["cursor_order", "packing-test", "t"] + process_ids = [] + + parser = OptionParser(description=__doc__) + parser.add_option('-p', '--process-contains-names', dest='process_contains_names', + help='Comma separated list of process patterns to analyze') + parser.add_option('-e', '--process-names', dest='process_exact_names', + help='Comma separated list of exact process names to analyze') + parser.add_option('-d', '--process-ids', dest='process_ids', default=None, + help='Comma separated list of process ids (PID) to analyze, overrides -p & e') + parser.add_option('-c', '--dump-core', dest='dump_core', action="store_true", default=False, + help='Dump core file for each analyzed process') + parser.add_option('-s', '--max-core-dumps-size', dest='max_core_dumps_size', default=10000, + help='Maximum total size of core dumps to keep in megabytes') + parser.add_option('-o', '--debugger-output', dest='debugger_output', action="append", + choices=['file', 'stdout'], default=None, + help="If 'stdout', then the debugger's output is written to the Python" + " process's stdout. If 'file', then the debugger's output is written" + " to a file named debugger_<process>_<pid>.log for each process it" + " attaches to. This option can be specified multiple times on the" + " command line to have the debugger's output written to multiple" + " locations. By default, the debugger's output is written only to the" + " Python process's stdout.") + + (options, _) = parser.parse_args() + + if options.debugger_output is None: + options.debugger_output = ['stdout'] + + if options.process_ids is not None: + # process_ids is an int list of PIDs. + process_ids = [int(pid) for pid in options.process_ids.split(',')] + + if options.process_exact_names is not None: + exact_processes = options.process_exact_names.split(',') + + if options.process_contains_names is not None: + contain_processes = options.process_contains_names.split(',') + + [ps, dbg] = get_hang_analyzers() + + if ps is None or dbg is None: + root_logger.warning("hang_analyzer.py: Unsupported platform: %s", sys.platform) + exit(1) + + all_processes = ps.dump_processes(root_logger) + + # Canonicalize the process names to lowercase to handle cases where the name of the Python + # process is /System/Library/.../Python on OS X and -p python is specified. + all_processes = [(pid, process_name.lower()) for (pid, process_name) in all_processes] + + # Find all running interesting processes: + # If a list of process_ids is supplied, match on that. + # Otherwise, do a substring match on interesting_processes. + if process_ids: + processes = [(pid, pname) for (pid, pname) in all_processes + if pid in process_ids and pid != os.getpid()] + + running_pids = set([pid for (pid, pname) in all_processes]) + missing_pids = set(process_ids) - running_pids + if missing_pids: + root_logger.warning("The following requested process ids are not running %s", + list(missing_pids)) + else: + processes = [(pid, pname) for (pid, pname) in all_processes + if (pname_match(True, pname, exact_processes) or pname_match(False, pname, contain_processes)) and pid != os.getpid()] + + root_logger.info("Found %d interesting processes %s", len(processes), processes) + + max_dump_size_bytes = int(options.max_core_dumps_size) * 1024 * 1024 + + trapped_exceptions = [] + + # Dump all processes. + for (pid, process_name) in processes: + process_logger = get_process_logger(options.debugger_output, pid, process_name) + try: + dbg.dump_info(root_logger, process_logger, pid, process_name, options.dump_core + and check_dump_quota(max_dump_size_bytes, dbg.get_dump_ext())) + except Exception as err: + root_logger.info("Error encountered when invoking debugger %s", err) + trapped_exceptions.append(traceback.format_exc()) + + root_logger.info("Done analyzing all processes for hangs") + + for exception in trapped_exceptions: + root_logger.info(exception) + if trapped_exceptions: + sys.exit(1) + +if __name__ == "__main__": + main() |