Import wiredtiger: 0ab3e59875ecfabbe3cd9d19c0c3e05b72bad1cf from branch mongodb-5.0

ref: 5f9a0178f2..0ab3e59875 for: 4.9.0 WT-6449 Hang analyzer for WT Evergreen tests WT-6706 Add table import repair functionality WT-6816 Design write gen scheme to allow dhandles with active history to get closed/re-opened WT-6857 Define a new cursor for the history store access.
author: Luke Chen <luke.chen@mongodb.com> 2020-11-13 15:57:42 +1100
committer: Evergreen Agent <no-reply@evergreen.mongodb.com> 2020-11-13 05:15:33 +0000
commit: 8bb9ed7824835cdfd146d642468508c01ea087ad (patch)
tree: 25118dd8dabdc8e892057067583845f47547aaad /src/third_party
parent: 6dc4a4eaf4acc7472590040fa5795901b5140fc5 (diff)
download: mongo-8bb9ed7824835cdfd146d642468508c01ea087ad.tar.gz
21 files changed, 1237 insertions, 107 deletions
diff --git a/src/third_party/wiredtiger/dist/s_funcs.list b/src/third_party/wiredtiger/dist/s_funcs.list
index fe19e596bf4..0b7db52d26c 100644
--- a/src/third_party/wiredtiger/dist/s_funcs.list
+++ b/src/third_party/wiredtiger/dist/s_funcs.list
@@ -13,6 +13,7 @@ __wt_bulk_insert_fix
 __wt_bulk_insert_row
 __wt_bulk_insert_var
 __wt_config_getone
+__wt_curhs_open
 __wt_cursor_get_raw_value
 __wt_debug_addr
 __wt_debug_addr_print
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 0e4185705d9..70e3738e022 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -346,6 +346,7 @@ Redistributions
 Refactor
 Resize
 RocksDB
+Runtime
 SIMD
 SLIST
 SLVG
@@ -634,6 +635,7 @@ curconfig
 curdump
 curextract
 curfile
+curhs
 curindex
 curjoin
 curlog
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 59b9bb25f00..518309b2758 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
     "vendor": "wiredtiger",
     "github": "wiredtiger/wiredtiger.git",
     "branch": "mongodb-5.0",
-    "commit": "5f9a0178f292c964ef5b40c4e639e3e0a438e5e5"
+    "commit": "0ab3e59875ecfabbe3cd9d19c0c3e05b72bad1cf"
 }
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 42139969802..60cde6ce8b5 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -522,16 +522,45 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
     btree->checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT); /* Checkpoint generation */
 
     /*
-     * In the regular case, we'll be initializing to the connection-wide base write generation since
-     * this is the largest of all btree write generations from the previous run. This has the nice
-     * property of ensuring that the range of write generations used by consecutive runs do not
-     * overlap which aids with debugging.
+     * The first time we open a btree, we'll be initializing the write gen to the connection-wide
+     * base write generation since this is the largest of all btree write generations from the
+     * previous run. This has the nice property of ensuring that the range of write generations used
+     * by consecutive runs do not overlap which aids with debugging.
      *
-     * In the import case, the btree write generation from the last run may actually be ahead of the
-     * connection-wide base write generation. In that case, we should initialize our write gen just
-     * ahead of our btree specific write generation.
+     * If we're reopening a btree or importing a new one to a running system, the btree write
+     * generation from the last run may actually be ahead of the connection-wide base write
+     * generation. In that case, we should initialize our write gen just ahead of our btree specific
+     * write generation.
+     *
+     * The runtime write generation is important since it's going to determine what we're going to
+     * use as the base write generation (and thus what pages to wipe transaction ids from). The idea
+     * is that we want to initialize it once the first time we open the btree during a run and then
+     * for every subsequent open, we want to reuse it. This so that we're still able to read
+     * transaction ids from the previous time a btree was open in the same run.
+     *
+     * FIXME-WT-6819: When we begin discarding dhandles more aggressively, we need to check that
+     * updates aren't having their transaction ids wiped after reopening the dhandle. The runtime
+     * write generation is relevant here since it should remain static across the entire run.
+     */
+    btree->write_gen = WT_MAX(ckpt->write_gen + 1, conn->base_write_gen);
+    WT_ASSERT(session, ckpt->write_gen >= ckpt->run_write_gen);
+
+    /* If this is the first time opening the tree this run. */
+    if (F_ISSET(session, WT_SESSION_IMPORT) || ckpt->run_write_gen < conn->base_write_gen)
+        btree->base_write_gen = btree->run_write_gen = btree->write_gen;
+    else
+        btree->base_write_gen = btree->run_write_gen = ckpt->run_write_gen;
+
+    /*
+     * We've just overwritten the runtime write generation based off the fact that know that we're
+     * importing and therefore, the checkpoint data's runtime write generation is meaningless. We
+     * need to ensure that the underlying dhandle doesn't get discarded without being included in a
+     * subsequent checkpoint including the new overwritten runtime write generation. Otherwise,
+     * we'll reopen, won't know that we're in the import case and will incorrectly use the old
+     * system's runtime write generation.
      */
-    btree->write_gen = btree->base_write_gen = WT_MAX(ckpt->write_gen + 1, conn->base_write_gen);
+    if (F_ISSET(session, WT_SESSION_IMPORT))
+        btree->modified = true;
 
     return (0);
 }
diff --git a/src/third_party/wiredtiger/src/btree/bt_import.c b/src/third_party/wiredtiger/src/btree/bt_import.c
index 39d0dda368a..e779d90fc66 100644
--- a/src/third_party/wiredtiger/src/btree/bt_import.c
+++ b/src/third_party/wiredtiger/src/btree/bt_import.c
@@ -13,7 +13,7 @@
  *     Import a WiredTiger file into the database and reconstruct its metadata.
  */
 int
-__wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
+__wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **configp)
 {
     WT_BM *bm;
     WT_CKPT *ckpt, *ckptbase;
@@ -24,13 +24,12 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
     WT_DECL_RET;
     WT_KEYED_ENCRYPTOR *kencryptor;
     uint32_t allocsize;
-    char *checkpoint_list, *fileconf, *metadata, fileid[64];
-    const char *filecfg[] = {
-      WT_CONFIG_BASE(session, file_meta), NULL, NULL, NULL, NULL, NULL, NULL};
+    char *checkpoint_list, *config, *config_tmp, *metadata, fileid[64];
+    const char *cfg[] = {WT_CONFIG_BASE(session, file_meta), NULL, NULL, NULL, NULL, NULL, NULL};
     const char *filename;
 
     ckptbase = NULL;
-    checkpoint_list = fileconf = metadata = NULL;
+    checkpoint_list = config = config_tmp = metadata = NULL;
 
     WT_ERR(__wt_scr_alloc(session, 0, &a));
     WT_ERR(__wt_scr_alloc(session, 0, &b));
@@ -45,13 +44,11 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
      * size, but 512B allows us to read the descriptor block and that's all we care about.
      */
     F_SET(session, WT_SESSION_IMPORT_REPAIR);
-    WT_ERR(__wt_block_manager_open(session, filename, filecfg, false, true, 512, &bm));
+    WT_ERR(__wt_block_manager_open(session, filename, cfg, false, true, 512, &bm));
     ret = bm->checkpoint_last(bm, session, &metadata, &checkpoint_list, checkpoint);
     WT_TRET(bm->close(bm, session));
     F_CLR(session, WT_SESSION_IMPORT_REPAIR);
     WT_ERR(ret);
-    __wt_verbose(session, WT_VERB_CHECKPOINT, "import metadata: %s", metadata);
-    __wt_verbose(session, WT_VERB_CHECKPOINT, "import checkpoint-list: %s", checkpoint_list);
 
     /*
      * The metadata may have been encrypted, in which case it's also hexadecimal encoded. The
@@ -59,7 +56,7 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
      * diagnosis.
      */
     WT_ERR(__wt_config_getones(session, metadata, "block_metadata_encrypted", &v));
-    WT_ERR(__wt_btree_config_encryptor(session, filecfg, &kencryptor));
+    WT_ERR(__wt_btree_config_encryptor(session, cfg, &kencryptor));
     if ((kencryptor == NULL && v.val != 0) || (kencryptor != NULL && v.val == 0))
         WT_ERR_MSG(session, EINVAL,
           "%s: loaded object's encryption configuration doesn't match the database's encryption "
@@ -99,20 +96,18 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
      * Strip out the checkpoint LSN, an imported file isn't associated with any log files. Assign a
      * unique file ID.
      */
-    filecfg[1] = a->data;
-    filecfg[2] = checkpoint_list;
-    filecfg[3] = "checkpoint_backup_info=";
-    filecfg[4] = "checkpoint_lsn=";
+    cfg[1] = a->data;
+    cfg[2] = checkpoint_list;
+    cfg[3] = "checkpoint_backup_info=";
+    cfg[4] = "checkpoint_lsn=";
     WT_WITH_SCHEMA_LOCK(session,
       ret = __wt_snprintf(fileid, sizeof(fileid), "id=%" PRIu32, ++S2C(session)->next_file_id));
     WT_ERR(ret);
-    filecfg[5] = fileid;
-    WT_ERR(__wt_config_collapse(session, filecfg, &fileconf));
-    WT_ERR(__wt_metadata_insert(session, uri, fileconf));
-    __wt_verbose(session, WT_VERB_CHECKPOINT, "import configuration: %s/%s", uri, fileconf);
+    cfg[5] = fileid;
+    WT_ERR(__wt_config_collapse(session, cfg, &config_tmp));
 
     /* Now that we've retrieved the configuration, let's get the real allocation size. */
-    WT_ERR(__wt_config_getones(session, fileconf, "allocation_size", &v));
+    WT_ERR(__wt_config_getones(session, config_tmp, "allocation_size", &v));
     allocsize = (uint32_t)v.val;
 
     /*
@@ -120,32 +115,21 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
      * size. When we did this earlier, we were able to read the descriptor block properly but the
      * checkpoint's byte representation was wrong because it was using the wrong allocation size.
      */
-    WT_ERR(__wt_block_manager_open(session, filename, filecfg, false, true, allocsize, &bm));
+    WT_ERR(__wt_block_manager_open(session, filename, cfg, false, true, allocsize, &bm));
+    __wt_free(session, checkpoint_list);
+    __wt_free(session, metadata);
     ret = bm->checkpoint_last(bm, session, &metadata, &checkpoint_list, checkpoint);
     WT_TRET(bm->close(bm, session));
 
     /*
-     * The just inserted metadata was correct as of immediately before the final checkpoint, but
-     * it's not quite right. The block manager returned the corrected final checkpoint, put it all
-     * together.
+     * The metadata was correct as of immediately before the final checkpoint, but it's not quite
+     * right. The block manager returned the corrected final checkpoint, put it all together.
      *
      * Get the checkpoint information from the file's metadata as an array of WT_CKPT structures.
-     *
-     * XXX There's a problem here. If a file is imported from our future (leaf pages with unstable
-     * entries that have write-generations ahead of the current database's base write generation),
-     * we'll read the values and treat them as stable. A restart will fix this: when we added the
-     * imported file to our metadata, the write generation in the imported file's checkpoints
-     * updated our database's maximum write generation, and so a restart will have a maximum
-     * generation newer than the imported file's write generation. An alternative solution is to add
-     * a "base write generation" value to the imported file's metadata, and use that value instead
-     * of the connection's base write generation when deciding what page items should be read. Since
-     * all future writes to the imported file would be ahead of that write generation, it would have
-     * the effect we want.
-     *
      * Update the last checkpoint with the corrected information. Update the file's metadata with
      * the new checkpoint information.
      */
-    WT_ERR(__wt_meta_ckptlist_get(session, uri, false, &ckptbase));
+    WT_ERR(__wt_meta_ckptlist_get_from_config(session, false, &ckptbase, config_tmp));
     WT_CKPT_FOREACH (ckptbase, ckpt)
         if (ckpt->name == NULL || (ckpt + 1)->name == NULL)
             break;
@@ -153,20 +137,20 @@ __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
         WT_ERR_MSG(session, EINVAL, "no checkpoint information available to import");
     F_SET(ckpt, WT_CKPT_UPDATE);
     WT_ERR(__wt_buf_set(session, &ckpt->raw, checkpoint->data, checkpoint->size));
-    WT_ERR(__wt_meta_ckptlist_set(session, uri, ckptbase, NULL));
-
-    WT_ASSERT(session, fileconfp != NULL);
-    *fileconfp = fileconf;
+    WT_ERR(__wt_meta_ckptlist_update_config(session, ckptbase, config_tmp, &config));
+    __wt_verbose(session, WT_VERB_CHECKPOINT, "import metadata: %s", config);
+    *configp = config;
 
 err:
     F_CLR(session, WT_SESSION_IMPORT_REPAIR);
 
     __wt_meta_ckptlist_free(session, &ckptbase);
 
+    __wt_free(session, checkpoint_list);
     if (ret != 0)
-        __wt_free(session, fileconf);
+        __wt_free(session, config);
+    __wt_free(session, config_tmp);
     __wt_free(session, metadata);
-    __wt_free(session, checkpoint_list);
 
     __wt_scr_free(session, &a);
     __wt_scr_free(session, &b);
diff --git a/src/third_party/wiredtiger/src/cursor/cur_hs.c b/src/third_party/wiredtiger/src/cursor/cur_hs.c
index ba2799e2127..d46429fac23 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_hs.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_hs.c
@@ -141,3 +141,102 @@ __wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exa
       session, WT_ISO_READ_UNCOMMITTED, ret = cursor->search_near(cursor, exactp));
     return (ret);
 }
+
+/*
+ * __curhs_close --
+ *     WT_CURSOR->close method for the hs cursor type.
+ */
+static int
+__curhs_close(WT_CURSOR *cursor)
+{
+    WT_CURSOR *file_cursor;
+    WT_CURSOR_HS *hs_cursor;
+    WT_DECL_RET;
+    WT_SESSION_IMPL *session;
+
+    hs_cursor = (WT_CURSOR_HS *)cursor;
+    file_cursor = hs_cursor->file_cursor;
+    CURSOR_API_CALL_PREPARE_ALLOWED(
+      cursor, session, close, file_cursor == NULL ? NULL : CUR2BT(file_cursor));
+err:
+
+    if (file_cursor != NULL)
+        WT_TRET(file_cursor->close(file_cursor));
+    __wt_cursor_close(cursor);
+
+    API_END_RET(session, ret);
+}
+
+/*
+ * __curhs_reset --
+ *     Reset a history store cursor.
+ */
+static int
+__curhs_reset(WT_CURSOR *cursor)
+{
+    WT_CURSOR *file_cursor;
+    WT_CURSOR_HS *hs_cursor;
+    WT_DECL_RET;
+    WT_SESSION_IMPL *session;
+
+    hs_cursor = (WT_CURSOR_HS *)cursor;
+    file_cursor = hs_cursor->file_cursor;
+    CURSOR_API_CALL_PREPARE_ALLOWED(cursor, session, reset, CUR2BT(file_cursor));
+
+    ret = file_cursor->reset(file_cursor);
+    F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+
+err:
+    API_END_RET(session, ret);
+}
+
+/*
+ * __wt_curhs_open --
+ *     Initialize a history store cursor.
+ */
+int
+__wt_curhs_open(WT_SESSION_IMPL *session, WT_CURSOR *owner, WT_CURSOR **cursorp)
+{
+    WT_CURSOR_STATIC_INIT(iface, __wt_cursor_get_key, /* get-key */
+      __wt_cursor_get_value,                          /* get-value */
+      __wt_cursor_set_key,                            /* set-key */
+      __wt_cursor_set_value,                          /* set-value */
+      __wt_cursor_compare_notsup,                     /* compare */
+      __wt_cursor_equals_notsup,                      /* equals */
+      __wt_cursor_notsup,                             /* next */
+      __wt_cursor_notsup,                             /* prev */
+      __curhs_reset,                                  /* reset */
+      __wt_cursor_notsup,                             /* search */
+      __wt_cursor_search_near_notsup,                 /* search-near */
+      __wt_cursor_notsup,                             /* insert */
+      __wt_cursor_modify_value_format_notsup,         /* modify */
+      __wt_cursor_notsup,                             /* update */
+      __wt_cursor_notsup,                             /* remove */
+      __wt_cursor_notsup,                             /* reserve */
+      __wt_cursor_reconfigure_notsup,                 /* reconfigure */
+      __wt_cursor_notsup,                             /* cache */
+      __wt_cursor_reopen_notsup,                      /* reopen */
+      __curhs_close);                                 /* close */
+    WT_CURSOR *cursor;
+    WT_CURSOR_HS *hs_cursor;
+    WT_DECL_RET;
+
+    WT_RET(__wt_calloc_one(session, &hs_cursor));
+    cursor = (WT_CURSOR *)hs_cursor;
+    *cursor = iface;
+    cursor->session = (WT_SESSION *)session;
+    cursor->key_format = WT_HS_KEY_FORMAT;
+    cursor->value_format = WT_HS_VALUE_FORMAT;
+
+    /* Open the file cursor for operations on the regular history store .*/
+    WT_ERR(__hs_cursor_open_int(session, &hs_cursor->file_cursor));
+
+    WT_ERR(__wt_cursor_init(cursor, WT_HS_URI, owner, NULL, cursorp));
+
+    if (0) {
+err:
+        WT_TRET(__curhs_close(cursor));
+        *cursorp = NULL;
+    }
+    return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index b066bcde18f..3cc12d78e74 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -235,11 +235,13 @@ struct __wt_ovfl_reuse {
 #else
 #define WT_HS_COMPRESSOR "none"
 #endif
-#define WT_HS_CONFIG                                                              \
-    "key_format=" WT_UNCHECKED_STRING(IuQQ) ",value_format=" WT_UNCHECKED_STRING( \
-      QQQu) ",block_compressor=" WT_HS_COMPRESSOR                                 \
-            ",leaf_value_max=64MB"                                                \
-            ",prefix_compression=false"
+#define WT_HS_KEY_FORMAT WT_UNCHECKED_STRING(IuQQ)
+#define WT_HS_VALUE_FORMAT WT_UNCHECKED_STRING(QQQu)
+#define WT_HS_CONFIG                                                   \
+    "key_format=" WT_HS_KEY_FORMAT ",value_format=" WT_HS_VALUE_FORMAT \
+    ",block_compressor=" WT_HS_COMPRESSOR                              \
+    ",leaf_value_max=64MB"                                             \
+    ",prefix_compression=false"
 
 /*
  * WT_PAGE_MODIFY --
diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h
index 19eea8088ae..4af6ea90f67 100644
--- a/src/third_party/wiredtiger/src/include/btree.h
+++ b/src/third_party/wiredtiger/src/include/btree.h
@@ -167,6 +167,7 @@ struct __wt_btree {
 
     uint64_t write_gen;      /* Write generation */
     uint64_t base_write_gen; /* Write generation on startup. */
+    uint64_t run_write_gen;  /* Runtime write generation. */
     uint64_t rec_max_txn;    /* Maximum txn seen (clean trees) */
     wt_timestamp_t rec_max_timestamp;
 
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index 5460978d09f..1c5cd5dcb6b 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -282,6 +282,12 @@ struct __wt_cursor_dump {
     WT_CURSOR *child;
 };
 
+struct __wt_cursor_hs {
+    WT_CURSOR iface;
+
+    WT_CURSOR *file_cursor; /* Queries of regular history store data */
+};
+
 struct __wt_cursor_index {
     WT_CURSOR iface;
 
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 0a6f3d79a9b..d902d72ff01 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -486,6 +486,8 @@ extern int __wt_curfile_next_random(WT_CURSOR *cursor)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
   const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_curhs_open(WT_SESSION_IMPL *session, WT_CURSOR *owner, WT_CURSOR **cursorp)
+  WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner,
   const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT_INDEX *idx,
@@ -775,7 +777,7 @@ extern int __wt_huffman_encode(WT_SESSION_IMPL *session, void *huffman_arg, cons
   size_t from_len, WT_ITEM *to_buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_huffman_open(WT_SESSION_IMPL *session, void *symbol_frequency_array, u_int symcnt,
   u_int numbytes, void *retp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **fileconfp)
+extern int __wt_import_repair(WT_SESSION_IMPL *session, const char *uri, char **configp)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_inmem_unsupported_op(WT_SESSION_IMPL *session, const char *tag)
   WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -1036,12 +1038,14 @@ extern int __wt_meta_checkpoint_last_name(WT_SESSION_IMPL *session, const char *
   const char **namep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_meta_ckptlist_get(WT_SESSION_IMPL *session, const char *fname, bool update,
   WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_meta_ckptlist_get_with_config(WT_SESSION_IMPL *session, bool update,
+extern int __wt_meta_ckptlist_get_from_config(WT_SESSION_IMPL *session, bool update,
   WT_CKPT **ckptbasep, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase,
   WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM *buf)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_meta_ckptlist_update_config(WT_SESSION_IMPL *session, WT_CKPT *ckptbase,
+  const char *oldcfg, char **newcfgp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_meta_sysinfo_set(WT_SESSION_IMPL *session)
   WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_meta_track_checkpoint(WT_SESSION_IMPL *session)
diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h
index 9e274e9f3cc..b4902740837 100644
--- a/src/third_party/wiredtiger/src/include/meta.h
+++ b/src/third_party/wiredtiger/src/include/meta.h
@@ -137,7 +137,8 @@ struct __wt_ckpt {
 
     uint64_t size; /* Checkpoint size */
 
-    uint64_t write_gen; /* Write generation */
+    uint64_t write_gen;     /* Write generation */
+    uint64_t run_write_gen; /* Runtime write generation. */
 
     char *block_metadata;   /* Block-stored metadata */
     char *block_checkpoint; /* Block-stored checkpoint */
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index 3492905ec0b..0186b98f4cc 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -170,31 +170,32 @@ struct __wt_session_impl {
 #define WT_SESSION_CACHE_CURSORS 0x00000004u
 #define WT_SESSION_CAN_WAIT 0x00000008u
 #define WT_SESSION_IGNORE_CACHE_SIZE 0x00000010u
-#define WT_SESSION_IMPORT_REPAIR 0x00000020u
-#define WT_SESSION_INSTANTIATE_PREPARE 0x00000040u
-#define WT_SESSION_INTERNAL 0x00000080u
-#define WT_SESSION_LOCKED_CHECKPOINT 0x00000100u
-#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000200u
-#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000400u
-#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00000800u
-#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00001000u
-#define WT_SESSION_LOCKED_METADATA 0x00002000u
-#define WT_SESSION_LOCKED_PASS 0x00004000u
-#define WT_SESSION_LOCKED_SCHEMA 0x00008000u
-#define WT_SESSION_LOCKED_SLOT 0x00010000u
-#define WT_SESSION_LOCKED_TABLE_READ 0x00020000u
-#define WT_SESSION_LOCKED_TABLE_WRITE 0x00040000u
-#define WT_SESSION_LOCKED_TURTLE 0x00080000u
-#define WT_SESSION_LOGGING_INMEM 0x00100000u
-#define WT_SESSION_NO_DATA_HANDLES 0x00200000u
-#define WT_SESSION_NO_LOGGING 0x00400000u
-#define WT_SESSION_NO_RECONCILE 0x00800000u
-#define WT_SESSION_NO_SCHEMA_LOCK 0x01000000u
-#define WT_SESSION_QUIET_CORRUPT_FILE 0x02000000u
-#define WT_SESSION_READ_WONT_NEED 0x04000000u
-#define WT_SESSION_RESOLVING_TXN 0x08000000u
-#define WT_SESSION_ROLLBACK_TO_STABLE 0x10000000u
-#define WT_SESSION_SCHEMA_TXN 0x20000000u
+#define WT_SESSION_IMPORT 0x00000020u
+#define WT_SESSION_IMPORT_REPAIR 0x00000040u
+#define WT_SESSION_INSTANTIATE_PREPARE 0x00000080u
+#define WT_SESSION_INTERNAL 0x00000100u
+#define WT_SESSION_LOCKED_CHECKPOINT 0x00000200u
+#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000400u
+#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000800u
+#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x00001000u
+#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x00002000u
+#define WT_SESSION_LOCKED_METADATA 0x00004000u
+#define WT_SESSION_LOCKED_PASS 0x00008000u
+#define WT_SESSION_LOCKED_SCHEMA 0x00010000u
+#define WT_SESSION_LOCKED_SLOT 0x00020000u
+#define WT_SESSION_LOCKED_TABLE_READ 0x00040000u
+#define WT_SESSION_LOCKED_TABLE_WRITE 0x00080000u
+#define WT_SESSION_LOCKED_TURTLE 0x00100000u
+#define WT_SESSION_LOGGING_INMEM 0x00200000u
+#define WT_SESSION_NO_DATA_HANDLES 0x00400000u
+#define WT_SESSION_NO_LOGGING 0x00800000u
+#define WT_SESSION_NO_RECONCILE 0x01000000u
+#define WT_SESSION_NO_SCHEMA_LOCK 0x02000000u
+#define WT_SESSION_QUIET_CORRUPT_FILE 0x04000000u
+#define WT_SESSION_READ_WONT_NEED 0x08000000u
+#define WT_SESSION_RESOLVING_TXN 0x10000000u
+#define WT_SESSION_ROLLBACK_TO_STABLE 0x20000000u
+#define WT_SESSION_SCHEMA_TXN 0x40000000u
     /* AUTOMATIC FLAG VALUE GENERATION STOP */
     uint32_t flags;
 
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index b0e0b4c4585..a64fb5acdfe 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -141,6 +141,8 @@ struct __wt_cursor_data_source;
 typedef struct __wt_cursor_data_source WT_CURSOR_DATA_SOURCE;
 struct __wt_cursor_dump;
 typedef struct __wt_cursor_dump WT_CURSOR_DUMP;
+struct __wt_cursor_hs;
+typedef struct __wt_cursor_hs WT_CURSOR_HS;
 struct __wt_cursor_index;
 typedef struct __wt_cursor_index WT_CURSOR_INDEX;
 struct __wt_cursor_join;
diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
index aa9191bdc6a..150880625a6 100644
--- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c
+++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
@@ -501,7 +501,7 @@ __wt_meta_ckptlist_get(
     config = NULL;
 
     WT_ERR(__wt_metadata_search(session, fname, &config));
-    WT_ERR(__wt_meta_ckptlist_get_with_config(session, update, ckptbasep, config));
+    WT_ERR(__wt_meta_ckptlist_get_from_config(session, update, ckptbasep, config));
 
 err:
     __wt_free(session, config);
@@ -509,11 +509,11 @@ err:
 }
 
 /*
- * __wt_meta_ckptlist_get_with_config --
+ * __wt_meta_ckptlist_get_from_config --
  *     Provided a metadata config, load all available checkpoint information for a file.
  */
 int
-__wt_meta_ckptlist_get_with_config(
+__wt_meta_ckptlist_get_from_config(
   WT_SESSION_IMPL *session, bool update, WT_CKPT **ckptbasep, const char *config)
 {
     WT_CKPT *ckpt, *ckptbase;
@@ -697,6 +697,16 @@ __ckpt_load(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *k, WT_CONFIG_ITEM *v, WT_C
         goto format;
     ckpt->write_gen = (uint64_t)a.val;
 
+    /*
+     * If runtime write generation isn't supplied, this means that we're doing an upgrade and that
+     * we're opening the tree for the first time. We should just leave it as 0 so it is recognized
+     * as part of a previous run.
+     */
+    ret = __wt_config_subgets(session, v, "run_write_gen", &a);
+    WT_RET_NOTFOUND_OK(ret);
+    if (ret != WT_NOTFOUND && a.len != 0)
+        ckpt->run_write_gen = (uint64_t)a.val;
+
     return (0);
 
 format:
@@ -789,12 +799,13 @@ __wt_meta_ckptlist_to_meta(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, WT_ITEM
           "=(addr=\"%.*s\",order=%" PRId64 ",time=%" PRIu64 ",size=%" PRId64
           ",newest_start_durable_ts=%" PRId64 ",oldest_start_ts=%" PRId64 ",newest_txn=%" PRId64
           ",newest_stop_durable_ts=%" PRId64 ",newest_stop_ts=%" PRId64 ",newest_stop_txn=%" PRId64
-          ",prepare=%d,write_gen=%" PRId64 ")",
+          ",prepare=%d,write_gen=%" PRId64 ",run_write_gen=%" PRId64 ")",
           (int)ckpt->addr.size, (char *)ckpt->addr.data, ckpt->order, ckpt->sec,
           (int64_t)ckpt->size, (int64_t)ckpt->ta.newest_start_durable_ts,
           (int64_t)ckpt->ta.oldest_start_ts, (int64_t)ckpt->ta.newest_txn,
           (int64_t)ckpt->ta.newest_stop_durable_ts, (int64_t)ckpt->ta.newest_stop_ts,
-          (int64_t)ckpt->ta.newest_stop_txn, (int)ckpt->ta.prepare, (int64_t)ckpt->write_gen));
+          (int64_t)ckpt->ta.newest_stop_txn, (int)ckpt->ta.prepare, (int64_t)ckpt->write_gen,
+          (int64_t)ckpt->run_write_gen));
     }
     WT_RET(__wt_buf_catfmt(session, buf, ")"));
 
@@ -860,6 +871,44 @@ __wt_ckpt_blkmod_to_meta(WT_SESSION_IMPL *session, WT_ITEM *buf, WT_CKPT *ckpt)
 }
 
 /*
+ * __wt_meta_ckptlist_update_config --
+ *     Provided a metadata config and list of checkpoints, set a file's checkpoint value.
+ */
+int
+__wt_meta_ckptlist_update_config(
+  WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *oldcfg, char **newcfgp)
+{
+    WT_CKPT *ckpt;
+    WT_DECL_ITEM(buf);
+    WT_DECL_RET;
+    char *newcfg;
+    const char *cfg[3];
+
+    newcfg = NULL;
+    WT_RET(__wt_scr_alloc(session, 1024, &buf));
+    WT_ERR(__wt_meta_ckptlist_to_meta(session, ckptbase, buf));
+
+    /* Add backup block modifications for any added checkpoint. */
+    WT_CKPT_FOREACH (ckptbase, ckpt)
+        if (F_ISSET(ckpt, WT_CKPT_ADD))
+            WT_ERR(__wt_ckpt_blkmod_to_meta(session, buf, ckpt));
+
+    /* Replace the checkpoint entry. */
+    cfg[0] = oldcfg;
+    cfg[1] = buf->mem;
+    cfg[2] = NULL;
+    WT_ERR(__wt_config_collapse(session, cfg, &newcfg));
+
+    *newcfgp = newcfg;
+
+err:
+    if (ret != 0)
+        __wt_free(session, newcfg);
+    __wt_scr_free(session, &buf);
+    return (ret);
+}
+
+/*
  * __wt_meta_ckptlist_set --
  *     Set a file's checkpoint value from the WT_CKPT list.
  */
diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c
index 84af4dffa0a..f49bea7250e 100644
--- a/src/third_party/wiredtiger/src/schema/schema_create.c
+++ b/src/third_party/wiredtiger/src/schema/schema_create.c
@@ -62,7 +62,7 @@ __check_imported_ts(WT_SESSION_IMPL *session, const char *uri, const char *confi
     ckptbase = NULL;
     txn_global = &S2C(session)->txn_global;
 
-    WT_ERR_NOTFOUND_OK(__wt_meta_ckptlist_get_with_config(session, false, &ckptbase, config), true);
+    WT_ERR_NOTFOUND_OK(__wt_meta_ckptlist_get_from_config(session, false, &ckptbase, config), true);
     if (ret == WT_NOTFOUND)
         WT_ERR_MSG(session, EINVAL,
           "%s: import could not find any checkpoint information in supplied metadata", uri);
@@ -218,18 +218,18 @@ __create_file(
                 ;
             *p = val->data;
             WT_ERR(__wt_config_collapse(session, filecfg, &fileconf));
-            WT_ERR(__wt_metadata_insert(session, uri, fileconf));
         } else {
-            /* Read the data file's descriptor block and try to recreate the associated metadata. */
+            /* Try to recreate the associated metadata from the imported data source. */
             WT_ERR(__wt_import_repair(session, uri, &fileconf));
         }
+        WT_ERR(__wt_metadata_insert(session, uri, fileconf));
 
         /*
          * Ensure that the timestamps in the imported data file are not in the future relative to
          * our oldest timestamp.
          */
         if (import)
-            WT_ERR(__check_imported_ts(session, filename, fileconf));
+            WT_ERR(__check_imported_ts(session, uri, fileconf));
     }
 
     /*
@@ -663,16 +663,18 @@ __create_table(
     WT_CONFIG_ITEM cgkey, cgval, ckey, cval;
     WT_DECL_RET;
     WT_TABLE *table;
-    size_t cgsize;
+    size_t len;
     int ncolgroups, nkeys;
-    char *tableconf, *cgname;
+    char *cgcfg, *cgname, *filecfg, *filename, *importcfg, *tablecfg;
     const char *cfg[4] = {WT_CONFIG_BASE(session, table_meta), config, NULL, NULL};
     const char *tablename;
     bool import_repair;
 
-    cgname = NULL;
+    import_repair = false;
+
+    cgcfg = filecfg = importcfg = tablecfg = NULL;
+    cgname = filename = NULL;
     table = NULL;
-    tableconf = NULL;
 
     WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_TABLE_WRITE));
 
@@ -680,7 +682,7 @@ __create_table(
     WT_PREFIX_SKIP_REQUIRED(session, tablename, "table:");
 
     /* Check if the table already exists. */
-    if ((ret = __wt_metadata_search(session, uri, &tableconf)) != WT_NOTFOUND) {
+    if ((ret = __wt_metadata_search(session, uri, &tablecfg)) != WT_NOTFOUND) {
         /*
          * Regardless of the 'exclusive' flag, we should raise an error if we try to import an
          * existing URI rather than just silently returning.
@@ -707,6 +709,13 @@ __create_table(
                   "'repair' option is provided",
                   uri);
             WT_ERR_NOTFOUND_OK(ret, false);
+        } else {
+            /* Try to recreate the associated metadata from the imported data source. */
+            len = strlen("file:") + strlen(tablename) + strlen(".wt") + 1;
+            WT_ERR(__wt_calloc_def(session, len, &filename));
+            WT_ERR(__wt_snprintf(filename, len, "file:%s.wt", tablename));
+            WT_ERR(__wt_import_repair(session, filename, &filecfg));
+            cfg[2] = filecfg;
         }
     }
 
@@ -716,14 +725,24 @@ __create_table(
         ;
     WT_ERR_NOTFOUND_OK(ret, false);
 
-    WT_ERR(__wt_config_collapse(session, cfg, &tableconf));
-    WT_ERR(__wt_metadata_insert(session, uri, tableconf));
+    WT_ERR(__wt_config_collapse(session, cfg, &tablecfg));
+    WT_ERR(__wt_metadata_insert(session, uri, tablecfg));
 
     if (ncolgroups == 0) {
-        cgsize = strlen("colgroup:") + strlen(tablename) + 1;
-        WT_ERR(__wt_calloc_def(session, cgsize, &cgname));
-        WT_ERR(__wt_snprintf(cgname, cgsize, "colgroup:%s", tablename));
-        WT_ERR(__create_colgroup(session, cgname, exclusive, config));
+        len = strlen("colgroup:") + strlen(tablename) + 1;
+        WT_ERR(__wt_calloc_def(session, len, &cgname));
+        WT_ERR(__wt_snprintf(cgname, len, "colgroup:%s", tablename));
+        if (import_repair) {
+            len =
+              strlen(tablecfg) + strlen(",import=(enabled,file_metadata=())") + strlen(filecfg) + 1;
+            WT_ERR(__wt_calloc_def(session, len, &importcfg));
+            WT_ERR(__wt_snprintf(
+              importcfg, len, "%s,import=(enabled,file_metadata=(%s))", tablecfg, filecfg));
+            cfg[2] = importcfg;
+            WT_ERR(__wt_config_collapse(session, &cfg[1], &cgcfg));
+            WT_ERR(__create_colgroup(session, cgname, exclusive, cgcfg));
+        } else
+            WT_ERR(__create_colgroup(session, cgname, exclusive, config));
     }
 
     /*
@@ -739,8 +758,12 @@ __create_table(
 
 err:
     WT_TRET(__wt_schema_release_table(session, &table));
+    __wt_free(session, cgcfg);
     __wt_free(session, cgname);
-    __wt_free(session, tableconf);
+    __wt_free(session, filecfg);
+    __wt_free(session, filename);
+    __wt_free(session, importcfg);
+    __wt_free(session, tablecfg);
     return (ret);
 }
 
@@ -798,6 +821,8 @@ __schema_create(WT_SESSION_IMPL *session, const char *uri, const char *config)
      * back it all out.
      */
     WT_RET(__wt_meta_track_on(session));
+    if (import)
+        F_SET(session, WT_SESSION_IMPORT);
 
     if (WT_PREFIX_MATCH(uri, "colgroup:"))
         ret = __create_colgroup(session, uri, exclusive, config);
@@ -816,6 +841,7 @@ __schema_create(WT_SESSION_IMPL *session, const char *uri, const char *config)
         ret = __wt_bad_object_type(session, uri);
 
     session->dhandle = NULL;
+    F_CLR(session, WT_SESSION_IMPORT);
     WT_TRET(__wt_meta_track_off(session, true, ret != 0));
 
     return (ret);
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 54a13cedff0..8178e593f6b 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -1580,6 +1580,7 @@ __wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session, WT_TIME_AGGREGAT
     WT_CKPT_FOREACH (ckptbase, ckpt)
         if (F_ISSET(ckpt, WT_CKPT_ADD)) {
             ckpt->write_gen = btree->write_gen;
+            ckpt->run_write_gen = btree->run_write_gen;
             WT_TIME_AGGREGATE_COPY(&ckpt->ta, ta);
         }
 }
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index 0b5b7da92c0..6e042fc9b33 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -841,11 +841,19 @@ __wt_txn_recover(WT_SESSION_IMPL *session, const char *cfg[])
         WT_ERR(ret);
     }
 
-    /* Check whether the history store exists. */
-    WT_ERR(__hs_exists(session, metac, cfg, &hs_exists));
-
     /* Scan the metadata to find the live files and their IDs. */
     WT_ERR(__recovery_file_scan(&r));
+
+    /*
+     * Check whether the history store exists.
+     *
+     * This will open a dhandle on the history store and initialize its write gen so we must ensure
+     * that the connection-wide base write generation is stable at this point. Performing a recovery
+     * file scan will involve updating the connection-wide base write generation so we MUST do this
+     * before checking for the existence of a history store file.
+     */
+    WT_ERR(__hs_exists(session, metac, cfg, &hs_exists));
+
     /*
      * Clear this out. We no longer need it and it could have been re-allocated when scanning the
      * files.
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index 1827a2aec58..491d4b24cb3 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -284,6 +284,38 @@ functions:
         rm -rf "wiredtiger"
         rm -rf "wiredtiger.tgz"
 
+  "run wt hang analyzer":
+    command: shell.exec
+    params:
+      working_dir: "wiredtiger/build_posix"
+      script: |
+        set -o verbose
+
+        # Dump core (-c) and debugger outputs (-o)
+        wt_hang_analyzer_option="-c -o file -o stdout"
+
+        echo "Calling the wt hang analyzer ..."
+        PATH="/opt/mongodbtoolchain/gdb/bin:$PATH" ${python_binary|python3} ../test/wt_hang_analyzer/wt_hang_analyzer.py $wt_hang_analyzer_option
+
+  "save wt hang analyzer core/debugger files":
+    - command: archive.targz_pack
+      params:
+        target: "wt-hang-analyzer.tgz"
+        source_dir: "wiredtiger/build_posix"
+        include:
+          - "./*core*"
+          - "./debugger*.*"
+    - command: s3.put
+      params:
+        aws_secret: ${aws_secret}
+        aws_key: ${aws_key}
+        local_file: wt-hang-analyzer.tgz
+        bucket: build_external
+        permissions: public-read
+        content_type: application/tar
+        display_name: WT Hang Analyzer Output - Execution ${execution}
+        remote_file: wiredtiger/${build_variant}/${revision}/wt_hang_analyzer/wt-hang-analyzer_${task_name}_${build_id}${postfix|}.tgz
+
   "dump stderr/stdout":
     command: shell.exec
     params:
@@ -423,8 +455,11 @@ post:
   - func: "upload artifact"
     vars:
       postfix: -${execution}
+  - func: "save wt hang analyzer core/debugger files"
   - func: "dump stderr/stdout"
   - func: "cleanup"
+timeout:
+  - func: "run wt hang analyzer"
 
 tasks:
   # Base compile task on posix flavours
diff --git a/src/third_party/wiredtiger/test/suite/test_import09.py b/src/third_party/wiredtiger/test/suite/test_import09.py
new file mode 100644
index 00000000000..b6b747b9005
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_import09.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_import09.py
+# Import a table with the repair option (no exported metadata).
+
+import os, random, shutil
+from test_import01 import test_import_base
+from wtscenario import make_scenarios
+
+class test_import09(test_import_base):
+    nrows = 100
+    ntables = 1
+    session_config = 'isolation=snapshot'
+
+    allocsizes = [
+        ('512', dict(allocsize='512')),
+        ('1024', dict(allocsize='1024')),
+        ('2048', dict(allocsize='2048')),
+        ('4096', dict(allocsize='4096')),
+    ]
+    compressors = [
+       ('none', dict(compressor='none')),
+       ('nop', dict(compressor='nop')),
+       ('lz4', dict(compressor='lz4')),
+       ('snappy', dict(compressor='snappy')),
+       ('zlib', dict(compressor='zlib')),
+       ('zstd', dict(compressor='zstd')),
+    ]
+    encryptors = [
+       ('none', dict(encryptor='none')),
+       ('nop', dict(encryptor='nop')),
+       ('rotn', dict(encryptor='rotn')),
+    ]
+    tables = [
+        ('simple_table', dict(
+            is_simple = True,
+            keys = [k for k in range(1, nrows+1)],
+            values = random.sample(range(1000000), k=nrows),
+            config = 'key_format=r,value_format=i')),
+       ('table_with_named_columns', dict(
+           is_simple = False,
+           keys = [k for k in range(1, 7)],
+           values = [('Australia', 'Canberra', 1),('Japan', 'Tokyo', 2),('Italy', 'Rome', 3),
+             ('China', 'Beijing', 4),('Germany', 'Berlin', 5),('South Korea', 'Seoul', 6)],
+           config = 'columns=(id,country,capital,population),key_format=r,value_format=SSi')),
+    ]
+    scenarios = make_scenarios(tables, allocsizes, compressors, encryptors)
+
+    # Check to verify table projections.
+    def check_projections(self, uri, keys, values):
+        for i in range(0, len(keys)):
+            self.check_record(uri + '(country,capital)',
+                              keys[i], [values[i][0], values[i][1]])
+            self.check_record(uri + '(country,population)',
+                              keys[i], [values[i][0], values[i][2]])
+            self.check_record(uri + '(capital,population)',
+                              keys[i], [values[i][1], values[i][2]])
+
+    # Load the compressor extension, skip the test if missing.
+    def conn_extensions(self, extlist):
+        extlist.skip_if_missing = True
+        extlist.extension('compressors', self.compressor)
+        extlist.extension('encryptors', self.encryptor)
+
+    def conn_config(self):
+        return 'cache_size=50MB,log=(enabled),statistics=(all),encryption=(name={})'.format(
+            self.encryptor)
+
+    def test_import_table_repair(self):
+        # Add some tables & data and checkpoint.
+        self.populate(self.ntables, self.nrows)
+        self.session.checkpoint()
+
+        # Create the table targeted for import.
+        original_db_table = 'original_db_table'
+        uri = 'table:' + original_db_table
+        create_config = ('allocation_size={},log=(enabled=true),block_compressor={},'
+            'encryption=(name={}),') + self.config
+        self.session.create(uri,
+            create_config.format(self.allocsize, self.compressor, self.encryptor))
+
+        keys = self.keys
+        values = self.values
+        ts = [10*k for k in range(1, len(keys)+1)]
+
+        # Add data to our target table and perform a checkpoint.
+        min_idx = 0
+        max_idx = len(keys) // 3
+        for i in range(min_idx, max_idx):
+            self.update(uri, keys[i], values[i], ts[i])
+        self.session.checkpoint()
+
+        # Add more data and checkpoint again.
+        min_idx = max_idx
+        max_idx = 2*len(keys) // 3
+        for i in range(min_idx, max_idx):
+            self.update(uri, keys[i], values[i], ts[i])
+        self.session.checkpoint()
+
+        # Export the file and table metadata so we can verify our repair later.
+        original_db_file_uri = 'file:' + original_db_table + '.wt'
+        c = self.session.open_cursor('metadata:', None, None)
+        original_db_table_config = c[uri]
+        original_db_file_config = c[original_db_file_uri]
+        c.close()
+
+        self.printVerbose(3, '\nFile configuration:\n' + original_db_file_config)
+        self.printVerbose(3, '\nTable configuration:\n' + original_db_table_config)
+
+        # Close the connection.
+        self.close_conn()
+
+        # Create a new database and connect to it.
+        newdir = 'IMPORT_DB'
+        shutil.rmtree(newdir, ignore_errors=True)
+        os.mkdir(newdir)
+        self.conn = self.setUpConnectionOpen(newdir)
+        self.session = self.setUpSessionOpen(self.conn)
+
+        # Make a bunch of files and fill them with data.
+        self.populate(self.ntables, self.nrows)
+        self.session.checkpoint()
+
+        # Bring forward the oldest to be past or equal to the timestamps we'll be importing.
+        self.conn.set_timestamp('oldest_timestamp=' + self.timestamp_str(ts[max_idx]))
+
+        # Copy over the datafile for the table we want to import.
+        self.copy_file(original_db_table + '.wt', '.', newdir)
+
+        # Construct the config string.
+        import_config = 'log=(enabled=true),import=(enabled,repair=true)'
+
+        # Import the file.
+        self.session.create(uri, import_config)
+
+        # Verify object.
+        self.session.verify(uri)
+
+        # Check that the previously inserted values survived the import.
+        self.check(uri, keys[:max_idx], values[:max_idx])
+
+        # Check against projections when the table is not simple.
+        if not self.is_simple:
+            self.check_projections(uri, keys[:max_idx], values[:max_idx])
+
+        # Compare configuration metadata.
+        c = self.session.open_cursor('metadata:', None, None)
+        new_db_file_config = c[original_db_file_uri]
+        new_db_table_config = c[uri]
+        c.close()
+        self.config_compare(original_db_file_config, new_db_file_config)
+        self.config_compare(original_db_table_config, new_db_table_config)
+
+        # Add some data and check that the table operates as usual after importing.
+        min_idx = max_idx
+        max_idx = len(keys)
+        for i in range(min_idx, max_idx):
+            self.update(uri, keys[i], values[i], ts[i])
+        self.check(uri, keys, values)
+        if not self.is_simple:
+            self.check_projections(uri, keys, values)
+
+        # Perform a checkpoint.
+        self.session.checkpoint()
diff --git a/src/third_party/wiredtiger/test/suite/test_txn25.py b/src/third_party/wiredtiger/test/suite/test_txn25.py
new file mode 100644
index 00000000000..befdd6cdf24
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_txn25.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2020 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_txn24.py
+#   Test the write generation mechanism to ensure that transaction ids get wiped between runs.
+#
+
+import wiredtiger, wttest
+
+class test_txn25(wttest.WiredTigerTestCase):
+    conn_config = 'cache_size=50MB,log=(enabled),statistics=(all)'
+    session_config = 'isolation=snapshot'
+
+    def test_txn25(self):
+        uri = 'file:test_txn25'
+        create_config = 'allocation_size=512,key_format=S,value_format=S'
+        self.session.create(uri, create_config)
+
+        # Populate the file and ensure that we start seeing some high transaction IDs in the system.
+        value1 = 'aaaaa' * 100
+        value2 = 'bbbbb' * 100
+        value3 = 'ccccc' * 100
+
+        # Keep transaction ids around.
+        session2 = self.conn.open_session()
+        session2.begin_transaction()
+
+        cursor = self.session.open_cursor(uri)
+        for i in range(1, 1000):
+            self.session.begin_transaction()
+            cursor[str(i)] = value1
+            self.session.commit_transaction()
+
+        for i in range(1, 1000):
+            self.session.begin_transaction()
+            cursor[str(i)] = value2
+            self.session.commit_transaction()
+
+        for i in range(1, 1000):
+            self.session.begin_transaction()
+            cursor[str(i)] = value3
+            self.session.commit_transaction()
+
+        session2.rollback_transaction()
+        session2.close()
+
+        # Close and re-open the connection.
+        cursor.close()
+        self.conn.close()
+        self.conn = wiredtiger.wiredtiger_open(self.home, self.conn_config)
+        self.session = self.conn.open_session(self.session_config)
+
+        # Now that we've reopened, check that we can view the latest data from the previous run.
+        #
+        # Since we've restarted the system, our transaction IDs are going to begin from 1 again
+        # so we have to wipe the cell's transaction IDs in order to see them.
+        cursor = self.session.open_cursor(uri)
+        self.session.begin_transaction()
+        for i in range(1, 1000):
+            self.assertEqual(cursor[str(i)], value3)
+        self.session.rollback_transaction()
diff --git a/src/third_party/wiredtiger/test/wt_hang_analyzer/wt_hang_analyzer.py b/src/third_party/wiredtiger/test/wt_hang_analyzer/wt_hang_analyzer.py
new file mode 100644
index 00000000000..6c5b7832cb4
--- /dev/null
+++ b/src/third_party/wiredtiger/test/wt_hang_analyzer/wt_hang_analyzer.py
@@ -0,0 +1,603 @@
+#!/usr/bin/env python
+"""Hang Analyzer module.
+
+A prototype hang analyzer for Evergreen integration to help investigate test timeouts.
+
+1. Script supports taking dumps, and/or dumping a summary of useful information about a process.
+2. Script will iterate through a list of interesting processes,
+    and run the tools from step 1. The list of processes can be provided as an option.
+
+Currently only supports Linux. There are two issues with the MacOS and Windows implementations:
+1. WT-6918 - lldb cannot attach to processes in MacOS.
+2. WT-6919 - Windows cannot find the debug symbols.
+"""
+
+import csv, glob, itertools, logging, re, tempfile, traceback
+import os, sys, platform, signal, subprocess, threading, time
+from distutils import spawn
+from io import BytesIO, TextIOWrapper
+from optparse import OptionParser
+_IS_WINDOWS = (sys.platform == "win32")
+
+if _IS_WINDOWS:
+    import win32event
+    import win32api
+
+"""
+Helper class to read output of a subprocess.
+
+Used to avoid deadlocks from the pipe buffer filling up and blocking the subprocess while it's
+being waited on.
+"""
+class LoggerPipe(threading.Thread):
+    """Asynchronously reads the output of a subprocess and sends it to a logger."""
+
+    # The start() and join() methods are not intended to be called directly on the LoggerPipe
+    # instance. Since we override them for that effect, the super's version are preserved here.
+    __start = threading.Thread.start
+    __join = threading.Thread.join
+
+    def __init__(self, logger, level, pipe_out):
+        """Initialize the LoggerPipe with the specified arguments."""
+
+        threading.Thread.__init__(self)
+        # Main thread should not call join() when exiting.
+        self.daemon = True
+
+        self.__logger = logger
+        self.__level = level
+        self.__pipe_out = pipe_out
+
+        self.__lock = threading.Lock()
+        self.__condition = threading.Condition(self.__lock)
+
+        self.__started = False
+        self.__finished = False
+
+        LoggerPipe.__start(self)
+
+    def start(self):
+        """Start not implemented."""
+        raise NotImplementedError("start should not be called directly")
+
+    def run(self):
+        """Read the output from 'pipe_out' and logs each line to 'logger'."""
+
+        with self.__lock:
+            self.__started = True
+            self.__condition.notify_all()
+
+        # Close the pipe when all of the output has been read.
+        with self.__pipe_out:
+            # Avoid buffering the output from the pipe.
+            for line in iter(self.__pipe_out.readline, b""):
+                # Convert the output of the process from a bytestring to a UTF-8 string, and replace
+                # any characters that cannot be decoded with the official Unicode replacement
+                # character, U+FFFD.
+                line = line.decode("utf-8", "replace")
+                self.__logger.log(self.__level, line.rstrip())
+
+        with self.__lock:
+            self.__finished = True
+            self.__condition.notify_all()
+
+    def join(self, timeout=None):
+        """Join not implemented."""
+        raise NotImplementedError("join should not be called directly")
+
+    def wait_until_started(self):
+        """Wait until started."""
+        with self.__lock:
+            while not self.__started:
+                self.__condition.wait()
+
+    def wait_until_finished(self):
+        """Wait until finished."""
+        with self.__lock:
+            while not self.__finished:
+                self.__condition.wait()
+
+        # No need to pass a timeout to join() because the thread should already be done after
+        # notifying us it has finished reading output from the pipe.
+        LoggerPipe.__join(self)
+
+def call(args, logger):
+    """Call subprocess on args list."""
+    logger.info(str(args))
+
+    # Use a common pipe for stdout & stderr for logging.
+    process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    logger_pipe = LoggerPipe(logger, logging.INFO, process.stdout)
+    logger_pipe.wait_until_started()
+
+    ret = process.wait()
+    logger_pipe.wait_until_finished()
+
+    if ret != 0:
+        logger.error("Bad exit code %d", ret)
+        raise Exception("Bad exit code %d from %s" % (ret, " ".join(args)))
+
+def callo(args, logger):
+    """Call subprocess on args string."""
+    logger.info("%s", str(args))
+
+    return subprocess.check_output(args)
+
+def find_program(prog, paths):
+    """Find the specified program in env PATH, or tries a set of paths."""
+    loc = spawn.find_executable(prog)
+
+    if loc is not None:
+        return loc
+
+    for loc in paths:
+        full_prog = os.path.join(loc, prog)
+        if os.path.exists(full_prog):
+            return full_prog
+
+    return None
+
+def get_process_logger(debugger_output, pid, process_name):
+    """Return the process logger from options specified."""
+    process_logger = logging.Logger("process", level=logging.DEBUG)
+    process_logger.mongo_process_filename = None
+
+    if 'stdout' in debugger_output:
+        s_handler = logging.StreamHandler(sys.stdout)
+        s_handler.setFormatter(logging.Formatter(fmt="%(message)s"))
+        process_logger.addHandler(s_handler)
+
+    if 'file' in debugger_output:
+        filename = "debugger_%s_%d.log" % (os.path.splitext(process_name)[0], pid)
+        process_logger.mongo_process_filename = filename
+        f_handler = logging.FileHandler(filename=filename, mode="w")
+        f_handler.setFormatter(logging.Formatter(fmt="%(message)s"))
+        process_logger.addHandler(f_handler)
+
+    return process_logger
+
+class WindowsDumper(object):
+    """WindowsDumper class."""
+
+    @staticmethod
+    def __find_debugger(logger, debugger):
+        """Find the installed debugger."""
+        # We are looking for c:\Program Files (x86)\Windows Kits\8.1\Debuggers\x64.
+        cdb = spawn.find_executable(debugger)
+        if cdb is not None:
+            return cdb
+        from win32com.shell import shell, shellcon
+
+        # Cygwin via sshd does not expose the normal environment variables.
+        # Use the shell api to get the variable instead.
+        root_dir = shell.SHGetFolderPath(0, shellcon.CSIDL_PROGRAM_FILESX86, None, 0)
+
+        # Construct the debugger search paths in most-recent order.
+        debugger_paths = [os.path.join(root_dir, "Windows Kits", "10", "Debuggers", "x64")]
+        for idx in reversed(range(0, 2)):
+            debugger_paths.append(
+                os.path.join(root_dir, "Windows Kits", "8." + str(idx), "Debuggers", "x64"))
+
+        for dbg_path in debugger_paths:
+            logger.info("Checking for debugger in %s", dbg_path)
+            if os.path.exists(dbg_path):
+                return os.path.join(dbg_path, debugger)
+
+        return None
+
+    def dump_info(self, root_logger, logger, pid, process_name, take_dump):
+        """Dump useful information to the console."""
+        debugger = "cdb.exe"
+        dbg = self.__find_debugger(root_logger, debugger)
+
+        if dbg is None:
+            root_logger.warning("Debugger %s not found, skipping dumping of %d", debugger, pid)
+            return
+
+        root_logger.info("Debugger %s, analyzing %s process with PID %d", dbg, process_name, pid)
+
+        dump_command = ""
+        if take_dump:
+            # Dump to file, dump_<process name>.<pid>.mdmp.
+            dump_file = "dump_%s.%d.%s" % (os.path.splitext(process_name)[0], pid,
+                                           self.get_dump_ext())
+            dump_command = ".dump /ma %s" % dump_file
+            root_logger.info("Dumping core to %s", dump_file)
+
+        cmds = [
+            ".symfix",  # Fixup symbol path.
+            "!sym noisy",  # Enable noisy symbol loading.
+            ".symopt +0x10",  # Enable line loading (off by default in CDB, on by default in WinDBG).
+            ".reload",  # Reload symbols.
+            "!peb",  # Dump current exe & environment variables.
+            "lm",  # Dump loaded modules.
+            dump_command,
+            "!uniqstack -pn",  # Dump all unique threads with function arguments.
+            "!cs -l",  # Dump all locked critical sections.
+            ".detach",  # Detach.
+            "q"  # Quit.
+        ]
+
+        call([dbg, '-c', ";".join(cmds), '-p', str(pid)], logger)
+
+        root_logger.info("Done analyzing %s process with PID %d", process_name, pid)
+
+    @staticmethod
+    def get_dump_ext():
+        """Return the dump file extension."""
+        return "mdmp"
+
+class WindowsProcessList(object):
+    """WindowsProcessList class."""
+
+    @staticmethod
+    def __find_ps():
+        """Find tasklist."""
+        return os.path.join(os.environ["WINDIR"], "system32", "tasklist.exe")
+
+    def dump_processes(self, logger):
+        """Get list of [Pid, Process Name]."""
+        ps = self.__find_ps()
+
+        logger.info("Getting list of processes using %s", ps)
+
+        ret = callo([ps, "/FO", "CSV"], logger)
+
+        buff = TextIOWrapper(BytesIO(ret))
+        csv_reader = csv.reader(buff)
+
+        return [[int(row[1]), row[0]] for row in csv_reader if row[1] != "PID"]
+
+# LLDB dumper is for MacOS X.
+class LLDBDumper(object):
+    """LLDBDumper class."""
+
+    @staticmethod
+    def __find_debugger(debugger):
+        """Find the installed debugger."""
+        return find_program(debugger, ['/usr/bin'])
+
+    def dump_info(self, root_logger, logger, pid, process_name, take_dump):
+        """Dump info."""
+        debugger = "lldb"
+        dbg = self.__find_debugger(debugger)
+
+        if dbg is None:
+            root_logger.warning("Debugger %s not found, skipping dumping of %d", debugger, pid)
+            return
+
+        root_logger.info("Debugger %s, analyzing %s process with PID %d", dbg, process_name, pid)
+
+        lldb_version = callo([dbg, "--version"], logger)
+
+        logger.info(lldb_version)
+
+        # Do we have the XCode or LLVM version of lldb?
+        # Old versions of lldb do not work well when taking commands via a file.
+        # XCode (7.2): lldb-340.4.119.
+        # LLVM - lldb version 3.7.0 ( revision ).
+
+        lldb_version = str(lldb_version)
+        if 'version' not in lldb_version:
+            # We have XCode's lldb.
+            lldb_version = lldb_version[lldb_version.index("lldb-"):]
+            lldb_version = lldb_version.replace('lldb-', '')
+            lldb_major_version = int(lldb_version[:lldb_version.index('.')])
+            if lldb_major_version < 340:
+                logger.warning("Debugger lldb is too old, please upgrade to XCode 7.2")
+                return
+
+        dump_command = ""
+        if take_dump:
+            # Dump to file, dump_<process name>.<pid>.core.
+            dump_file = "dump_%s.%d.%s" % (process_name, pid, self.get_dump_ext())
+            dump_command = "process save-core %s" % dump_file
+            root_logger.info("Dumping core to %s", dump_file)
+
+        cmds = [
+            "attach -p %d" % pid,
+            "target modules list",
+            "thread backtrace all",
+            dump_command,
+            "settings set interpreter.prompt-on-quit false",
+            "quit",
+        ]
+
+        tf = tempfile.NamedTemporaryFile(mode='w', encoding='utf-8')
+
+        for cmd in cmds:
+            tf.write(cmd + "\n")
+
+        tf.flush()
+
+        # Works on in MacOS 10.9 & later.
+        #call([dbg] +  list( itertools.chain.from_iterable([['-o', b] for b in cmds])), logger)
+        call(['cat', tf.name], logger)
+        call([dbg, '--source', tf.name], logger)
+
+        root_logger.info("Done analyzing %s process with PID %d", process_name, pid)
+
+    @staticmethod
+    def get_dump_ext():
+        """Return the dump file extension."""
+        return "core"
+
+class DarwinProcessList(object):
+    """DarwinProcessList class."""
+
+    @staticmethod
+    def __find_ps():
+        """Find ps."""
+        return find_program('ps', ['/bin'])
+
+    def dump_processes(self, logger):
+        """Get list of [Pid, Process Name]."""
+        ps = self.__find_ps()
+
+        logger.info("Getting list of processes using %s", ps)
+
+        ret = callo([ps, "-axco", "pid,comm"], logger)
+
+        buff = TextIOWrapper(BytesIO(ret))
+        csv_reader = csv.reader(buff, delimiter=' ', quoting=csv.QUOTE_NONE, skipinitialspace=True)
+
+        return [[int(row[0]), row[1]] for row in csv_reader if row[0] != "PID"]
+
+# GDB dumper is for Linux.
+class GDBDumper(object):
+    """GDBDumper class."""
+
+    @staticmethod
+    def __find_debugger(debugger):
+        """Find the installed debugger."""
+        return find_program(debugger, ['/opt/mongodbtoolchain/v3/bin/gdb', '/usr/bin'])
+
+    def dump_info(self, root_logger, logger, pid, process_name, take_dump):
+        """Dump info."""
+        debugger = "gdb"
+        dbg = self.__find_debugger(debugger)
+
+        if dbg is None:
+            logger.warning("Debugger %s not found, skipping dumping of %d", debugger, pid)
+            return
+
+        root_logger.info("Debugger %s, analyzing %s process with PID %d", dbg, process_name, pid)
+
+        dump_command = ""
+        if take_dump:
+            # Dump to file, dump_<process name>.<pid>.core.
+            dump_file = "dump_%s.%d.%s" % (process_name, pid, self.get_dump_ext())
+            dump_command = "gcore %s" % dump_file
+            root_logger.info("Dumping core to %s", dump_file)
+
+        call([dbg, "--version"], logger)
+
+        cmds = [
+            "set interactive-mode off",
+            "set print thread-events off",  # Suppress GDB messages of threads starting/finishing.
+            "file %s" % process_name,
+            "attach %d" % pid,
+            "info sharedlibrary",
+            "info threads",  # Dump a simple list of commands to get the thread name.
+            "thread apply all bt",
+            "set python print-stack full",
+            # Lock the scheduler, before running commands, which execute code in the attached process.
+            "set scheduler-locking on",
+            dump_command,
+            "set confirm off",
+            "quit",
+        ]
+
+        call([dbg, "--quiet", "--nx"] +
+             list(itertools.chain.from_iterable([['-ex', b] for b in cmds])), logger)
+
+        root_logger.info("Done analyzing %s process with PID %d", process_name, pid)
+
+    @staticmethod
+    def get_dump_ext():
+        """Return the dump file extension."""
+        return "core"
+
+    @staticmethod
+    def _find_gcore():
+        """Find the installed gcore."""
+        dbg = "/usr/bin/gcore"
+        if os.path.exists(dbg):
+            return dbg
+
+        return None
+
+class LinuxProcessList(object):
+    """LinuxProcessList class."""
+
+    @staticmethod
+    def __find_ps():
+        """Find ps."""
+        return find_program('ps', ['/bin', '/usr/bin'])
+
+    def dump_processes(self, logger):
+        """Get list of [Pid, Process Name]."""
+        ps = self.__find_ps()
+
+        logger.info("Getting list of processes using %s", ps)
+
+        call([ps, "--version"], logger)
+
+        ret = callo([ps, "-eo", "pid,args"], logger)
+
+        buff = TextIOWrapper(BytesIO(ret))
+        csv_reader = csv.reader(buff, delimiter=' ', quoting=csv.QUOTE_NONE, skipinitialspace=True)
+
+        return [[int(row[0]), os.path.split(row[1])[1]] for row in csv_reader if row[0] != "PID"]
+
+def get_hang_analyzers():
+    """Return hang analyzers."""
+
+    dbg = None
+    ps = None
+
+    # Skip taking the dump in Mac OS and result in an error.
+    # FIXME : WT-6918 - Remove the skip block of code after fixing the issues.
+    if sys.platform == "darwin":
+        return [ps, dbg]
+
+    if sys.platform.startswith("linux"):
+        dbg = GDBDumper()
+        ps = LinuxProcessList()
+    elif _IS_WINDOWS or sys.platform == "cygwin":
+        dbg = WindowsDumper()
+        ps = WindowsProcessList()
+    elif sys.platform == "darwin":
+        dbg = LLDBDumper()
+        ps = DarwinProcessList()
+
+    return [ps, dbg]
+
+def check_dump_quota(quota, ext):
+    """Check if sum of the files with ext is within the specified quota in megabytes."""
+
+    files = glob.glob("*." + ext)
+
+    size_sum = 0
+    for file_name in files:
+        size_sum += os.path.getsize(file_name)
+
+    return size_sum <= quota
+
+def pname_match(exact_match, pname, processes):
+    """Return True if the pname matches in processes."""
+    pname = os.path.splitext(pname)[0]
+    for ip in processes:
+        if exact_match and pname == ip or not exact_match and ip in pname:
+            return True
+    return False
+
+# Basic procedure
+#
+# 1. Get a list of interesting processes.
+# 2. Dump useful information or take dumps.
+def main():
+    """Execute Main program."""
+    root_logger = logging.Logger("hang_analyzer", level=logging.DEBUG)
+
+    handler = logging.StreamHandler(sys.stdout)
+    handler.setFormatter(logging.Formatter(fmt="%(message)s"))
+    root_logger.addHandler(handler)
+
+    root_logger.info("Python Version: %s", sys.version)
+    root_logger.info("OS: %s", platform.platform())
+
+    try:
+        if _IS_WINDOWS or sys.platform == "cygwin":
+            distro = platform.win32_ver()
+            root_logger.info("Windows Distribution: %s", distro)
+        else:
+            distro = platform.linux_distribution()
+            root_logger.info("Linux Distribution: %s", distro)
+
+    except AttributeError:
+        root_logger.warning("Cannot determine Linux distro since Python is too old")
+
+    try:
+        uid = os.getuid()
+        root_logger.info("Current User: %s", uid)
+        current_login = os.getlogin()
+        root_logger.info("Current Login: %s", current_login)
+    except OSError:
+        root_logger.warning("Cannot determine Unix Current Login")
+    except AttributeError:
+        root_logger.warning("Cannot determine Unix Current Login, not supported on Windows")
+
+    contain_processes = ["ex_", "intpack-test", "python", "test_"]
+    exact_processes = ["cursor_order", "packing-test", "t"]
+    process_ids = []
+
+    parser = OptionParser(description=__doc__)
+    parser.add_option('-p', '--process-contains-names', dest='process_contains_names',
+                      help='Comma separated list of process patterns to analyze')
+    parser.add_option('-e', '--process-names', dest='process_exact_names',
+                      help='Comma separated list of exact process names to analyze')
+    parser.add_option('-d', '--process-ids', dest='process_ids', default=None,
+                      help='Comma separated list of process ids (PID) to analyze, overrides -p & e')
+    parser.add_option('-c', '--dump-core', dest='dump_core', action="store_true", default=False,
+                      help='Dump core file for each analyzed process')
+    parser.add_option('-s', '--max-core-dumps-size', dest='max_core_dumps_size', default=10000,
+                      help='Maximum total size of core dumps to keep in megabytes')
+    parser.add_option('-o', '--debugger-output', dest='debugger_output', action="append",
+                      choices=['file', 'stdout'], default=None,
+                      help="If 'stdout', then the debugger's output is written to the Python"
+                      " process's stdout. If 'file', then the debugger's output is written"
+                      " to a file named debugger_<process>_<pid>.log for each process it"
+                      " attaches to. This option can be specified multiple times on the"
+                      " command line to have the debugger's output written to multiple"
+                      " locations. By default, the debugger's output is written only to the"
+                      " Python process's stdout.")
+
+    (options, _) = parser.parse_args()
+
+    if options.debugger_output is None:
+        options.debugger_output = ['stdout']
+
+    if options.process_ids is not None:
+        # process_ids is an int list of PIDs.
+        process_ids = [int(pid) for pid in options.process_ids.split(',')]
+
+    if options.process_exact_names is not None:
+        exact_processes = options.process_exact_names.split(',')
+
+    if options.process_contains_names is not None:
+        contain_processes = options.process_contains_names.split(',')
+
+    [ps, dbg] = get_hang_analyzers()
+
+    if ps is None or dbg is None:
+        root_logger.warning("hang_analyzer.py: Unsupported platform: %s", sys.platform)
+        exit(1)
+
+    all_processes = ps.dump_processes(root_logger)
+
+    # Canonicalize the process names to lowercase to handle cases where the name of the Python
+    # process is /System/Library/.../Python on OS X and -p python is specified.
+    all_processes = [(pid, process_name.lower()) for (pid, process_name) in all_processes]
+
+    # Find all running interesting processes:
+    #   If a list of process_ids is supplied, match on that.
+    #   Otherwise, do a substring match on interesting_processes.
+    if process_ids:
+        processes = [(pid, pname) for (pid, pname) in all_processes
+                     if pid in process_ids and pid != os.getpid()]
+
+        running_pids = set([pid for (pid, pname) in all_processes])
+        missing_pids = set(process_ids) - running_pids
+        if missing_pids:
+            root_logger.warning("The following requested process ids are not running %s",
+                                list(missing_pids))
+    else:
+        processes = [(pid, pname) for (pid, pname) in all_processes
+                     if (pname_match(True, pname, exact_processes) or pname_match(False, pname, contain_processes)) and pid != os.getpid()]
+
+    root_logger.info("Found %d interesting processes %s", len(processes), processes)
+
+    max_dump_size_bytes = int(options.max_core_dumps_size) * 1024 * 1024
+
+    trapped_exceptions = []
+
+    # Dump all processes.
+    for (pid, process_name) in processes:
+        process_logger = get_process_logger(options.debugger_output, pid, process_name)
+        try:
+            dbg.dump_info(root_logger, process_logger, pid, process_name, options.dump_core
+                          and check_dump_quota(max_dump_size_bytes, dbg.get_dump_ext()))
+        except Exception as err:
+            root_logger.info("Error encountered when invoking debugger %s", err)
+            trapped_exceptions.append(traceback.format_exc())
+
+    root_logger.info("Done analyzing all processes for hangs")
+
+    for exception in trapped_exceptions:
+        root_logger.info(exception)
+    if trapped_exceptions:
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
author	Luke Chen <luke.chen@mongodb.com>	2020-11-13 15:57:42 +1100
committer	Evergreen Agent <no-reply@evergreen.mongodb.com>	2020-11-13 05:15:33 +0000
commit	8bb9ed7824835cdfd146d642468508c01ea087ad (patch)
tree	25118dd8dabdc8e892057067583845f47547aaad /src/third_party
parent	6dc4a4eaf4acc7472590040fa5795901b5140fc5 (diff)
download	mongo-8bb9ed7824835cdfd146d642468508c01ea087ad.tar.gz