diff options
author | Alex Gorrod <alexander.gorrod@mongodb.com> | 2016-12-23 15:03:07 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2016-12-23 15:03:07 +1100 |
commit | 3eaa4ea8d458f1a57d3aac916e2bc8a59450af97 (patch) | |
tree | 1d2130efdc63b40cb8d14b8bd87fbd5799b24985 | |
parent | 0605d628342faaaeb5fea7c6f816dda151c412f4 (diff) | |
download | mongo-3eaa4ea8d458f1a57d3aac916e2bc8a59450af97.tar.gz |
WT-3086 Add information about transaction state to cache stuck diagnostics (#3214)
-rw-r--r-- | src/evict/evict_lru.c | 188 | ||||
-rw-r--r-- | src/include/extern.h | 2 |
2 files changed, 159 insertions, 31 deletions
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 6fa728916de..0a2a9d28402 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -365,7 +365,7 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work) ret = ETIMEDOUT; __wt_err(session, ret, "Cache stuck for too long, giving up"); - WT_TRET(__wt_cache_dump(session, NULL)); + WT_TRET(__wt_dump_stuck_info(session, NULL)); return (ret); } #endif @@ -1974,15 +1974,116 @@ __wt_evict_priority_clear(WT_SESSION_IMPL *session) } #ifdef HAVE_DIAGNOSTIC +static int __dump_txn_state(WT_SESSION_IMPL *, FILE *fp); +static int __dump_cache(WT_SESSION_IMPL *, FILE *fp); /* - * __wt_cache_dump -- - * Dump debugging information to a file (default stderr) about the size of - * the files in the cache. + * __dump_txn_state -- + * Output debugging information about the global transaction state. */ int -__wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) +__dump_txn_state(WT_SESSION_IMPL *session, FILE *fp) +{ + WT_CONNECTION_IMPL *conn; + WT_TXN_GLOBAL *txn_global; + WT_TXN *txn; + WT_TXN_STATE *s; + const char *iso_tag; + uint64_t id; + uint32_t i, session_cnt; + + conn = S2C(session); + txn_global = &conn->txn_global; + WT_ORDERED_READ(session_cnt, conn->session_cnt); + + /* Note: odd string concatenation avoids spelling errors. */ + if (fprintf(fp, "==========\n" "transaction state dump\n") < 0) + return (EIO); + + if (fprintf(fp, + "current ID: %" PRIu64 "\n" + "last running ID: %" PRIu64 "\n" + "oldest ID: %" PRIu64 "\n" + "oldest named snapshot ID: %" PRIu64 "\n", + txn_global->current, txn_global->last_running, + txn_global->oldest_id, txn_global->nsnap_oldest_id) < 0) + return (EIO); + + if (fprintf(fp, + "checkpoint running? %s\n" + "checkpoint generation: %" PRIu64 "\n" + "checkpoint pinned ID: %" PRIu64 "\n" + "checkpoint txn ID: %" PRIu64 "\n" + "session count: %" PRIu32 "\n", + txn_global->checkpoint_running ? "yes" : "no", + txn_global->checkpoint_gen, + txn_global->checkpoint_pinned, + txn_global->checkpoint_txnid, + session_cnt) < 0) + return (EIO); + + if (fprintf(fp, "Dumping transaction state of active sessions\n") < 0) + return (EIO); + + /* + * Walk each session transaction state and dump information. Accessing + * the content of session handles is not thread safe, so some + * information may change while traversing if other threads are active + * at the same time, which is OK since this is diagnostic code. + */ + for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) { + /* Skip sessions with no active transaction */ + if ((id = s->id) == WT_TXN_NONE && s->pinned_id == WT_TXN_NONE) + continue; + + txn = &conn->sessions[i].txn; + switch (txn->isolation) { + case WT_ISO_READ_COMMITTED: + iso_tag = "WT_ISO_READ_COMMITTED"; + break; + case WT_ISO_READ_UNCOMMITTED: + iso_tag = "WT_ISO_READ_UNCOMMITTED"; + break; + case WT_ISO_SNAPSHOT: + iso_tag = "WT_ISO_SNAPSHOT"; + break; + default: + iso_tag = "INVALID"; + break; + } + + if (fprintf(fp, + "ID: %6" PRIu64 + ", mod count: %u" + ", pinned ID: %" PRIu64 + ", snap min: %" PRIu64 + ", snap max: %" PRIu64 + ", metadata pinned ID: %" PRIu64 + ", flags: 0x%08" PRIx32 + ", name: %s" + ", isolation: %s" "\n", + id, + txn->mod_count, + s->pinned_id, + txn->snap_min, + txn->snap_max, + s->metadata_pinned, + txn->flags, + conn->sessions[i].name == NULL ? + "EMPTY" : conn->sessions[i].name, + iso_tag) < 0) + return (EIO); + } + + return (0); +} + +/* + * __dump_cache -- + * Output debugging information about the size of the files in cache. + */ +int +__dump_cache(WT_SESSION_IMPL *session, FILE *fp) { - FILE *fp; WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle, *saved_dhandle; WT_PAGE *page; @@ -1997,13 +2098,9 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) conn = S2C(session); total_bytes = total_dirty_bytes = 0; - if (ofile == NULL) - fp = stderr; - else if ((fp = fopen(ofile, "w")) == NULL) - return (EIO); - /* Note: odd string concatenation avoids spelling errors. */ - (void)fprintf(fp, "==========\n" "cache dump\n"); + if (fprintf(fp, "==========\n" "cache dump\n") < 0) + return (EIO); saved_dhandle = session->dhandle; TAILQ_FOREACH(dhandle, &conn->dhqh, q) { @@ -2048,13 +2145,17 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) } session->dhandle = NULL; - if (dhandle->checkpoint == NULL) - (void)fprintf(fp, "%s(<live>): \n", dhandle->name); - else - (void)fprintf(fp, "%s(checkpoint=%s): \n", - dhandle->name, dhandle->checkpoint); - if (intl_pages != 0) - (void)fprintf(fp, + if (dhandle->checkpoint == NULL) { + if (fprintf(fp, + "%s(<live>): \n", dhandle->name) < 0) + return (EIO); + } else { + if (fprintf(fp, "%s(checkpoint=%s): \n", + dhandle->name, dhandle->checkpoint) < 0) + return (EIO); + } + if (intl_pages != 0) { + if (fprintf(fp, "\t" "internal: " "%" PRIu64 " pages, " "%" PRIu64 "MB, " @@ -2069,9 +2170,11 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) (intl_bytes - intl_dirty_bytes) >> 20, intl_dirty_bytes >> 20, intl_bytes_max >> 20, - intl_dirty_bytes_max >> 20); - if (leaf_pages != 0) - (void)fprintf(fp, + intl_dirty_bytes_max >> 20) < 0) + return (EIO); + } + if (leaf_pages != 0) { + if (fprintf(fp, "\t" "leaf: " "%" PRIu64 " pages, " "%" PRIu64 "MB, " @@ -2086,7 +2189,9 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) (leaf_bytes - leaf_dirty_bytes) >> 20, leaf_dirty_bytes >> 20, leaf_bytes_max >> 20, - leaf_dirty_bytes_max >> 20); + leaf_dirty_bytes_max >> 20) < 0) + return (EIO); + } total_bytes += intl_bytes + leaf_bytes; total_dirty_bytes += intl_dirty_bytes + leaf_dirty_bytes; @@ -2099,16 +2204,39 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) */ total_bytes = __wt_cache_bytes_plus_overhead(conn->cache, total_bytes); - (void)fprintf(fp, + if (fprintf(fp, "cache dump: " - "total found = %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB\n" - "total dirty bytes = %" PRIu64 "MB\n", + "total found: %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB\n" + "total dirty bytes: %" PRIu64 "MB\n", total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20, - total_dirty_bytes >> 20); - (void)fprintf(fp, "==========\n"); - - if (ofile != NULL && fclose(fp) != 0) + total_dirty_bytes >> 20) < 0) return (EIO); + if (fprintf(fp, "==========\n") < 0) + return (EIO); + return (0); } + +/* + * __wt_dump_stuck_info -- + * Dump debugging information to a file (default stderr) about the state + * of WiredTiger when we have determined that the cache is stuck full. + */ +int +__wt_dump_stuck_info(WT_SESSION_IMPL *session, const char *ofile) +{ + FILE *fp; + WT_DECL_RET; + + if (ofile == NULL) + fp = stderr; + else if ((fp = fopen(ofile, "w")) == NULL) + return (EIO); + + WT_ERR(__dump_txn_state(session, fp)); + WT_ERR(__dump_cache(session, fp)); +err: if (ofile != NULL && fclose(fp) != 0) + return (EIO); + return (ret); +} #endif diff --git a/src/include/extern.h b/src/include/extern.h index 2fb92c5faf0..bb7fbddcae5 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -352,7 +352,7 @@ extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern int __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern int __wt_dump_stuck_info(WT_SESSION_IMPL *session, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); |