summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJuan Gu <juan.gu@mongodb.com>2022-12-20 23:53:28 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-12-21 01:00:24 +0000
commit6cd0e5c322cf8cc1b24722543a0f7e5604f85ed8 (patch)
tree4413b687864f6b4e573a101eb4eb47b3fbd14948
parent877a8295ec3ccd9df1e4b3b843b33bbdbb761e71 (diff)
downloadmongo-6cd0e5c322cf8cc1b24722543a0f7e5604f85ed8.tar.gz
SERVER-70802 Ensure data files are uploaded on Evergreen timeout
-rw-r--r--buildscripts/resmokelib/hang_analyzer/dumper.py28
-rw-r--r--buildscripts/resmokelib/hang_analyzer/process.py13
2 files changed, 38 insertions, 3 deletions
diff --git a/buildscripts/resmokelib/hang_analyzer/dumper.py b/buildscripts/resmokelib/hang_analyzer/dumper.py
index 2b31514a007..92d58d7d190 100644
--- a/buildscripts/resmokelib/hang_analyzer/dumper.py
+++ b/buildscripts/resmokelib/hang_analyzer/dumper.py
@@ -5,12 +5,14 @@ import logging
import os
import sys
import tempfile
+from datetime import datetime
from abc import ABCMeta, abstractmethod
from collections import namedtuple
from distutils import spawn
from buildscripts.resmokelib.hang_analyzer.process import call, callo, find_program
from buildscripts.resmokelib.hang_analyzer.process_list import Pinfo
+from buildscripts.resmokelib import config as resmoke_config
Dumpers = namedtuple('Dumpers', ['dbg', 'jstack'])
@@ -329,6 +331,20 @@ class LLDBDumper(Dumper):
class GDBDumper(Dumper):
"""GDBDumper class."""
+ def __init__(self, root_logger: logging.Logger, dbg_output: str,
+ timeout_seconds_for_gdb_process=720):
+ """Initialize GDBDumper."""
+ if resmoke_config.EVERGREEN_TASK_ID is None:
+ # Set 24 hours time out for hang analyzer being run in locally
+ timeout_seconds_for_gdb_process = 86400
+ #Timeout for hang analyzer, default timeout is 12mins(out of total 15mins) in Evergreen
+ self._timeout_seconds_for_gdb_process = timeout_seconds_for_gdb_process
+ super().__init__(root_logger, dbg_output)
+
+ def _reduce_timeout_for_gdb_process(self, timeout_period: int):
+ """Reduce timeout for remaining gdb processes."""
+ self._timeout_seconds_for_gdb_process -= timeout_period
+
def _find_debugger(self, debugger):
"""Find the installed debugger."""
return find_program(debugger, ['/opt/mongodbtoolchain/v4/bin', '/usr/bin'])
@@ -443,12 +459,19 @@ class GDBDumper(Dumper):
debugger = "gdb"
dbg = self._find_debugger(debugger)
logger = _get_process_logger(self._dbg_output, pinfo.name)
+ _start_time = datetime.now()
if dbg is None:
self._root_logger.warning("Debugger %s not found, skipping dumping of %s", debugger,
str(pinfo.pidv))
return
+ if self._timeout_seconds_for_gdb_process <= 0:
+ self._root_logger.warning(
+ "Skipping dumping of %s processes with PIDs %s because the time limit expired",
+ pinfo.name, str(pinfo.pidv))
+ return
+
self._root_logger.info("Debugger %s, analyzing %s processes with PIDs %s", dbg, pinfo.name,
str(pinfo.pidv))
@@ -457,8 +480,11 @@ class GDBDumper(Dumper):
cmds = self._prefix() + self._process_specific(pinfo, take_dump, logger) + self._postfix()
call([dbg, "--quiet", "--nx"] + list(
- itertools.chain.from_iterable([['-ex', b] for b in cmds])), logger)
+ itertools.chain.from_iterable([['-ex', b] for b in cmds])), logger,
+ self._timeout_seconds_for_gdb_process, pinfo)
+ time_period = (datetime.now() - _start_time).total_seconds()
+ self._reduce_timeout_for_gdb_process(time_period)
self._root_logger.info("Done analyzing %s processes with PIDs %s", pinfo.name,
str(pinfo.pidv))
diff --git a/buildscripts/resmokelib/hang_analyzer/process.py b/buildscripts/resmokelib/hang_analyzer/process.py
index 9f40c20fe12..17d8713be2e 100644
--- a/buildscripts/resmokelib/hang_analyzer/process.py
+++ b/buildscripts/resmokelib/hang_analyzer/process.py
@@ -22,7 +22,7 @@ if _IS_WINDOWS:
PROCS_TIMEOUT_SECS = 60
-def call(args, logger):
+def call(args, logger, timeout_seconds=None, pinfo=None):
"""Call subprocess on args list."""
logger.info(str(args))
@@ -31,7 +31,16 @@ def call(args, logger):
logger_pipe = core.pipe.LoggerPipe(logger, logging.INFO, process.stdout)
logger_pipe.wait_until_started()
- ret = process.wait()
+ try:
+ ret = process.wait(timeout=timeout_seconds)
+ except subprocess.TimeoutExpired:
+ logger.error("Killing %s processes with PIDs %s because time limit expired", pinfo.name,
+ str(pinfo.pidv))
+ process.kill()
+ process.wait()
+ logger_pipe.wait_until_finished()
+ return
+
logger_pipe.wait_until_finished()
if ret != 0: