summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorjasurbeknurboyev <998946972365>2022-11-25 17:34:19 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-11-25 18:03:49 +0000
commit5f7a95abeefa77b1ae9ad9918b3319ec107e2364 (patch)
tree12b433fdb3942fbecbe9936875fb02e6026ed85c
parent8a01d8d90871e627134a553611d3f6b2caacf9c4 (diff)
downloadmongo-5f7a95abeefa77b1ae9ad9918b3319ec107e2364.tar.gz
SERVER-71548 Avoid deleting files after symbolization
-rw-r--r--buildscripts/resmokelib/testing/symbolizer_service.py78
-rw-r--r--buildscripts/tests/resmokelib/testing/test_symbolizer_service.py50
2 files changed, 86 insertions, 42 deletions
diff --git a/buildscripts/resmokelib/testing/symbolizer_service.py b/buildscripts/resmokelib/testing/symbolizer_service.py
index be4ca070b45..866a4209239 100644
--- a/buildscripts/resmokelib/testing/symbolizer_service.py
+++ b/buildscripts/resmokelib/testing/symbolizer_service.py
@@ -8,7 +8,7 @@ import time
from datetime import timedelta
from threading import Lock
-from typing import List, Optional, NamedTuple
+from typing import List, Optional, NamedTuple, Set
from buildscripts.resmokelib import config as _config
from buildscripts.resmokelib.testing.testcases.interface import TestCase
@@ -19,6 +19,7 @@ _lock = Lock()
STACKTRACE_FILE_EXTENSION = ".stacktrace"
SYMBOLIZE_RETRY_TIMEOUT_SECS = timedelta(minutes=4).total_seconds()
+PROCESSED_FILES_LIST_FILE_PATH = "symbolizer-processed-files.txt" # noqa
class ResmokeSymbolizerConfig(NamedTuple):
@@ -69,7 +70,8 @@ class ResmokeSymbolizer:
)
self.symbolizer_service = symbolizer_service if symbolizer_service is not None else SymbolizerService(
)
- self.file_service = file_service if file_service is not None else FileService()
+ self.file_service = file_service if file_service is not None else FileService(
+ PROCESSED_FILES_LIST_FILE_PATH)
def symbolize_test_logs(self, test: TestCase,
symbolize_retry_timeout: float = SYMBOLIZE_RETRY_TIMEOUT_SECS) -> None:
@@ -107,8 +109,9 @@ class ResmokeSymbolizer:
if time.perf_counter() - start_time > symbolize_retry_timeout:
break
- # To avoid performing the same actions on these files again, we remove them
- self.file_service.remove_all(files)
+ # To avoid performing the same actions on these files again, we mark them as processed
+ self.file_service.add_to_processed_files(files)
+ self.file_service.write_processed_files(PROCESSED_FILES_LIST_FILE_PATH)
test.logger.info("\nEND Symbolization \nSymbolization process completed. ")
@@ -162,8 +165,9 @@ class ResmokeSymbolizer:
files = self.file_service.find_all_children_recursively(dir_path)
files = self.file_service.filter_by_extension(files, STACKTRACE_FILE_EXTENSION)
- self.file_service.remove_empty(files)
+ files = self.file_service.filter_out_empty_files(files)
files = self.file_service.filter_out_non_files(files)
+ files = self.file_service.filter_out_already_processed_files(files)
return files
@@ -171,6 +175,51 @@ class ResmokeSymbolizer:
class FileService:
"""A service for working with files."""
+ def __init__(self, processed_files_list_path: str = PROCESSED_FILES_LIST_FILE_PATH):
+ """Initialize FileService instance."""
+ self._processed_files = self.load_processed_files(processed_files_list_path)
+
+ @staticmethod
+ def load_processed_files(file_path: str) -> Set[str]:
+ """
+ Load processed files info from a file.
+
+ :param: path to a file where we store processed files info.
+ """
+ if os.path.exists(file_path):
+ with open(file_path, "r") as file:
+ return {line for line in set(file.readlines()) if line}
+ return set()
+
+ def add_to_processed_files(self, files: List[str]) -> None:
+ """
+ Bulk add to collection of processed files.
+
+ :param files: files to add to processed files collection
+ :return: None
+ """
+ for file in files:
+ self._processed_files.add(file)
+
+ def write_processed_files(self, file_path: str) -> None:
+ """
+ Write processed files info to a file.
+
+ :param file_path: path to a file where we store processed files info
+ :return: None
+ """
+ with open(file_path, "w") as file:
+ file.write("\n".join(self._processed_files))
+
+ def is_processed(self, file: str) -> bool:
+ """
+ Check if file is already processed or not.
+
+ :param file: file path
+ :return: whether the file is already processed or not
+ """
+ return file in self._processed_files
+
@staticmethod
def find_all_children_recursively(dir_path: str) -> List[str]:
"""
@@ -205,25 +254,24 @@ class FileService:
"""
return [f for f in files if os.path.isfile(f)]
- @staticmethod
- def remove_empty(files: List[str]) -> None:
+ def filter_out_already_processed_files(self, files: List[str]):
"""
- Delete files that are empty.
+ Filter out already processed files.
- :param files: list of paths
+ :param files: list of file paths
+ :return: non-processed files
"""
- for file in [f for f in files if os.stat(f).st_size == 0]:
- os.remove(file)
+ return [f for f in files if not self.is_processed(f)]
@staticmethod
- def remove_all(files: List[str]) -> None:
+ def filter_out_empty_files(files: List[str]) -> List[str]:
"""
- Delete all files.
+ Filter our files that are empty.
:param files: list of paths
+ :return: Non-empty files
"""
- for file in files:
- os.remove(file)
+ return [f for f in files if not os.stat(f).st_size == 0]
@staticmethod
def check_path_exists(path: str) -> bool:
diff --git a/buildscripts/tests/resmokelib/testing/test_symbolizer_service.py b/buildscripts/tests/resmokelib/testing/test_symbolizer_service.py
index 805182442ed..06b0cc9991e 100644
--- a/buildscripts/tests/resmokelib/testing/test_symbolizer_service.py
+++ b/buildscripts/tests/resmokelib/testing/test_symbolizer_service.py
@@ -30,24 +30,26 @@ class TestResmokeSymbolizer(unittest.TestCase):
def test_symbolize_test_logs_process_all_files(self):
stacktrace_files = [f"file{i}.stacktrace" for i in range(5)]
self.file_service_mock.filter_out_non_files.return_value = stacktrace_files
+ self.file_service_mock.filter_out_empty_files.return_value = stacktrace_files
+ self.file_service_mock.filter_out_already_processed_files.return_value = stacktrace_files
self.resmoke_symbolizer.symbolize_test_logs(MagicMock())
self.assertEqual(self.symbolizer_service_mock.run_symbolizer_script.call_count, 5)
for i, call in enumerate(self.symbolizer_service_mock.run_symbolizer_script.call_arg_list):
self.assertEqual(call.args[0], f"file{i}.stacktrace")
- self.file_service_mock.remove_all.assert_called_once_with(stacktrace_files)
def test_symbolize_test_logs_hit_timeout(self):
stacktrace_files = [f"file{i}.stacktrace" for i in range(5)]
self.file_service_mock.filter_out_non_files.return_value = stacktrace_files
+ self.file_service_mock.filter_out_empty_files.return_value = stacktrace_files
+ self.file_service_mock.filter_out_already_processed_files.return_value = stacktrace_files
self.resmoke_symbolizer.symbolize_test_logs(MagicMock(), 0)
self.assertEqual(self.symbolizer_service_mock.run_symbolizer_script.call_count, 1)
for i, call in enumerate(self.symbolizer_service_mock.run_symbolizer_script.call_arg_list):
self.assertEqual(call.args[0], f"file{i}.stacktrace")
- self.file_service_mock.remove_all.assert_called_once_with(stacktrace_files)
def test_symbolize_test_logs_should_not_symbolize(self):
self.config_mock.is_windows.return_value = True
@@ -191,11 +193,10 @@ class TestFileService(unittest.TestCase):
with open(file, "w") as fstream:
fstream.write("stacktrace")
- self.file_service.remove_empty(abs_file_paths)
- for file in abs_file_paths:
- self.assertTrue(os.path.exists(file))
+ self.assertEqual(
+ set(self.file_service.filter_out_empty_files(abs_file_paths)), set(abs_file_paths))
- def test_remove_empty_files_if_partly_empty(self):
+ def test_filter_out_empty_files_if_partly_empty(self):
with TemporaryDirectory() as tmpdir:
abs_dir_paths = [os.path.join(tmpdir, d) for d in self.relative_dir_paths]
abs_file_paths = [os.path.join(tmpdir, f) for f in self.relative_file_paths]
@@ -208,14 +209,14 @@ class TestFileService(unittest.TestCase):
fstream.write("stacktrace")
Path(abs_file_paths[3]).touch()
- self.file_service.remove_empty(abs_file_paths)
+ filtered = self.file_service.filter_out_empty_files(abs_file_paths)
- self.assertTrue(os.path.exists(abs_file_paths[0]))
- self.assertFalse(os.path.exists(abs_file_paths[1]))
- self.assertTrue(os.path.exists(abs_file_paths[2]))
- self.assertFalse(os.path.exists(abs_file_paths[3]))
+ self.assertTrue(abs_file_paths[0] in filtered)
+ self.assertFalse(abs_file_paths[1] in filtered)
+ self.assertTrue(abs_file_paths[2] in filtered)
+ self.assertFalse(abs_file_paths[3] in filtered)
- def test_remove_empty_files_if_all_empty(self):
+ def test_filter_out_empty_files_if_all_empty(self):
with TemporaryDirectory() as tmpdir:
abs_dir_paths = [os.path.join(tmpdir, d) for d in self.relative_dir_paths]
abs_file_paths = [os.path.join(tmpdir, f) for f in self.relative_file_paths]
@@ -224,20 +225,15 @@ class TestFileService(unittest.TestCase):
for file in abs_file_paths:
Path(file).touch()
- self.file_service.remove_empty(abs_file_paths)
- for file in abs_file_paths:
- self.assertFalse(os.path.exists(file))
-
- def test_remove_all_files(self):
- with TemporaryDirectory() as tmpdir:
- abs_dir_paths = [os.path.join(tmpdir, d) for d in self.relative_dir_paths]
- abs_file_paths = [os.path.join(tmpdir, f) for f in self.relative_file_paths]
- for dir_ in abs_dir_paths:
- Path(dir_).mkdir(parents=True)
+ filtered = self.file_service.filter_out_empty_files(abs_file_paths)
for file in abs_file_paths:
- with open(file, "w") as fstream:
- fstream.write("stacktrace")
+ self.assertFalse(file in filtered)
- self.file_service.remove_all(abs_file_paths)
- for file in abs_file_paths:
- self.assertFalse(os.path.exists(file))
+ def test_filter_out_already_processed_files(self):
+ processed_files = ["processed-file.stacktrace"]
+ files = [
+ "file.stacktrace", "other-file.stacktrace", "another-file.stacktrace", *processed_files
+ ]
+ self.file_service.add_to_processed_files(processed_files)
+ filtered = self.file_service.filter_out_already_processed_files(files)
+ self.assertTrue(all(file not in processed_files for file in filtered))