diff options
author | jasurbeknurboyev <998946972365> | 2022-11-25 17:34:19 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-11-25 18:03:49 +0000 |
commit | 5f7a95abeefa77b1ae9ad9918b3319ec107e2364 (patch) | |
tree | 12b433fdb3942fbecbe9936875fb02e6026ed85c | |
parent | 8a01d8d90871e627134a553611d3f6b2caacf9c4 (diff) | |
download | mongo-5f7a95abeefa77b1ae9ad9918b3319ec107e2364.tar.gz |
SERVER-71548 Avoid deleting files after symbolization
-rw-r--r-- | buildscripts/resmokelib/testing/symbolizer_service.py | 78 | ||||
-rw-r--r-- | buildscripts/tests/resmokelib/testing/test_symbolizer_service.py | 50 |
2 files changed, 86 insertions, 42 deletions
diff --git a/buildscripts/resmokelib/testing/symbolizer_service.py b/buildscripts/resmokelib/testing/symbolizer_service.py index be4ca070b45..866a4209239 100644 --- a/buildscripts/resmokelib/testing/symbolizer_service.py +++ b/buildscripts/resmokelib/testing/symbolizer_service.py @@ -8,7 +8,7 @@ import time from datetime import timedelta from threading import Lock -from typing import List, Optional, NamedTuple +from typing import List, Optional, NamedTuple, Set from buildscripts.resmokelib import config as _config from buildscripts.resmokelib.testing.testcases.interface import TestCase @@ -19,6 +19,7 @@ _lock = Lock() STACKTRACE_FILE_EXTENSION = ".stacktrace" SYMBOLIZE_RETRY_TIMEOUT_SECS = timedelta(minutes=4).total_seconds() +PROCESSED_FILES_LIST_FILE_PATH = "symbolizer-processed-files.txt" # noqa class ResmokeSymbolizerConfig(NamedTuple): @@ -69,7 +70,8 @@ class ResmokeSymbolizer: ) self.symbolizer_service = symbolizer_service if symbolizer_service is not None else SymbolizerService( ) - self.file_service = file_service if file_service is not None else FileService() + self.file_service = file_service if file_service is not None else FileService( + PROCESSED_FILES_LIST_FILE_PATH) def symbolize_test_logs(self, test: TestCase, symbolize_retry_timeout: float = SYMBOLIZE_RETRY_TIMEOUT_SECS) -> None: @@ -107,8 +109,9 @@ class ResmokeSymbolizer: if time.perf_counter() - start_time > symbolize_retry_timeout: break - # To avoid performing the same actions on these files again, we remove them - self.file_service.remove_all(files) + # To avoid performing the same actions on these files again, we mark them as processed + self.file_service.add_to_processed_files(files) + self.file_service.write_processed_files(PROCESSED_FILES_LIST_FILE_PATH) test.logger.info("\nEND Symbolization \nSymbolization process completed. ") @@ -162,8 +165,9 @@ class ResmokeSymbolizer: files = self.file_service.find_all_children_recursively(dir_path) files = self.file_service.filter_by_extension(files, STACKTRACE_FILE_EXTENSION) - self.file_service.remove_empty(files) + files = self.file_service.filter_out_empty_files(files) files = self.file_service.filter_out_non_files(files) + files = self.file_service.filter_out_already_processed_files(files) return files @@ -171,6 +175,51 @@ class ResmokeSymbolizer: class FileService: """A service for working with files.""" + def __init__(self, processed_files_list_path: str = PROCESSED_FILES_LIST_FILE_PATH): + """Initialize FileService instance.""" + self._processed_files = self.load_processed_files(processed_files_list_path) + + @staticmethod + def load_processed_files(file_path: str) -> Set[str]: + """ + Load processed files info from a file. + + :param: path to a file where we store processed files info. + """ + if os.path.exists(file_path): + with open(file_path, "r") as file: + return {line for line in set(file.readlines()) if line} + return set() + + def add_to_processed_files(self, files: List[str]) -> None: + """ + Bulk add to collection of processed files. + + :param files: files to add to processed files collection + :return: None + """ + for file in files: + self._processed_files.add(file) + + def write_processed_files(self, file_path: str) -> None: + """ + Write processed files info to a file. + + :param file_path: path to a file where we store processed files info + :return: None + """ + with open(file_path, "w") as file: + file.write("\n".join(self._processed_files)) + + def is_processed(self, file: str) -> bool: + """ + Check if file is already processed or not. + + :param file: file path + :return: whether the file is already processed or not + """ + return file in self._processed_files + @staticmethod def find_all_children_recursively(dir_path: str) -> List[str]: """ @@ -205,25 +254,24 @@ class FileService: """ return [f for f in files if os.path.isfile(f)] - @staticmethod - def remove_empty(files: List[str]) -> None: + def filter_out_already_processed_files(self, files: List[str]): """ - Delete files that are empty. + Filter out already processed files. - :param files: list of paths + :param files: list of file paths + :return: non-processed files """ - for file in [f for f in files if os.stat(f).st_size == 0]: - os.remove(file) + return [f for f in files if not self.is_processed(f)] @staticmethod - def remove_all(files: List[str]) -> None: + def filter_out_empty_files(files: List[str]) -> List[str]: """ - Delete all files. + Filter our files that are empty. :param files: list of paths + :return: Non-empty files """ - for file in files: - os.remove(file) + return [f for f in files if not os.stat(f).st_size == 0] @staticmethod def check_path_exists(path: str) -> bool: diff --git a/buildscripts/tests/resmokelib/testing/test_symbolizer_service.py b/buildscripts/tests/resmokelib/testing/test_symbolizer_service.py index 805182442ed..06b0cc9991e 100644 --- a/buildscripts/tests/resmokelib/testing/test_symbolizer_service.py +++ b/buildscripts/tests/resmokelib/testing/test_symbolizer_service.py @@ -30,24 +30,26 @@ class TestResmokeSymbolizer(unittest.TestCase): def test_symbolize_test_logs_process_all_files(self): stacktrace_files = [f"file{i}.stacktrace" for i in range(5)] self.file_service_mock.filter_out_non_files.return_value = stacktrace_files + self.file_service_mock.filter_out_empty_files.return_value = stacktrace_files + self.file_service_mock.filter_out_already_processed_files.return_value = stacktrace_files self.resmoke_symbolizer.symbolize_test_logs(MagicMock()) self.assertEqual(self.symbolizer_service_mock.run_symbolizer_script.call_count, 5) for i, call in enumerate(self.symbolizer_service_mock.run_symbolizer_script.call_arg_list): self.assertEqual(call.args[0], f"file{i}.stacktrace") - self.file_service_mock.remove_all.assert_called_once_with(stacktrace_files) def test_symbolize_test_logs_hit_timeout(self): stacktrace_files = [f"file{i}.stacktrace" for i in range(5)] self.file_service_mock.filter_out_non_files.return_value = stacktrace_files + self.file_service_mock.filter_out_empty_files.return_value = stacktrace_files + self.file_service_mock.filter_out_already_processed_files.return_value = stacktrace_files self.resmoke_symbolizer.symbolize_test_logs(MagicMock(), 0) self.assertEqual(self.symbolizer_service_mock.run_symbolizer_script.call_count, 1) for i, call in enumerate(self.symbolizer_service_mock.run_symbolizer_script.call_arg_list): self.assertEqual(call.args[0], f"file{i}.stacktrace") - self.file_service_mock.remove_all.assert_called_once_with(stacktrace_files) def test_symbolize_test_logs_should_not_symbolize(self): self.config_mock.is_windows.return_value = True @@ -191,11 +193,10 @@ class TestFileService(unittest.TestCase): with open(file, "w") as fstream: fstream.write("stacktrace") - self.file_service.remove_empty(abs_file_paths) - for file in abs_file_paths: - self.assertTrue(os.path.exists(file)) + self.assertEqual( + set(self.file_service.filter_out_empty_files(abs_file_paths)), set(abs_file_paths)) - def test_remove_empty_files_if_partly_empty(self): + def test_filter_out_empty_files_if_partly_empty(self): with TemporaryDirectory() as tmpdir: abs_dir_paths = [os.path.join(tmpdir, d) for d in self.relative_dir_paths] abs_file_paths = [os.path.join(tmpdir, f) for f in self.relative_file_paths] @@ -208,14 +209,14 @@ class TestFileService(unittest.TestCase): fstream.write("stacktrace") Path(abs_file_paths[3]).touch() - self.file_service.remove_empty(abs_file_paths) + filtered = self.file_service.filter_out_empty_files(abs_file_paths) - self.assertTrue(os.path.exists(abs_file_paths[0])) - self.assertFalse(os.path.exists(abs_file_paths[1])) - self.assertTrue(os.path.exists(abs_file_paths[2])) - self.assertFalse(os.path.exists(abs_file_paths[3])) + self.assertTrue(abs_file_paths[0] in filtered) + self.assertFalse(abs_file_paths[1] in filtered) + self.assertTrue(abs_file_paths[2] in filtered) + self.assertFalse(abs_file_paths[3] in filtered) - def test_remove_empty_files_if_all_empty(self): + def test_filter_out_empty_files_if_all_empty(self): with TemporaryDirectory() as tmpdir: abs_dir_paths = [os.path.join(tmpdir, d) for d in self.relative_dir_paths] abs_file_paths = [os.path.join(tmpdir, f) for f in self.relative_file_paths] @@ -224,20 +225,15 @@ class TestFileService(unittest.TestCase): for file in abs_file_paths: Path(file).touch() - self.file_service.remove_empty(abs_file_paths) - for file in abs_file_paths: - self.assertFalse(os.path.exists(file)) - - def test_remove_all_files(self): - with TemporaryDirectory() as tmpdir: - abs_dir_paths = [os.path.join(tmpdir, d) for d in self.relative_dir_paths] - abs_file_paths = [os.path.join(tmpdir, f) for f in self.relative_file_paths] - for dir_ in abs_dir_paths: - Path(dir_).mkdir(parents=True) + filtered = self.file_service.filter_out_empty_files(abs_file_paths) for file in abs_file_paths: - with open(file, "w") as fstream: - fstream.write("stacktrace") + self.assertFalse(file in filtered) - self.file_service.remove_all(abs_file_paths) - for file in abs_file_paths: - self.assertFalse(os.path.exists(file)) + def test_filter_out_already_processed_files(self): + processed_files = ["processed-file.stacktrace"] + files = [ + "file.stacktrace", "other-file.stacktrace", "another-file.stacktrace", *processed_files + ] + self.file_service.add_to_processed_files(processed_files) + filtered = self.file_service.filter_out_already_processed_files(files) + self.assertTrue(all(file not in processed_files for file in filtered)) |