From 7a70b5a8a8d55c969c00a4b0b2dc44f411cd043d Mon Sep 17 00:00:00 2001 From: Gguidini Date: Thu, 5 Oct 2023 18:43:28 -0300 Subject: [PATCH] fix: better handle missing static analysis info Fix codecov/engineering-team#650 Recently we made changes to static analysis process of the CLI so it doesn't fails completely if there are issues analysing a file or uploading it to GCS. I think this inadvertedly broke the expectation of label analysis that all files in the static analysis are uploaded and ready to be used. Or some other misterious bug is causing some snapshots to be missing This is causing issues that the label analysis is not processing correctly. So we are now guarding against missing snapshots in HEAD or BASE. Sadly for ats, it only as good as the data it has. And being conservative (and safe) is important to us. So if we can't make an educated guess about a change, the procedure so far is to just run everything. That's the fallback, assume the entire file is compromised and needs to be tested again closes codecov/engineering-team#650 --- services/static_analysis/__init__.py | 11 ++ .../unit/test_static_analysis_comparison.py | 125 +++++++++++++++++- 2 files changed, 135 insertions(+), 1 deletion(-) diff --git a/services/static_analysis/__init__.py b/services/static_analysis/__init__.py index f1c80fed9..a9672e0d4 100644 --- a/services/static_analysis/__init__.py +++ b/services/static_analysis/__init__.py @@ -132,6 +132,17 @@ def _analyze_single_change( ) if not head_analysis_file_data and not base_analysis_file_data: return None + if head_analysis_file_data is None or base_analysis_file_data is None: + log.warning( + "Failed to load snapshot for file. Fallback to all lines in the file", + extra=dict( + file_path=change.after_filepath, + is_missing_head=(head_analysis_file_data is None), + is_missing_base=(base_analysis_file_data is None), + ), + ) + return {"all": True, "lines": None} + for base_line in change.lines_only_on_base: corresponding_exec_line = ( base_analysis_file_data.get_corresponding_executable_line(base_line) diff --git a/services/static_analysis/tests/unit/test_static_analysis_comparison.py b/services/static_analysis/tests/unit/test_static_analysis_comparison.py index 1cf106c53..c14f519e6 100644 --- a/services/static_analysis/tests/unit/test_static_analysis_comparison.py +++ b/services/static_analysis/tests/unit/test_static_analysis_comparison.py @@ -515,7 +515,7 @@ def test_analyze_single_change_base_change(self, dbsession, mock_storage): ( 10, { - "len": 1, + "len": 0, "extra_connected_lines": [20], }, ), @@ -574,6 +574,18 @@ def test_analyze_single_change_base_change(self, dbsession, mock_storage): changed_snapshot_base.content_location, changed_snapshot_head.content_location, ) == {"all": True, "lines": None} + assert service._analyze_single_change( + dbsession, + DiffChange( + before_filepath="path/changed.py", + after_filepath="path/changed.py", + change_type=DiffChangeType.modified, + lines_only_on_base=[], + lines_only_on_head=[11], + ), + changed_snapshot_base.content_location, + changed_snapshot_head.content_location, + ) == {"all": False, "lines": {10}} assert service._analyze_single_change( dbsession, DiffChange( @@ -587,6 +599,117 @@ def test_analyze_single_change_base_change(self, dbsession, mock_storage): changed_snapshot_head.content_location, ) == {"all": False, "lines": set()} + def test_analyze_single_change_base_change_missing_head_snapshot( + self, dbsession, mock_storage + ): + repository = RepositoryFactory.create() + dbsession.add(repository) + dbsession.flush() + changed_snapshot_base = StaticAnalysisSingleFileSnapshotFactory.create( + repository=repository + ) + changed_snapshot_head = StaticAnalysisSingleFileSnapshotFactory.create( + repository=repository + ) + dbsession.add_all( + [ + changed_snapshot_base, + changed_snapshot_head, + ] + ) + dbsession.flush() + mock_storage.write_file( + "archive", + changed_snapshot_base.content_location, + json.dumps( + { + "functions": [ + { + "identifier": "banana_function", + "start_line": 3, + "end_line": 8, + } + ], + "statements": [ + ( + 1, + { + "len": 0, + "line_surety_ancestorship": None, + "extra_connected_lines": [], + }, + ), + ( + 2, + { + "len": 0, + "line_surety_ancestorship": 1, + "extra_connected_lines": [], + }, + ), + ], + } + ), + ) + head_static_analysis = StaticAnalysisSuiteFactory.create( + commit__repository=repository + ) + base_static_analysis = StaticAnalysisSuiteFactory.create( + commit__repository=repository + ) + dbsession.add(head_static_analysis) + dbsession.add(base_static_analysis) + dbsession.flush() + service = StaticAnalysisComparisonService( + base_static_analysis=base_static_analysis, + head_static_analysis=head_static_analysis, + git_diff=[ + DiffChange( + before_filepath="path/changed.py", + after_filepath="path/changed.py", + change_type=DiffChangeType.modified, + lines_only_on_base=[], + lines_only_on_head=[20], + ), + ], + ) + assert service._analyze_single_change( + dbsession, + DiffChange( + before_filepath="path/changed.py", + after_filepath="path/changed.py", + change_type=DiffChangeType.modified, + lines_only_on_base=[], + lines_only_on_head=[20], + ), + changed_snapshot_base.content_location, + changed_snapshot_head.content_location, + ) == {"all": True, "lines": None} + assert service._analyze_single_change( + dbsession, + DiffChange( + before_filepath="path/changed.py", + after_filepath="path/changed.py", + change_type=DiffChangeType.modified, + lines_only_on_base=[], + lines_only_on_head=[11], + ), + changed_snapshot_base.content_location, + changed_snapshot_head.content_location, + ) == {"all": True, "lines": None} + assert service._analyze_single_change( + dbsession, + DiffChange( + before_filepath="path/changed.py", + after_filepath="path/changed.py", + change_type=DiffChangeType.modified, + lines_only_on_base=[], + lines_only_on_head=[99, 100], + ), + changed_snapshot_base.content_location, + changed_snapshot_head.content_location, + ) == {"all": True, "lines": None} + def test_analyze_single_change_function_based(self, dbsession, mock_storage): repository = RepositoryFactory.create() dbsession.add(repository)