diff --git a/.github/workflows/ossf-scorecard.yml b/.github/workflows/ossf-scorecard.yml index 76a762a0..f3f9270a 100644 --- a/.github/workflows/ossf-scorecard.yml +++ b/.github/workflows/ossf-scorecard.yml @@ -49,7 +49,13 @@ jobs: - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: ossf-scorecard-results - path: scorecard-sarif + path: scorecard-artifact + skip-decompress: true + - name: Safely extract Scorecard SARIF artifact + run: >- + python3 scripts/checks/extract_scorecard_artifact.py + scorecard-artifact + scorecard-sarif - name: Normalize repository-level Scorecard SARIF locations run: >- python3 scripts/checks/normalize_scorecard_sarif.py diff --git a/scripts/checks/extract_scorecard_artifact.py b/scripts/checks/extract_scorecard_artifact.py new file mode 100644 index 00000000..3f0f9da2 --- /dev/null +++ b/scripts/checks/extract_scorecard_artifact.py @@ -0,0 +1,130 @@ +"""Safely extract the OSSF Scorecard SARIF artifact downloaded as a ZIP.""" + +from __future__ import annotations + +import argparse +import os +import stat +import zipfile +from pathlib import Path +from typing import IO + +EXPECTED_MEMBER = "results.sarif" +MAX_SARIF_BYTES = 10 * 1024 * 1024 +READ_CHUNK_BYTES = 64 * 1024 + + +def resolve_artifact_zip(source: Path) -> Path: + """Return the artifact ZIP file from a file path or single-ZIP directory.""" + if source.is_file(): + ensure_non_symlink_path(source, path_kind="artifact path") + return source + if not source.is_dir(): + raise ValueError(f"artifact source does not exist: {source}") + ensure_non_symlink_path(source, path_kind="artifact path") + candidates: list[Path] = [] + for path in sorted( + candidate for candidate in source.iterdir() if candidate.suffix == ".zip" + ): + ensure_non_symlink_path(path, path_kind="artifact path") + candidates.append(path) + if len(candidates) != 1: + raise ValueError( + f"expected exactly one Scorecard artifact zip in {source}, found {len(candidates)}" + ) + return candidates[0] + + +def validate_member(member: zipfile.ZipInfo) -> None: + """Reject unexpected or unsafe ZIP members.""" + member_path = Path(member.filename) + unix_mode = member.external_attr >> 16 + if ( + member.filename != EXPECTED_MEMBER + or member_path.is_absolute() + or ".." in member_path.parts + or member.is_dir() + or stat.S_ISLNK(unix_mode) + ): + raise ValueError(f"unexpected artifact member: {member.filename}") + if member.file_size > MAX_SARIF_BYTES: + raise ValueError(f"artifact member too large: {member.filename}") + + +def ensure_non_symlink_path(path: Path, *, path_kind: str = "output path") -> None: + """Raise when any existing component in ``path`` is a symlink.""" + absolute_path = path.absolute() + existing_components = [absolute_path] + existing_components.extend(absolute_path.parents) + for component in reversed(existing_components): + try: + metadata = os.lstat(component) + except FileNotFoundError: + continue + if stat.S_ISLNK(metadata.st_mode): + raise ValueError(f"symlinked {path_kind} is not allowed: {component}") + + +def write_new_file_without_following_symlinks( + target: Path, source_file: IO[bytes] +) -> None: + """Stream-write to a new file without following an existing symlink.""" + flags = os.O_CREAT | os.O_EXCL | os.O_WRONLY + if hasattr(os, "O_NOFOLLOW"): + flags |= os.O_NOFOLLOW + fd = os.open(target, flags, 0o600) + written = 0 + try: + with os.fdopen(fd, "wb") as target_file: + while chunk := source_file.read(READ_CHUNK_BYTES): + written += len(chunk) + if written > MAX_SARIF_BYTES: + raise ValueError("artifact member too large") + target_file.write(chunk) + except Exception: + target.unlink(missing_ok=True) + raise + + +def extract_scorecard_artifact(source: Path, output_dir: Path) -> Path: + """Extract exactly ``results.sarif`` into ``output_dir`` and return its path.""" + artifact_zip = resolve_artifact_zip(source) + with zipfile.ZipFile(artifact_zip) as archive: + members = archive.infolist() + for member in members: + validate_member(member) + if [member.filename for member in members] != [EXPECTED_MEMBER]: + raise ValueError("expected only results.sarif in Scorecard artifact") + member = members[0] + ensure_non_symlink_path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + ensure_non_symlink_path(output_dir) + target = output_dir / EXPECTED_MEMBER + with archive.open(member) as source_file: + write_new_file_without_following_symlinks(target, source_file) + return target + + +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" + parser = argparse.ArgumentParser( + description="Safely extract a zipped OSSF Scorecard SARIF artifact." + ) + parser.add_argument( + "source", + type=Path, + help="Artifact ZIP file or directory containing exactly one artifact ZIP", + ) + parser.add_argument("output_dir", type=Path, help="Directory for results.sarif") + return parser.parse_args() + + +def main() -> None: + """Run the extractor from the command line.""" + args = parse_args() + extracted = extract_scorecard_artifact(args.source, args.output_dir) + print(f"Extracted OSSF Scorecard SARIF to {extracted}") + + +if __name__ == "__main__": + main() diff --git a/scripts/checks/verify_supply_chain.py b/scripts/checks/verify_supply_chain.py index b4276355..7396c325 100644 --- a/scripts/checks/verify_supply_chain.py +++ b/scripts/checks/verify_supply_chain.py @@ -45,6 +45,11 @@ "ossf scorecard publishing job must only contain uses steps; split run steps " "into a separate non-publishing job" ) +OSSF_DOWNLOAD_DECOMPRESSION_VIOLATION = ( + "ossf scorecard artifact download must use skip-decompress: true and " + "repo-owned extraction before normalization" +) +OSSF_ARTIFACT_EXTRACTOR = "scripts/checks/extract_scorecard_artifact.py" OSSF_SARIF_NORMALIZER = "scripts/checks/normalize_scorecard_sarif.py" OSSF_NORMALIZED_SARIF = "normalized-scorecard-results.sarif" OSSF_NORMALIZED_SARIF_UPLOAD = f"sarif_file: {OSSF_NORMALIZED_SARIF}" @@ -107,6 +112,84 @@ "--title", } RELEASE_CREATE_ALLOWED_ASSET_TOKENS = {"${release_assets[@]}", "${release_assets[*]}"} +WorkflowStepBlock = tuple[int, int, list[str]] + + +def workflow_step_blocks(lines: list[str]) -> list[WorkflowStepBlock]: + """Return YAML step blocks nested under a workflow ``steps:`` parent.""" + step_blocks: list[WorkflowStepBlock] = [] + for index, line in enumerate(lines): + stripped = line.strip() + if not stripped.startswith("- "): + continue + step_indent = len(line) - len(line.lstrip(" ")) + if step_indent < 6: + continue + has_steps_parent = False + for previous_line in reversed(lines[:index]): + previous_stripped = previous_line.strip().partition("#")[0].strip() + previous_indent = len(previous_line) - len(previous_line.lstrip(" ")) + if previous_indent >= step_indent: + continue + if previous_stripped == "steps:": + has_steps_parent = True + break + if not has_steps_parent: + continue + step_lines = [line] + for following_line in lines[index + 1 :]: + following_stripped = following_line.strip() + following_indent = len(following_line) - len(following_line.lstrip(" ")) + if following_stripped.startswith("- ") and following_indent <= step_indent: + break + step_lines.append(following_line) + step_blocks.append((index, step_indent, step_lines)) + return step_blocks + + +def workflow_job_content_for_step(lines: list[str], line_index: int) -> str: + """Return the workflow job block containing ``line_index``.""" + job_start = 0 + for reverse_index in range(line_index, -1, -1): + candidate = lines[reverse_index] + candidate_without_comment = candidate.strip().partition("#")[0].strip() + if len(candidate) - len( + candidate.lstrip(" ") + ) == 2 and candidate_without_comment.endswith(":"): + job_start = reverse_index + break + job_end = len(lines) + for forward_index in range(job_start + 1, len(lines)): + candidate = lines[forward_index] + candidate_without_comment = candidate.strip().partition("#")[0].strip() + if len(candidate) - len( + candidate.lstrip(" ") + ) == 2 and candidate_without_comment.endswith(":"): + job_end = forward_index + break + return "\n".join(lines[job_start:job_end]) + + +def step_run_command_from_block(step_lines: list[str], step_indent: int) -> str: + """Return a workflow step run command with comments and YAML wrappers removed.""" + run_indent: int | None = None + command_lines: list[str] = [] + for step_line in step_lines: + raw_stripped = step_line.strip().partition("#")[0].strip() + stripped = raw_stripped + is_step_start = stripped.startswith("- ") + if is_step_start: + stripped = stripped[2:].strip() + indent = len(step_line) - len(step_line.lstrip(" ")) + if run_indent is None: + if stripped.startswith("run:") and (indent > step_indent or is_step_start): + run_indent = indent + command_lines.append(stripped.partition(":")[2].strip()) + continue + if stripped and indent <= run_indent: + break + command_lines.append(stripped) + return "\n".join(command_lines) def logical_workflow_lines(content: str) -> list[tuple[int, str]]: @@ -379,26 +462,6 @@ def upload_step_sarif_file(step_lines: list[str], step_indent: int) -> str | Non return stripped.partition(":")[2].partition("#")[0].strip().strip("'\"") return None - def step_run_command(step_lines: list[str], step_indent: int) -> str: - run_indent: int | None = None - command_lines: list[str] = [] - for step_line in step_lines: - raw_stripped = step_line.strip().partition("#")[0].strip() - stripped = raw_stripped - is_step_start = stripped.startswith("- ") - if is_step_start: - stripped = stripped[2:].strip() - indent = len(step_line) - len(step_line.lstrip(" ")) - if run_indent is None: - if stripped.startswith("run:") and (indent > step_indent or is_step_start): - run_indent = indent - command_lines.append(stripped.partition(":")[2].strip()) - continue - if stripped and indent <= run_indent: - break - command_lines.append(stripped) - return "\n".join(command_lines) - def normalizer_output_file(command: str) -> str | None: try: tokens = shlex.split(command) @@ -418,51 +481,17 @@ def normalizer_output_file(command: str) -> str | None: return None return positional_args[1] - def workflow_job_content(line_index: int) -> str: - job_start = 0 - for reverse_index in range(line_index, -1, -1): - candidate = lines[reverse_index] - candidate_without_comment = candidate.strip().partition("#")[0].strip() - if len(candidate) - len( - candidate.lstrip(" ") - ) == 2 and candidate_without_comment.endswith(":"): - job_start = reverse_index - break - job_end = len(lines) - for forward_index in range(job_start + 1, len(lines)): - candidate = lines[forward_index] - candidate_without_comment = candidate.strip().partition("#")[0].strip() - if len(candidate) - len( - candidate.lstrip(" ") - ) == 2 and candidate_without_comment.endswith(":"): - job_end = forward_index - break - return "\n".join(lines[job_start:job_end]) - def workflow_job_step_blocks(line_index: int) -> list[tuple[int, int, list[str]]]: - job_content = workflow_job_content(line_index) + job_content = workflow_job_content_for_step(lines, line_index) return [ block for block in step_blocks - if workflow_job_content(block[0]) == job_content + if workflow_job_content_for_step(lines, block[0]) == job_content ] lines = content.splitlines() - step_blocks: list[tuple[int, int, list[str]]] = [] - for index, line in enumerate(lines): - stripped = line.strip() - if not stripped.startswith("- "): - continue - step_indent = len(line) - len(line.lstrip(" ")) - step_lines = [line] - for following_line in lines[index + 1 :]: - following_stripped = following_line.strip() - following_indent = len(following_line) - len(following_line.lstrip(" ")) - if following_stripped.startswith("- ") and following_indent <= step_indent: - break - step_lines.append(following_line) - step_blocks.append((index, step_indent, step_lines)) + step_blocks = workflow_step_blocks(lines) violations: list[str] = [] for index, step_indent, step_lines in step_blocks: @@ -471,13 +500,13 @@ def workflow_job_step_blocks(line_index: int) -> list[tuple[int, int, list[str]] ): continue sarif_file = upload_step_sarif_file(step_lines, step_indent) - job_content = workflow_job_content(index) + job_content = workflow_job_content_for_step(lines, index) job_content_without_comments = "\n".join( line.partition("#")[0] for line in job_content.splitlines() ) job_blocks = workflow_job_step_blocks(index) normalizer_run_commands = [ - step_run_command(normalizer_step_lines, normalizer_step_indent) + step_run_command_from_block(normalizer_step_lines, normalizer_step_indent) for _, normalizer_step_indent, normalizer_step_lines in job_blocks ] normalizer_outputs = { @@ -517,6 +546,92 @@ def workflow_job_step_blocks(line_index: int) -> list[tuple[int, int, list[str]] return violations +def scorecard_artifact_download_decompression_violations(content: str) -> list[str]: + """Return Scorecard downloads that rely on action-owned ZIP decompression.""" + content_without_comments = "\n".join( + line.partition("#")[0] for line in content.splitlines() + ) + if "actions/download-artifact" not in content_without_comments: + return [] + if "ossf-scorecard-results" not in content_without_comments: + return [] + + lines = content.splitlines() + step_blocks = workflow_step_blocks(lines) + + def invokes_scorecard_extractor(command: str) -> bool: + try: + tokens = shlex.split(command) + except ValueError: + tokens = re.split(r"\s+", command) + cleaned_tokens = [token.strip("'\"") for token in tokens if token.strip("'\"")] + if cleaned_tokens and cleaned_tokens[0] in {">", ">-", "|", "|-"}: + cleaned_tokens = cleaned_tokens[1:] + return ( + len(cleaned_tokens) == 4 + and cleaned_tokens[0] in {"python", "python3"} + and cleaned_tokens[1] == OSSF_ARTIFACT_EXTRACTOR + and cleaned_tokens[2] == "scorecard-artifact" + and cleaned_tokens[3] == "scorecard-sarif" + ) + + violations: list[str] = [] + for index, _, step_lines in step_blocks: + step_content = "\n".join(line.partition("#")[0] for line in step_lines) + if "actions/download-artifact" not in step_content: + continue + if "ossf-scorecard-results" not in step_content: + continue + if "skip-decompress: true" not in step_content: + violations.append(OSSF_DOWNLOAD_DECOMPRESSION_VIOLATION) + continue + + job_content = workflow_job_content_for_step(lines, index) + job_step_blocks = [ + block + for block in step_blocks + if workflow_job_content_for_step(lines, block[0]) == job_content + ] + later_steps = [ + (block_indent, block_lines) + for block_index, block_indent, block_lines in job_step_blocks + if block_index > index + ] + extractor_step_position = next( + ( + position + for position, (block_indent, block_lines) in enumerate(later_steps) + if invokes_scorecard_extractor( + step_run_command_from_block(block_lines, block_indent) + ) + ), + None, + ) + normalizer_step_position = next( + ( + position + for position, (block_indent, block_lines) in enumerate(later_steps) + if ( + OSSF_SARIF_NORMALIZER + in step_run_command_from_block(block_lines, block_indent) + ) + ), + None, + ) + if extractor_step_position is None: + violations.append(OSSF_DOWNLOAD_DECOMPRESSION_VIOLATION) + continue + if ( + normalizer_step_position is not None + and extractor_step_position > normalizer_step_position + ): + violations.append(OSSF_DOWNLOAD_DECOMPRESSION_VIOLATION) + continue + if violations: + return [OSSF_DOWNLOAD_DECOMPRESSION_VIOLATION] + return [] + + def verify_workflow_coverage() -> list[str]: """Return workflow trigger and artifact coverage violations.""" missing: list[str] = [] @@ -628,6 +743,9 @@ def verify_workflow_coverage() -> list[str]: missing.extend( scorecard_sarif_upload_normalization_violations(workflow_content) ) + missing.extend( + scorecard_artifact_download_decompression_violations(workflow_content) + ) missing.extend( ossf_scorecard_publish_restriction_violations( workflow_content, workflow_path diff --git a/services/analysis-engine/tests/test_supply_chain_policy.py b/services/analysis-engine/tests/test_supply_chain_policy.py index 968093c0..7799aec2 100644 --- a/services/analysis-engine/tests/test_supply_chain_policy.py +++ b/services/analysis-engine/tests/test_supply_chain_policy.py @@ -5,6 +5,8 @@ import importlib import json import re +import stat +import zipfile from pathlib import Path import pytest @@ -1031,6 +1033,382 @@ def test_supply_chain_check_rejects_echo_spoofed_normalizer_command( ) in violations +def test_supply_chain_check_requires_scorecard_download_without_action_decompression( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure Scorecard downloads avoid action-owned legacy decompression paths.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_download_decompression_guard", + ) + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on: push", + "jobs:", + " scorecard-sarif-upload:", + " steps:", + " - uses: ", + " actions/download-artifact@" + "3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1", + " with:", + " name: ossf-scorecard-results", + " path: scorecard-sarif", + " - name: Normalize repository-level Scorecard SARIF locations", + " run: >-", + " python3 scripts/checks/normalize_scorecard_sarif.py", + " scorecard-sarif/results.sarif", + " normalized-scorecard-results.sarif", + " - uses: ", + " github/codeql-action/upload-sarif@" + "95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " with:", + " sarif_file: normalized-scorecard-results.sarif", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert ( + "ossf scorecard artifact download must use skip-decompress: true and " + "repo-owned extraction before normalization" + ) in violations + + +def test_supply_chain_check_rejects_commented_scorecard_decompression_tokens( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure comments cannot spoof the Scorecard artifact extraction guard.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_download_comment_spoof_guard", + ) + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on: push", + "jobs:", + " scorecard-sarif-upload:", + " steps:", + " # skip-decompress: true", + " # python3 scripts/checks/extract_scorecard_artifact.py", + " # scorecard-artifact scorecard-sarif", + " - uses: actions/download-artifact@" + "3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1", + " with:", + " name: ossf-scorecard-results", + " path: scorecard-sarif", + " - name: Normalize repository-level Scorecard SARIF locations", + " run: >-", + " python3 scripts/checks/normalize_scorecard_sarif.py", + " scorecard-sarif/results.sarif", + " normalized-scorecard-results.sarif", + " - uses: github/codeql-action/upload-sarif@" + "95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " with:", + " sarif_file: normalized-scorecard-results.sarif", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert ( + "ossf scorecard artifact download must use skip-decompress: true and " + "repo-owned extraction before normalization" + ) in violations + + +def test_supply_chain_check_rejects_echo_spoofed_scorecard_extractor( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + """Ensure echoing the extractor command cannot satisfy artifact extraction.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_download_echo_spoof_guard", + ) + workflow_dir = tmp_path / ".github" / "workflows" + workflow_dir.mkdir(parents=True) + (workflow_dir / "ossf-scorecard.yml").write_text( + "\n".join( + [ + "name: ossf-scorecard", + "on: push", + "jobs:", + " scorecard-sarif-upload:", + " steps:", + " - uses: actions/download-artifact@" + "3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1", + " with:", + " name: ossf-scorecard-results", + " path: scorecard-artifact", + " skip-decompress: true", + " - name: Mention extractor without running it", + " run: >-", + " echo python3 scripts/checks/extract_scorecard_artifact.py", + " scorecard-artifact", + " scorecard-sarif", + " - name: Normalize repository-level Scorecard SARIF locations", + " run: >-", + " python3 scripts/checks/normalize_scorecard_sarif.py", + " scorecard-sarif/results.sarif", + " normalized-scorecard-results.sarif", + " - uses: github/codeql-action/upload-sarif@" + "95e58e9a2cdfd71adc6e0353d5c52f41a045d225", + " with:", + " sarif_file: normalized-scorecard-results.sarif", + ] + ), + encoding="utf-8", + ) + + monkeypatch.chdir(tmp_path) + + violations = supply_chain.verify_workflow_coverage() + + assert ( + "ossf scorecard artifact download must use skip-decompress: true and " + "repo-owned extraction before normalization" + ) in violations + + +def test_supply_chain_check_accepts_repo_scorecard_download_decompression_guard( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Ensure checked-in Scorecard downloads use repo-owned artifact extraction.""" + supply_chain = load_module( + "scripts/checks/verify_supply_chain.py", + "verify_supply_chain_ossf_download_decompression_repo", + ) + repo_root = Path(__file__).resolve().parents[3] + + monkeypatch.chdir(repo_root) + + violations = supply_chain.verify_workflow_coverage() + + assert not any("skip-decompress" in violation for violation in violations) + + +def test_scorecard_artifact_extractor_extracts_expected_sarif(tmp_path: Path) -> None: + """Ensure the repo-owned extractor restores results.sarif from zipped artifacts.""" + extractor = load_module( + "scripts/checks/extract_scorecard_artifact.py", "extract_scorecard_artifact" + ) + source_zip = tmp_path / "ossf-scorecard-results.zip" + output_dir = tmp_path / "scorecard-sarif" + with zipfile.ZipFile(source_zip, "w") as archive: + archive.writestr("results.sarif", '{"version":"2.1.0","runs":[]}') + + extracted = extractor.extract_scorecard_artifact(source_zip, output_dir) + + assert extracted == output_dir / "results.sarif" + assert extracted.read_text(encoding="utf-8") == '{"version":"2.1.0","runs":[]}' + + artifact_dir = tmp_path / "scorecard-artifact" + artifact_dir.mkdir() + directory_source_zip = artifact_dir / "results.sarif.zip" + with zipfile.ZipFile(directory_source_zip, "w") as archive: + archive.writestr("results.sarif", '{"version":"2.1.0","runs":[{}]}') + + directory_output_dir = tmp_path / "directory-scorecard-sarif" + directory_extracted = extractor.extract_scorecard_artifact(artifact_dir, directory_output_dir) + + assert directory_extracted == directory_output_dir / "results.sarif" + assert directory_extracted.read_text(encoding="utf-8") == '{"version":"2.1.0","runs":[{}]}' + + empty_artifact_dir = tmp_path / "empty-scorecard-artifact" + empty_artifact_dir.mkdir() + with pytest.raises(ValueError, match="expected exactly one Scorecard artifact zip"): + extractor.extract_scorecard_artifact(empty_artifact_dir, tmp_path / "empty-output") + + multi_artifact_dir = tmp_path / "multi-scorecard-artifact" + multi_artifact_dir.mkdir() + with zipfile.ZipFile(multi_artifact_dir / "first.zip", "w") as archive: + archive.writestr("results.sarif", "{}") + with zipfile.ZipFile(multi_artifact_dir / "second.zip", "w") as archive: + archive.writestr("results.sarif", "{}") + with pytest.raises(ValueError, match="expected exactly one Scorecard artifact zip"): + extractor.extract_scorecard_artifact(multi_artifact_dir, tmp_path / "multi-output") + + +def test_scorecard_artifact_extractor_rejects_symlink_artifact_zip( + tmp_path: Path, +) -> None: + """Ensure input artifact paths are not followed through symlinks.""" + extractor = load_module( + "scripts/checks/extract_scorecard_artifact.py", + "extract_scorecard_artifact_input_symlink", + ) + real_zip = tmp_path / "real-scorecard-results.zip" + with zipfile.ZipFile(real_zip, "w") as archive: + archive.writestr("results.sarif", "{}") + symlink_zip = tmp_path / "ossf-scorecard-results.zip" + symlink_zip.symlink_to(real_zip) + + with pytest.raises(ValueError, match="symlinked artifact path"): + extractor.extract_scorecard_artifact(symlink_zip, tmp_path / "scorecard-sarif") + + +def test_scorecard_artifact_extractor_rejects_symlink_zip_in_artifact_directory( + tmp_path: Path, +) -> None: + """Ensure directory inputs reject symlinked ZIP candidates and fail closed.""" + extractor = load_module( + "scripts/checks/extract_scorecard_artifact.py", + "extract_scorecard_artifact_directory_symlink", + ) + artifact_dir = tmp_path / "scorecard-artifact" + artifact_dir.mkdir() + real_zip = tmp_path / "real-scorecard-results.zip" + with zipfile.ZipFile(real_zip, "w") as archive: + archive.writestr("results.sarif", "{}") + (artifact_dir / "results.sarif.zip").symlink_to(real_zip) + + with pytest.raises(ValueError, match="symlinked artifact path"): + extractor.extract_scorecard_artifact(artifact_dir, tmp_path / "scorecard-sarif") + + +def test_scorecard_artifact_extractor_rejects_mixed_symlink_zip_directory( + tmp_path: Path, +) -> None: + """Ensure any symlinked ZIP candidate taints directory artifact input.""" + extractor = load_module( + "scripts/checks/extract_scorecard_artifact.py", + "extract_scorecard_artifact_mixed_directory_symlink", + ) + artifact_dir = tmp_path / "scorecard-artifact" + artifact_dir.mkdir() + with zipfile.ZipFile(artifact_dir / "results.sarif.zip", "w") as archive: + archive.writestr("results.sarif", "{}") + real_zip = tmp_path / "real-scorecard-results.zip" + with zipfile.ZipFile(real_zip, "w") as archive: + archive.writestr("results.sarif", "{}") + (artifact_dir / "shadow.zip").symlink_to(real_zip) + + with pytest.raises(ValueError, match="symlinked artifact path"): + extractor.extract_scorecard_artifact(artifact_dir, tmp_path / "scorecard-sarif") + + +def test_scorecard_artifact_extractor_rejects_path_traversal(tmp_path: Path) -> None: + """Ensure malformed Scorecard artifacts cannot escape the extraction directory.""" + extractor = load_module( + "scripts/checks/extract_scorecard_artifact.py", + "extract_scorecard_artifact_traversal", + ) + source_zip = tmp_path / "ossf-scorecard-results.zip" + with zipfile.ZipFile(source_zip, "w") as archive: + archive.writestr("../results.sarif", "{}") + + with pytest.raises(ValueError, match="unexpected artifact member"): + extractor.extract_scorecard_artifact(source_zip, tmp_path / "scorecard-sarif") + + +def test_scorecard_artifact_extractor_rejects_zip_symlink(tmp_path: Path) -> None: + """Ensure symlink-like ZIP members are rejected even with the expected name.""" + extractor = load_module( + "scripts/checks/extract_scorecard_artifact.py", + "extract_scorecard_artifact_symlink", + ) + source_zip = tmp_path / "ossf-scorecard-results.zip" + symlink_info = zipfile.ZipInfo("results.sarif") + symlink_info.external_attr = (stat.S_IFLNK | 0o777) << 16 + with zipfile.ZipFile(source_zip, "w") as archive: + archive.writestr(symlink_info, "target") + + with pytest.raises(ValueError, match="unexpected artifact member"): + extractor.extract_scorecard_artifact(source_zip, tmp_path / "scorecard-sarif") + + +def test_scorecard_artifact_extractor_rejects_missing_results_sarif( + tmp_path: Path, +) -> None: + """Ensure artifacts without the expected Scorecard SARIF fail closed.""" + extractor = load_module( + "scripts/checks/extract_scorecard_artifact.py", + "extract_scorecard_artifact_missing", + ) + source_zip = tmp_path / "ossf-scorecard-results.zip" + with zipfile.ZipFile(source_zip, "w"): + pass + + with pytest.raises(ValueError, match="expected only results.sarif"): + extractor.extract_scorecard_artifact(source_zip, tmp_path / "scorecard-sarif") + + +def test_scorecard_artifact_extractor_rejects_symlink_output_dir(tmp_path: Path) -> None: + """Ensure output directories are not followed through symlinks.""" + extractor = load_module( + "scripts/checks/extract_scorecard_artifact.py", + "extract_scorecard_artifact_output_dir_symlink", + ) + source_zip = tmp_path / "ossf-scorecard-results.zip" + real_output = tmp_path / "real-output" + real_output.mkdir() + symlink_output = tmp_path / "scorecard-sarif" + symlink_output.symlink_to(real_output, target_is_directory=True) + with zipfile.ZipFile(source_zip, "w") as archive: + archive.writestr("results.sarif", "{}") + + with pytest.raises(ValueError, match="symlinked output path"): + extractor.extract_scorecard_artifact(source_zip, symlink_output) + + +def test_scorecard_artifact_extractor_rejects_existing_target_symlink( + tmp_path: Path, +) -> None: + """Ensure existing target symlinks cannot be overwritten by extraction.""" + extractor = load_module( + "scripts/checks/extract_scorecard_artifact.py", + "extract_scorecard_artifact_target_symlink", + ) + source_zip = tmp_path / "ossf-scorecard-results.zip" + output_dir = tmp_path / "scorecard-sarif" + output_dir.mkdir() + outside_target = tmp_path / "outside.sarif" + outside_target.write_text("outside", encoding="utf-8") + (output_dir / "results.sarif").symlink_to(outside_target) + with zipfile.ZipFile(source_zip, "w") as archive: + archive.writestr("results.sarif", "{}") + + with pytest.raises(FileExistsError): + extractor.extract_scorecard_artifact(source_zip, output_dir) + assert outside_target.read_text(encoding="utf-8") == "outside" + + +def test_scorecard_artifact_extractor_rejects_oversized_results_sarif( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Ensure oversized Scorecard SARIF artifacts fail before extraction.""" + extractor = load_module( + "scripts/checks/extract_scorecard_artifact.py", + "extract_scorecard_artifact_oversized", + ) + monkeypatch.setattr(extractor, "MAX_SARIF_BYTES", 1) + source_zip = tmp_path / "ossf-scorecard-results.zip" + output_dir = tmp_path / "scorecard-sarif" + with zipfile.ZipFile(source_zip, "w") as archive: + archive.writestr("results.sarif", "{}") + + with pytest.raises(ValueError, match="artifact member too large"): + extractor.extract_scorecard_artifact(source_zip, output_dir) + + assert not (output_dir / "results.sarif").exists() + + def test_scorecard_sarif_normalizer_replaces_repository_level_placeholder( tmp_path: Path, ) -> None: diff --git a/skills/ci-warning-root-cause-remediation/SKILL.md b/skills/ci-warning-root-cause-remediation/SKILL.md new file mode 100644 index 00000000..7d52c57f --- /dev/null +++ b/skills/ci-warning-root-cause-remediation/SKILL.md @@ -0,0 +1,67 @@ +--- +name: ci-warning-root-cause-remediation +description: Use when GitHub Actions logs, local verification commands, scanners, linters, dependency tools, or robot reviewers report warnings, deprecations, notices, fatal errors, agent implementation mistakes, or security defects in BandScope or similar repositories. +--- + +# CI Warning Root Cause Remediation + +## Overview + +Turn every CI warning or agent-introduced defect into a tracked root-cause fix, a narrow documented external-owner follow-up, or a verified non-issue. Prefer code/config/toolchain fixes over output filtering. + +## Workflow + +1. Capture exact evidence: command, working directory, run URL/job/step, warning text, tool/action version, commit SHA, and whether it appears locally, in CI, or both. +2. Classify the owner: repository code, workflow configuration, direct dependency, transitive dependency, toolchain/runtime mismatch, hosted runner image, robot reviewer, or external service. +3. Trace the owner chain with structured tools where possible: + - GitHub Actions: inspect workflow file, pinned action SHA, action inputs, and job logs. + - JavaScript: use `npm explain`, lockfile entries, and action package metadata. + - Python: use `uv tree`, `pip inspect`, or direct lockfile reads. + - Rust: use `cargo tree`, `cargo audit`, and `Cargo.lock`. + - Code/lint: search exact symbols and rule IDs, then read the narrow files. +4. Choose the maintained path: + - update project code or tests, + - align versions or inputs, + - remove unused dependencies, + - replace deprecated APIs, + - add a narrow repo-owned guard, or + - create a follow-up only when the remaining owner is external and no maintained fix exists. +5. Do not hide warnings with broad filters, `2>/dev/null`, blanket quiet flags, or reviewer dismissal. +6. Add regression coverage when the warning can recur. +7. Re-run the exact warning-producing command or CI job and the smallest relevant local verification. +8. Record evidence in the PR and linked issue. + +## Agent mistake and security defect handling + +When a linter error, test failure, security issue, or review finding is caused by agent work: + +- Treat it as in-scope remediation, not a blocker. +- Add or strengthen tests before changing production/security-sensitive code. +- Fix the smallest root cause and re-run the failing command. +- If the mistake reveals a reusable workflow gap, add a skill, guard script, or canonical doc update in the same PR when scoped. +- Keep security controls stricter or equal; never weaken checks to make CI green. + +## GitHub Actions warning pattern + +For action-runtime warnings: + +1. Identify the exact action SHA and semantic version comment. +2. Confirm whether the latest upstream release fixes the warning. +3. If no upstream fix exists, avoid the warning-producing code path with action-supported inputs while preserving digest, permission, and SHA-pin controls. +4. If repo code replaces action behavior, make it narrow and safe by default, with tests for malformed inputs and unexpected artifacts. +5. Keep a follow-up issue for upstream removal only when repo-controlled mitigation cannot eliminate the warning fully. + +## Done criteria + +- Original warning command or CI job has fresh evidence. +- Output is warning-free, or the remaining warning is explicitly external-owned with issue linkage and no maintained repo-controlled fix. +- Regression tests or guard scripts cover the path. +- Linked issue records owner chain, fix, verification, and follow-up state. +- PR current head has passing required checks and robot-review approval/skip evidence. + +## Forbidden shortcuts + +- Do not use broad log filtering to hide warnings. +- Do not request or wait for human review unless explicitly required by the user. +- Do not treat Strix/robot/security findings as blockers to ignore; triage and fix or rebut with evidence. +- Do not mark platform-owned warnings resolved without checking for upstream releases or maintained alternatives.