From 4ed916df4dfeb42beb2f5364f9091570267457f2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 14:13:26 +0000 Subject: [PATCH 1/4] Initial plan From 5062b645cbbc8a7617220300e69292db8b452cf3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 14:23:58 +0000 Subject: [PATCH 2/4] fix: improve metrics comparison CI readability - Fix backwards diff direction in compare_metrics.py: swap unified_diff args so - = in baseline but not current (regression), + = newly added - Update compare_metrics_test.py assertions to reflect correct semantics - Fix get_raw_diff_sample in metrics_summary.py to interleave - and + lines so both sides are visible when truncating modified metrics - Remove set -x from metrics_summary.sh and add GitHub group markers to fold verbose output sections in CI logs - Add ciRunUrl parameter to formatMetricsDetail in ci-summary-report- publish.js to link directly to CI run logs for detailed diff output - Update ci-summary-report-publish.test.js with new ciRunUrl tests Agent-Logs-Url: https://github.com/jaegertracing/jaeger/sessions/9a76075a-0fc6-4e8a-a951-4dfe09c297fe Co-authored-by: yurishkuro <3523016+yurishkuro@users.noreply.github.com> --- .github/scripts/ci-summary-report-publish.js | 15 ++++++--- .../scripts/ci-summary-report-publish.test.js | 33 +++++++++++++++---- scripts/e2e/compare_metrics.py | 7 +++- scripts/e2e/compare_metrics_test.py | 21 ++++++------ scripts/e2e/metrics_summary.py | 31 +++++++++++++---- scripts/e2e/metrics_summary.sh | 18 ++++++---- 6 files changed, 92 insertions(+), 33 deletions(-) diff --git a/.github/scripts/ci-summary-report-publish.js b/.github/scripts/ci-summary-report-publish.js index 31ae50865fc..611a64319d3 100644 --- a/.github/scripts/ci-summary-report-publish.js +++ b/.github/scripts/ci-summary-report-publish.js @@ -158,9 +158,10 @@ function computeCoverage(s) { * All text is built from trusted templates; metric names have been validated * through sanitizeMetricName() and are rendered in backtick-code spans. * @param {Array|null} snapshots - Sanitized snapshots from computeMetrics + * @param {string} [ciRunUrl] - URL to the CI run for linking to detailed diff logs * @returns {string} - Markdown detail block, or empty string if no data */ -function formatMetricsDetail(snapshots) { +function formatMetricsDetail(snapshots, ciRunUrl) { if (!snapshots || snapshots.length === 0) return ''; const lines = [ @@ -170,6 +171,11 @@ function formatMetricsDetail(snapshots) { '', ]; + if (ciRunUrl) { + lines.push(`_For label-level diff details, open the [CI run](${ciRunUrl}) and expand the "Compare metrics and generate summary" step logs._`); + lines.push(''); + } + for (const snap of snapshots) { lines.push(`**${snap.snapshot}**`); const parts = []; @@ -201,9 +207,10 @@ function formatMetricsDetail(snapshots) { * @param {string} footer - links + timestamp line * @param {object} [opts] * @param {Array|null} [opts.metricsSnapshots] - sanitized snapshot data for detail rendering + * @param {string} [opts.ciRunUrl] - URL to the CI run, passed through to formatMetricsDetail * @returns {string} */ -function buildCommentBody(metricsText, coverageText, footer, { metricsSnapshots } = {}) { +function buildCommentBody(metricsText, coverageText, footer, { metricsSnapshots, ciRunUrl } = {}) { const parts = [ COMMENT_TAG, '## CI Summary Report', @@ -212,7 +219,7 @@ function buildCommentBody(metricsText, coverageText, footer, { metricsSnapshots metricsText, ]; - const detail = formatMetricsDetail(metricsSnapshots); + const detail = formatMetricsDetail(metricsSnapshots, ciRunUrl); if (detail) { parts.push(detail); } @@ -349,7 +356,7 @@ async function handler({ github, core, fs, inputs }) { // after a green run. Only create a new comment when there is something to report. const hasIssues = metrics.conclusion === 'failure' || coverage.conclusion === 'failure' || metrics.totalChanges > 0; - const body = buildCommentBody(metrics.text, coverage.text, footer, { metricsSnapshots: metrics.snapshots }); + const body = buildCommentBody(metrics.text, coverage.text, footer, { metricsSnapshots: metrics.snapshots, ciRunUrl }); await postOrUpdateComment(github, owner, repo, prNumber, body, core, { createNew: hasIssues }); } else { core.info('No PR number; skipping PR comment.'); diff --git a/.github/scripts/ci-summary-report-publish.test.js b/.github/scripts/ci-summary-report-publish.test.js index 8334c131578..1bbd4c118b6 100644 --- a/.github/scripts/ci-summary-report-publish.test.js +++ b/.github/scripts/ci-summary-report-publish.test.js @@ -285,14 +285,23 @@ describe('formatMetricsDetail', () => { expect(detail).toContain('3 removed'); }); - test('omits zero counts from summary line', () => { + test('includes link to CI run when ciRunUrl provided', () => { const detail = formatMetricsDetail([{ - snapshot: 'test', added: 0, removed: 5, modified: 0, - metric_names: [], + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + }], 'https://github.com/org/repo/actions/runs/123'); + expect(detail).toContain('[CI run](https://github.com/org/repo/actions/runs/123)'); + expect(detail).toContain('Compare metrics and generate summary'); + }); + + test('omits CI run link when ciRunUrl not provided', () => { + const detail = formatMetricsDetail([{ + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], }]); - expect(detail).not.toContain('added'); - expect(detail).toContain('5 removed'); - expect(detail).not.toContain('modified'); + expect(detail).not.toContain('[CI run]'); }); }); @@ -420,6 +429,18 @@ describe('buildCommentBody', () => { expect(metricsPos).toBeLessThan(detailsPos); expect(detailsPos).toBeLessThan(coveragePos); }); + + test('includes CI run link in metrics detail when ciRunUrl provided', () => { + const snapshots = [{ + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + }]; + const ciRunUrl = 'https://github.com/org/repo/actions/runs/999'; + const body = buildCommentBody('❌ 1 metric change(s) detected', coverageText, footer, { metricsSnapshots: snapshots, ciRunUrl }); + expect(body).toContain(`[CI run](${ciRunUrl})`); + expect(body).toContain('Compare metrics and generate summary'); + }); }); // ── postCheckRun ────────────────────────────────────────────────────────────── diff --git a/scripts/e2e/compare_metrics.py b/scripts/e2e/compare_metrics.py index a64481a2eda..70873905a91 100644 --- a/scripts/e2e/compare_metrics.py +++ b/scripts/e2e/compare_metrics.py @@ -132,6 +132,9 @@ def generate_diff(file1_content, file2_content): The diff is performed on these sorted, value-free metric strings. If the two snapshots produce the same set of strings the diff is empty and this function returns ''. When there are differences, the return value is a unified diff + using the standard convention: + - lines = present in file2 (baseline) but absent from file1 (current) → regression/removed + + lines = present in file1 (current) but absent from file2 (baseline) → newly added followed by optional comment lines reporting how many metrics were excluded, e.g.: # Metrics excluded from A: 3 # Metrics excluded from B: 5 @@ -146,7 +149,9 @@ def generate_diff(file1_content, file2_content): metrics1,excluded_metrics_count1 = parse_metrics(file1_content) metrics2,excluded_metrics_count2 = parse_metrics(file2_content) - diff = list(unified_diff(metrics1, metrics2, lineterm='', n=0)) + # unified_diff(baseline, current): - = in baseline but not current (removed/regression), + # + = in current but not baseline (newly added). + diff = list(unified_diff(metrics2, metrics1, lineterm='', n=0)) # Exclusion counts are informational context appended to the diff output. # They must not be written when the diff itself is empty: two snapshots with diff --git a/scripts/e2e/compare_metrics_test.py b/scripts/e2e/compare_metrics_test.py index 10068cdba19..7d64739101f 100644 --- a/scripts/e2e/compare_metrics_test.py +++ b/scripts/e2e/compare_metrics_test.py @@ -54,11 +54,11 @@ def test_empty_snapshots_returns_empty(self): def test_regression_detected(self): """Metric present in baseline but absent from current snapshot → diff is non-empty.""" - # current=A only, baseline=A+B → B is missing from current (regression) + # current=A only, baseline=A+B → B is missing from current (regression/removed) result = generate_diff(_METRIC_A, _METRIC_A + _METRIC_B) self.assertNotEqual(result, '', 'Expected a non-empty diff for a regression') - # The diff must contain a '+' line for the missing metric (counter_b) - self.assertIn('+counter_b', result) + # '-' line = in baseline but not in current (regression) + self.assertIn('-counter_b', result) def test_new_metric_in_current_snapshot_produces_diff(self): """Metric present in current snapshot but absent from baseline → diff is non-empty. @@ -68,11 +68,11 @@ def test_new_metric_in_current_snapshot_produces_diff(self): Silently ignoring new metrics would mask intermittent behaviour where a metric alternates between appearing and disappearing across runs. """ - # current=A+B, baseline=A only → B is new in current + # current=A+B, baseline=A only → B is new in current (newly added) result = generate_diff(_METRIC_A + _METRIC_B, _METRIC_A) self.assertNotEqual(result, '', 'New metrics in current snapshot should produce a diff') - # '-' line = in current but not in baseline - self.assertIn('-counter_b', result) + # '+' line = in current but not in baseline (newly added) + self.assertIn('+counter_b', result) def test_exclusion_count_difference_does_not_produce_diff(self): """Snapshots that differ only in excluded-metric counts produce no diff. @@ -92,12 +92,13 @@ def test_exclusion_count_difference_does_not_produce_diff(self): def test_mixed_regression_and_new_metric_returns_diff(self): """When there is both a regression AND a new metric, the diff is non-empty.""" - # current=B only, baseline=A only → A is missing (regression), B is new + # current=B only, baseline=A only → A is missing (regression/removed), B is new (added) result = generate_diff(_METRIC_B, _METRIC_A) self.assertNotEqual(result, '') - self.assertIn('+counter_a', result) - # The new metric should still appear in the raw diff output for visibility - self.assertIn('-counter_b', result) + # '-' line = in baseline but not in current (regression) + self.assertIn('-counter_a', result) + # '+' line = in current but not in baseline (newly added) + self.assertIn('+counter_b', result) def test_regression_with_exclusions_includes_exclusion_summary(self): """When there is a regression and excluded metrics, the output includes counts.""" diff --git a/scripts/e2e/metrics_summary.py b/scripts/e2e/metrics_summary.py index f5b1c6e15dc..0ba2fe11119 100644 --- a/scripts/e2e/metrics_summary.py +++ b/scripts/e2e/metrics_summary.py @@ -73,19 +73,38 @@ def extract_metric_name(line): return line.split('{')[0].strip() return line.strip() -def get_raw_diff_sample(raw_lines, max_lines=7): +def get_raw_diff_sample(raw_lines, max_lines=8): """ Get sample raw diff lines, preserving original diff formatting. + + For modified metrics that have both removed (-) and added (+) lines, + interleaves them so both sides are visible when the sample is truncated, + avoiding the problem of showing only the removed side. """ if not raw_lines: return [] - # Take up to max_lines - sample_lines = raw_lines[:max_lines] - if len(raw_lines) > max_lines: - sample_lines.append("...") + removed_lines = [l for l in raw_lines if l.startswith('-')] + added_lines = [l for l in raw_lines if l.startswith('+')] + + # If only one direction of change (pure add or pure remove), just truncate. + if not removed_lines or not added_lines: + sample_lines = raw_lines[:max_lines] + if len(raw_lines) > max_lines: + sample_lines.append("...") + return sample_lines + + # Interleave pairs of (removed, added) lines so both sides are always visible. + max_pairs = max(1, max_lines // 2) + interleaved = [] + for i in range(min(max_pairs, len(removed_lines), len(added_lines))): + interleaved.append(removed_lines[i]) + interleaved.append(added_lines[i]) + + if len(removed_lines) > max_pairs or len(added_lines) > max_pairs: + interleaved.append("...") - return sample_lines + return interleaved def generate_diff_summary(changes, raw_diff_sections, exclusion_count): """ diff --git a/scripts/e2e/metrics_summary.sh b/scripts/e2e/metrics_summary.sh index 67983ece935..70439e7d05a 100644 --- a/scripts/e2e/metrics_summary.sh +++ b/scripts/e2e/metrics_summary.sh @@ -3,22 +3,24 @@ # Copyright (c) 2025 The Jaeger Authors. # SPDX-License-Identifier: Apache-2.0 -# Enable debug tracing and exit on error -set -exo pipefail +# Exit on error, treat unset variables as errors, fail on pipe errors. +set -eo pipefail METRICS_DIR="${METRICS_DIR:-./.artifacts}" declare -a summary_files=() declare -a json_files=() total_changes=0 +echo "::group::Metrics diff setup" echo "Starting metrics diff processing in directory: $METRICS_DIR" echo "Directory structure:" ls -la "$METRICS_DIR" || echo "Metrics directory listing failed" +echo "::endgroup::" # Verify 1-to-1: every metrics_snapshot_* artifact must have a diff_metrics_snapshot_* artifact. # verify-metrics-snapshot always uploads a diff artifact on PRs (empty stub if no baseline), # so a missing diff dir means that action never ran for that snapshot — an infra failure. -echo "=== Checking for missing diff artifacts ===" +echo "::group::Checking for missing diff artifacts" declare -a missing_diffs=() found_any_snapshot=false for snapshot_dir in "$METRICS_DIR"/metrics_snapshot_*/; do @@ -41,12 +43,14 @@ if [ ${#missing_diffs[@]} -gt 0 ]; then else echo "INFRA_ERRORS=false" >> "$GITHUB_OUTPUT" fi +echo "::endgroup::" # Debug: List all diff files found -echo "=== Searching for diff files ===" +echo "::group::Searching for diff files" find "$METRICS_DIR" -type f -name "diff_*.txt" | while read -r file; do echo "Found diff file: $file" done +echo "::endgroup::" # Process all non-empty diff files. # Empty diff files are stubs uploaded by verify-metrics-snapshot when there is no @@ -74,11 +78,12 @@ while IFS= read -r -d '' diff_file; do summary_file="$dir/summary_$snapshot_name.md" json_file="$dir/changes_$snapshot_name.json" - echo "Generating summary for $snapshot_name" + echo "::group::Generating summary for $snapshot_name" python3 ./scripts/e2e/metrics_summary.py \ --diff "$diff_file" \ --output "$summary_file" \ --json-output "$json_file" + echo "::endgroup::" summary_files+=("$summary_file") json_files+=("$json_file") @@ -152,10 +157,11 @@ if [ "$total_changes" -gt 0 ]; then echo "" for summary_file in "${summary_files[@]}"; do file_name=$(basename "$summary_file" .md) - echo "--- ${file_name} ---" + echo "::group::${file_name}" echo "" cat "$summary_file" echo "" + echo "::endgroup::" done elif [ ${#missing_diffs[@]} -gt 0 ]; then echo "No metric changes in available diffs, but some diff artifacts were missing (see above)." From abc5eab190040bb9bf64c50d62b4fa16296ecd2c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 15:31:44 +0000 Subject: [PATCH 3/4] fix: address review feedback - rename terminology, add tests, add artifact links - Rename A/B/1/2 terminology to baseline/current throughout: - compare_metrics.py: rename function args, vars, CLI args (--current/ --baseline), and exclusion count labels (current:/baseline:) - metrics_summary.py: update exclusion label parsing - compare_metrics_test.py: update assertions - action.yaml: update --file1/--file2 to --current/--baseline - Add metrics_summary_test.py with 25 tests covering get_raw_diff_sample, parse_diff_file, extract_metric_name, generate_diff_summary, generate_structured_json - Add per-snapshot diff artifact download links in PR comment: - metrics_summary.sh: query GitHub API for artifact IDs; include in metrics_snapshots.json via merge Python snippet - ci-summary-report.yml: add GH_TOKEN to compare-metrics step - ci-summary-report-publish.yml: pass sourceRunId to handler - ci-summary-report-publish.js: sanitize artifact_id; construct per- snapshot download URLs from trusted pieces; add artifactUrlPrefix parameter to formatMetricsDetail and buildCommentBody - ci-summary-report-publish.test.js: add tests for artifact_id sanitization and download link rendering Agent-Logs-Url: https://github.com/jaegertracing/jaeger/sessions/a021a645-ff42-4ce9-89f1-6a2918708a39 Co-authored-by: yurishkuro <3523016+yurishkuro@users.noreply.github.com> --- .../verify-metrics-snapshot/action.yaml | 2 +- .github/scripts/ci-summary-report-publish.js | 42 ++- .../scripts/ci-summary-report-publish.test.js | 83 ++++- .../workflows/ci-summary-report-publish.yml | 13 +- .github/workflows/ci-summary-report.yml | 2 + scripts/e2e/compare_metrics.py | 50 +-- scripts/e2e/compare_metrics_test.py | 4 +- scripts/e2e/metrics_summary.py | 2 +- scripts/e2e/metrics_summary.sh | 40 ++- scripts/e2e/metrics_summary_test.py | 288 ++++++++++++++++++ 10 files changed, 482 insertions(+), 44 deletions(-) create mode 100644 scripts/e2e/metrics_summary_test.py diff --git a/.github/actions/verify-metrics-snapshot/action.yaml b/.github/actions/verify-metrics-snapshot/action.yaml index 326a2861674..e0612a3242d 100644 --- a/.github/actions/verify-metrics-snapshot/action.yaml +++ b/.github/actions/verify-metrics-snapshot/action.yaml @@ -94,7 +94,7 @@ runs: shell: bash run: | python3 -m pip install prometheus-client - if python3 ./scripts/e2e/compare_metrics.py --file1 ./.metrics/${{ inputs.snapshot }}.txt --file2 ./.metrics/baseline_${{ inputs.snapshot }}.txt --output ./.metrics/diff_${{ inputs.snapshot }}.txt; then + if python3 ./scripts/e2e/compare_metrics.py --current ./.metrics/${{ inputs.snapshot }}.txt --baseline ./.metrics/baseline_${{ inputs.snapshot }}.txt --output ./.metrics/diff_${{ inputs.snapshot }}.txt; then echo "No differences found in metrics" else echo "🛑 Differences found in metrics" diff --git a/.github/scripts/ci-summary-report-publish.js b/.github/scripts/ci-summary-report-publish.js index 611a64319d3..06c20e92de9 100644 --- a/.github/scripts/ci-summary-report-publish.js +++ b/.github/scripts/ci-summary-report-publish.js @@ -97,7 +97,12 @@ function sanitizeSnapshots(raw) { if (clean) names.push(clean); } } - result.push({ snapshot, added, removed, modified, metric_names: names }); + // Validate artifact_id (optional, non-negative integer for the diff artifact download link) + const artifactId = safeNum(entry.artifact_id); + // artifact_id must be a positive integer (GitHub artifact IDs are always > 0) + const sanitizedArtifactId = (artifactId !== null && Number.isInteger(artifactId) && artifactId > 0) + ? artifactId : null; + result.push({ snapshot, added, removed, modified, metric_names: names, artifact_id: sanitizedArtifactId }); } return result.length > 0 ? result : null; } @@ -158,10 +163,14 @@ function computeCoverage(s) { * All text is built from trusted templates; metric names have been validated * through sanitizeMetricName() and are rendered in backtick-code spans. * @param {Array|null} snapshots - Sanitized snapshots from computeMetrics - * @param {string} [ciRunUrl] - URL to the CI run for linking to detailed diff logs + * @param {string} [ciRunUrl] - URL to the CI run for linking to step logs + * @param {string} [artifactUrlPrefix] - Base URL for artifact downloads: + * `https://github.com/{owner}/{repo}/actions/runs/{runId}/artifacts`. + * When provided, each snapshot with a valid artifact_id gets a direct + * download link appended to its header line. * @returns {string} - Markdown detail block, or empty string if no data */ -function formatMetricsDetail(snapshots, ciRunUrl) { +function formatMetricsDetail(snapshots, { ciRunUrl, artifactUrlPrefix } = {}) { if (!snapshots || snapshots.length === 0) return ''; const lines = [ @@ -177,7 +186,12 @@ function formatMetricsDetail(snapshots, ciRunUrl) { } for (const snap of snapshots) { - lines.push(`**${snap.snapshot}**`); + // Build the snapshot header, optionally with a direct diff artifact download link. + let snapHeader = `**${snap.snapshot}**`; + if (artifactUrlPrefix && snap.artifact_id !== null && snap.artifact_id !== undefined) { + snapHeader += ` — [⬇️ download diff](${artifactUrlPrefix}/${snap.artifact_id})`; + } + lines.push(snapHeader); const parts = []; if (snap.added !== null && snap.added > 0) parts.push(`${snap.added} added`); if (snap.removed !== null && snap.removed > 0) parts.push(`${snap.removed} removed`); @@ -208,9 +222,10 @@ function formatMetricsDetail(snapshots, ciRunUrl) { * @param {object} [opts] * @param {Array|null} [opts.metricsSnapshots] - sanitized snapshot data for detail rendering * @param {string} [opts.ciRunUrl] - URL to the CI run, passed through to formatMetricsDetail + * @param {string} [opts.artifactUrlPrefix] - Base URL for artifact downloads, passed through * @returns {string} */ -function buildCommentBody(metricsText, coverageText, footer, { metricsSnapshots, ciRunUrl } = {}) { +function buildCommentBody(metricsText, coverageText, footer, { metricsSnapshots, ciRunUrl, artifactUrlPrefix } = {}) { const parts = [ COMMENT_TAG, '## CI Summary Report', @@ -219,7 +234,7 @@ function buildCommentBody(metricsText, coverageText, footer, { metricsSnapshots, metricsText, ]; - const detail = formatMetricsDetail(metricsSnapshots, ciRunUrl); + const detail = formatMetricsDetail(metricsSnapshots, { ciRunUrl, artifactUrlPrefix }); if (detail) { parts.push(detail); } @@ -308,6 +323,8 @@ async function postOrUpdateComment(github, owner, repo, prNumber, body, core, { * @param {string} opts.inputs.prNumber - raw string from step output * @param {string} opts.inputs.ciRunUrl * @param {string} opts.inputs.publishUrl + * @param {string} [opts.inputs.sourceRunId] - numeric run ID of the CI Orchestrator run; + * used to construct direct artifact download URLs (e.g. .../artifacts/{id}) */ async function handler({ github, core, fs, inputs }) { const { owner, repo, headSha, ciRunUrl, publishUrl } = inputs; @@ -317,6 +334,13 @@ async function handler({ github, core, fs, inputs }) { const ts = new Date().toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC'); const footer = `${links}\n_${ts}_`; + // Construct the artifact URL prefix for per-snapshot diff download links. + // All parts come from trusted sources: owner/repo from context, runId validated as integer. + const sourceRunId = parseInt(inputs.sourceRunId, 10) || null; + const artifactUrlPrefix = (owner && repo && sourceRunId) + ? `https://github.com/${owner}/${repo}/actions/runs/${sourceRunId}/artifacts` + : null; + // Read structured data written by ci-summary-report.yml. // All fields are primitives (enums, numbers, booleans) — no free-form text. let s; @@ -356,7 +380,11 @@ async function handler({ github, core, fs, inputs }) { // after a green run. Only create a new comment when there is something to report. const hasIssues = metrics.conclusion === 'failure' || coverage.conclusion === 'failure' || metrics.totalChanges > 0; - const body = buildCommentBody(metrics.text, coverage.text, footer, { metricsSnapshots: metrics.snapshots, ciRunUrl }); + const body = buildCommentBody(metrics.text, coverage.text, footer, { + metricsSnapshots: metrics.snapshots, + ciRunUrl, + artifactUrlPrefix, + }); await postOrUpdateComment(github, owner, repo, prNumber, body, core, { createNew: hasIssues }); } else { core.info('No PR number; skipping PR comment.'); diff --git a/.github/scripts/ci-summary-report-publish.test.js b/.github/scripts/ci-summary-report-publish.test.js index 1bbd4c118b6..4b2ab570552 100644 --- a/.github/scripts/ci-summary-report-publish.test.js +++ b/.github/scripts/ci-summary-report-publish.test.js @@ -175,6 +175,44 @@ describe('sanitizeSnapshots', () => { expect(result).toHaveLength(1); expect(result[0].snapshot).toBe('valid'); }); + + test('sanitizes artifact_id as a positive integer', () => { + const input = [{ + snapshot: 'test_snapshot', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + artifact_id: 6359406281, + }]; + const result = sanitizeSnapshots(input); + expect(result[0].artifact_id).toBe(6359406281); + }); + + test('rejects non-integer artifact_id', () => { + const input = [{ + snapshot: 'test_snapshot', + added: 1, removed: 0, modified: 0, + metric_names: [], + artifact_id: 'evil_string', + }]; + const result = sanitizeSnapshots(input); + expect(result[0].artifact_id).toBeNull(); + }); + + test('rejects zero and negative artifact_id', () => { + const input = [ + { snapshot: 'snap_a', added: 0, removed: 0, modified: 0, metric_names: [], artifact_id: 0 }, + { snapshot: 'snap_b', added: 0, removed: 0, modified: 0, metric_names: [], artifact_id: -1 }, + ]; + const result = sanitizeSnapshots(input); + expect(result[0].artifact_id).toBeNull(); + expect(result[1].artifact_id).toBeNull(); + }); + + test('sets artifact_id to null when absent', () => { + const input = [{ snapshot: 'test_snapshot', added: 1, removed: 0, modified: 0, metric_names: [] }]; + const result = sanitizeSnapshots(input); + expect(result[0].artifact_id).toBeNull(); + }); }); // ── computeMetrics ──────────────────────────────────────────────────────────── @@ -290,7 +328,7 @@ describe('formatMetricsDetail', () => { snapshot: 'cassandra_v2', added: 1, removed: 0, modified: 0, metric_names: ['http_server_duration'], - }], 'https://github.com/org/repo/actions/runs/123'); + }], { ciRunUrl: 'https://github.com/org/repo/actions/runs/123' }); expect(detail).toContain('[CI run](https://github.com/org/repo/actions/runs/123)'); expect(detail).toContain('Compare metrics and generate summary'); }); @@ -303,6 +341,36 @@ describe('formatMetricsDetail', () => { }]); expect(detail).not.toContain('[CI run]'); }); + + test('renders per-snapshot artifact download link when artifactUrlPrefix and artifact_id provided', () => { + const detail = formatMetricsDetail([{ + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + artifact_id: 6359406281, + }], { artifactUrlPrefix: 'https://github.com/org/repo/actions/runs/123/artifacts' }); + expect(detail).toContain('[⬇️ download diff](https://github.com/org/repo/actions/runs/123/artifacts/6359406281)'); + }); + + test('omits artifact download link when artifact_id is null', () => { + const detail = formatMetricsDetail([{ + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + artifact_id: null, + }], { artifactUrlPrefix: 'https://github.com/org/repo/actions/runs/123/artifacts' }); + expect(detail).not.toContain('download diff'); + }); + + test('omits artifact download link when artifactUrlPrefix not provided', () => { + const detail = formatMetricsDetail([{ + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + artifact_id: 6359406281, + }]); + expect(detail).not.toContain('download diff'); + }); }); // ── computeCoverage ─────────────────────────────────────────────────────────── @@ -435,12 +503,25 @@ describe('buildCommentBody', () => { snapshot: 'cassandra_v2', added: 1, removed: 0, modified: 0, metric_names: ['http_server_duration'], + artifact_id: null, }]; const ciRunUrl = 'https://github.com/org/repo/actions/runs/999'; const body = buildCommentBody('❌ 1 metric change(s) detected', coverageText, footer, { metricsSnapshots: snapshots, ciRunUrl }); expect(body).toContain(`[CI run](${ciRunUrl})`); expect(body).toContain('Compare metrics and generate summary'); }); + + test('includes per-snapshot artifact download link when artifactUrlPrefix and artifact_id provided', () => { + const snapshots = [{ + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + artifact_id: 6359406281, + }]; + const artifactUrlPrefix = 'https://github.com/org/repo/actions/runs/999/artifacts'; + const body = buildCommentBody('❌ 1 metric change(s) detected', coverageText, footer, { metricsSnapshots: snapshots, artifactUrlPrefix }); + expect(body).toContain(`[⬇️ download diff](${artifactUrlPrefix}/6359406281)`); + }); }); // ── postCheckRun ────────────────────────────────────────────────────────────── diff --git a/.github/workflows/ci-summary-report-publish.yml b/.github/workflows/ci-summary-report-publish.yml index 88ad0559e48..cc532d2e821 100644 --- a/.github/workflows/ci-summary-report-publish.yml +++ b/.github/workflows/ci-summary-report-publish.yml @@ -116,12 +116,13 @@ jobs: await handler({ github, core, fs: require('fs'), inputs: { - owner: context.repo.owner, - repo: context.repo.repo, - headSha: '${{ steps.source-run.outputs.head_sha }}', - prNumber: '${{ steps.source-run.outputs.pr_number || steps.pr.outputs.pr_number }}', - ciRunUrl: '${{ steps.source-run.outputs.source_run_url }}', - publishUrl: '${{ steps.source-run.outputs.summary_run_url }}', + owner: context.repo.owner, + repo: context.repo.repo, + headSha: '${{ steps.source-run.outputs.head_sha }}', + prNumber: '${{ steps.source-run.outputs.pr_number || steps.pr.outputs.pr_number }}', + ciRunUrl: '${{ steps.source-run.outputs.source_run_url }}', + publishUrl: '${{ steps.source-run.outputs.summary_run_url }}', + sourceRunId: '${{ steps.source-run.outputs.run_id }}', }, }); env: diff --git a/.github/workflows/ci-summary-report.yml b/.github/workflows/ci-summary-report.yml index 1d26cb75d3f..b83bfbf1e66 100644 --- a/.github/workflows/ci-summary-report.yml +++ b/.github/workflows/ci-summary-report.yml @@ -40,6 +40,8 @@ jobs: - name: Compare metrics and generate summary id: compare-metrics + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} shell: bash run: bash ./scripts/e2e/metrics_summary.sh diff --git a/scripts/e2e/compare_metrics.py b/scripts/e2e/compare_metrics.py index 70873905a91..8e3d454715c 100644 --- a/scripts/e2e/compare_metrics.py +++ b/scripts/e2e/compare_metrics.py @@ -111,7 +111,7 @@ def parse_metrics(content): return metrics,metrics_exclusion_count -def generate_diff(file1_content, file2_content): +def generate_diff(current_content, baseline_content): """Compare two Prometheus metrics snapshots and return a unified diff of metric names. The input files are raw Prometheus text exposition format, scraped directly from @@ -133,25 +133,25 @@ def generate_diff(file1_content, file2_content): snapshots produce the same set of strings the diff is empty and this function returns ''. When there are differences, the return value is a unified diff using the standard convention: - - lines = present in file2 (baseline) but absent from file1 (current) → regression/removed - + lines = present in file1 (current) but absent from file2 (baseline) → newly added + - lines = present in baseline but absent from current → regression/removed + + lines = present in current but absent from baseline → newly added followed by optional comment lines reporting how many metrics were excluded, e.g.: - # Metrics excluded from A: 3 - # Metrics excluded from B: 5 + # Metrics excluded from current: 3 + # Metrics excluded from baseline: 5 These comment lines (prefixed with `# `) are appended only when the diff is non-empty; they are informational context, not metric differences themselves. """ - if isinstance(file1_content, list): - file1_content = ''.join(file1_content) - if isinstance(file2_content, list): - file2_content = ''.join(file2_content) + if isinstance(current_content, list): + current_content = ''.join(current_content) + if isinstance(baseline_content, list): + baseline_content = ''.join(baseline_content) - metrics1,excluded_metrics_count1 = parse_metrics(file1_content) - metrics2,excluded_metrics_count2 = parse_metrics(file2_content) + current_metrics, excluded_count_current = parse_metrics(current_content) + baseline_metrics, excluded_count_baseline = parse_metrics(baseline_content) # unified_diff(baseline, current): - = in baseline but not current (removed/regression), # + = in current but not baseline (newly added). - diff = list(unified_diff(metrics2, metrics1, lineterm='', n=0)) + diff = list(unified_diff(baseline_metrics, current_metrics, lineterm='', n=0)) # Exclusion counts are informational context appended to the diff output. # They must not be written when the diff itself is empty: two snapshots with @@ -160,12 +160,12 @@ def generate_diff(file1_content, file2_content): if len(diff) == 0: return '' - total_excluded = excluded_metrics_count1 + excluded_metrics_count2 - + total_excluded = excluded_count_current + excluded_count_baseline + exclusion_lines = '' if total_excluded > 0: - exclusion_lines = f'\n# Metrics excluded from A: {excluded_metrics_count1}\n# Metrics excluded from B: {excluded_metrics_count2}' - + exclusion_lines = f'\n# Metrics excluded from current: {excluded_count_current}\n# Metrics excluded from baseline: {excluded_count_baseline}' + return '\n'.join(diff) + exclusion_lines def write_diff_file(diff_lines, output_path): @@ -177,20 +177,20 @@ def write_diff_file(diff_lines, output_path): def main(): parser = argparse.ArgumentParser(description='Generate diff between two Jaeger metric files') - parser.add_argument('--file1', help='Path to first metric file') - parser.add_argument('--file2', help='Path to second metric file') + parser.add_argument('--current', help='Path to the current metric file (e.g. from the PR)') + parser.add_argument('--baseline', help='Path to the baseline metric file (e.g. from main branch)') parser.add_argument('--output', '-o', default='metrics_diff.txt', help='Output diff file path (default: metrics_diff.txt)') - + args = parser.parse_args() - + # Read input files - file1_lines = read_metric_file(args.file1) - file2_lines = read_metric_file(args.file2) - + current_lines = read_metric_file(args.current) + baseline_lines = read_metric_file(args.baseline) + # Generate diff - diff_lines = generate_diff(file1_lines, file2_lines) - + diff_lines = generate_diff(current_lines, baseline_lines) + # Check if there are any differences if diff_lines: print("differences found between the metric files.") diff --git a/scripts/e2e/compare_metrics_test.py b/scripts/e2e/compare_metrics_test.py index 7d64739101f..9750808932c 100644 --- a/scripts/e2e/compare_metrics_test.py +++ b/scripts/e2e/compare_metrics_test.py @@ -105,8 +105,8 @@ def test_regression_with_exclusions_includes_exclusion_summary(self): # current=excluded only, baseline=A+excluded → A is missing (regression) result = generate_diff(_METRIC_EXCLUDED_5XX, _METRIC_A + _METRIC_EXCLUDED_5XX) self.assertNotEqual(result, '') - self.assertIn('# Metrics excluded from A:', result) - self.assertIn('# Metrics excluded from B:', result) + self.assertIn('# Metrics excluded from current:', result) + self.assertIn('# Metrics excluded from baseline:', result) def test_no_exclusions_means_no_exclusion_summary(self): """When there are no excluded metrics, the exclusion summary is omitted.""" diff --git a/scripts/e2e/metrics_summary.py b/scripts/e2e/metrics_summary.py index 0ba2fe11119..6e4ed9cd655 100644 --- a/scripts/e2e/metrics_summary.py +++ b/scripts/e2e/metrics_summary.py @@ -28,7 +28,7 @@ def parse_diff_file(diff_path): original_line = line.rstrip('\n') stripped = original_line.strip() - if stripped.startswith('# Metrics excluded from A: ') or stripped.startswith('# Metrics excluded from B: '): + if stripped.startswith('# Metrics excluded from current: ') or stripped.startswith('# Metrics excluded from baseline: '): count_str = stripped.split(': ')[1] exclusion_count += int(count_str) continue diff --git a/scripts/e2e/metrics_summary.sh b/scripts/e2e/metrics_summary.sh index 70439e7d05a..2edc95e59e0 100644 --- a/scripts/e2e/metrics_summary.sh +++ b/scripts/e2e/metrics_summary.sh @@ -52,6 +52,27 @@ find "$METRICS_DIR" -type f -name "diff_*.txt" | while read -r file; do done echo "::endgroup::" +# Query artifact IDs for all diff_metrics_snapshot_* artifacts in this run. +# These IDs are used by the publish workflow to render direct download links +# in the PR comment. The query is optional: if it fails (e.g. when running +# locally without a token) we write an empty map and the publish workflow +# degrades gracefully (no download links, but the rest of the summary still works). +echo "::group::Querying diff artifact IDs" +if [ -n "${GITHUB_RUN_ID:-}" ] && [ -n "${GITHUB_REPOSITORY:-}" ]; then + if gh api "repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/artifacts?per_page=100" \ + --jq '[.artifacts[] | select(.name | startswith("diff_metrics_snapshot_")) | {key: .name, value: .id}] | from_entries' \ + > "$METRICS_DIR/artifact_ids.json" 2>/dev/null; then + echo "Artifact IDs written to $METRICS_DIR/artifact_ids.json" + else + echo "Could not query artifact IDs (OK for local runs without a token)" + echo '{}' > "$METRICS_DIR/artifact_ids.json" + fi +else + echo "GITHUB_RUN_ID or GITHUB_REPOSITORY not set; skipping artifact ID query" + echo '{}' > "$METRICS_DIR/artifact_ids.json" +fi +echo "::endgroup::" + # Process all non-empty diff files. # Empty diff files are stubs uploaded by verify-metrics-snapshot when there is no # baseline or when compare_metrics.py found no differences (it only writes to the @@ -115,13 +136,24 @@ else fi # Merge per-snapshot JSON files into a single metrics_snapshots.json. -# Each entry gets a "snapshot" field with the snapshot name. +# Each entry gets a "snapshot" field with the snapshot name and an optional +# "artifact_id" field (integer) for the corresponding diff artifact download link. # Capped at 50 entries to match the publish workflow's MAX_SNAPSHOTS limit. # The trusted publish workflow validates this data before rendering. python3 - "$METRICS_DIR" "${json_files[@]}" <<'PYEOF' import json, os, sys metrics_dir = sys.argv[1] MAX_SNAPSHOTS = 50 + +# Load artifact IDs if available (produced by the gh api query above). +# artifact_ids maps artifact name → artifact ID (integer). +artifact_ids = {} +try: + with open(os.path.join(metrics_dir, 'artifact_ids.json')) as f: + artifact_ids = json.load(f) +except (FileNotFoundError, json.JSONDecodeError): + pass + snapshots = [] for path in sys.argv[2:]: if len(snapshots) >= MAX_SNAPSHOTS: @@ -133,6 +165,12 @@ for path in sys.argv[2:]: basename = os.path.basename(path) name = basename.removeprefix('changes_').removesuffix('.json') data['snapshot'] = name + # Look up the artifact ID for this snapshot's diff artifact so the + # publish workflow can render a direct download link. + diff_artifact_name = f"diff_{name}" + artifact_id = artifact_ids.get(diff_artifact_name) + if isinstance(artifact_id, int): + data['artifact_id'] = artifact_id snapshots.append(data) except Exception as e: print(f"Warning: could not read {path}: {e}", file=sys.stderr) diff --git a/scripts/e2e/metrics_summary_test.py b/scripts/e2e/metrics_summary_test.py new file mode 100644 index 00000000000..cd8d54a3459 --- /dev/null +++ b/scripts/e2e/metrics_summary_test.py @@ -0,0 +1,288 @@ +# Copyright (c) 2025 The Jaeger Authors. +# SPDX-License-Identifier: Apache-2.0 + +import os +import tempfile +import unittest +from metrics_summary import ( + extract_metric_name, + get_raw_diff_sample, + parse_diff_file, + generate_diff_summary, + generate_structured_json, +) + + +# A minimal unified diff that exercises added/removed/modified cases and exclusion counts. +_DIFF_WITH_ALL_CATEGORIES = """\ +--- ++++ +@@ -1,3 +1,3 @@ +-baseline_only{job="a"} +-http_server_duration{le="+Inf"} ++current_only{job="b"} ++http_server_duration{http_route="/status",le="+Inf"} +# Metrics excluded from current: 2 +# Metrics excluded from baseline: 3 +""" + +# A diff with only added metrics (present in current, absent from baseline). +_DIFF_ADDED_ONLY = """\ +--- ++++ +@@ -1 +2 @@ ++new_metric{job="a"} +""" + +# A diff with only removed metrics (present in baseline, absent from current). +_DIFF_REMOVED_ONLY = """\ +--- ++++ +@@ -2 +1 @@ +-old_metric{job="b"} +""" + + +def _write_tmp(content): + """Write content to a temp file and return its path.""" + f = tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) + f.write(content) + f.close() + return f.name + + +class TestExtractMetricName(unittest.TestCase): + """Tests for extract_metric_name().""" + + def test_extracts_name_before_braces(self): + self.assertEqual(extract_metric_name('http_server_duration{le="+Inf"}'), 'http_server_duration') + + def test_returns_bare_name_when_no_braces(self): + self.assertEqual(extract_metric_name('my_metric'), 'my_metric') + + def test_strips_whitespace(self): + self.assertEqual(extract_metric_name(' my_metric '), 'my_metric') + + +class TestGetRawDiffSample(unittest.TestCase): + """Tests for get_raw_diff_sample().""" + + def test_empty_input_returns_empty(self): + self.assertEqual(get_raw_diff_sample([]), []) + + def test_pure_removals_are_truncated(self): + lines = [f'-metric_{i}' for i in range(10)] + result = get_raw_diff_sample(lines, max_lines=4) + self.assertEqual(result[:4], lines[:4]) + self.assertEqual(result[-1], '...') + + def test_pure_additions_are_truncated(self): + lines = [f'+metric_{i}' for i in range(10)] + result = get_raw_diff_sample(lines, max_lines=4) + self.assertEqual(result[:4], lines[:4]) + self.assertEqual(result[-1], '...') + + def test_interleaves_removed_and_added_lines(self): + """Modified metrics with both - and + lines are interleaved.""" + removed = [f'-metric{{a="{i}"}}' for i in range(4)] + added = [f'+metric{{a="{i}",b="x"}}' for i in range(4)] + result = get_raw_diff_sample(removed + added, max_lines=4) + # Pairs: (-, +, -, +, ...) + self.assertTrue(result[0].startswith('-')) + self.assertTrue(result[1].startswith('+')) + self.assertTrue(result[2].startswith('-')) + self.assertTrue(result[3].startswith('+')) + + def test_interleaved_truncation_adds_ellipsis(self): + removed = [f'-metric{{a="{i}"}}' for i in range(10)] + added = [f'+metric{{a="{i}",b="x"}}' for i in range(10)] + result = get_raw_diff_sample(removed + added, max_lines=4) + self.assertIn('...', result) + # Should only show max_lines//2 = 2 pairs + non_ellipsis = [l for l in result if l != '...'] + self.assertEqual(len(non_ellipsis), 4) # 2 pairs × 2 lines each + + def test_no_ellipsis_when_within_limit(self): + lines = ['-m1', '+m1_v2'] + result = get_raw_diff_sample(lines, max_lines=8) + self.assertNotIn('...', result) + + +class TestParseDiffFile(unittest.TestCase): + """Tests for parse_diff_file().""" + + def test_parses_added_removed_and_modified(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + + # baseline_only is in baseline but not current → removed + self.assertIn('baseline_only', changes['removed']) + # current_only is in current but not baseline → added + self.assertIn('current_only', changes['added']) + # http_server_duration appears in both → modified + self.assertIn('http_server_duration', changes['modified']) + # Not in added or removed after reclassification + self.assertNotIn('http_server_duration', changes['added']) + self.assertNotIn('http_server_duration', changes['removed']) + + def test_accumulates_exclusion_counts(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + _, _, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + self.assertEqual(excl_count, 5) # 2 + 3 + + def test_zero_exclusion_count_when_no_exclusion_lines(self): + path = _write_tmp(_DIFF_ADDED_ONLY) + try: + _, _, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + self.assertEqual(excl_count, 0) + + def test_raw_diff_sections_populated(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + _, raw_sections, _ = parse_diff_file(path) + finally: + os.unlink(path) + self.assertIn('http_server_duration', raw_sections) + # The section should have at least one - line and one + line + lines = raw_sections['http_server_duration'] + self.assertTrue(any(l.startswith('-') for l in lines)) + self.assertTrue(any(l.startswith('+') for l in lines)) + + def test_added_only_diff(self): + path = _write_tmp(_DIFF_ADDED_ONLY) + try: + changes, _, _ = parse_diff_file(path) + finally: + os.unlink(path) + self.assertIn('new_metric', changes['added']) + self.assertEqual(len(changes['removed']), 0) + self.assertEqual(len(changes['modified']), 0) + + def test_removed_only_diff(self): + path = _write_tmp(_DIFF_REMOVED_ONLY) + try: + changes, _, _ = parse_diff_file(path) + finally: + os.unlink(path) + self.assertIn('old_metric', changes['removed']) + self.assertEqual(len(changes['added']), 0) + self.assertEqual(len(changes['modified']), 0) + + +class TestGenerateDiffSummary(unittest.TestCase): + """Tests for generate_diff_summary().""" + + def test_total_changes_header_present(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + summary = generate_diff_summary(changes, raw_sections, excl_count) + self.assertIn('**Total Changes:**', summary) + + def test_added_section_rendered(self): + path = _write_tmp(_DIFF_ADDED_ONLY) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + summary = generate_diff_summary(changes, raw_sections, excl_count) + self.assertIn('Added Metrics', summary) + self.assertIn('new_metric', summary) + + def test_removed_section_rendered(self): + path = _write_tmp(_DIFF_REMOVED_ONLY) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + summary = generate_diff_summary(changes, raw_sections, excl_count) + self.assertIn('Removed Metrics', summary) + self.assertIn('old_metric', summary) + + def test_modified_section_rendered(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + summary = generate_diff_summary(changes, raw_sections, excl_count) + self.assertIn('Modified Metrics', summary) + self.assertIn('http_server_duration', summary) + + def test_diff_sample_block_present_for_changed_metric(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + summary = generate_diff_summary(changes, raw_sections, excl_count) + self.assertIn('```diff', summary) + + def test_exclusion_count_shown(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + summary = generate_diff_summary(changes, raw_sections, excl_count) + self.assertIn('Excluded: 5', summary) + + +class TestGenerateStructuredJson(unittest.TestCase): + """Tests for generate_structured_json().""" + + def test_counts_are_correct(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, _, _ = parse_diff_file(path) + finally: + os.unlink(path) + data = generate_structured_json(changes) + self.assertEqual(data['added'], 1) # current_only + self.assertEqual(data['removed'], 1) # baseline_only + self.assertEqual(data['modified'], 1) # http_server_duration + + def test_metric_names_list_sorted(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, _, _ = parse_diff_file(path) + finally: + os.unlink(path) + data = generate_structured_json(changes) + names = data['metric_names'] + self.assertEqual(names, sorted(names)) + + def test_metric_names_deduped(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, _, _ = parse_diff_file(path) + finally: + os.unlink(path) + data = generate_structured_json(changes) + self.assertEqual(len(data['metric_names']), len(set(data['metric_names']))) + + def test_added_only_produces_correct_output(self): + path = _write_tmp(_DIFF_ADDED_ONLY) + try: + changes, _, _ = parse_diff_file(path) + finally: + os.unlink(path) + data = generate_structured_json(changes) + self.assertEqual(data['added'], 1) + self.assertEqual(data['removed'], 0) + self.assertEqual(data['modified'], 0) + self.assertIn('new_metric', data['metric_names']) + + +if __name__ == '__main__': + unittest.main() From 2a74cfcf712e7fce9603176552c91123713c4d21 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 17:34:39 +0000 Subject: [PATCH 4/4] fix: use baseline-first ordering consistently throughout diff pipeline - compare_metrics.py: rename generate_diff signature to (baseline_content, current_content) matching diff command conventions; parse baseline before current; emit '# Metrics excluded from baseline:' before '# Metrics excluded from current:' in output; fix main() call order - compare_metrics_test.py: swap all generate_diff() call arguments to (baseline, current) order; update comments to say baseline first - metrics_summary.py: check '# Metrics excluded from baseline:' before '# Metrics excluded from current:' in condition - metrics_summary_test.py: update _DIFF_WITH_ALL_CATEGORIES fixture to list baseline exclusion count before current - metrics_summary.sh: expand artifact ID query section with detailed troubleshooting logs (show API URL, capture stderr, log jq errors, print artifact count and full map on success) Agent-Logs-Url: https://github.com/jaegertracing/jaeger/sessions/04585a94-2aa5-4d3e-8859-8a15d5ae435a Co-authored-by: yurishkuro <3523016+yurishkuro@users.noreply.github.com> --- scripts/e2e/compare_metrics.py | 20 +++++++------- scripts/e2e/compare_metrics_test.py | 24 ++++++++-------- scripts/e2e/metrics_summary.py | 2 +- scripts/e2e/metrics_summary.sh | 43 ++++++++++++++++++++++------- scripts/e2e/metrics_summary_test.py | 2 +- 5 files changed, 57 insertions(+), 34 deletions(-) diff --git a/scripts/e2e/compare_metrics.py b/scripts/e2e/compare_metrics.py index 8e3d454715c..0633f0b63d2 100644 --- a/scripts/e2e/compare_metrics.py +++ b/scripts/e2e/compare_metrics.py @@ -111,7 +111,7 @@ def parse_metrics(content): return metrics,metrics_exclusion_count -def generate_diff(current_content, baseline_content): +def generate_diff(baseline_content, current_content): """Compare two Prometheus metrics snapshots and return a unified diff of metric names. The input files are raw Prometheus text exposition format, scraped directly from @@ -136,18 +136,18 @@ def generate_diff(current_content, baseline_content): - lines = present in baseline but absent from current → regression/removed + lines = present in current but absent from baseline → newly added followed by optional comment lines reporting how many metrics were excluded, e.g.: - # Metrics excluded from current: 3 - # Metrics excluded from baseline: 5 + # Metrics excluded from baseline: 3 + # Metrics excluded from current: 5 These comment lines (prefixed with `# `) are appended only when the diff is non-empty; they are informational context, not metric differences themselves. """ - if isinstance(current_content, list): - current_content = ''.join(current_content) if isinstance(baseline_content, list): baseline_content = ''.join(baseline_content) + if isinstance(current_content, list): + current_content = ''.join(current_content) - current_metrics, excluded_count_current = parse_metrics(current_content) baseline_metrics, excluded_count_baseline = parse_metrics(baseline_content) + current_metrics, excluded_count_current = parse_metrics(current_content) # unified_diff(baseline, current): - = in baseline but not current (removed/regression), # + = in current but not baseline (newly added). @@ -160,11 +160,11 @@ def generate_diff(current_content, baseline_content): if len(diff) == 0: return '' - total_excluded = excluded_count_current + excluded_count_baseline + total_excluded = excluded_count_baseline + excluded_count_current exclusion_lines = '' if total_excluded > 0: - exclusion_lines = f'\n# Metrics excluded from current: {excluded_count_current}\n# Metrics excluded from baseline: {excluded_count_baseline}' + exclusion_lines = f'\n# Metrics excluded from baseline: {excluded_count_baseline}\n# Metrics excluded from current: {excluded_count_current}' return '\n'.join(diff) + exclusion_lines @@ -185,11 +185,11 @@ def main(): args = parser.parse_args() # Read input files - current_lines = read_metric_file(args.current) baseline_lines = read_metric_file(args.baseline) + current_lines = read_metric_file(args.current) # Generate diff - diff_lines = generate_diff(current_lines, baseline_lines) + diff_lines = generate_diff(baseline_lines, current_lines) # Check if there are any differences if diff_lines: diff --git a/scripts/e2e/compare_metrics_test.py b/scripts/e2e/compare_metrics_test.py index 9750808932c..fd6e9ef37a6 100644 --- a/scripts/e2e/compare_metrics_test.py +++ b/scripts/e2e/compare_metrics_test.py @@ -54,8 +54,8 @@ def test_empty_snapshots_returns_empty(self): def test_regression_detected(self): """Metric present in baseline but absent from current snapshot → diff is non-empty.""" - # current=A only, baseline=A+B → B is missing from current (regression/removed) - result = generate_diff(_METRIC_A, _METRIC_A + _METRIC_B) + # baseline=A+B, current=A only → B is missing from current (regression/removed) + result = generate_diff(_METRIC_A + _METRIC_B, _METRIC_A) self.assertNotEqual(result, '', 'Expected a non-empty diff for a regression') # '-' line = in baseline but not in current (regression) self.assertIn('-counter_b', result) @@ -68,8 +68,8 @@ def test_new_metric_in_current_snapshot_produces_diff(self): Silently ignoring new metrics would mask intermittent behaviour where a metric alternates between appearing and disappearing across runs. """ - # current=A+B, baseline=A only → B is new in current (newly added) - result = generate_diff(_METRIC_A + _METRIC_B, _METRIC_A) + # baseline=A only, current=A+B → B is new in current (newly added) + result = generate_diff(_METRIC_A, _METRIC_A + _METRIC_B) self.assertNotEqual(result, '', 'New metrics in current snapshot should produce a diff') # '+' line = in current but not in baseline (newly added) self.assertIn('+counter_b', result) @@ -82,8 +82,8 @@ def test_exclusion_count_difference_does_not_produce_diff(self): other), the exclusion-count lines are informational metadata and must not make the diff non-empty on their own. """ - # current has metric_a + one 5xx (excluded), baseline has metric_a + zero 5xx - result = generate_diff(_METRIC_A_AND_EXCLUDED, _METRIC_A) + # baseline has metric_a + zero 5xx, current has metric_a + one 5xx (excluded) + result = generate_diff(_METRIC_A, _METRIC_A_AND_EXCLUDED) self.assertEqual( result, '', @@ -92,8 +92,8 @@ def test_exclusion_count_difference_does_not_produce_diff(self): def test_mixed_regression_and_new_metric_returns_diff(self): """When there is both a regression AND a new metric, the diff is non-empty.""" - # current=B only, baseline=A only → A is missing (regression/removed), B is new (added) - result = generate_diff(_METRIC_B, _METRIC_A) + # baseline=A only, current=B only → A is missing (regression/removed), B is new (added) + result = generate_diff(_METRIC_A, _METRIC_B) self.assertNotEqual(result, '') # '-' line = in baseline but not in current (regression) self.assertIn('-counter_a', result) @@ -102,15 +102,15 @@ def test_mixed_regression_and_new_metric_returns_diff(self): def test_regression_with_exclusions_includes_exclusion_summary(self): """When there is a regression and excluded metrics, the output includes counts.""" - # current=excluded only, baseline=A+excluded → A is missing (regression) - result = generate_diff(_METRIC_EXCLUDED_5XX, _METRIC_A + _METRIC_EXCLUDED_5XX) + # baseline=A+excluded, current=excluded only → A is missing (regression) + result = generate_diff(_METRIC_A + _METRIC_EXCLUDED_5XX, _METRIC_EXCLUDED_5XX) self.assertNotEqual(result, '') - self.assertIn('# Metrics excluded from current:', result) self.assertIn('# Metrics excluded from baseline:', result) + self.assertIn('# Metrics excluded from current:', result) def test_no_exclusions_means_no_exclusion_summary(self): """When there are no excluded metrics, the exclusion summary is omitted.""" - result = generate_diff(_METRIC_A, _METRIC_A + _METRIC_B) + result = generate_diff(_METRIC_A + _METRIC_B, _METRIC_A) self.assertNotIn('Metrics excluded from', result) diff --git a/scripts/e2e/metrics_summary.py b/scripts/e2e/metrics_summary.py index 6e4ed9cd655..315fed8ca8d 100644 --- a/scripts/e2e/metrics_summary.py +++ b/scripts/e2e/metrics_summary.py @@ -28,7 +28,7 @@ def parse_diff_file(diff_path): original_line = line.rstrip('\n') stripped = original_line.strip() - if stripped.startswith('# Metrics excluded from current: ') or stripped.startswith('# Metrics excluded from baseline: '): + if stripped.startswith('# Metrics excluded from baseline: ') or stripped.startswith('# Metrics excluded from current: '): count_str = stripped.split(': ')[1] exclusion_count += int(count_str) continue diff --git a/scripts/e2e/metrics_summary.sh b/scripts/e2e/metrics_summary.sh index 2edc95e59e0..cc7d71777c4 100644 --- a/scripts/e2e/metrics_summary.sh +++ b/scripts/e2e/metrics_summary.sh @@ -58,18 +58,41 @@ echo "::endgroup::" # locally without a token) we write an empty map and the publish workflow # degrades gracefully (no download links, but the rest of the summary still works). echo "::group::Querying diff artifact IDs" -if [ -n "${GITHUB_RUN_ID:-}" ] && [ -n "${GITHUB_REPOSITORY:-}" ]; then - if gh api "repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/artifacts?per_page=100" \ - --jq '[.artifacts[] | select(.name | startswith("diff_metrics_snapshot_")) | {key: .name, value: .id}] | from_entries' \ - > "$METRICS_DIR/artifact_ids.json" 2>/dev/null; then - echo "Artifact IDs written to $METRICS_DIR/artifact_ids.json" - else - echo "Could not query artifact IDs (OK for local runs without a token)" - echo '{}' > "$METRICS_DIR/artifact_ids.json" - fi -else +if [ -z "${GITHUB_RUN_ID:-}" ] || [ -z "${GITHUB_REPOSITORY:-}" ]; then echo "GITHUB_RUN_ID or GITHUB_REPOSITORY not set; skipping artifact ID query" echo '{}' > "$METRICS_DIR/artifact_ids.json" +else + echo "Querying artifacts for run ${GITHUB_RUN_ID} in ${GITHUB_REPOSITORY}" + api_url="repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/artifacts?per_page=100" + echo "API URL: https://api.github.com/${api_url}" + + api_output=$(gh api "$api_url" 2>&1) + api_exit=$? + if [ $api_exit -ne 0 ]; then + echo "::warning::gh api call failed (exit $api_exit) — no artifact download links will be rendered" + echo "gh api error output:" + echo "$api_output" + echo '{}' > "$METRICS_DIR/artifact_ids.json" + else + echo "API call succeeded; filtering diff_metrics_snapshot_* artifacts" + jq_output=$(echo "$api_output" | \ + jq '[.artifacts[] | select(.name | startswith("diff_metrics_snapshot_")) | {key: .name, value: .id}] | from_entries' 2>&1) + jq_exit=$? + if [ $jq_exit -ne 0 ]; then + echo "::warning::jq processing failed (exit $jq_exit) — no artifact download links will be rendered" + echo "jq error output:" + echo "$jq_output" + echo '{}' > "$METRICS_DIR/artifact_ids.json" + else + echo "$jq_output" > "$METRICS_DIR/artifact_ids.json" + artifact_count=$(echo "$jq_output" | jq 'length') + echo "Artifact IDs written to $METRICS_DIR/artifact_ids.json (${artifact_count} diff artifact(s) found)" + if [ "${artifact_count}" -gt 0 ]; then + echo "Artifact ID map:" + echo "$jq_output" | jq '.' + fi + fi + fi fi echo "::endgroup::" diff --git a/scripts/e2e/metrics_summary_test.py b/scripts/e2e/metrics_summary_test.py index cd8d54a3459..79d5e2dfdec 100644 --- a/scripts/e2e/metrics_summary_test.py +++ b/scripts/e2e/metrics_summary_test.py @@ -22,8 +22,8 @@ -http_server_duration{le="+Inf"} +current_only{job="b"} +http_server_duration{http_route="/status",le="+Inf"} -# Metrics excluded from current: 2 # Metrics excluded from baseline: 3 +# Metrics excluded from current: 2 """ # A diff with only added metrics (present in current, absent from baseline).