diff --git a/.github/actions/verify-metrics-snapshot/action.yaml b/.github/actions/verify-metrics-snapshot/action.yaml index 326a2861674..e0612a3242d 100644 --- a/.github/actions/verify-metrics-snapshot/action.yaml +++ b/.github/actions/verify-metrics-snapshot/action.yaml @@ -94,7 +94,7 @@ runs: shell: bash run: | python3 -m pip install prometheus-client - if python3 ./scripts/e2e/compare_metrics.py --file1 ./.metrics/${{ inputs.snapshot }}.txt --file2 ./.metrics/baseline_${{ inputs.snapshot }}.txt --output ./.metrics/diff_${{ inputs.snapshot }}.txt; then + if python3 ./scripts/e2e/compare_metrics.py --current ./.metrics/${{ inputs.snapshot }}.txt --baseline ./.metrics/baseline_${{ inputs.snapshot }}.txt --output ./.metrics/diff_${{ inputs.snapshot }}.txt; then echo "No differences found in metrics" else echo "🛑 Differences found in metrics" diff --git a/.github/scripts/ci-summary-report-publish.js b/.github/scripts/ci-summary-report-publish.js index 31ae50865fc..06c20e92de9 100644 --- a/.github/scripts/ci-summary-report-publish.js +++ b/.github/scripts/ci-summary-report-publish.js @@ -97,7 +97,12 @@ function sanitizeSnapshots(raw) { if (clean) names.push(clean); } } - result.push({ snapshot, added, removed, modified, metric_names: names }); + // Validate artifact_id (optional, non-negative integer for the diff artifact download link) + const artifactId = safeNum(entry.artifact_id); + // artifact_id must be a positive integer (GitHub artifact IDs are always > 0) + const sanitizedArtifactId = (artifactId !== null && Number.isInteger(artifactId) && artifactId > 0) + ? artifactId : null; + result.push({ snapshot, added, removed, modified, metric_names: names, artifact_id: sanitizedArtifactId }); } return result.length > 0 ? result : null; } @@ -158,9 +163,14 @@ function computeCoverage(s) { * All text is built from trusted templates; metric names have been validated * through sanitizeMetricName() and are rendered in backtick-code spans. * @param {Array|null} snapshots - Sanitized snapshots from computeMetrics + * @param {string} [ciRunUrl] - URL to the CI run for linking to step logs + * @param {string} [artifactUrlPrefix] - Base URL for artifact downloads: + * `https://github.com/{owner}/{repo}/actions/runs/{runId}/artifacts`. + * When provided, each snapshot with a valid artifact_id gets a direct + * download link appended to its header line. * @returns {string} - Markdown detail block, or empty string if no data */ -function formatMetricsDetail(snapshots) { +function formatMetricsDetail(snapshots, { ciRunUrl, artifactUrlPrefix } = {}) { if (!snapshots || snapshots.length === 0) return ''; const lines = [ @@ -170,8 +180,18 @@ function formatMetricsDetail(snapshots) { '', ]; + if (ciRunUrl) { + lines.push(`_For label-level diff details, open the [CI run](${ciRunUrl}) and expand the "Compare metrics and generate summary" step logs._`); + lines.push(''); + } + for (const snap of snapshots) { - lines.push(`**${snap.snapshot}**`); + // Build the snapshot header, optionally with a direct diff artifact download link. + let snapHeader = `**${snap.snapshot}**`; + if (artifactUrlPrefix && snap.artifact_id !== null && snap.artifact_id !== undefined) { + snapHeader += ` — [⬇️ download diff](${artifactUrlPrefix}/${snap.artifact_id})`; + } + lines.push(snapHeader); const parts = []; if (snap.added !== null && snap.added > 0) parts.push(`${snap.added} added`); if (snap.removed !== null && snap.removed > 0) parts.push(`${snap.removed} removed`); @@ -201,9 +221,11 @@ function formatMetricsDetail(snapshots) { * @param {string} footer - links + timestamp line * @param {object} [opts] * @param {Array|null} [opts.metricsSnapshots] - sanitized snapshot data for detail rendering + * @param {string} [opts.ciRunUrl] - URL to the CI run, passed through to formatMetricsDetail + * @param {string} [opts.artifactUrlPrefix] - Base URL for artifact downloads, passed through * @returns {string} */ -function buildCommentBody(metricsText, coverageText, footer, { metricsSnapshots } = {}) { +function buildCommentBody(metricsText, coverageText, footer, { metricsSnapshots, ciRunUrl, artifactUrlPrefix } = {}) { const parts = [ COMMENT_TAG, '## CI Summary Report', @@ -212,7 +234,7 @@ function buildCommentBody(metricsText, coverageText, footer, { metricsSnapshots metricsText, ]; - const detail = formatMetricsDetail(metricsSnapshots); + const detail = formatMetricsDetail(metricsSnapshots, { ciRunUrl, artifactUrlPrefix }); if (detail) { parts.push(detail); } @@ -301,6 +323,8 @@ async function postOrUpdateComment(github, owner, repo, prNumber, body, core, { * @param {string} opts.inputs.prNumber - raw string from step output * @param {string} opts.inputs.ciRunUrl * @param {string} opts.inputs.publishUrl + * @param {string} [opts.inputs.sourceRunId] - numeric run ID of the CI Orchestrator run; + * used to construct direct artifact download URLs (e.g. .../artifacts/{id}) */ async function handler({ github, core, fs, inputs }) { const { owner, repo, headSha, ciRunUrl, publishUrl } = inputs; @@ -310,6 +334,13 @@ async function handler({ github, core, fs, inputs }) { const ts = new Date().toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC'); const footer = `${links}\n_${ts}_`; + // Construct the artifact URL prefix for per-snapshot diff download links. + // All parts come from trusted sources: owner/repo from context, runId validated as integer. + const sourceRunId = parseInt(inputs.sourceRunId, 10) || null; + const artifactUrlPrefix = (owner && repo && sourceRunId) + ? `https://github.com/${owner}/${repo}/actions/runs/${sourceRunId}/artifacts` + : null; + // Read structured data written by ci-summary-report.yml. // All fields are primitives (enums, numbers, booleans) — no free-form text. let s; @@ -349,7 +380,11 @@ async function handler({ github, core, fs, inputs }) { // after a green run. Only create a new comment when there is something to report. const hasIssues = metrics.conclusion === 'failure' || coverage.conclusion === 'failure' || metrics.totalChanges > 0; - const body = buildCommentBody(metrics.text, coverage.text, footer, { metricsSnapshots: metrics.snapshots }); + const body = buildCommentBody(metrics.text, coverage.text, footer, { + metricsSnapshots: metrics.snapshots, + ciRunUrl, + artifactUrlPrefix, + }); await postOrUpdateComment(github, owner, repo, prNumber, body, core, { createNew: hasIssues }); } else { core.info('No PR number; skipping PR comment.'); diff --git a/.github/scripts/ci-summary-report-publish.test.js b/.github/scripts/ci-summary-report-publish.test.js index 8334c131578..4b2ab570552 100644 --- a/.github/scripts/ci-summary-report-publish.test.js +++ b/.github/scripts/ci-summary-report-publish.test.js @@ -175,6 +175,44 @@ describe('sanitizeSnapshots', () => { expect(result).toHaveLength(1); expect(result[0].snapshot).toBe('valid'); }); + + test('sanitizes artifact_id as a positive integer', () => { + const input = [{ + snapshot: 'test_snapshot', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + artifact_id: 6359406281, + }]; + const result = sanitizeSnapshots(input); + expect(result[0].artifact_id).toBe(6359406281); + }); + + test('rejects non-integer artifact_id', () => { + const input = [{ + snapshot: 'test_snapshot', + added: 1, removed: 0, modified: 0, + metric_names: [], + artifact_id: 'evil_string', + }]; + const result = sanitizeSnapshots(input); + expect(result[0].artifact_id).toBeNull(); + }); + + test('rejects zero and negative artifact_id', () => { + const input = [ + { snapshot: 'snap_a', added: 0, removed: 0, modified: 0, metric_names: [], artifact_id: 0 }, + { snapshot: 'snap_b', added: 0, removed: 0, modified: 0, metric_names: [], artifact_id: -1 }, + ]; + const result = sanitizeSnapshots(input); + expect(result[0].artifact_id).toBeNull(); + expect(result[1].artifact_id).toBeNull(); + }); + + test('sets artifact_id to null when absent', () => { + const input = [{ snapshot: 'test_snapshot', added: 1, removed: 0, modified: 0, metric_names: [] }]; + const result = sanitizeSnapshots(input); + expect(result[0].artifact_id).toBeNull(); + }); }); // ── computeMetrics ──────────────────────────────────────────────────────────── @@ -285,14 +323,53 @@ describe('formatMetricsDetail', () => { expect(detail).toContain('3 removed'); }); - test('omits zero counts from summary line', () => { + test('includes link to CI run when ciRunUrl provided', () => { const detail = formatMetricsDetail([{ - snapshot: 'test', added: 0, removed: 5, modified: 0, - metric_names: [], + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + }], { ciRunUrl: 'https://github.com/org/repo/actions/runs/123' }); + expect(detail).toContain('[CI run](https://github.com/org/repo/actions/runs/123)'); + expect(detail).toContain('Compare metrics and generate summary'); + }); + + test('omits CI run link when ciRunUrl not provided', () => { + const detail = formatMetricsDetail([{ + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], }]); - expect(detail).not.toContain('added'); - expect(detail).toContain('5 removed'); - expect(detail).not.toContain('modified'); + expect(detail).not.toContain('[CI run]'); + }); + + test('renders per-snapshot artifact download link when artifactUrlPrefix and artifact_id provided', () => { + const detail = formatMetricsDetail([{ + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + artifact_id: 6359406281, + }], { artifactUrlPrefix: 'https://github.com/org/repo/actions/runs/123/artifacts' }); + expect(detail).toContain('[⬇️ download diff](https://github.com/org/repo/actions/runs/123/artifacts/6359406281)'); + }); + + test('omits artifact download link when artifact_id is null', () => { + const detail = formatMetricsDetail([{ + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + artifact_id: null, + }], { artifactUrlPrefix: 'https://github.com/org/repo/actions/runs/123/artifacts' }); + expect(detail).not.toContain('download diff'); + }); + + test('omits artifact download link when artifactUrlPrefix not provided', () => { + const detail = formatMetricsDetail([{ + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + artifact_id: 6359406281, + }]); + expect(detail).not.toContain('download diff'); }); }); @@ -420,6 +497,31 @@ describe('buildCommentBody', () => { expect(metricsPos).toBeLessThan(detailsPos); expect(detailsPos).toBeLessThan(coveragePos); }); + + test('includes CI run link in metrics detail when ciRunUrl provided', () => { + const snapshots = [{ + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + artifact_id: null, + }]; + const ciRunUrl = 'https://github.com/org/repo/actions/runs/999'; + const body = buildCommentBody('❌ 1 metric change(s) detected', coverageText, footer, { metricsSnapshots: snapshots, ciRunUrl }); + expect(body).toContain(`[CI run](${ciRunUrl})`); + expect(body).toContain('Compare metrics and generate summary'); + }); + + test('includes per-snapshot artifact download link when artifactUrlPrefix and artifact_id provided', () => { + const snapshots = [{ + snapshot: 'cassandra_v2', + added: 1, removed: 0, modified: 0, + metric_names: ['http_server_duration'], + artifact_id: 6359406281, + }]; + const artifactUrlPrefix = 'https://github.com/org/repo/actions/runs/999/artifacts'; + const body = buildCommentBody('❌ 1 metric change(s) detected', coverageText, footer, { metricsSnapshots: snapshots, artifactUrlPrefix }); + expect(body).toContain(`[⬇️ download diff](${artifactUrlPrefix}/6359406281)`); + }); }); // ── postCheckRun ────────────────────────────────────────────────────────────── diff --git a/.github/workflows/ci-summary-report-publish.yml b/.github/workflows/ci-summary-report-publish.yml index 88ad0559e48..cc532d2e821 100644 --- a/.github/workflows/ci-summary-report-publish.yml +++ b/.github/workflows/ci-summary-report-publish.yml @@ -116,12 +116,13 @@ jobs: await handler({ github, core, fs: require('fs'), inputs: { - owner: context.repo.owner, - repo: context.repo.repo, - headSha: '${{ steps.source-run.outputs.head_sha }}', - prNumber: '${{ steps.source-run.outputs.pr_number || steps.pr.outputs.pr_number }}', - ciRunUrl: '${{ steps.source-run.outputs.source_run_url }}', - publishUrl: '${{ steps.source-run.outputs.summary_run_url }}', + owner: context.repo.owner, + repo: context.repo.repo, + headSha: '${{ steps.source-run.outputs.head_sha }}', + prNumber: '${{ steps.source-run.outputs.pr_number || steps.pr.outputs.pr_number }}', + ciRunUrl: '${{ steps.source-run.outputs.source_run_url }}', + publishUrl: '${{ steps.source-run.outputs.summary_run_url }}', + sourceRunId: '${{ steps.source-run.outputs.run_id }}', }, }); env: diff --git a/.github/workflows/ci-summary-report.yml b/.github/workflows/ci-summary-report.yml index 1d26cb75d3f..b83bfbf1e66 100644 --- a/.github/workflows/ci-summary-report.yml +++ b/.github/workflows/ci-summary-report.yml @@ -40,6 +40,8 @@ jobs: - name: Compare metrics and generate summary id: compare-metrics + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} shell: bash run: bash ./scripts/e2e/metrics_summary.sh diff --git a/scripts/e2e/compare_metrics.py b/scripts/e2e/compare_metrics.py index a64481a2eda..0633f0b63d2 100644 --- a/scripts/e2e/compare_metrics.py +++ b/scripts/e2e/compare_metrics.py @@ -111,7 +111,7 @@ def parse_metrics(content): return metrics,metrics_exclusion_count -def generate_diff(file1_content, file2_content): +def generate_diff(baseline_content, current_content): """Compare two Prometheus metrics snapshots and return a unified diff of metric names. The input files are raw Prometheus text exposition format, scraped directly from @@ -132,21 +132,26 @@ def generate_diff(file1_content, file2_content): The diff is performed on these sorted, value-free metric strings. If the two snapshots produce the same set of strings the diff is empty and this function returns ''. When there are differences, the return value is a unified diff + using the standard convention: + - lines = present in baseline but absent from current → regression/removed + + lines = present in current but absent from baseline → newly added followed by optional comment lines reporting how many metrics were excluded, e.g.: - # Metrics excluded from A: 3 - # Metrics excluded from B: 5 + # Metrics excluded from baseline: 3 + # Metrics excluded from current: 5 These comment lines (prefixed with `# `) are appended only when the diff is non-empty; they are informational context, not metric differences themselves. """ - if isinstance(file1_content, list): - file1_content = ''.join(file1_content) - if isinstance(file2_content, list): - file2_content = ''.join(file2_content) + if isinstance(baseline_content, list): + baseline_content = ''.join(baseline_content) + if isinstance(current_content, list): + current_content = ''.join(current_content) - metrics1,excluded_metrics_count1 = parse_metrics(file1_content) - metrics2,excluded_metrics_count2 = parse_metrics(file2_content) + baseline_metrics, excluded_count_baseline = parse_metrics(baseline_content) + current_metrics, excluded_count_current = parse_metrics(current_content) - diff = list(unified_diff(metrics1, metrics2, lineterm='', n=0)) + # unified_diff(baseline, current): - = in baseline but not current (removed/regression), + # + = in current but not baseline (newly added). + diff = list(unified_diff(baseline_metrics, current_metrics, lineterm='', n=0)) # Exclusion counts are informational context appended to the diff output. # They must not be written when the diff itself is empty: two snapshots with @@ -155,12 +160,12 @@ def generate_diff(file1_content, file2_content): if len(diff) == 0: return '' - total_excluded = excluded_metrics_count1 + excluded_metrics_count2 - + total_excluded = excluded_count_baseline + excluded_count_current + exclusion_lines = '' if total_excluded > 0: - exclusion_lines = f'\n# Metrics excluded from A: {excluded_metrics_count1}\n# Metrics excluded from B: {excluded_metrics_count2}' - + exclusion_lines = f'\n# Metrics excluded from baseline: {excluded_count_baseline}\n# Metrics excluded from current: {excluded_count_current}' + return '\n'.join(diff) + exclusion_lines def write_diff_file(diff_lines, output_path): @@ -172,20 +177,20 @@ def write_diff_file(diff_lines, output_path): def main(): parser = argparse.ArgumentParser(description='Generate diff between two Jaeger metric files') - parser.add_argument('--file1', help='Path to first metric file') - parser.add_argument('--file2', help='Path to second metric file') + parser.add_argument('--current', help='Path to the current metric file (e.g. from the PR)') + parser.add_argument('--baseline', help='Path to the baseline metric file (e.g. from main branch)') parser.add_argument('--output', '-o', default='metrics_diff.txt', help='Output diff file path (default: metrics_diff.txt)') - + args = parser.parse_args() - + # Read input files - file1_lines = read_metric_file(args.file1) - file2_lines = read_metric_file(args.file2) - + baseline_lines = read_metric_file(args.baseline) + current_lines = read_metric_file(args.current) + # Generate diff - diff_lines = generate_diff(file1_lines, file2_lines) - + diff_lines = generate_diff(baseline_lines, current_lines) + # Check if there are any differences if diff_lines: print("differences found between the metric files.") diff --git a/scripts/e2e/compare_metrics_test.py b/scripts/e2e/compare_metrics_test.py index 10068cdba19..fd6e9ef37a6 100644 --- a/scripts/e2e/compare_metrics_test.py +++ b/scripts/e2e/compare_metrics_test.py @@ -54,11 +54,11 @@ def test_empty_snapshots_returns_empty(self): def test_regression_detected(self): """Metric present in baseline but absent from current snapshot → diff is non-empty.""" - # current=A only, baseline=A+B → B is missing from current (regression) - result = generate_diff(_METRIC_A, _METRIC_A + _METRIC_B) + # baseline=A+B, current=A only → B is missing from current (regression/removed) + result = generate_diff(_METRIC_A + _METRIC_B, _METRIC_A) self.assertNotEqual(result, '', 'Expected a non-empty diff for a regression') - # The diff must contain a '+' line for the missing metric (counter_b) - self.assertIn('+counter_b', result) + # '-' line = in baseline but not in current (regression) + self.assertIn('-counter_b', result) def test_new_metric_in_current_snapshot_produces_diff(self): """Metric present in current snapshot but absent from baseline → diff is non-empty. @@ -68,11 +68,11 @@ def test_new_metric_in_current_snapshot_produces_diff(self): Silently ignoring new metrics would mask intermittent behaviour where a metric alternates between appearing and disappearing across runs. """ - # current=A+B, baseline=A only → B is new in current - result = generate_diff(_METRIC_A + _METRIC_B, _METRIC_A) + # baseline=A only, current=A+B → B is new in current (newly added) + result = generate_diff(_METRIC_A, _METRIC_A + _METRIC_B) self.assertNotEqual(result, '', 'New metrics in current snapshot should produce a diff') - # '-' line = in current but not in baseline - self.assertIn('-counter_b', result) + # '+' line = in current but not in baseline (newly added) + self.assertIn('+counter_b', result) def test_exclusion_count_difference_does_not_produce_diff(self): """Snapshots that differ only in excluded-metric counts produce no diff. @@ -82,8 +82,8 @@ def test_exclusion_count_difference_does_not_produce_diff(self): other), the exclusion-count lines are informational metadata and must not make the diff non-empty on their own. """ - # current has metric_a + one 5xx (excluded), baseline has metric_a + zero 5xx - result = generate_diff(_METRIC_A_AND_EXCLUDED, _METRIC_A) + # baseline has metric_a + zero 5xx, current has metric_a + one 5xx (excluded) + result = generate_diff(_METRIC_A, _METRIC_A_AND_EXCLUDED) self.assertEqual( result, '', @@ -92,24 +92,25 @@ def test_exclusion_count_difference_does_not_produce_diff(self): def test_mixed_regression_and_new_metric_returns_diff(self): """When there is both a regression AND a new metric, the diff is non-empty.""" - # current=B only, baseline=A only → A is missing (regression), B is new - result = generate_diff(_METRIC_B, _METRIC_A) + # baseline=A only, current=B only → A is missing (regression/removed), B is new (added) + result = generate_diff(_METRIC_A, _METRIC_B) self.assertNotEqual(result, '') - self.assertIn('+counter_a', result) - # The new metric should still appear in the raw diff output for visibility - self.assertIn('-counter_b', result) + # '-' line = in baseline but not in current (regression) + self.assertIn('-counter_a', result) + # '+' line = in current but not in baseline (newly added) + self.assertIn('+counter_b', result) def test_regression_with_exclusions_includes_exclusion_summary(self): """When there is a regression and excluded metrics, the output includes counts.""" - # current=excluded only, baseline=A+excluded → A is missing (regression) - result = generate_diff(_METRIC_EXCLUDED_5XX, _METRIC_A + _METRIC_EXCLUDED_5XX) + # baseline=A+excluded, current=excluded only → A is missing (regression) + result = generate_diff(_METRIC_A + _METRIC_EXCLUDED_5XX, _METRIC_EXCLUDED_5XX) self.assertNotEqual(result, '') - self.assertIn('# Metrics excluded from A:', result) - self.assertIn('# Metrics excluded from B:', result) + self.assertIn('# Metrics excluded from baseline:', result) + self.assertIn('# Metrics excluded from current:', result) def test_no_exclusions_means_no_exclusion_summary(self): """When there are no excluded metrics, the exclusion summary is omitted.""" - result = generate_diff(_METRIC_A, _METRIC_A + _METRIC_B) + result = generate_diff(_METRIC_A + _METRIC_B, _METRIC_A) self.assertNotIn('Metrics excluded from', result) diff --git a/scripts/e2e/metrics_summary.py b/scripts/e2e/metrics_summary.py index f5b1c6e15dc..315fed8ca8d 100644 --- a/scripts/e2e/metrics_summary.py +++ b/scripts/e2e/metrics_summary.py @@ -28,7 +28,7 @@ def parse_diff_file(diff_path): original_line = line.rstrip('\n') stripped = original_line.strip() - if stripped.startswith('# Metrics excluded from A: ') or stripped.startswith('# Metrics excluded from B: '): + if stripped.startswith('# Metrics excluded from baseline: ') or stripped.startswith('# Metrics excluded from current: '): count_str = stripped.split(': ')[1] exclusion_count += int(count_str) continue @@ -73,19 +73,38 @@ def extract_metric_name(line): return line.split('{')[0].strip() return line.strip() -def get_raw_diff_sample(raw_lines, max_lines=7): +def get_raw_diff_sample(raw_lines, max_lines=8): """ Get sample raw diff lines, preserving original diff formatting. + + For modified metrics that have both removed (-) and added (+) lines, + interleaves them so both sides are visible when the sample is truncated, + avoiding the problem of showing only the removed side. """ if not raw_lines: return [] - # Take up to max_lines - sample_lines = raw_lines[:max_lines] - if len(raw_lines) > max_lines: - sample_lines.append("...") + removed_lines = [l for l in raw_lines if l.startswith('-')] + added_lines = [l for l in raw_lines if l.startswith('+')] + + # If only one direction of change (pure add or pure remove), just truncate. + if not removed_lines or not added_lines: + sample_lines = raw_lines[:max_lines] + if len(raw_lines) > max_lines: + sample_lines.append("...") + return sample_lines + + # Interleave pairs of (removed, added) lines so both sides are always visible. + max_pairs = max(1, max_lines // 2) + interleaved = [] + for i in range(min(max_pairs, len(removed_lines), len(added_lines))): + interleaved.append(removed_lines[i]) + interleaved.append(added_lines[i]) + + if len(removed_lines) > max_pairs or len(added_lines) > max_pairs: + interleaved.append("...") - return sample_lines + return interleaved def generate_diff_summary(changes, raw_diff_sections, exclusion_count): """ diff --git a/scripts/e2e/metrics_summary.sh b/scripts/e2e/metrics_summary.sh index 67983ece935..cc7d71777c4 100644 --- a/scripts/e2e/metrics_summary.sh +++ b/scripts/e2e/metrics_summary.sh @@ -3,22 +3,24 @@ # Copyright (c) 2025 The Jaeger Authors. # SPDX-License-Identifier: Apache-2.0 -# Enable debug tracing and exit on error -set -exo pipefail +# Exit on error, treat unset variables as errors, fail on pipe errors. +set -eo pipefail METRICS_DIR="${METRICS_DIR:-./.artifacts}" declare -a summary_files=() declare -a json_files=() total_changes=0 +echo "::group::Metrics diff setup" echo "Starting metrics diff processing in directory: $METRICS_DIR" echo "Directory structure:" ls -la "$METRICS_DIR" || echo "Metrics directory listing failed" +echo "::endgroup::" # Verify 1-to-1: every metrics_snapshot_* artifact must have a diff_metrics_snapshot_* artifact. # verify-metrics-snapshot always uploads a diff artifact on PRs (empty stub if no baseline), # so a missing diff dir means that action never ran for that snapshot — an infra failure. -echo "=== Checking for missing diff artifacts ===" +echo "::group::Checking for missing diff artifacts" declare -a missing_diffs=() found_any_snapshot=false for snapshot_dir in "$METRICS_DIR"/metrics_snapshot_*/; do @@ -41,12 +43,58 @@ if [ ${#missing_diffs[@]} -gt 0 ]; then else echo "INFRA_ERRORS=false" >> "$GITHUB_OUTPUT" fi +echo "::endgroup::" # Debug: List all diff files found -echo "=== Searching for diff files ===" +echo "::group::Searching for diff files" find "$METRICS_DIR" -type f -name "diff_*.txt" | while read -r file; do echo "Found diff file: $file" done +echo "::endgroup::" + +# Query artifact IDs for all diff_metrics_snapshot_* artifacts in this run. +# These IDs are used by the publish workflow to render direct download links +# in the PR comment. The query is optional: if it fails (e.g. when running +# locally without a token) we write an empty map and the publish workflow +# degrades gracefully (no download links, but the rest of the summary still works). +echo "::group::Querying diff artifact IDs" +if [ -z "${GITHUB_RUN_ID:-}" ] || [ -z "${GITHUB_REPOSITORY:-}" ]; then + echo "GITHUB_RUN_ID or GITHUB_REPOSITORY not set; skipping artifact ID query" + echo '{}' > "$METRICS_DIR/artifact_ids.json" +else + echo "Querying artifacts for run ${GITHUB_RUN_ID} in ${GITHUB_REPOSITORY}" + api_url="repos/${GITHUB_REPOSITORY}/actions/runs/${GITHUB_RUN_ID}/artifacts?per_page=100" + echo "API URL: https://api.github.com/${api_url}" + + api_output=$(gh api "$api_url" 2>&1) + api_exit=$? + if [ $api_exit -ne 0 ]; then + echo "::warning::gh api call failed (exit $api_exit) — no artifact download links will be rendered" + echo "gh api error output:" + echo "$api_output" + echo '{}' > "$METRICS_DIR/artifact_ids.json" + else + echo "API call succeeded; filtering diff_metrics_snapshot_* artifacts" + jq_output=$(echo "$api_output" | \ + jq '[.artifacts[] | select(.name | startswith("diff_metrics_snapshot_")) | {key: .name, value: .id}] | from_entries' 2>&1) + jq_exit=$? + if [ $jq_exit -ne 0 ]; then + echo "::warning::jq processing failed (exit $jq_exit) — no artifact download links will be rendered" + echo "jq error output:" + echo "$jq_output" + echo '{}' > "$METRICS_DIR/artifact_ids.json" + else + echo "$jq_output" > "$METRICS_DIR/artifact_ids.json" + artifact_count=$(echo "$jq_output" | jq 'length') + echo "Artifact IDs written to $METRICS_DIR/artifact_ids.json (${artifact_count} diff artifact(s) found)" + if [ "${artifact_count}" -gt 0 ]; then + echo "Artifact ID map:" + echo "$jq_output" | jq '.' + fi + fi + fi +fi +echo "::endgroup::" # Process all non-empty diff files. # Empty diff files are stubs uploaded by verify-metrics-snapshot when there is no @@ -74,11 +122,12 @@ while IFS= read -r -d '' diff_file; do summary_file="$dir/summary_$snapshot_name.md" json_file="$dir/changes_$snapshot_name.json" - echo "Generating summary for $snapshot_name" + echo "::group::Generating summary for $snapshot_name" python3 ./scripts/e2e/metrics_summary.py \ --diff "$diff_file" \ --output "$summary_file" \ --json-output "$json_file" + echo "::endgroup::" summary_files+=("$summary_file") json_files+=("$json_file") @@ -110,13 +159,24 @@ else fi # Merge per-snapshot JSON files into a single metrics_snapshots.json. -# Each entry gets a "snapshot" field with the snapshot name. +# Each entry gets a "snapshot" field with the snapshot name and an optional +# "artifact_id" field (integer) for the corresponding diff artifact download link. # Capped at 50 entries to match the publish workflow's MAX_SNAPSHOTS limit. # The trusted publish workflow validates this data before rendering. python3 - "$METRICS_DIR" "${json_files[@]}" <<'PYEOF' import json, os, sys metrics_dir = sys.argv[1] MAX_SNAPSHOTS = 50 + +# Load artifact IDs if available (produced by the gh api query above). +# artifact_ids maps artifact name → artifact ID (integer). +artifact_ids = {} +try: + with open(os.path.join(metrics_dir, 'artifact_ids.json')) as f: + artifact_ids = json.load(f) +except (FileNotFoundError, json.JSONDecodeError): + pass + snapshots = [] for path in sys.argv[2:]: if len(snapshots) >= MAX_SNAPSHOTS: @@ -128,6 +188,12 @@ for path in sys.argv[2:]: basename = os.path.basename(path) name = basename.removeprefix('changes_').removesuffix('.json') data['snapshot'] = name + # Look up the artifact ID for this snapshot's diff artifact so the + # publish workflow can render a direct download link. + diff_artifact_name = f"diff_{name}" + artifact_id = artifact_ids.get(diff_artifact_name) + if isinstance(artifact_id, int): + data['artifact_id'] = artifact_id snapshots.append(data) except Exception as e: print(f"Warning: could not read {path}: {e}", file=sys.stderr) @@ -152,10 +218,11 @@ if [ "$total_changes" -gt 0 ]; then echo "" for summary_file in "${summary_files[@]}"; do file_name=$(basename "$summary_file" .md) - echo "--- ${file_name} ---" + echo "::group::${file_name}" echo "" cat "$summary_file" echo "" + echo "::endgroup::" done elif [ ${#missing_diffs[@]} -gt 0 ]; then echo "No metric changes in available diffs, but some diff artifacts were missing (see above)." diff --git a/scripts/e2e/metrics_summary_test.py b/scripts/e2e/metrics_summary_test.py new file mode 100644 index 00000000000..79d5e2dfdec --- /dev/null +++ b/scripts/e2e/metrics_summary_test.py @@ -0,0 +1,288 @@ +# Copyright (c) 2025 The Jaeger Authors. +# SPDX-License-Identifier: Apache-2.0 + +import os +import tempfile +import unittest +from metrics_summary import ( + extract_metric_name, + get_raw_diff_sample, + parse_diff_file, + generate_diff_summary, + generate_structured_json, +) + + +# A minimal unified diff that exercises added/removed/modified cases and exclusion counts. +_DIFF_WITH_ALL_CATEGORIES = """\ +--- ++++ +@@ -1,3 +1,3 @@ +-baseline_only{job="a"} +-http_server_duration{le="+Inf"} ++current_only{job="b"} ++http_server_duration{http_route="/status",le="+Inf"} +# Metrics excluded from baseline: 3 +# Metrics excluded from current: 2 +""" + +# A diff with only added metrics (present in current, absent from baseline). +_DIFF_ADDED_ONLY = """\ +--- ++++ +@@ -1 +2 @@ ++new_metric{job="a"} +""" + +# A diff with only removed metrics (present in baseline, absent from current). +_DIFF_REMOVED_ONLY = """\ +--- ++++ +@@ -2 +1 @@ +-old_metric{job="b"} +""" + + +def _write_tmp(content): + """Write content to a temp file and return its path.""" + f = tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) + f.write(content) + f.close() + return f.name + + +class TestExtractMetricName(unittest.TestCase): + """Tests for extract_metric_name().""" + + def test_extracts_name_before_braces(self): + self.assertEqual(extract_metric_name('http_server_duration{le="+Inf"}'), 'http_server_duration') + + def test_returns_bare_name_when_no_braces(self): + self.assertEqual(extract_metric_name('my_metric'), 'my_metric') + + def test_strips_whitespace(self): + self.assertEqual(extract_metric_name(' my_metric '), 'my_metric') + + +class TestGetRawDiffSample(unittest.TestCase): + """Tests for get_raw_diff_sample().""" + + def test_empty_input_returns_empty(self): + self.assertEqual(get_raw_diff_sample([]), []) + + def test_pure_removals_are_truncated(self): + lines = [f'-metric_{i}' for i in range(10)] + result = get_raw_diff_sample(lines, max_lines=4) + self.assertEqual(result[:4], lines[:4]) + self.assertEqual(result[-1], '...') + + def test_pure_additions_are_truncated(self): + lines = [f'+metric_{i}' for i in range(10)] + result = get_raw_diff_sample(lines, max_lines=4) + self.assertEqual(result[:4], lines[:4]) + self.assertEqual(result[-1], '...') + + def test_interleaves_removed_and_added_lines(self): + """Modified metrics with both - and + lines are interleaved.""" + removed = [f'-metric{{a="{i}"}}' for i in range(4)] + added = [f'+metric{{a="{i}",b="x"}}' for i in range(4)] + result = get_raw_diff_sample(removed + added, max_lines=4) + # Pairs: (-, +, -, +, ...) + self.assertTrue(result[0].startswith('-')) + self.assertTrue(result[1].startswith('+')) + self.assertTrue(result[2].startswith('-')) + self.assertTrue(result[3].startswith('+')) + + def test_interleaved_truncation_adds_ellipsis(self): + removed = [f'-metric{{a="{i}"}}' for i in range(10)] + added = [f'+metric{{a="{i}",b="x"}}' for i in range(10)] + result = get_raw_diff_sample(removed + added, max_lines=4) + self.assertIn('...', result) + # Should only show max_lines//2 = 2 pairs + non_ellipsis = [l for l in result if l != '...'] + self.assertEqual(len(non_ellipsis), 4) # 2 pairs × 2 lines each + + def test_no_ellipsis_when_within_limit(self): + lines = ['-m1', '+m1_v2'] + result = get_raw_diff_sample(lines, max_lines=8) + self.assertNotIn('...', result) + + +class TestParseDiffFile(unittest.TestCase): + """Tests for parse_diff_file().""" + + def test_parses_added_removed_and_modified(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + + # baseline_only is in baseline but not current → removed + self.assertIn('baseline_only', changes['removed']) + # current_only is in current but not baseline → added + self.assertIn('current_only', changes['added']) + # http_server_duration appears in both → modified + self.assertIn('http_server_duration', changes['modified']) + # Not in added or removed after reclassification + self.assertNotIn('http_server_duration', changes['added']) + self.assertNotIn('http_server_duration', changes['removed']) + + def test_accumulates_exclusion_counts(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + _, _, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + self.assertEqual(excl_count, 5) # 2 + 3 + + def test_zero_exclusion_count_when_no_exclusion_lines(self): + path = _write_tmp(_DIFF_ADDED_ONLY) + try: + _, _, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + self.assertEqual(excl_count, 0) + + def test_raw_diff_sections_populated(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + _, raw_sections, _ = parse_diff_file(path) + finally: + os.unlink(path) + self.assertIn('http_server_duration', raw_sections) + # The section should have at least one - line and one + line + lines = raw_sections['http_server_duration'] + self.assertTrue(any(l.startswith('-') for l in lines)) + self.assertTrue(any(l.startswith('+') for l in lines)) + + def test_added_only_diff(self): + path = _write_tmp(_DIFF_ADDED_ONLY) + try: + changes, _, _ = parse_diff_file(path) + finally: + os.unlink(path) + self.assertIn('new_metric', changes['added']) + self.assertEqual(len(changes['removed']), 0) + self.assertEqual(len(changes['modified']), 0) + + def test_removed_only_diff(self): + path = _write_tmp(_DIFF_REMOVED_ONLY) + try: + changes, _, _ = parse_diff_file(path) + finally: + os.unlink(path) + self.assertIn('old_metric', changes['removed']) + self.assertEqual(len(changes['added']), 0) + self.assertEqual(len(changes['modified']), 0) + + +class TestGenerateDiffSummary(unittest.TestCase): + """Tests for generate_diff_summary().""" + + def test_total_changes_header_present(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + summary = generate_diff_summary(changes, raw_sections, excl_count) + self.assertIn('**Total Changes:**', summary) + + def test_added_section_rendered(self): + path = _write_tmp(_DIFF_ADDED_ONLY) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + summary = generate_diff_summary(changes, raw_sections, excl_count) + self.assertIn('Added Metrics', summary) + self.assertIn('new_metric', summary) + + def test_removed_section_rendered(self): + path = _write_tmp(_DIFF_REMOVED_ONLY) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + summary = generate_diff_summary(changes, raw_sections, excl_count) + self.assertIn('Removed Metrics', summary) + self.assertIn('old_metric', summary) + + def test_modified_section_rendered(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + summary = generate_diff_summary(changes, raw_sections, excl_count) + self.assertIn('Modified Metrics', summary) + self.assertIn('http_server_duration', summary) + + def test_diff_sample_block_present_for_changed_metric(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + summary = generate_diff_summary(changes, raw_sections, excl_count) + self.assertIn('```diff', summary) + + def test_exclusion_count_shown(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, raw_sections, excl_count = parse_diff_file(path) + finally: + os.unlink(path) + summary = generate_diff_summary(changes, raw_sections, excl_count) + self.assertIn('Excluded: 5', summary) + + +class TestGenerateStructuredJson(unittest.TestCase): + """Tests for generate_structured_json().""" + + def test_counts_are_correct(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, _, _ = parse_diff_file(path) + finally: + os.unlink(path) + data = generate_structured_json(changes) + self.assertEqual(data['added'], 1) # current_only + self.assertEqual(data['removed'], 1) # baseline_only + self.assertEqual(data['modified'], 1) # http_server_duration + + def test_metric_names_list_sorted(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, _, _ = parse_diff_file(path) + finally: + os.unlink(path) + data = generate_structured_json(changes) + names = data['metric_names'] + self.assertEqual(names, sorted(names)) + + def test_metric_names_deduped(self): + path = _write_tmp(_DIFF_WITH_ALL_CATEGORIES) + try: + changes, _, _ = parse_diff_file(path) + finally: + os.unlink(path) + data = generate_structured_json(changes) + self.assertEqual(len(data['metric_names']), len(set(data['metric_names']))) + + def test_added_only_produces_correct_output(self): + path = _write_tmp(_DIFF_ADDED_ONLY) + try: + changes, _, _ = parse_diff_file(path) + finally: + os.unlink(path) + data = generate_structured_json(changes) + self.assertEqual(data['added'], 1) + self.assertEqual(data['removed'], 0) + self.assertEqual(data['modified'], 0) + self.assertIn('new_metric', data['metric_names']) + + +if __name__ == '__main__': + unittest.main()