From 4639c597d9b3aa06644feed014632248941ad5db Mon Sep 17 00:00:00 2001 From: Tianlei WU Date: Thu, 12 Feb 2026 15:14:02 -0800 Subject: [PATCH 01/11] Add cherry-pick script --- tools/python/cherry_pick.py | 162 ++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 tools/python/cherry_pick.py diff --git a/tools/python/cherry_pick.py b/tools/python/cherry_pick.py new file mode 100644 index 0000000000000..fc2fa8cb55b9b --- /dev/null +++ b/tools/python/cherry_pick.py @@ -0,0 +1,162 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. +# +# Usage: +# python create_cherry_pick.py --label "release:1.24.2" --output cherry_pick.cmd --branch "origin/rel-1.24.2" +# +# Arguments: +# --label: Label to filter PRs (required) +# --output: Output cmd file path (required) +# --repo: Repository (default: microsoft/onnxruntime) +# --branch: Target branch to compare against for dependency checks (default: HEAD) +# +# This script fetches merged PRs with the specified label from the onnxruntime repository, +# sorts them by merge date, and generates: +# 1. A batch file (specified by --output) containing git cherry-pick commands. +# 2. A markdown file (cherry_pick_pr_description.md) summarizing the cherry-picked PRs for pull request description. +# +# It also checks for potential missing dependencies (conflicts) by verifying if files modified +# by the cherry-picked commits have any other modifications in the target branch history +# that are not included in the cherry-pick list. +import argparse +import subprocess +import json +import sys +from collections import defaultdict + +def main(): + parser = argparse.ArgumentParser(description="Generate cherry-pick script from PRs with a specific label.") + parser.add_argument("--label", required=True, help="Label to filter PRs") + parser.add_argument("--output", required=True, help="Output cmd file path") + parser.add_argument("--repo", default="microsoft/onnxruntime", help="Repository (default: microsoft/onnxruntime)") + parser.add_argument("--branch", default="HEAD", help="Target branch to compare against for dependency checks (default: HEAD)") + args = parser.parse_args() + + # Fetch merged PRs with the specified label using gh CLI + print(f"Fetching merged PRs with label '{args.label}' from {args.repo}...") + cmd = [ + "gh", "pr", "list", + "--repo", args.repo, + "--label", args.label, + "--state", "merged", + "--json", "number,title,mergeCommit,mergedAt", + "-L", "200" + ] + + try: + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + prs = json.loads(result.stdout) + except subprocess.CalledProcessError as e: + print(f"Error running gh command: {e}", file=sys.stderr) + print(e.stderr, file=sys.stderr) + sys.exit(1) + except json.JSONDecodeError as e: + print(f"Error parsing gh output: {e}", file=sys.stderr) + print(f"Output was: {result.stdout}", file=sys.stderr) + sys.exit(1) + + if not prs: + print(f"No PRs found with label '{args.label}'.") + return + + # Sort by mergedAt (ISO 8601 strings sort correctly in chronological order) + prs.sort(key=lambda x: x['mergedAt']) + + # Write to output cmd file + commit_count = 0 + with open(args.output, "w", encoding="utf-8") as f: + f.write("@echo off\n") + f.write(f"rem Cherry-pick {args.label} commits\n") + f.write("rem Sorted by merge time (oldest first)\n\n") + + for pr in prs: + number = pr['number'] + title = pr['title'] + safe_title = title.replace('\n', ' ') + + if not pr.get('mergeCommit'): + print(f"Warning: PR #{number} has no merge commit OID. Skipping.", file=sys.stderr) + continue + + oid = pr['mergeCommit']['oid'] + f.write(f"rem PR {number}: {safe_title}\n") + f.write(f"git cherry-pick {oid}\n\n") + commit_count += 1 + + print(f"Generated {args.output} with {commit_count} commits.") + + # Write to markdown file. You can use it as the pull request description. + md_output = "cherry_pick_pr_description.md" + with open(md_output, "w", encoding="utf-8") as f: + f.write(f"This cherry-picks the following commits for the release:\n") + for pr in prs: + if not pr.get('mergeCommit'): + continue + number = pr['number'] + f.write(f"- #{number}\n") + + print(f"Generated {md_output} with {commit_count} commits.") + + # Check for potential missing dependencies + print("\nChecking for potential missing dependencies (conflicts)...") + + # Collect OIDs being cherry-picked + cherry_pick_oids = set() + for pr in prs: + if pr.get('mergeCommit'): + cherry_pick_oids.add(pr['mergeCommit']['oid']) + + for pr in prs: + if not pr.get('mergeCommit'): + continue + + oid = pr['mergeCommit']['oid'] + number = pr['number'] + + # Get files changed by this commit + try: + res = subprocess.run( + ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", oid], + capture_output=True, text=True, check=True + ) + files = res.stdout.strip().splitlines() + except subprocess.CalledProcessError as e: + print(f"Error getting changed files for {oid}: {e}", file=sys.stderr) + continue + + # For each file, find commits that modified it between the target branch and the cherry-picked commit. + # Deduplicate warnings: group affected files by missing commit. + # missing_commits maps: missing_commit_oid -> (title, [list of affected files]) + missing_commits = defaultdict(lambda: ("", [])) + for filepath in files: + try: + res = subprocess.run( + ["git", "log", oid, "--not", args.branch, "--format=%H %s", "--", filepath], + capture_output=True, text=True, check=True + ) + for line in res.stdout.strip().splitlines(): + parts = line.split(' ', 1) + c = parts[0] + title = parts[1] if len(parts) > 1 else "" + + if c == oid: + continue + if c not in cherry_pick_oids: + existing_title, existing_files = missing_commits[c] + if not existing_title: + existing_title = title + existing_files.append(filepath) + missing_commits[c] = (existing_title, existing_files) + + except subprocess.CalledProcessError as e: + print(f"Error checking history for {filepath}: {e}", file=sys.stderr) + continue + + # Print deduplicated warnings + for missing_oid, (title, affected_files) in missing_commits.items(): + files_str = ", ".join(affected_files) + print(f"WARNING: PR #{number} ({oid}) depends on commit {missing_oid} ({title}) " + f"which is not in the cherry-pick list. Affected files: {files_str}") + +if __name__ == "__main__": + main() From dd0d487acaa8233efda159da4a4da9f8eacf09d8 Mon Sep 17 00:00:00 2001 From: Tianlei WU Date: Thu, 12 Feb 2026 15:21:06 -0800 Subject: [PATCH 02/11] lintrunner --- tools/python/cherry_pick.py | 71 +++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/tools/python/cherry_pick.py b/tools/python/cherry_pick.py index fc2fa8cb55b9b..035d923725e4e 100644 --- a/tools/python/cherry_pick.py +++ b/tools/python/cherry_pick.py @@ -19,28 +19,38 @@ # by the cherry-picked commits have any other modifications in the target branch history # that are not included in the cherry-pick list. import argparse -import subprocess import json +import subprocess import sys from collections import defaultdict + def main(): parser = argparse.ArgumentParser(description="Generate cherry-pick script from PRs with a specific label.") parser.add_argument("--label", required=True, help="Label to filter PRs") parser.add_argument("--output", required=True, help="Output cmd file path") parser.add_argument("--repo", default="microsoft/onnxruntime", help="Repository (default: microsoft/onnxruntime)") - parser.add_argument("--branch", default="HEAD", help="Target branch to compare against for dependency checks (default: HEAD)") + parser.add_argument( + "--branch", default="HEAD", help="Target branch to compare against for dependency checks (default: HEAD)" + ) args = parser.parse_args() # Fetch merged PRs with the specified label using gh CLI print(f"Fetching merged PRs with label '{args.label}' from {args.repo}...") cmd = [ - "gh", "pr", "list", - "--repo", args.repo, - "--label", args.label, - "--state", "merged", - "--json", "number,title,mergeCommit,mergedAt", - "-L", "200" + "gh", + "pr", + "list", + "--repo", + args.repo, + "--label", + args.label, + "--state", + "merged", + "--json", + "number,title,mergeCommit,mergedAt", + "-L", + "200", ] try: @@ -60,7 +70,7 @@ def main(): return # Sort by mergedAt (ISO 8601 strings sort correctly in chronological order) - prs.sort(key=lambda x: x['mergedAt']) + prs.sort(key=lambda x: x["mergedAt"]) # Write to output cmd file commit_count = 0 @@ -70,15 +80,15 @@ def main(): f.write("rem Sorted by merge time (oldest first)\n\n") for pr in prs: - number = pr['number'] - title = pr['title'] - safe_title = title.replace('\n', ' ') + number = pr["number"] + title = pr["title"] + safe_title = title.replace("\n", " ") - if not pr.get('mergeCommit'): + if not pr.get("mergeCommit"): print(f"Warning: PR #{number} has no merge commit OID. Skipping.", file=sys.stderr) continue - oid = pr['mergeCommit']['oid'] + oid = pr["mergeCommit"]["oid"] f.write(f"rem PR {number}: {safe_title}\n") f.write(f"git cherry-pick {oid}\n\n") commit_count += 1 @@ -88,11 +98,11 @@ def main(): # Write to markdown file. You can use it as the pull request description. md_output = "cherry_pick_pr_description.md" with open(md_output, "w", encoding="utf-8") as f: - f.write(f"This cherry-picks the following commits for the release:\n") + f.write("This cherry-picks the following commits for the release:\n") for pr in prs: - if not pr.get('mergeCommit'): + if not pr.get("mergeCommit"): continue - number = pr['number'] + number = pr["number"] f.write(f"- #{number}\n") print(f"Generated {md_output} with {commit_count} commits.") @@ -103,21 +113,23 @@ def main(): # Collect OIDs being cherry-picked cherry_pick_oids = set() for pr in prs: - if pr.get('mergeCommit'): - cherry_pick_oids.add(pr['mergeCommit']['oid']) + if pr.get("mergeCommit"): + cherry_pick_oids.add(pr["mergeCommit"]["oid"]) for pr in prs: - if not pr.get('mergeCommit'): + if not pr.get("mergeCommit"): continue - oid = pr['mergeCommit']['oid'] - number = pr['number'] + oid = pr["mergeCommit"]["oid"] + number = pr["number"] # Get files changed by this commit try: res = subprocess.run( ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", oid], - capture_output=True, text=True, check=True + capture_output=True, + text=True, + check=True, ) files = res.stdout.strip().splitlines() except subprocess.CalledProcessError as e: @@ -132,10 +144,12 @@ def main(): try: res = subprocess.run( ["git", "log", oid, "--not", args.branch, "--format=%H %s", "--", filepath], - capture_output=True, text=True, check=True + capture_output=True, + text=True, + check=True, ) for line in res.stdout.strip().splitlines(): - parts = line.split(' ', 1) + parts = line.split(" ", 1) c = parts[0] title = parts[1] if len(parts) > 1 else "" @@ -155,8 +169,11 @@ def main(): # Print deduplicated warnings for missing_oid, (title, affected_files) in missing_commits.items(): files_str = ", ".join(affected_files) - print(f"WARNING: PR #{number} ({oid}) depends on commit {missing_oid} ({title}) " - f"which is not in the cherry-pick list. Affected files: {files_str}") + print( + f"WARNING: PR #{number} ({oid}) depends on commit {missing_oid} ({title}) " + f"which is not in the cherry-pick list. Affected files: {files_str}" + ) + if __name__ == "__main__": main() From 6a44998644beac778c5138bf756fadd3c9c5fa2e Mon Sep 17 00:00:00 2001 From: Tianlei WU Date: Thu, 12 Feb 2026 15:46:21 -0800 Subject: [PATCH 03/11] address copilot feedback --- tools/python/cherry_pick.py | 287 ++++++++++++++++++++++-------------- 1 file changed, 177 insertions(+), 110 deletions(-) diff --git a/tools/python/cherry_pick.py b/tools/python/cherry_pick.py index 035d923725e4e..2d44f02da665f 100644 --- a/tools/python/cherry_pick.py +++ b/tools/python/cherry_pick.py @@ -1,23 +1,27 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -# -# Usage: -# python create_cherry_pick.py --label "release:1.24.2" --output cherry_pick.cmd --branch "origin/rel-1.24.2" -# -# Arguments: -# --label: Label to filter PRs (required) -# --output: Output cmd file path (required) -# --repo: Repository (default: microsoft/onnxruntime) -# --branch: Target branch to compare against for dependency checks (default: HEAD) -# -# This script fetches merged PRs with the specified label from the onnxruntime repository, -# sorts them by merge date, and generates: -# 1. A batch file (specified by --output) containing git cherry-pick commands. -# 2. A markdown file (cherry_pick_pr_description.md) summarizing the cherry-picked PRs for pull request description. -# -# It also checks for potential missing dependencies (conflicts) by verifying if files modified -# by the cherry-picked commits have any other modifications in the target branch history -# that are not included in the cherry-pick list. + +""" +Cherry-Pick Helper Script +------------------------- +Description: + This script automates the process of cherry-picking commits for a release branch. + It fetches merged PRs with a specific label, sorts them by merge date, and generates: + 1. A batch file (.cmd) with git cherry-pick commands. + 2. A markdown file (.md) for the PR description. + It also checks for potential missing dependencies (conflicts) by verifying if files modified + by the cherry-picked commits have any other modifications in commits that are not in the + specified target branch and are not included in the cherry-pick list. + +Usage: + python cherry_pick.py --label "release:1.24.2" --output cherry_pick.cmd --branch "origin/rel-1.24.2" + +Requirements: + - Python 3.7+ + - GitHub CLI (gh) logged in. + - Git available in PATH. +""" + import argparse import json import subprocess @@ -25,45 +29,170 @@ from collections import defaultdict -def main(): - parser = argparse.ArgumentParser(description="Generate cherry-pick script from PRs with a specific label.") - parser.add_argument("--label", required=True, help="Label to filter PRs") - parser.add_argument("--output", required=True, help="Output cmd file path") - parser.add_argument("--repo", default="microsoft/onnxruntime", help="Repository (default: microsoft/onnxruntime)") - parser.add_argument( - "--branch", default="HEAD", help="Target branch to compare against for dependency checks (default: HEAD)" - ) - args = parser.parse_args() - - # Fetch merged PRs with the specified label using gh CLI - print(f"Fetching merged PRs with label '{args.label}' from {args.repo}...") +def run_command(command_list, cwd=None, silent=False): + """Run a command using a list of arguments for security (no shell=True).""" + try: + result = subprocess.run(command_list, check=False, capture_output=True, text=True, cwd=cwd, encoding="utf-8") + if result.returncode != 0: + if not silent: + log_str = " ".join(command_list) + print(f"Error running command: {log_str}", file=sys.stderr) + if result.stderr: + print(f"Stderr: {result.stderr.strip()}", file=sys.stderr) + return None + return result.stdout + except FileNotFoundError: + if not silent: + cmd = command_list[0] + print(f"Error: '{cmd}' command not found.", file=sys.stderr) + if cmd == "gh": + print( + "Please install GitHub CLI (https://cli.github.com/) and ensure 'gh' is available on your PATH.", + file=sys.stderr, + ) + return None + except Exception as e: + if not silent: + print(f"Exception running command {' '.join(command_list)}: {e}", file=sys.stderr) + return None + + +def check_preflight(): + """Verify gh CLI and git repository early.""" + # Check git + git_check = run_command(["git", "rev-parse", "--is-inside-work-tree"], silent=True) + if not git_check: + print("Error: This script must be run inside a git repository.", file=sys.stderr) + return False + + # Check gh + gh_check = run_command(["gh", "--version"], silent=True) + if not gh_check: + print("Error: GitHub CLI (gh) not found or not in PATH.", file=sys.stderr) + print( + "Please install GitHub CLI (https://cli.github.com/) and ensure 'gh' is available on your PATH.", + file=sys.stderr, + ) + return False + + gh_auth = run_command(["gh", "auth", "status"], silent=True) + if not gh_auth: + print("Error: GitHub CLI not authenticated. Please run 'gh auth login'.", file=sys.stderr) + return False + + return True + + +def get_merged_prs(repo, label, limit=200): + """Fetch merged PRs with the specific label.""" + print(f"Fetching merged PRs with label '{label}' from {repo}...") cmd = [ "gh", "pr", "list", "--repo", - args.repo, + repo, "--label", - args.label, + label, "--state", "merged", "--json", "number,title,mergeCommit,mergedAt", "-L", - "200", + str(limit), ] + output = run_command(cmd) + if not output: + return [] try: - result = subprocess.run(cmd, capture_output=True, text=True, check=True) - prs = json.loads(result.stdout) - except subprocess.CalledProcessError as e: - print(f"Error running gh command: {e}", file=sys.stderr) - print(e.stderr, file=sys.stderr) - sys.exit(1) + return json.loads(output) except json.JSONDecodeError as e: print(f"Error parsing gh output: {e}", file=sys.stderr) - print(f"Output was: {result.stdout}", file=sys.stderr) - sys.exit(1) + return [] + + +def get_changed_files(oid): + """Get list of files changed in a commit.""" + output = run_command(["git", "diff-tree", "--no-commit-id", "--name-only", "-r", oid], silent=True) + if output: + return output.strip().splitlines() + return [] + + +def check_missing_dependencies(prs, branch): + """Check for potential missing dependencies (conflicts).""" + print("\nChecking for potential missing dependencies (conflicts)...") + + # Collect OIDs being cherry-picked + cherry_pick_oids = set() + for pr in prs: + if pr.get("mergeCommit"): + cherry_pick_oids.add(pr["mergeCommit"]["oid"]) + + for pr in prs: + if not pr.get("mergeCommit"): + continue + + oid = pr["mergeCommit"]["oid"] + number = pr["number"] + + files = get_changed_files(oid) + if not files: + continue + + # For each file, find commits that modified it between the target branch and the cherry-picked commit. + # Deduplicate warnings: group affected files by missing commit. + # missing_commits maps: missing_commit_oid -> (title, [list of affected files]) + missing_commits = defaultdict(lambda: ("", [])) + + for filepath in files: + # git log --not -- + output = run_command(["git", "log", oid, "--not", branch, "--format=%H %s", "--", filepath], silent=True) + + if not output: + continue + + for line in output.strip().splitlines(): + parts = line.split(" ", 1) + c = parts[0] + title = parts[1] if len(parts) > 1 else "" + + if c == oid: + continue + if c not in cherry_pick_oids: + existing_title, existing_files = missing_commits[c] + if not existing_title: + existing_title = title + existing_files.append(filepath) + missing_commits[c] = (existing_title, existing_files) + + # Print deduplicated warnings + for missing_oid, (title, affected_files) in missing_commits.items(): + files_str = ", ".join(affected_files) + print( + f"WARNING: PR #{number} ({oid}) modifies files that were also changed by commit {missing_oid} ({title}), " + f"which is not in the cherry-pick list. This may indicate missing related changes. Affected files: {files_str}" + ) + + +def main(): + parser = argparse.ArgumentParser(description="Generate cherry-pick script from PRs with a specific label.") + parser.add_argument("--label", required=True, help="Label to filter PRs") + parser.add_argument("--output", required=True, help="Output cmd file path") + parser.add_argument("--repo", default="microsoft/onnxruntime", help="Repository (default: microsoft/onnxruntime)") + parser.add_argument( + "--branch", default="HEAD", help="Target branch to compare against for dependency checks (default: HEAD)" + ) + parser.add_argument("--limit", type=int, default=200, help="Wait limitation for PR fetching (default: 200)") + args = parser.parse_args() + + # Preflight Check + if not check_preflight(): + return + + # 1. Fetch Merged PRs + prs = get_merged_prs(args.repo, args.label, args.limit) if not prs: print(f"No PRs found with label '{args.label}'.") @@ -72,7 +201,7 @@ def main(): # Sort by mergedAt (ISO 8601 strings sort correctly in chronological order) prs.sort(key=lambda x: x["mergedAt"]) - # Write to output cmd file + # 2. Write Output Script commit_count = 0 with open(args.output, "w", encoding="utf-8") as f: f.write("@echo off\n") @@ -95,7 +224,7 @@ def main(): print(f"Generated {args.output} with {commit_count} commits.") - # Write to markdown file. You can use it as the pull request description. + # 3. Write PR Description Markdown md_output = "cherry_pick_pr_description.md" with open(md_output, "w", encoding="utf-8") as f: f.write("This cherry-picks the following commits for the release:\n") @@ -103,76 +232,14 @@ def main(): if not pr.get("mergeCommit"): continue number = pr["number"] - f.write(f"- #{number}\n") + title = pr["title"] + # Markdown link format: - #123 Title + f.write(f"- #{number} {title}\n") print(f"Generated {md_output} with {commit_count} commits.") - # Check for potential missing dependencies - print("\nChecking for potential missing dependencies (conflicts)...") - - # Collect OIDs being cherry-picked - cherry_pick_oids = set() - for pr in prs: - if pr.get("mergeCommit"): - cherry_pick_oids.add(pr["mergeCommit"]["oid"]) - - for pr in prs: - if not pr.get("mergeCommit"): - continue - - oid = pr["mergeCommit"]["oid"] - number = pr["number"] - - # Get files changed by this commit - try: - res = subprocess.run( - ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", oid], - capture_output=True, - text=True, - check=True, - ) - files = res.stdout.strip().splitlines() - except subprocess.CalledProcessError as e: - print(f"Error getting changed files for {oid}: {e}", file=sys.stderr) - continue - - # For each file, find commits that modified it between the target branch and the cherry-picked commit. - # Deduplicate warnings: group affected files by missing commit. - # missing_commits maps: missing_commit_oid -> (title, [list of affected files]) - missing_commits = defaultdict(lambda: ("", [])) - for filepath in files: - try: - res = subprocess.run( - ["git", "log", oid, "--not", args.branch, "--format=%H %s", "--", filepath], - capture_output=True, - text=True, - check=True, - ) - for line in res.stdout.strip().splitlines(): - parts = line.split(" ", 1) - c = parts[0] - title = parts[1] if len(parts) > 1 else "" - - if c == oid: - continue - if c not in cherry_pick_oids: - existing_title, existing_files = missing_commits[c] - if not existing_title: - existing_title = title - existing_files.append(filepath) - missing_commits[c] = (existing_title, existing_files) - - except subprocess.CalledProcessError as e: - print(f"Error checking history for {filepath}: {e}", file=sys.stderr) - continue - - # Print deduplicated warnings - for missing_oid, (title, affected_files) in missing_commits.items(): - files_str = ", ".join(affected_files) - print( - f"WARNING: PR #{number} ({oid}) depends on commit {missing_oid} ({title}) " - f"which is not in the cherry-pick list. Affected files: {files_str}" - ) + # 4. Dependency Check + check_missing_dependencies(prs, args.branch) if __name__ == "__main__": From 1e6bbf915e5b27c09a259b463f01365f1a88d3b7 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Thu, 19 Feb 2026 09:10:29 +0000 Subject: [PATCH 04/11] update compile contributors patterns --- tools/python/compile_contributors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/python/compile_contributors.py b/tools/python/compile_contributors.py index 494b0f91c5381..79e5166cfb4b7 100644 --- a/tools/python/compile_contributors.py +++ b/tools/python/compile_contributors.py @@ -219,6 +219,7 @@ def extract_pr_numbers(text, strict=False): # And it avoids matching truncated headlines like (#25... as PR #25 patterns = [ r"\(#(\d+)\)", # (#123) + r"(?:^|\s|-)#(\d+)(?:\s|$)", # #123 at start, or preceded by space/dash, and followed by space or end r"microsoft/onnxruntime/pull/(\d+)", ] results = [] @@ -280,7 +281,7 @@ def get_prs_from_log(log_output, prs_base=None, log_file=None, scan_depth=100): # Reuse commits already fetched in get_pr_details to avoid an extra gh CLI call for commit in details.get("commits", []): all_extracted_nums.extend(extract_pr_numbers(commit.get("messageHeadline", ""), strict=True)) - all_extracted_nums.extend(extract_pr_numbers(commit.get("messageBody", ""), strict=True)) + # DO NOT scan messageBody for expansion to avoid historical context PRs # Filter and Normalize current_pr_int = int(pr_num_str) From 1c40b36abf04f1f87e62ce1e7b26b5499be25c0d Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Fri, 27 Feb 2026 01:38:39 +0000 Subject: [PATCH 05/11] output sh and markdown table --- tools/python/cherry_pick.py | 76 ++++++++++++++++++++++++---- tools/python/compile_contributors.py | 7 ++- 2 files changed, 70 insertions(+), 13 deletions(-) diff --git a/tools/python/cherry_pick.py b/tools/python/cherry_pick.py index 2d44f02da665f..12beb44fb05bd 100644 --- a/tools/python/cherry_pick.py +++ b/tools/python/cherry_pick.py @@ -24,6 +24,7 @@ import argparse import json +import re import subprocess import sys from collections import defaultdict @@ -120,6 +121,31 @@ def get_changed_files(oid): return [] +def get_pr_number_from_subject(subject): + """Extract PR number from a commit subject like 'Some title (#12345)'.""" + match = re.search(r"\(#(\d+)\)$", subject.strip()) + if match: + return match.group(1) + return None + + +def get_existing_pr_numbers(branch): + """Get the set of PR numbers already present in the target branch.""" + output = run_command(["git", "log", branch, "--oneline", "-n", "500"], silent=True) + if not output: + return set() + pr_numbers = set() + for line in output.strip().splitlines(): + parts = line.split(" ", 1) + if len(parts) < 2: + continue + subject = parts[1] + pr_num = get_pr_number_from_subject(subject) + if pr_num: + pr_numbers.add(int(pr_num)) + return pr_numbers + + def check_missing_dependencies(prs, branch): """Check for potential missing dependencies (conflicts).""" print("\nChecking for potential missing dependencies (conflicts)...") @@ -179,7 +205,9 @@ def check_missing_dependencies(prs, branch): def main(): parser = argparse.ArgumentParser(description="Generate cherry-pick script from PRs with a specific label.") parser.add_argument("--label", required=True, help="Label to filter PRs") - parser.add_argument("--output", required=True, help="Output cmd file path") + parser.add_argument( + "--output", required=True, help="Output script file path (.sh for bash, .cmd for Windows batch)" + ) parser.add_argument("--repo", default="microsoft/onnxruntime", help="Repository (default: microsoft/onnxruntime)") parser.add_argument( "--branch", default="HEAD", help="Target branch to compare against for dependency checks (default: HEAD)" @@ -201,12 +229,27 @@ def main(): # Sort by mergedAt (ISO 8601 strings sort correctly in chronological order) prs.sort(key=lambda x: x["mergedAt"]) + # 1.5. Check which PRs are already in the target branch + existing_prs = get_existing_pr_numbers(args.branch) + if existing_prs: + print(f"Found {len(existing_prs)} PRs already in branch '{args.branch}'.") + + # Determine output format based on file extension + is_shell = args.output.endswith(".sh") + # 2. Write Output Script commit_count = 0 + skipped_count = 0 with open(args.output, "w", encoding="utf-8") as f: - f.write("@echo off\n") - f.write(f"rem Cherry-pick {args.label} commits\n") - f.write("rem Sorted by merge time (oldest first)\n\n") + if is_shell: + f.write("#!/bin/bash\n") + f.write(f"# Cherry-pick {args.label} commits\n") + f.write("# Sorted by merge time (oldest first)\n") + f.write("set -e\n\n") + else: + f.write("@echo off\n") + f.write(f"rem Cherry-pick {args.label} commits\n") + f.write("rem Sorted by merge time (oldest first)\n\n") for pr in prs: number = pr["number"] @@ -217,24 +260,35 @@ def main(): print(f"Warning: PR #{number} has no merge commit OID. Skipping.", file=sys.stderr) continue + if number in existing_prs: + print(f"Skipping PR #{number} (already in branch '{args.branch}'): {safe_title}") + skipped_count += 1 + continue + oid = pr["mergeCommit"]["oid"] - f.write(f"rem PR {number}: {safe_title}\n") + comment = "#" if is_shell else "rem" + f.write(f"{comment} PR {number}: {safe_title}\n") f.write(f"git cherry-pick {oid}\n\n") commit_count += 1 - print(f"Generated {args.output} with {commit_count} commits.") + print(f"Generated {args.output} with {commit_count} commits ({skipped_count} skipped, already in branch).") - # 3. Write PR Description Markdown + # 3. Write PR Description Markdown (table format) md_output = "cherry_pick_pr_description.md" with open(md_output, "w", encoding="utf-8") as f: - f.write("This cherry-picks the following commits for the release:\n") + f.write("This cherry-picks the following commits for the release:\n\n") + f.write("| Commit ID | PR Number | Commit Title |\n") + f.write("|-----------|-----------|-------------|\n") for pr in prs: if not pr.get("mergeCommit"): continue number = pr["number"] - title = pr["title"] - # Markdown link format: - #123 Title - f.write(f"- #{number} {title}\n") + if number in existing_prs: + continue + title = pr["title"].replace("\n", " ") + oid = pr["mergeCommit"]["oid"] + short_oid = oid[:10] + f.write(f"| {short_oid} | #{number} | {title} |\n") print(f"Generated {md_output} with {commit_count} commits.") diff --git a/tools/python/compile_contributors.py b/tools/python/compile_contributors.py index 79e5166cfb4b7..5324c90aad94c 100644 --- a/tools/python/compile_contributors.py +++ b/tools/python/compile_contributors.py @@ -214,14 +214,17 @@ def extract_pr_numbers(text, strict=False): return [] if strict: - # Strict mode: Only look for (#123) with closing paren or full onnxruntime URLs + # Strict mode: Only look for (#123) with closing paren, full onnxruntime URLs, + # or PR numbers in markdown table cells (| #123 |), or standalone #123 with clear boundaries. # This avoids noise from version numbers or external repo PRs # And it avoids matching truncated headlines like (#25... as PR #25 patterns = [ r"\(#(\d+)\)", # (#123) - r"(?:^|\s|-)#(\d+)(?:\s|$)", # #123 at start, or preceded by space/dash, and followed by space or end r"microsoft/onnxruntime/pull/(\d+)", + r"(?:^|\s|-)#(\d+)(?:\s|$)", # #123 at start, or preceded by space/dash, and followed by space or end + r"\|\s*#(\d+)\s*\|", # | #123 | (markdown table cell) ] + results = [] for p in patterns: results.extend(re.findall(p, text)) From 90d5604e007b59c1ed93772134c676a73101ced0 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Sat, 28 Feb 2026 20:55:14 +0000 Subject: [PATCH 06/11] refactoring --- tools/python/cherry_pick.py | 117 ++++++++---------- tools/python/cherry_pick_utils.py | 92 ++++++++++++++ tools/python/compile_contributors.py | 171 ++++++++------------------- 3 files changed, 194 insertions(+), 186 deletions(-) create mode 100644 tools/python/cherry_pick_utils.py diff --git a/tools/python/cherry_pick.py b/tools/python/cherry_pick.py index 12beb44fb05bd..85bc7cba03617 100644 --- a/tools/python/cherry_pick.py +++ b/tools/python/cherry_pick.py @@ -25,63 +25,14 @@ import argparse import json import re -import subprocess import sys from collections import defaultdict - - -def run_command(command_list, cwd=None, silent=False): - """Run a command using a list of arguments for security (no shell=True).""" - try: - result = subprocess.run(command_list, check=False, capture_output=True, text=True, cwd=cwd, encoding="utf-8") - if result.returncode != 0: - if not silent: - log_str = " ".join(command_list) - print(f"Error running command: {log_str}", file=sys.stderr) - if result.stderr: - print(f"Stderr: {result.stderr.strip()}", file=sys.stderr) - return None - return result.stdout - except FileNotFoundError: - if not silent: - cmd = command_list[0] - print(f"Error: '{cmd}' command not found.", file=sys.stderr) - if cmd == "gh": - print( - "Please install GitHub CLI (https://cli.github.com/) and ensure 'gh' is available on your PATH.", - file=sys.stderr, - ) - return None - except Exception as e: - if not silent: - print(f"Exception running command {' '.join(command_list)}: {e}", file=sys.stderr) - return None - - -def check_preflight(): - """Verify gh CLI and git repository early.""" - # Check git - git_check = run_command(["git", "rev-parse", "--is-inside-work-tree"], silent=True) - if not git_check: - print("Error: This script must be run inside a git repository.", file=sys.stderr) - return False - - # Check gh - gh_check = run_command(["gh", "--version"], silent=True) - if not gh_check: - print("Error: GitHub CLI (gh) not found or not in PATH.", file=sys.stderr) - print( - "Please install GitHub CLI (https://cli.github.com/) and ensure 'gh' is available on your PATH.", - file=sys.stderr, - ) - return False - - gh_auth = run_command(["gh", "auth", "status"], silent=True) - if not gh_auth: - print("Error: GitHub CLI not authenticated. Please run 'gh auth login'.", file=sys.stderr) - return False - - return True +from cherry_pick_utils import ( + run_command, + check_preflight, + get_pr_number_from_subject, + extract_pr_numbers, +) def get_merged_prs(repo, label, limit=200): @@ -121,28 +72,62 @@ def get_changed_files(oid): return [] -def get_pr_number_from_subject(subject): - """Extract PR number from a commit subject like 'Some title (#12345)'.""" - match = re.search(r"\(#(\d+)\)$", subject.strip()) - if match: - return match.group(1) - return None - - def get_existing_pr_numbers(branch): """Get the set of PR numbers already present in the target branch.""" output = run_command(["git", "log", branch, "--oneline", "-n", "500"], silent=True) if not output: return set() pr_numbers = set() - for line in output.strip().splitlines(): + + # Pre-fetch PR cache to avoid redundant gh calls + pr_cache = {} + + # Process commit log + lines = output.strip().splitlines() + for line in lines: parts = line.split(" ", 1) if len(parts) < 2: continue subject = parts[1] + pr_num = get_pr_number_from_subject(subject) - if pr_num: - pr_numbers.add(int(pr_num)) + if not pr_num: + continue + + pr_num_int = int(pr_num) + pr_numbers.add(pr_num_int) + + # Check if it's a cherry-pick / meta-PR + is_meta_pr = ( + "cherry pick" in subject.lower() or "cherry-pick" in subject.lower() or "cherrypick" in subject.lower() + ) + + if is_meta_pr: + # Query gh to get more details (body/commits) to find squashed sub-PRs + if pr_num not in pr_cache: + gh_out = run_command(["gh", "pr", "view", pr_num, "--json", "title,body,commits"], silent=True) + if gh_out: + try: + pr_cache[pr_num] = json.loads(gh_out) + except json.JSONDecodeError: + pr_cache[pr_num] = None + else: + pr_cache[pr_num] = None + + details = pr_cache.get(pr_num) + if details: + # Collect sub-PRs from title, body, and commits + extracted_nums = [] + extracted_nums.extend(extract_pr_numbers(details.get("title", ""))) + extracted_nums.extend(extract_pr_numbers(details.get("body", ""))) + + for commit in details.get("commits", []): + extracted_nums.extend(extract_pr_numbers(commit.get("messageHeadline", ""))) + + for num in set(extracted_nums): + if num != pr_num_int: + pr_numbers.add(num) + return pr_numbers diff --git a/tools/python/cherry_pick_utils.py b/tools/python/cherry_pick_utils.py new file mode 100644 index 0000000000000..34e3f8f0f87eb --- /dev/null +++ b/tools/python/cherry_pick_utils.py @@ -0,0 +1,92 @@ +import subprocess +import sys +import re +import json + +def run_command(command_list, cwd=None, silent=False): + """Run a command using a list of arguments for security (no shell=True).""" + try: + result = subprocess.run(command_list, check=False, capture_output=True, text=True, cwd=cwd, encoding="utf-8") + if result.returncode != 0: + if not silent: + log_str = " ".join(command_list) + print(f"Error running command: {log_str}", file=sys.stderr) + if result.stderr: + print(f"Stderr: {result.stderr.strip()}", file=sys.stderr) + return None + return result.stdout + except FileNotFoundError: + if not silent: + cmd = command_list[0] + print(f"Error: '{cmd}' command not found.", file=sys.stderr) + if cmd == "gh": + print( + "Please install GitHub CLI (https://cli.github.com/) and ensure 'gh' is available on your PATH.", + file=sys.stderr, + ) + return None + except Exception as e: + if not silent: + print(f"Exception running command {' '.join(command_list)}: {e}", file=sys.stderr) + return None + + +def check_preflight(): + """Verify gh CLI and git repository early.""" + # Check git + git_check = run_command(["git", "rev-parse", "--is-inside-work-tree"], silent=True) + if not git_check: + print("Error: This script must be run inside a git repository.", file=sys.stderr) + return False + + # Check gh + gh_check = run_command(["gh", "--version"], silent=True) + if not gh_check: + print("Error: GitHub CLI (gh) not found or not in PATH.", file=sys.stderr) + print( + "Please install GitHub CLI (https://cli.github.com/) and ensure 'gh' is available on your PATH.", + file=sys.stderr, + ) + return False + + gh_auth = run_command(["gh", "auth", "status"], silent=True) + if not gh_auth: + print("Error: GitHub CLI not authenticated. Please run 'gh auth login'.", file=sys.stderr) + return False + + return True + + +def get_pr_number_from_subject(subject): + """Extract PR number from a commit subject like 'Some title (#12345)'.""" + match = re.search(r"\(#(\d+)\)$", subject.strip()) + if match: + return match.group(1) + return None + + +def extract_pr_numbers(text, strict=False): + if not text: + return [] + + if strict: + # Strict mode: Only look for (#123) with closing paren, full onnxruntime URLs, + # or PR numbers in markdown table cells (| #123 |), or standalone #123 with clear boundaries. + # This avoids noise from version numbers or external repo PRs + # And it avoids matching truncated headlines like (#25... as PR #25 + patterns = [ + r"\(#(\d+)\)", # (#123) + r"microsoft/onnxruntime/pull/(\d+)", + r"(?:^|\s|-)#(\d+)(?:\s|$)", # #123 at start, or preceded by space/dash, and followed by space or end + r"\|\s*#(\d+)\s*\|", # | #123 | (markdown table cell) + ] + + results = [] + for p in patterns: + results.extend(re.findall(p, text)) + return [int(x) for x in set(results)] + + # Matches patterns like #123 or https://github.com/microsoft/onnxruntime/pull/123 + # Also handles ( #123) or similar in titles + prs = re.findall(r"(?:#|/pull/)(\d+)", text) + return [int(x) for x in set(prs)] diff --git a/tools/python/compile_contributors.py b/tools/python/compile_contributors.py index 5324c90aad94c..892f0f1648502 100644 --- a/tools/python/compile_contributors.py +++ b/tools/python/compile_contributors.py @@ -30,8 +30,12 @@ import json import os import re -import subprocess - +from cherry_pick_utils import ( + run_command, + check_preflight, + get_pr_number_from_subject as get_pr_number, + extract_pr_numbers, +) def log_event(message, log_file=None): """Log a message to the console and an optional log file.""" @@ -40,100 +44,9 @@ def log_event(message, log_file=None): print(message) # Clean print for console UI if log_file: log_file.write(full_message + "\n") - - -def run_command(command_list, cwd=".", silent=False): - """Run a command using a list of arguments for security (no shell=True).""" - result = subprocess.run(command_list, check=False, capture_output=True, text=True, cwd=cwd, encoding="utf-8") - if result.returncode != 0: - if not silent: - log_str = " ".join(command_list) - print(f"Error running command: {log_str}") - if result.stderr: - print(f"Stderr: {result.stderr.strip()}") - return None - return result.stdout - - -def check_preflight(): - """Verify gh CLI and git repository early.""" - # Check git - git_check = run_command(["git", "rev-parse", "--is-inside-work-tree"], silent=True) - if not git_check: - print("Error: This script must be run inside a git repository.") - return False - - # Check gh - gh_check = run_command(["gh", "--version"], silent=True) - if not gh_check: - print("Error: GitHub CLI (gh) not found or not in PATH.") - return False - - gh_auth = run_command(["gh", "auth", "status"], silent=True) - if not gh_auth: - print("Error: GitHub CLI not authenticated. Please run 'gh auth login'.") - return False - - return True - - -# Constants -PR_CACHE = {} # Cache for PR details to speed up multiple rounds referencing same PRs NAME_TO_LOGIN = {} # Map full names to GitHub logins for consolidation VERIFIED_LOGINS = set() # Track IDs known to be valid GitHub logins (vs free-form names) - -# Bots to exclude from contributor lists -BOT_NAMES = { - "Copilot", - "dependabot[bot]", - "app/dependabot", - "github-actions[bot]", - "app/copilot-swe-agent", - "CI Bot", - "github-advanced-security[bot]", - "GitHub Actions", - "dependabot", - "github-actions", - "Gemini", - "CI", -} - - -def is_bot(name): - if not name: - return True - name_clean = name.strip().lstrip("@") - # Known bots and patterns - if name_clean in BOT_NAMES: - return True - if "[bot]" in name_clean.lower(): - return True - if name_clean.lower().startswith("app/"): - return True - return False - - -def is_invalid(name): - if not name: - return True - # If it's a bot, it's considered a valid identity for the CSV - if is_bot(name): - return False - - name_clean = name.strip().lstrip("@") - # Paths, brackets, and code extensions - if "/" in name_clean or "\\" in name_clean or "[" in name_clean or "]" in name_clean: - return True - if any(name_clean.lower().endswith(ext) for ext in [".cmake", ".py", ".h", ".cc", ".cpp", ".txt", ".md"]): - return True - return False - - -def get_pr_number(subject): - match = re.search(r"\(#(\d+)\)$", subject.strip()) - if match: - return match.group(1) - return None +PR_CACHE = {} # Cache for PR details to speed up multiple rounds referencing same PRs def get_pr_details(pr_number): @@ -150,7 +63,6 @@ def get_pr_details(pr_number): PR_CACHE[pr_number] = None return None - def extract_authors_from_pr(details): authors = set() if not details: @@ -208,32 +120,51 @@ def extract_authors_from_commit(commit_id): return authors +# Bots to exclude from contributor lists +BOT_NAMES = { + "Copilot", + "dependabot[bot]", + "app/dependabot", + "github-actions[bot]", + "app/copilot-swe-agent", + "CI Bot", + "github-advanced-security[bot]", + "GitHub Actions", + "dependabot", + "github-actions", + "Gemini", + "CI", +} -def extract_pr_numbers(text, strict=False): - if not text: - return [] - - if strict: - # Strict mode: Only look for (#123) with closing paren, full onnxruntime URLs, - # or PR numbers in markdown table cells (| #123 |), or standalone #123 with clear boundaries. - # This avoids noise from version numbers or external repo PRs - # And it avoids matching truncated headlines like (#25... as PR #25 - patterns = [ - r"\(#(\d+)\)", # (#123) - r"microsoft/onnxruntime/pull/(\d+)", - r"(?:^|\s|-)#(\d+)(?:\s|$)", # #123 at start, or preceded by space/dash, and followed by space or end - r"\|\s*#(\d+)\s*\|", # | #123 | (markdown table cell) - ] - - results = [] - for p in patterns: - results.extend(re.findall(p, text)) - return [int(x) for x in set(results)] - - # Matches patterns like #123 or https://github.com/microsoft/onnxruntime/pull/123 - # Also handles ( #123) or similar in titles - prs = re.findall(r"(?:#|/pull/)(\d+)", text) - return [int(x) for x in set(prs)] + +def is_bot(name): + if not name: + return True + name_clean = name.strip().lstrip("@") + # Known bots and patterns + if name_clean in BOT_NAMES: + return True + if "[bot]" in name_clean.lower(): + return True + if name_clean.lower().startswith("app/"): + return True + return False + + +def is_invalid(name): + if not name: + return True + # If it's a bot, it's considered a valid identity for the CSV + if is_bot(name): + return False + + name_clean = name.strip().lstrip("@") + # Paths, brackets, and code extensions + if "/" in name_clean or "\\" in name_clean or "[" in name_clean or "]" in name_clean: + return True + if any(name_clean.lower().endswith(ext) for ext in [".cmake", ".py", ".h", ".cc", ".cpp", ".txt", ".md"]): + return True + return False def get_prs_from_log(log_output, prs_base=None, log_file=None, scan_depth=100): From ff6f523b24355ec75c0b8763fbc3e77311c20fd9 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Sat, 28 Feb 2026 20:56:01 +0000 Subject: [PATCH 07/11] format --- tools/python/cherry_pick.py | 6 +++--- tools/python/cherry_pick_utils.py | 7 +++++-- tools/python/compile_contributors.py | 12 ++++++++++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/tools/python/cherry_pick.py b/tools/python/cherry_pick.py index 85bc7cba03617..3557ea467448c 100644 --- a/tools/python/cherry_pick.py +++ b/tools/python/cherry_pick.py @@ -24,14 +24,14 @@ import argparse import json -import re import sys from collections import defaultdict + from cherry_pick_utils import ( - run_command, check_preflight, - get_pr_number_from_subject, extract_pr_numbers, + get_pr_number_from_subject, + run_command, ) diff --git a/tools/python/cherry_pick_utils.py b/tools/python/cherry_pick_utils.py index 34e3f8f0f87eb..cd58f3b589c35 100644 --- a/tools/python/cherry_pick_utils.py +++ b/tools/python/cherry_pick_utils.py @@ -1,7 +1,10 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + +import re import subprocess import sys -import re -import json + def run_command(command_list, cwd=None, silent=False): """Run a command using a list of arguments for security (no shell=True).""" diff --git a/tools/python/compile_contributors.py b/tools/python/compile_contributors.py index 892f0f1648502..bb02c2807d08c 100644 --- a/tools/python/compile_contributors.py +++ b/tools/python/compile_contributors.py @@ -30,13 +30,17 @@ import json import os import re + from cherry_pick_utils import ( - run_command, check_preflight, - get_pr_number_from_subject as get_pr_number, extract_pr_numbers, + run_command, +) +from cherry_pick_utils import ( + get_pr_number_from_subject as get_pr_number, ) + def log_event(message, log_file=None): """Log a message to the console and an optional log file.""" timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") @@ -44,6 +48,8 @@ def log_event(message, log_file=None): print(message) # Clean print for console UI if log_file: log_file.write(full_message + "\n") + + NAME_TO_LOGIN = {} # Map full names to GitHub logins for consolidation VERIFIED_LOGINS = set() # Track IDs known to be valid GitHub logins (vs free-form names) PR_CACHE = {} # Cache for PR details to speed up multiple rounds referencing same PRs @@ -63,6 +69,7 @@ def get_pr_details(pr_number): PR_CACHE[pr_number] = None return None + def extract_authors_from_pr(details): authors = set() if not details: @@ -120,6 +127,7 @@ def extract_authors_from_commit(commit_id): return authors + # Bots to exclude from contributor lists BOT_NAMES = { "Copilot", From 33687c55f8b5bcc137dc0411f2a0b4b500a63cfa Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Wed, 4 Mar 2026 16:03:47 -0800 Subject: [PATCH 08/11] status --- tools/python/cherry_pick.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tools/python/cherry_pick.py b/tools/python/cherry_pick.py index 3557ea467448c..a26cfb6d92ddd 100644 --- a/tools/python/cherry_pick.py +++ b/tools/python/cherry_pick.py @@ -141,6 +141,7 @@ def check_missing_dependencies(prs, branch): if pr.get("mergeCommit"): cherry_pick_oids.add(pr["mergeCommit"]["oid"]) + conflicting_prs_count = 0 for pr in prs: if not pr.get("mergeCommit"): continue @@ -179,12 +180,19 @@ def check_missing_dependencies(prs, branch): missing_commits[c] = (existing_title, existing_files) # Print deduplicated warnings - for missing_oid, (title, affected_files) in missing_commits.items(): - files_str = ", ".join(affected_files) - print( - f"WARNING: PR #{number} ({oid}) modifies files that were also changed by commit {missing_oid} ({title}), " - f"which is not in the cherry-pick list. This may indicate missing related changes. Affected files: {files_str}" - ) + if missing_commits: + conflicting_prs_count += 1 + for missing_oid, (title, affected_files) in missing_commits.items(): + files_str = ", ".join(affected_files) + print( + f"WARNING: PR #{number} ({oid}) modifies files that were also changed by commit {missing_oid} ({title}), " + f"which is not in the cherry-pick list. This may indicate missing related changes. Affected files: {files_str}" + ) + + if conflicting_prs_count == 0: + print("No potential missing dependencies found.") + else: + print(f"\nDone. Found potential missing dependencies for {conflicting_prs_count} PRs.") def main(): From 433e8e32e3778a657680f8ebe6994c7f398f8ff9 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Mon, 9 Mar 2026 21:13:56 -0700 Subject: [PATCH 09/11] review feedback --- tools/python/cherry_pick.py | 91 +++++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 38 deletions(-) diff --git a/tools/python/cherry_pick.py b/tools/python/cherry_pick.py index a26cfb6d92ddd..7ac463b1ec274 100644 --- a/tools/python/cherry_pick.py +++ b/tools/python/cherry_pick.py @@ -24,8 +24,8 @@ import argparse import json +import os import sys -from collections import defaultdict from cherry_pick_utils import ( check_preflight, @@ -72,6 +72,16 @@ def get_changed_files(oid): return [] +def sanitize_title(title): + """Normalize PR titles for single-line text output.""" + return title.replace("\n", " ").strip() + + +def escape_markdown_table_cell(text): + """Escape markdown table delimiters in cell content.""" + return sanitize_title(text).replace("|", "\\|") + + def get_existing_pr_numbers(branch): """Get the set of PR numbers already present in the target branch.""" output = run_command(["git", "log", branch, "--oneline", "-n", "500"], silent=True) @@ -118,11 +128,11 @@ def get_existing_pr_numbers(branch): if details: # Collect sub-PRs from title, body, and commits extracted_nums = [] - extracted_nums.extend(extract_pr_numbers(details.get("title", ""))) - extracted_nums.extend(extract_pr_numbers(details.get("body", ""))) + extracted_nums.extend(extract_pr_numbers(details.get("title", ""), strict=True)) + extracted_nums.extend(extract_pr_numbers(details.get("body", ""), strict=True)) for commit in details.get("commits", []): - extracted_nums.extend(extract_pr_numbers(commit.get("messageHeadline", ""))) + extracted_nums.extend(extract_pr_numbers(commit.get("messageHeadline", ""), strict=True)) for num in set(extracted_nums): if num != pr_num_int: @@ -155,8 +165,8 @@ def check_missing_dependencies(prs, branch): # For each file, find commits that modified it between the target branch and the cherry-picked commit. # Deduplicate warnings: group affected files by missing commit. - # missing_commits maps: missing_commit_oid -> (title, [list of affected files]) - missing_commits = defaultdict(lambda: ("", [])) + # missing_commits maps: missing_commit_oid -> {"title": ..., "files": [...]} + missing_commits = {} for filepath in files: # git log --not -- @@ -173,19 +183,18 @@ def check_missing_dependencies(prs, branch): if c == oid: continue if c not in cherry_pick_oids: - existing_title, existing_files = missing_commits[c] - if not existing_title: - existing_title = title - existing_files.append(filepath) - missing_commits[c] = (existing_title, existing_files) + entry = missing_commits.setdefault(c, {"title": title, "files": []}) + if not entry["title"]: + entry["title"] = title + entry["files"].append(filepath) # Print deduplicated warnings if missing_commits: conflicting_prs_count += 1 - for missing_oid, (title, affected_files) in missing_commits.items(): - files_str = ", ".join(affected_files) + for missing_oid, entry in missing_commits.items(): + files_str = ", ".join(entry["files"]) print( - f"WARNING: PR #{number} ({oid}) modifies files that were also changed by commit {missing_oid} ({title}), " + f"WARNING: PR #{number} ({oid}) modifies files that were also changed by commit {missing_oid} ({entry['title']}), " f"which is not in the cherry-pick list. This may indicate missing related changes. Affected files: {files_str}" ) @@ -205,7 +214,11 @@ def main(): parser.add_argument( "--branch", default="HEAD", help="Target branch to compare against for dependency checks (default: HEAD)" ) - parser.add_argument("--limit", type=int, default=200, help="Wait limitation for PR fetching (default: 200)") + parser.add_argument("--limit", type=int, default=200, help="Maximum number of PRs to fetch (default: 200)") + parser.add_argument( + "--md-output", + help="Output markdown file path for the PR description (default: next to --output)", + ) args = parser.parse_args() # Preflight Check @@ -227,12 +240,28 @@ def main(): if existing_prs: print(f"Found {len(existing_prs)} PRs already in branch '{args.branch}'.") + cherry_pick_prs = [] + skipped_count = 0 + for pr in prs: + number = pr["number"] + safe_title = sanitize_title(pr["title"]) + + if not pr.get("mergeCommit"): + print(f"Warning: PR #{number} has no merge commit OID. Skipping.", file=sys.stderr) + continue + + if number in existing_prs: + print(f"Skipping PR #{number} (already in branch '{args.branch}'): {safe_title}") + skipped_count += 1 + continue + + cherry_pick_prs.append(pr) + # Determine output format based on file extension is_shell = args.output.endswith(".sh") # 2. Write Output Script - commit_count = 0 - skipped_count = 0 + commit_count = len(cherry_pick_prs) with open(args.output, "w", encoding="utf-8") as f: if is_shell: f.write("#!/bin/bash\n") @@ -244,41 +273,27 @@ def main(): f.write(f"rem Cherry-pick {args.label} commits\n") f.write("rem Sorted by merge time (oldest first)\n\n") - for pr in prs: + for pr in cherry_pick_prs: number = pr["number"] - title = pr["title"] - safe_title = title.replace("\n", " ") - - if not pr.get("mergeCommit"): - print(f"Warning: PR #{number} has no merge commit OID. Skipping.", file=sys.stderr) - continue - - if number in existing_prs: - print(f"Skipping PR #{number} (already in branch '{args.branch}'): {safe_title}") - skipped_count += 1 - continue + safe_title = sanitize_title(pr["title"]) oid = pr["mergeCommit"]["oid"] comment = "#" if is_shell else "rem" f.write(f"{comment} PR {number}: {safe_title}\n") f.write(f"git cherry-pick {oid}\n\n") - commit_count += 1 print(f"Generated {args.output} with {commit_count} commits ({skipped_count} skipped, already in branch).") # 3. Write PR Description Markdown (table format) - md_output = "cherry_pick_pr_description.md" + output_dir = os.path.dirname(args.output) + md_output = args.md_output or os.path.join(output_dir, "cherry_pick_pr_description.md") with open(md_output, "w", encoding="utf-8") as f: f.write("This cherry-picks the following commits for the release:\n\n") f.write("| Commit ID | PR Number | Commit Title |\n") f.write("|-----------|-----------|-------------|\n") - for pr in prs: - if not pr.get("mergeCommit"): - continue + for pr in cherry_pick_prs: number = pr["number"] - if number in existing_prs: - continue - title = pr["title"].replace("\n", " ") + title = escape_markdown_table_cell(pr["title"]) oid = pr["mergeCommit"]["oid"] short_oid = oid[:10] f.write(f"| {short_oid} | #{number} | {title} |\n") @@ -286,7 +301,7 @@ def main(): print(f"Generated {md_output} with {commit_count} commits.") # 4. Dependency Check - check_missing_dependencies(prs, args.branch) + check_missing_dependencies(cherry_pick_prs, args.branch) if __name__ == "__main__": From d7e922b8f482e9aa044871834a3234a55b2dd93a Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Mon, 9 Mar 2026 21:35:46 -0700 Subject: [PATCH 10/11] address more feedback --- tools/python/cherry_pick.py | 22 ++++++++++++++++------ tools/python/cherry_pick_utils.py | 12 +++++++++--- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/tools/python/cherry_pick.py b/tools/python/cherry_pick.py index 7ac463b1ec274..b90924a785680 100644 --- a/tools/python/cherry_pick.py +++ b/tools/python/cherry_pick.py @@ -82,9 +82,9 @@ def escape_markdown_table_cell(text): return sanitize_title(text).replace("|", "\\|") -def get_existing_pr_numbers(branch): +def get_existing_pr_numbers(branch, repo=None, log_limit=500): """Get the set of PR numbers already present in the target branch.""" - output = run_command(["git", "log", branch, "--oneline", "-n", "500"], silent=True) + output = run_command(["git", "log", branch, "--oneline", "-n", str(log_limit)], silent=True) if not output: return set() pr_numbers = set() @@ -115,7 +115,10 @@ def get_existing_pr_numbers(branch): if is_meta_pr: # Query gh to get more details (body/commits) to find squashed sub-PRs if pr_num not in pr_cache: - gh_out = run_command(["gh", "pr", "view", pr_num, "--json", "title,body,commits"], silent=True) + gh_cmd = ["gh", "pr", "view", pr_num, "--json", "title,body,commits"] + if repo: + gh_cmd.extend(["--repo", repo]) + gh_out = run_command(gh_cmd, silent=True) if gh_out: try: pr_cache[pr_num] = json.loads(gh_out) @@ -145,11 +148,18 @@ def check_missing_dependencies(prs, branch): """Check for potential missing dependencies (conflicts).""" print("\nChecking for potential missing dependencies (conflicts)...") - # Collect OIDs being cherry-picked + # Collect OIDs being cherry-picked and all their ancestor commits cherry_pick_oids = set() for pr in prs: if pr.get("mergeCommit"): - cherry_pick_oids.add(pr["mergeCommit"]["oid"]) + merge_oid = pr["mergeCommit"]["oid"] + cherry_pick_oids.add(merge_oid) + # Include ancestor commits of merge commits to avoid false-positive warnings + # for PRs that used a regular merge (not squash) strategy + ancestor_output = run_command(["git", "log", "--format=%H", merge_oid, "--not", branch], silent=True) + if ancestor_output: + for ancestor_oid in ancestor_output.strip().splitlines(): + cherry_pick_oids.add(ancestor_oid.strip()) conflicting_prs_count = 0 for pr in prs: @@ -236,7 +246,7 @@ def main(): prs.sort(key=lambda x: x["mergedAt"]) # 1.5. Check which PRs are already in the target branch - existing_prs = get_existing_pr_numbers(args.branch) + existing_prs = get_existing_pr_numbers(args.branch, repo=args.repo) if existing_prs: print(f"Found {len(existing_prs)} PRs already in branch '{args.branch}'.") diff --git a/tools/python/cherry_pick_utils.py b/tools/python/cherry_pick_utils.py index cd58f3b589c35..7460761593a8c 100644 --- a/tools/python/cherry_pick_utils.py +++ b/tools/python/cherry_pick_utils.py @@ -52,9 +52,15 @@ def check_preflight(): ) return False - gh_auth = run_command(["gh", "auth", "status"], silent=True) - if not gh_auth: - print("Error: GitHub CLI not authenticated. Please run 'gh auth login'.", file=sys.stderr) + # gh auth status outputs to stderr, so run_command returns empty stdout even on success. + # Use subprocess directly to check the return code. + try: + auth_result = subprocess.run(["gh", "auth", "status"], capture_output=True, text=True, check=False) + if auth_result.returncode != 0: + print("Error: GitHub CLI not authenticated. Please run 'gh auth login'.", file=sys.stderr) + return False + except FileNotFoundError: + print("Error: GitHub CLI (gh) not found.", file=sys.stderr) return False return True From 51e1db159f978e2680008a636c5a1a1d243c6a15 Mon Sep 17 00:00:00 2001 From: Tianlei Wu Date: Mon, 9 Mar 2026 22:09:52 -0700 Subject: [PATCH 11/11] final feedback --- tools/python/cherry_pick.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/python/cherry_pick.py b/tools/python/cherry_pick.py index b90924a785680..96a91c5e74c22 100644 --- a/tools/python/cherry_pick.py +++ b/tools/python/cherry_pick.py @@ -66,7 +66,7 @@ def get_merged_prs(repo, label, limit=200): def get_changed_files(oid): """Get list of files changed in a commit.""" - output = run_command(["git", "diff-tree", "--no-commit-id", "--name-only", "-r", oid], silent=True) + output = run_command(["git", "diff-tree", "--no-commit-id", "--name-only", "-m", "-r", oid], silent=True) if output: return output.strip().splitlines() return []