-
Notifications
You must be signed in to change notification settings - Fork 29k
[Project Infra] SPARK-1684: Merge script should standardize SPARK-XXX prefix #5149
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
8f4a7d1
042099d
48520ba
aa20a6e
25229c6
43b5aed
df73f6a
4f1ed46
8c195bb
7d5fa20
9b6b0a7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,6 +30,7 @@ | |
| import subprocess | ||
| import sys | ||
| import urllib2 | ||
| from collections import deque | ||
|
|
||
| try: | ||
| import jira.client | ||
|
|
@@ -55,8 +56,6 @@ | |
| # Prefix added to temporary branches | ||
| BRANCH_PREFIX = "PR_TOOL" | ||
|
|
||
| os.chdir(SPARK_HOME) | ||
|
|
||
|
|
||
| def get_json(url): | ||
| try: | ||
|
|
@@ -85,10 +84,6 @@ def continue_maybe(prompt): | |
| if result.lower() != "y": | ||
| fail("Okay, exiting") | ||
|
|
||
|
|
||
| original_head = run_cmd("git rev-parse HEAD")[:8] | ||
|
|
||
|
|
||
| def clean_up(): | ||
| print "Restoring head pointer to %s" % original_head | ||
| run_cmd("git checkout %s" % original_head) | ||
|
|
@@ -274,7 +269,7 @@ def get_version_json(version_str): | |
| asf_jira.transition_issue( | ||
| jira_id, resolve["id"], fixVersions=jira_fix_versions, comment=comment) | ||
|
|
||
| print "Succesfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions) | ||
| print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions) | ||
|
|
||
|
|
||
| def resolve_jira_issues(title, merge_branches, comment): | ||
|
|
@@ -286,68 +281,137 @@ def resolve_jira_issues(title, merge_branches, comment): | |
| resolve_jira_issue(merge_branches, comment, jira_id) | ||
|
|
||
|
|
||
| branches = get_json("%s/branches" % GITHUB_API_BASE) | ||
| branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches]) | ||
| # Assumes branch names can be sorted lexicographically | ||
| latest_branch = sorted(branch_names, reverse=True)[0] | ||
|
|
||
| pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ") | ||
| pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)) | ||
| pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num)) | ||
|
|
||
| url = pr["url"] | ||
| title = pr["title"] | ||
| body = pr["body"] | ||
| target_ref = pr["base"]["ref"] | ||
| user_login = pr["user"]["login"] | ||
| base_ref = pr["head"]["ref"] | ||
| pr_repo_desc = "%s/%s" % (user_login, base_ref) | ||
|
|
||
| # Merged pull requests don't appear as merged in the GitHub API; | ||
| # Instead, they're closed by asfgit. | ||
| merge_commits = \ | ||
| [e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"] | ||
|
|
||
| if merge_commits: | ||
| merge_hash = merge_commits[0]["commit_id"] | ||
| message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"] | ||
|
|
||
| print "Pull request %s has already been merged, assuming you want to backport" % pr_num | ||
| commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify', | ||
| def standardize_jira_ref(text): | ||
| """ | ||
| Standardize the [MODULE] SPARK-XXXXX prefix | ||
| Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[MLLIB] SPARK-XXX: Issue" | ||
|
|
||
| >>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful") | ||
| '[SQL] SPARK-5821: ParquetRelation2 CTAS should check if delete is successful' | ||
| >>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests") | ||
| '[PROJECT INFRA] [WIP] SPARK-4123: Show new dependencies added in pull requests' | ||
| >>> standardize_jira_ref("[MLlib] Spark 5954: Top by key") | ||
| '[MLLIB] SPARK-5954: Top by key' | ||
| """ | ||
| #If the string is compliant, no need to process any further | ||
| if (re.search(r'\[[A-Z0-9_]+\] SPARK-[0-9]{3,5}: \S+', text)): | ||
| return text | ||
|
|
||
| # Extract JIRA ref(s): | ||
| jira_refs = deque() | ||
| pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,5})', re.IGNORECASE) | ||
| while (pattern.search(text) is not None): | ||
| ref = pattern.search(text).groups()[0] | ||
| # Replace any whitespace with a dash & convert to uppercase | ||
| jira_refs.append(re.sub(r'\s+', '-', ref.upper())) | ||
| text = text.replace(ref, '') | ||
|
|
||
| # Extract spark component(s): | ||
| components = deque() | ||
| # Look for alphanumeric chars, spaces, and/or commas | ||
| pattern = re.compile(r'(\[[\w\s,]+\])', re.IGNORECASE) | ||
| while (pattern.search(text) is not None): | ||
| component = pattern.search(text).groups()[0] | ||
| # Convert to uppercase | ||
| components.append(component.upper()) | ||
| text = text.replace(component, '') | ||
|
|
||
| # Cleanup remaining symbols: | ||
| pattern = re.compile(r'^\W+(.*)', re.IGNORECASE) | ||
| if (pattern.search(text) is not None): | ||
| text = pattern.search(text).groups()[0] | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe I don't follow this totally, but could the rest of this function be written a bit more simply? |
||
| # Assemble full text (module(s), JIRA ref(s), remaining text) | ||
| if (len(components) < 1): | ||
| components = "" | ||
| component_text = ' '.join(components).strip() | ||
| if (len(jira_refs) < 1): | ||
| jira_ref_text = "" | ||
| jira_ref_text = ' '.join(jira_refs).strip() | ||
|
|
||
| if (len(jira_ref_text) < 1 and len(component_text) < 1): | ||
| clean_text = text.strip() | ||
| elif (len(jira_ref_text) < 1): | ||
| clean_text = component_text + ' ' + text.strip() | ||
| elif (len(component_text) < 1): | ||
| clean_text = jira_ref_text + ': ' + text.strip() | ||
| else: | ||
| clean_text = component_text + ' ' + jira_ref_text + ': ' + text.strip() | ||
|
|
||
| return clean_text | ||
|
|
||
| def main(): | ||
| os.chdir(SPARK_HOME) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just to be sure since it's a bit tricky with the diff here - all of this is simply re-organization, correct?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Right - everything inside main() was previously containerless. All statements not part of a function were moved into main() and doctest execution was added at the end. The only new code is the function standardize_jira_ref(), along with its reference on line 365. |
||
| original_head = run_cmd("git rev-parse HEAD")[:8] | ||
|
|
||
| branches = get_json("%s/branches" % GITHUB_API_BASE) | ||
| branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches]) | ||
| # Assumes branch names can be sorted lexicographically | ||
| latest_branch = sorted(branch_names, reverse=True)[0] | ||
|
|
||
| pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ") | ||
| pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)) | ||
| pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num)) | ||
|
|
||
| url = pr["url"] | ||
| title = standardize_jira_ref(pr["title"]) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the title has been modified, I'd prompt the user here for a yes/no as to whether to use the modified title. (i.e. just say "I've re-written the title as follows to match the format: . Would you like to use the new title?"... with and each on a new line). That way if our heuristic is broken in some way the committer can just ignore the re-writing. |
||
| body = pr["body"] | ||
| target_ref = pr["base"]["ref"] | ||
| user_login = pr["user"]["login"] | ||
| base_ref = pr["head"]["ref"] | ||
| pr_repo_desc = "%s/%s" % (user_login, base_ref) | ||
|
|
||
| # Merged pull requests don't appear as merged in the GitHub API; | ||
| # Instead, they're closed by asfgit. | ||
| merge_commits = \ | ||
| [e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"] | ||
|
|
||
| if merge_commits: | ||
| merge_hash = merge_commits[0]["commit_id"] | ||
| message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"] | ||
|
|
||
| print "Pull request %s has already been merged, assuming you want to backport" % pr_num | ||
| commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify', | ||
| "%s^{commit}" % merge_hash]).strip() != "" | ||
| if not commit_is_downloaded: | ||
| fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num) | ||
| if not commit_is_downloaded: | ||
| fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num) | ||
|
|
||
| print "Found commit %s:\n%s" % (merge_hash, message) | ||
| cherry_pick(pr_num, merge_hash, latest_branch) | ||
| sys.exit(0) | ||
| print "Found commit %s:\n%s" % (merge_hash, message) | ||
| cherry_pick(pr_num, merge_hash, latest_branch) | ||
| sys.exit(0) | ||
|
|
||
| if not bool(pr["mergeable"]): | ||
| msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \ | ||
| "Continue? (experts only!)" | ||
| continue_maybe(msg) | ||
| if not bool(pr["mergeable"]): | ||
| msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \ | ||
| "Continue? (experts only!)" | ||
| continue_maybe(msg) | ||
|
|
||
| print ("\n=== Pull Request #%s ===" % pr_num) | ||
| print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % ( | ||
| title, pr_repo_desc, target_ref, url)) | ||
| continue_maybe("Proceed with merging pull request #%s?" % pr_num) | ||
| print ("\n=== Pull Request #%s ===" % pr_num) | ||
| print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % ( | ||
| title, pr_repo_desc, target_ref, url)) | ||
| continue_maybe("Proceed with merging pull request #%s?" % pr_num) | ||
|
|
||
| merged_refs = [target_ref] | ||
| merged_refs = [target_ref] | ||
|
|
||
| merge_hash = merge_pr(pr_num, target_ref) | ||
| merge_hash = merge_pr(pr_num, target_ref) | ||
|
|
||
| pick_prompt = "Would you like to pick %s into another branch?" % merge_hash | ||
| while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y": | ||
| merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)] | ||
| pick_prompt = "Would you like to pick %s into another branch?" % merge_hash | ||
| while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y": | ||
| merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)] | ||
|
|
||
| if JIRA_IMPORTED: | ||
| if JIRA_USERNAME and JIRA_PASSWORD: | ||
| continue_maybe("Would you like to update an associated JIRA?") | ||
| jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num) | ||
| resolve_jira_issues(title, merged_refs, jira_comment) | ||
| if JIRA_IMPORTED: | ||
| if JIRA_USERNAME and JIRA_PASSWORD: | ||
| continue_maybe("Would you like to update an associated JIRA?") | ||
| jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num) | ||
| resolve_jira_issues(title, merged_refs, jira_comment) | ||
| else: | ||
| print "JIRA_USERNAME and JIRA_PASSWORD not set" | ||
| print "Exiting without trying to close the associated JIRA." | ||
| else: | ||
| print "JIRA_USERNAME and JIRA_PASSWORD not set" | ||
| print "Could not find jira-python library. Run 'sudo pip install jira-python' to install." | ||
| print "Exiting without trying to close the associated JIRA." | ||
| else: | ||
| print "Could not find jira-python library. Run 'sudo pip install jira-python' to install." | ||
| print "Exiting without trying to close the associated JIRA." | ||
|
|
||
| if __name__ == "__main__": | ||
| import doctest | ||
| doctest.testmod() | ||
|
|
||
| main() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any reason this and components can't just be normal Python lists?
i.e.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If you do convert them to lists, you can remove the
dequeimport.