Skip to content
Closed
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
188 changes: 126 additions & 62 deletions dev/merge_spark_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import subprocess
import sys
import urllib2
from collections import deque

try:
import jira.client
Expand All @@ -55,8 +56,6 @@
# Prefix added to temporary branches
BRANCH_PREFIX = "PR_TOOL"

os.chdir(SPARK_HOME)


def get_json(url):
try:
Expand Down Expand Up @@ -85,10 +84,6 @@ def continue_maybe(prompt):
if result.lower() != "y":
fail("Okay, exiting")


original_head = run_cmd("git rev-parse HEAD")[:8]


def clean_up():
print "Restoring head pointer to %s" % original_head
run_cmd("git checkout %s" % original_head)
Expand Down Expand Up @@ -274,7 +269,7 @@ def get_version_json(version_str):
asf_jira.transition_issue(
jira_id, resolve["id"], fixVersions=jira_fix_versions, comment=comment)

print "Succesfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)


def resolve_jira_issues(title, merge_branches, comment):
Expand All @@ -286,68 +281,137 @@ def resolve_jira_issues(title, merge_branches, comment):
resolve_jira_issue(merge_branches, comment, jira_id)


branches = get_json("%s/branches" % GITHUB_API_BASE)
branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
# Assumes branch names can be sorted lexicographically
latest_branch = sorted(branch_names, reverse=True)[0]

pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ")
pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))

url = pr["url"]
title = pr["title"]
body = pr["body"]
target_ref = pr["base"]["ref"]
user_login = pr["user"]["login"]
base_ref = pr["head"]["ref"]
pr_repo_desc = "%s/%s" % (user_login, base_ref)

# Merged pull requests don't appear as merged in the GitHub API;
# Instead, they're closed by asfgit.
merge_commits = \
[e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"]

if merge_commits:
merge_hash = merge_commits[0]["commit_id"]
message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"]

print "Pull request %s has already been merged, assuming you want to backport" % pr_num
commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify',
def standardize_jira_ref(text):
"""
Standardize the [MODULE] SPARK-XXXXX prefix
Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[MLLIB] SPARK-XXX: Issue"

>>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
'[SQL] SPARK-5821: ParquetRelation2 CTAS should check if delete is successful'
>>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
'[PROJECT INFRA] [WIP] SPARK-4123: Show new dependencies added in pull requests'
>>> standardize_jira_ref("[MLlib] Spark 5954: Top by key")
'[MLLIB] SPARK-5954: Top by key'
"""
#If the string is compliant, no need to process any further
if (re.search(r'\[[A-Z0-9_]+\] SPARK-[0-9]{3,5}: \S+', text)):
return text

# Extract JIRA ref(s):
jira_refs = deque()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason this and components can't just be normal Python lists?

i.e.

jira_refs = []

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you do convert them to lists, you can remove the deque import.

pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,5})', re.IGNORECASE)
while (pattern.search(text) is not None):
ref = pattern.search(text).groups()[0]
# Replace any whitespace with a dash & convert to uppercase
jira_refs.append(re.sub(r'\s+', '-', ref.upper()))
text = text.replace(ref, '')

# Extract spark component(s):
components = deque()
# Look for alphanumeric chars, spaces, and/or commas
pattern = re.compile(r'(\[[\w\s,]+\])', re.IGNORECASE)
while (pattern.search(text) is not None):
component = pattern.search(text).groups()[0]
# Convert to uppercase
components.append(component.upper())
text = text.replace(component, '')

# Cleanup remaining symbols:
pattern = re.compile(r'^\W+(.*)', re.IGNORECASE)
if (pattern.search(text) is not None):
text = pattern.search(text).groups()[0]

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I don't follow this totally, but could the rest of this function be written a bit more simply?

out_string = text.strip()
for tag in components + jira_refs:
  out_string = tag + " " + out_string
return out_string

# Assemble full text (module(s), JIRA ref(s), remaining text)
if (len(components) < 1):
components = ""
component_text = ' '.join(components).strip()
if (len(jira_refs) < 1):
jira_ref_text = ""
jira_ref_text = ' '.join(jira_refs).strip()

if (len(jira_ref_text) < 1 and len(component_text) < 1):
clean_text = text.strip()
elif (len(jira_ref_text) < 1):
clean_text = component_text + ' ' + text.strip()
elif (len(component_text) < 1):
clean_text = jira_ref_text + ': ' + text.strip()
else:
clean_text = component_text + ' ' + jira_ref_text + ': ' + text.strip()

return clean_text

def main():
os.chdir(SPARK_HOME)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to be sure since it's a bit tricky with the diff here - all of this is simply re-organization, correct?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right - everything inside main() was previously containerless. All statements not part of a function were moved into main() and doctest execution was added at the end. The only new code is the function standardize_jira_ref(), along with its reference on line 365.

original_head = run_cmd("git rev-parse HEAD")[:8]

branches = get_json("%s/branches" % GITHUB_API_BASE)
branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
# Assumes branch names can be sorted lexicographically
latest_branch = sorted(branch_names, reverse=True)[0]

pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ")
pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))

url = pr["url"]
title = standardize_jira_ref(pr["title"])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the title has been modified, I'd prompt the user here for a yes/no as to whether to use the modified title. (i.e. just say "I've re-written the title as follows to match the format: . Would you like to use the new title?"... with and each on a new line). That way if our heuristic is broken in some way the committer can just ignore the re-writing.

body = pr["body"]
target_ref = pr["base"]["ref"]
user_login = pr["user"]["login"]
base_ref = pr["head"]["ref"]
pr_repo_desc = "%s/%s" % (user_login, base_ref)

# Merged pull requests don't appear as merged in the GitHub API;
# Instead, they're closed by asfgit.
merge_commits = \
[e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"]

if merge_commits:
merge_hash = merge_commits[0]["commit_id"]
message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"]

print "Pull request %s has already been merged, assuming you want to backport" % pr_num
commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify',
"%s^{commit}" % merge_hash]).strip() != ""
if not commit_is_downloaded:
fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num)
if not commit_is_downloaded:
fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num)

print "Found commit %s:\n%s" % (merge_hash, message)
cherry_pick(pr_num, merge_hash, latest_branch)
sys.exit(0)
print "Found commit %s:\n%s" % (merge_hash, message)
cherry_pick(pr_num, merge_hash, latest_branch)
sys.exit(0)

if not bool(pr["mergeable"]):
msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \
"Continue? (experts only!)"
continue_maybe(msg)
if not bool(pr["mergeable"]):
msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \
"Continue? (experts only!)"
continue_maybe(msg)

print ("\n=== Pull Request #%s ===" % pr_num)
print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % (
title, pr_repo_desc, target_ref, url))
continue_maybe("Proceed with merging pull request #%s?" % pr_num)
print ("\n=== Pull Request #%s ===" % pr_num)
print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % (
title, pr_repo_desc, target_ref, url))
continue_maybe("Proceed with merging pull request #%s?" % pr_num)

merged_refs = [target_ref]
merged_refs = [target_ref]

merge_hash = merge_pr(pr_num, target_ref)
merge_hash = merge_pr(pr_num, target_ref)

pick_prompt = "Would you like to pick %s into another branch?" % merge_hash
while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y":
merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)]
pick_prompt = "Would you like to pick %s into another branch?" % merge_hash
while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y":
merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)]

if JIRA_IMPORTED:
if JIRA_USERNAME and JIRA_PASSWORD:
continue_maybe("Would you like to update an associated JIRA?")
jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
resolve_jira_issues(title, merged_refs, jira_comment)
if JIRA_IMPORTED:
if JIRA_USERNAME and JIRA_PASSWORD:
continue_maybe("Would you like to update an associated JIRA?")
jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
resolve_jira_issues(title, merged_refs, jira_comment)
else:
print "JIRA_USERNAME and JIRA_PASSWORD not set"
print "Exiting without trying to close the associated JIRA."
else:
print "JIRA_USERNAME and JIRA_PASSWORD not set"
print "Could not find jira-python library. Run 'sudo pip install jira-python' to install."
print "Exiting without trying to close the associated JIRA."
else:
print "Could not find jira-python library. Run 'sudo pip install jira-python' to install."
print "Exiting without trying to close the associated JIRA."

if __name__ == "__main__":
import doctest
doctest.testmod()

main()