From 8f4a7d1bb1e415e7d916b9291bc702a4f5741b58 Mon Sep 17 00:00:00 2001 From: texasmichelle Date: Mon, 23 Mar 2015 20:18:41 -0500 Subject: [PATCH 1/9] SPARK-1684 Merge script should standardize SPARK-XXX prefix Added text parsing capability to merge script so that titles are standardized to[MODULE] SPARK-XXX: Description. --- dev/merge_spark_pr.py | 39 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index 3062e9c3c665..3ec285455c6d 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -30,6 +30,7 @@ import subprocess import sys import urllib2 +import Queue try: import jira.client @@ -286,6 +287,42 @@ def resolve_jira_issues(title, merge_branches, comment): resolve_jira_issue(merge_branches, comment, jira_id) +def standardize_jira_ref(text): + # Standardize the [MODULE] SPARK-XXXXX prefix + # Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[MLLIB] SPARK-XXX: Issue" + + # Check for compliance + if (re.search(r'\[[A-Z0-9_]+\] SPARK-[0-9]{4,5}: \S+', text)): + return text + + # Extract JIRA ref(s): + jira_refs = Queue.Queue() + pattern = re.compile(r'(SPARK[-\s]*[0-9]{4,5})', re.IGNORECASE) + while (pattern.search(text) is not None): + jira_refs.put(re.sub(r'\s', '-', pattern.search(text).groups()[0].upper())) + text = text.replace(pattern.search(text).groups()[0], '') + + # Extract spark component(s): + components = Queue.Queue() + pattern = re.compile(r'(\[\w+\])', re.IGNORECASE) + while (pattern.search(text) is not None): + components.put(pattern.search(text).groups()[0]) + text = text.replace(pattern.search(text).groups()[0], '') + + # Cleanup remaining symbols: + pattern = re.compile(r'\W+(.*)', re.IGNORECASE) + text = pattern.search(text).groups()[0] + + # Assemble full text (module(s), JIRA ref(s), remaining text) + clean_text = '' + while (not components.empty()): + clean_text += components.get() + ' ' + while (not jira_refs.empty()): + clean_text += jira_refs.get() + ' ' + clean_text = clean_text.rstrip() + ': ' + text.strip() + + return clean_text + branches = get_json("%s/branches" % GITHUB_API_BASE) branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches]) # Assumes branch names can be sorted lexicographically @@ -296,7 +333,7 @@ def resolve_jira_issues(title, merge_branches, comment): pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num)) url = pr["url"] -title = pr["title"] +title = standardize_jira_ref(pr["title"]) body = pr["body"] target_ref = pr["base"]["ref"] user_login = pr["user"]["login"] From 042099dd0d9ed4b42718729a9780182922c4bef3 Mon Sep 17 00:00:00 2001 From: texasmichelle Date: Mon, 23 Mar 2015 20:46:19 -0500 Subject: [PATCH 2/9] SPARK-1684 Merge script should standardize SPARK-XXX prefix Added accuracy tweaks based on test results --- dev/merge_spark_pr.py | 52 +++++++++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index 3ec285455c6d..e09c64bb3288 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -286,40 +286,54 @@ def resolve_jira_issues(title, merge_branches, comment): for jira_id in jira_ids: resolve_jira_issue(merge_branches, comment, jira_id) - def standardize_jira_ref(text): # Standardize the [MODULE] SPARK-XXXXX prefix # Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[MLLIB] SPARK-XXX: Issue" - # Check for compliance - if (re.search(r'\[[A-Z0-9_]+\] SPARK-[0-9]{4,5}: \S+', text)): + #If the string is compliant, no need to process any further + if (re.search(r'\[[A-Z0-9_]+\] SPARK-[0-9]{3,5}: \S+', text)): return text # Extract JIRA ref(s): - jira_refs = Queue.Queue() - pattern = re.compile(r'(SPARK[-\s]*[0-9]{4,5})', re.IGNORECASE) + jira_refs = deque() + pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,5})', re.IGNORECASE) while (pattern.search(text) is not None): - jira_refs.put(re.sub(r'\s', '-', pattern.search(text).groups()[0].upper())) - text = text.replace(pattern.search(text).groups()[0], '') + ref = pattern.search(text).groups()[0] + # Replace any whitespace with a dash & convert to uppercase + jira_refs.append(re.sub(r'\s', '-', ref.upper())) + text = text.replace(ref, '') # Extract spark component(s): - components = Queue.Queue() - pattern = re.compile(r'(\[\w+\])', re.IGNORECASE) + components = deque() + # Look for alphanumeric chars, spaces, and/or commas + pattern = re.compile(r'(\[[\w\s,]+\])', re.IGNORECASE) while (pattern.search(text) is not None): - components.put(pattern.search(text).groups()[0]) - text = text.replace(pattern.search(text).groups()[0], '') + component = pattern.search(text).groups()[0] + # Convert to uppercase + components.append(component.upper()) + text = text.replace(component, '') # Cleanup remaining symbols: - pattern = re.compile(r'\W+(.*)', re.IGNORECASE) - text = pattern.search(text).groups()[0] + pattern = re.compile(r'^\W+(.*)', re.IGNORECASE) + if (pattern.search(text) is not None): + text = pattern.search(text).groups()[0] # Assemble full text (module(s), JIRA ref(s), remaining text) - clean_text = '' - while (not components.empty()): - clean_text += components.get() + ' ' - while (not jira_refs.empty()): - clean_text += jira_refs.get() + ' ' - clean_text = clean_text.rstrip() + ': ' + text.strip() + if (len(components) < 1): + components = "" + component_text = ' '.join(components).strip() + if (len(jira_refs) < 1): + jira_ref_text = "" + jira_ref_text = ' '.join(jira_refs).strip() + + if (len(jira_ref_text) < 1 and len(component_text) < 1): + clean_text = text.strip() + elif (len(jira_ref_text) < 1): + clean_text = component_text + ' ' + text.strip() + elif (len(component_text) < 1): + clean_text = jira_ref_text + ': ' + text.strip() + else: + clean_text = component_text + ' ' + jira_ref_text + ': ' + text.strip() return clean_text From 48520ba748649a4ddee4b2b7897c5f08b7a99e42 Mon Sep 17 00:00:00 2001 From: texasmichelle Date: Mon, 23 Mar 2015 21:00:23 -0500 Subject: [PATCH 3/9] SPARK-1684: Corrected import statement Replaced queue with deque --- dev/merge_spark_pr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index e09c64bb3288..609c31fecb00 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -30,7 +30,7 @@ import subprocess import sys import urllib2 -import Queue +from collections import deque try: import jira.client From aa20a6e14d5f7b204124638d7cb147a2f8e6190d Mon Sep 17 00:00:00 2001 From: texasmichelle Date: Tue, 24 Mar 2015 22:07:10 -0500 Subject: [PATCH 4/9] Move code into main() and add doctest for new text parsing method Also corrected a spelling mistake in one of the print statements. --- dev/merge_spark_pr.py | 143 +++++++++++++++++++++++------------------- 1 file changed, 78 insertions(+), 65 deletions(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index 609c31fecb00..c43b94d409cb 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -56,8 +56,6 @@ # Prefix added to temporary branches BRANCH_PREFIX = "PR_TOOL" -os.chdir(SPARK_HOME) - def get_json(url): try: @@ -86,10 +84,6 @@ def continue_maybe(prompt): if result.lower() != "y": fail("Okay, exiting") - -original_head = run_cmd("git rev-parse HEAD")[:8] - - def clean_up(): print "Restoring head pointer to %s" % original_head run_cmd("git checkout %s" % original_head) @@ -275,7 +269,7 @@ def get_version_json(version_str): asf_jira.transition_issue( jira_id, resolve["id"], fixVersions=jira_fix_versions, comment=comment) - print "Succesfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions) + print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions) def resolve_jira_issues(title, merge_branches, comment): @@ -286,10 +280,19 @@ def resolve_jira_issues(title, merge_branches, comment): for jira_id in jira_ids: resolve_jira_issue(merge_branches, comment, jira_id) + def standardize_jira_ref(text): - # Standardize the [MODULE] SPARK-XXXXX prefix - # Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[MLLIB] SPARK-XXX: Issue" + """ + Standardize the [MODULE] SPARK-XXXXX prefix + Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[MLLIB] SPARK-XXX: Issue" + >>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful") + '[SQL] SPARK-5821: ParquetRelation2 CTAS should check if delete is successful' + >>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests") + '[PROJECT INFRA] [WIP] SPARK-4123: Show new dependencies added in pull requests' + >>> standardize_jira_ref("[MLlib] Spark 5954: Top by key") + '[MLLIB] SPARK-5954: Top by key' + """ #If the string is compliant, no need to process any further if (re.search(r'\[[A-Z0-9_]+\] SPARK-[0-9]{3,5}: \S+', text)): return text @@ -300,7 +303,7 @@ def standardize_jira_ref(text): while (pattern.search(text) is not None): ref = pattern.search(text).groups()[0] # Replace any whitespace with a dash & convert to uppercase - jira_refs.append(re.sub(r'\s', '-', ref.upper())) + jira_refs.append(re.sub(r'\s+', '-', ref.upper())) text = text.replace(ref, '') # Extract spark component(s): @@ -337,68 +340,78 @@ def standardize_jira_ref(text): return clean_text -branches = get_json("%s/branches" % GITHUB_API_BASE) -branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches]) -# Assumes branch names can be sorted lexicographically -latest_branch = sorted(branch_names, reverse=True)[0] - -pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ") -pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)) -pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num)) - -url = pr["url"] -title = standardize_jira_ref(pr["title"]) -body = pr["body"] -target_ref = pr["base"]["ref"] -user_login = pr["user"]["login"] -base_ref = pr["head"]["ref"] -pr_repo_desc = "%s/%s" % (user_login, base_ref) - -# Merged pull requests don't appear as merged in the GitHub API; -# Instead, they're closed by asfgit. -merge_commits = \ - [e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"] - -if merge_commits: - merge_hash = merge_commits[0]["commit_id"] - message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"] - - print "Pull request %s has already been merged, assuming you want to backport" % pr_num - commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify', +def main(): + os.chdir(SPARK_HOME) + original_head = run_cmd("git rev-parse HEAD")[:8] + + branches = get_json("%s/branches" % GITHUB_API_BASE) + branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches]) + # Assumes branch names can be sorted lexicographically + latest_branch = sorted(branch_names, reverse=True)[0] + + pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ") + pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num)) + pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num)) + + url = pr["url"] + title = standardize_jira_ref(pr["title"]) + body = pr["body"] + target_ref = pr["base"]["ref"] + user_login = pr["user"]["login"] + base_ref = pr["head"]["ref"] + pr_repo_desc = "%s/%s" % (user_login, base_ref) + + # Merged pull requests don't appear as merged in the GitHub API; + # Instead, they're closed by asfgit. + merge_commits = \ + [e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"] + + if merge_commits: + merge_hash = merge_commits[0]["commit_id"] + message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"] + + print "Pull request %s has already been merged, assuming you want to backport" % pr_num + commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify', "%s^{commit}" % merge_hash]).strip() != "" - if not commit_is_downloaded: - fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num) + if not commit_is_downloaded: + fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num) - print "Found commit %s:\n%s" % (merge_hash, message) - cherry_pick(pr_num, merge_hash, latest_branch) - sys.exit(0) + print "Found commit %s:\n%s" % (merge_hash, message) + cherry_pick(pr_num, merge_hash, latest_branch) + sys.exit(0) -if not bool(pr["mergeable"]): - msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \ - "Continue? (experts only!)" - continue_maybe(msg) + if not bool(pr["mergeable"]): + msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \ + "Continue? (experts only!)" + continue_maybe(msg) -print ("\n=== Pull Request #%s ===" % pr_num) -print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % ( - title, pr_repo_desc, target_ref, url)) -continue_maybe("Proceed with merging pull request #%s?" % pr_num) + print ("\n=== Pull Request #%s ===" % pr_num) + print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % ( + title, pr_repo_desc, target_ref, url)) + continue_maybe("Proceed with merging pull request #%s?" % pr_num) -merged_refs = [target_ref] + merged_refs = [target_ref] -merge_hash = merge_pr(pr_num, target_ref) + merge_hash = merge_pr(pr_num, target_ref) -pick_prompt = "Would you like to pick %s into another branch?" % merge_hash -while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y": - merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)] + pick_prompt = "Would you like to pick %s into another branch?" % merge_hash + while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y": + merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)] -if JIRA_IMPORTED: - if JIRA_USERNAME and JIRA_PASSWORD: - continue_maybe("Would you like to update an associated JIRA?") - jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num) - resolve_jira_issues(title, merged_refs, jira_comment) + if JIRA_IMPORTED: + if JIRA_USERNAME and JIRA_PASSWORD: + continue_maybe("Would you like to update an associated JIRA?") + jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num) + resolve_jira_issues(title, merged_refs, jira_comment) + else: + print "JIRA_USERNAME and JIRA_PASSWORD not set" + print "Exiting without trying to close the associated JIRA." else: - print "JIRA_USERNAME and JIRA_PASSWORD not set" + print "Could not find jira-python library. Run 'sudo pip install jira-python' to install." print "Exiting without trying to close the associated JIRA." -else: - print "Could not find jira-python library. Run 'sudo pip install jira-python' to install." - print "Exiting without trying to close the associated JIRA." + +if __name__ == "__main__": + import doctest + doctest.testmod() + + main() From df73f6a68e2c097ce4898e66752a757dd7c328a7 Mon Sep 17 00:00:00 2001 From: texasmichelle Date: Sat, 18 Apr 2015 21:51:42 -0500 Subject: [PATCH 5/9] reworked regex's to enforce brackets around JIRA ref --- dev/merge_spark_pr.py | 42 +++++++++++++++++++++++++----------------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index c43b94d409cb..e81dff53917b 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -283,33 +283,41 @@ def resolve_jira_issues(title, merge_branches, comment): def standardize_jira_ref(text): """ - Standardize the [MODULE] SPARK-XXXXX prefix - Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[MLLIB] SPARK-XXX: Issue" + Standardize the [SPARK-XXXXX] [MODULE] prefix + Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[SPARK-XXX] [MLLIB] Issue" >>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful") - '[SQL] SPARK-5821: ParquetRelation2 CTAS should check if delete is successful' + '[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful' >>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests") - '[PROJECT INFRA] [WIP] SPARK-4123: Show new dependencies added in pull requests' + '[SPARK-4123] [PROJECT INFRA] [WIP] Show new dependencies added in pull requests' >>> standardize_jira_ref("[MLlib] Spark 5954: Top by key") - '[MLLIB] SPARK-5954: Top by key' + '[SPARK-5954] [MLLIB] Top by key' + >>> standardize_jira_ref("[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl") + '[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl' + >>> standardize_jira_ref("SPARK-1094 Support MiMa for reporting binary compatibility accross versions.") + '[SPARK-1094] Support MiMa for reporting binary compatibility accross versions.' + >>> standardize_jira_ref("[WIP] [SPARK-1146] Vagrant support for Spark") + '[SPARK-1146] [WIP] Vagrant support for Spark' + >>> standardize_jira_ref("SPARK-1032. If Yarn app fails before registering, app master stays aroun...") + '[SPARK-1032] If Yarn app fails before registering, app master stays aroun...' """ - #If the string is compliant, no need to process any further - if (re.search(r'\[[A-Z0-9_]+\] SPARK-[0-9]{3,5}: \S+', text)): + # If the string is compliant, no need to process any further + if (re.search(r'^\[SPARK-[0-9]{3,6}\] (\[[A-Z0-9_\s,]+\] )+\S+', text)): return text # Extract JIRA ref(s): jira_refs = deque() - pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,5})', re.IGNORECASE) + pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,6})', re.IGNORECASE) while (pattern.search(text) is not None): ref = pattern.search(text).groups()[0] # Replace any whitespace with a dash & convert to uppercase - jira_refs.append(re.sub(r'\s+', '-', ref.upper())) + jira_refs.append('[' + re.sub(r'\s+', '-', ref.upper()) + ']') text = text.replace(ref, '') # Extract spark component(s): components = deque() - # Look for alphanumeric chars, spaces, and/or commas - pattern = re.compile(r'(\[[\w\s,]+\])', re.IGNORECASE) + # Look for alphanumeric chars, spaces, dashes, periods, and/or commas + pattern = re.compile(r'(\[[\w\s,-\.]+\])', re.IGNORECASE) while (pattern.search(text) is not None): component = pattern.search(text).groups()[0] # Convert to uppercase @@ -321,22 +329,22 @@ def standardize_jira_ref(text): if (pattern.search(text) is not None): text = pattern.search(text).groups()[0] - # Assemble full text (module(s), JIRA ref(s), remaining text) - if (len(components) < 1): - components = "" - component_text = ' '.join(components).strip() + # Assemble full text (JIRA ref(s), module(s), remaining text) if (len(jira_refs) < 1): jira_ref_text = "" jira_ref_text = ' '.join(jira_refs).strip() + if (len(components) < 1): + components = "" + component_text = ' '.join(components).strip() if (len(jira_ref_text) < 1 and len(component_text) < 1): clean_text = text.strip() elif (len(jira_ref_text) < 1): clean_text = component_text + ' ' + text.strip() elif (len(component_text) < 1): - clean_text = jira_ref_text + ': ' + text.strip() + clean_text = jira_ref_text + ' ' + text.strip() else: - clean_text = component_text + ' ' + jira_ref_text + ': ' + text.strip() + clean_text = jira_ref_text + ' ' + component_text + ' ' + text.strip() return clean_text From 4f1ed46a8b6624871c37db868ce473da0e60f6a2 Mon Sep 17 00:00:00 2001 From: texasmichelle Date: Mon, 20 Apr 2015 12:38:39 -0500 Subject: [PATCH 6/9] Deque removal, logic simplifications, & prompt user to pick a title (orig or modified) --- dev/merge_spark_pr.py | 55 +++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index e81dff53917b..c37bf73e2380 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -30,7 +30,6 @@ import subprocess import sys import urllib2 -from collections import deque try: import jira.client @@ -300,51 +299,42 @@ def standardize_jira_ref(text): '[SPARK-1146] [WIP] Vagrant support for Spark' >>> standardize_jira_ref("SPARK-1032. If Yarn app fails before registering, app master stays aroun...") '[SPARK-1032] If Yarn app fails before registering, app master stays aroun...' + >>> standardize_jira_ref("[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.") + '[SPARK-6250] [SPARK-6146] [SPARK-5911] [SQL] Types are now reserved words in DDL parser.' + >>> standardize_jira_ref("Additional information for users building from source code") + 'Additional information for users building from source code' """ + jira_refs = [] + components = [] + # If the string is compliant, no need to process any further if (re.search(r'^\[SPARK-[0-9]{3,6}\] (\[[A-Z0-9_\s,]+\] )+\S+', text)): return text # Extract JIRA ref(s): - jira_refs = deque() - pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,6})', re.IGNORECASE) - while (pattern.search(text) is not None): - ref = pattern.search(text).groups()[0] - # Replace any whitespace with a dash & convert to uppercase + pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,6})+', re.IGNORECASE) + for ref in pattern.findall(text): + # Add brackets, replace spaces with a dash, & convert to uppercase jira_refs.append('[' + re.sub(r'\s+', '-', ref.upper()) + ']') text = text.replace(ref, '') # Extract spark component(s): - components = deque() # Look for alphanumeric chars, spaces, dashes, periods, and/or commas pattern = re.compile(r'(\[[\w\s,-\.]+\])', re.IGNORECASE) - while (pattern.search(text) is not None): - component = pattern.search(text).groups()[0] - # Convert to uppercase + for component in pattern.findall(text): components.append(component.upper()) text = text.replace(component, '') - # Cleanup remaining symbols: + # Cleanup any remaining symbols: pattern = re.compile(r'^\W+(.*)', re.IGNORECASE) if (pattern.search(text) is not None): text = pattern.search(text).groups()[0] # Assemble full text (JIRA ref(s), module(s), remaining text) - if (len(jira_refs) < 1): - jira_ref_text = "" - jira_ref_text = ' '.join(jira_refs).strip() - if (len(components) < 1): - components = "" - component_text = ' '.join(components).strip() + clean_text = ' '.join(jira_refs).strip() + " " + ' '.join(components).strip() + " " + text.strip() - if (len(jira_ref_text) < 1 and len(component_text) < 1): - clean_text = text.strip() - elif (len(jira_ref_text) < 1): - clean_text = component_text + ' ' + text.strip() - elif (len(component_text) < 1): - clean_text = jira_ref_text + ' ' + text.strip() - else: - clean_text = jira_ref_text + ' ' + component_text + ' ' + text.strip() + # Replace multiple spaces with a single space, e.g. if no jira refs and/or components were included + clean_text = re.sub(r'\s+', ' ', clean_text.strip()) return clean_text @@ -362,6 +352,21 @@ def main(): pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num)) url = pr["url"] + + # Decide whether to use the modified title or not + print "I've re-written the title as follows to match the standard format:" + print "Original: %s" % pr["title"] + print "Modified: %s" % standardize_jira_ref(pr["title"]) + prompt = "Would you like to use the modified title?" + result = raw_input("%s (y/n): " % prompt) + if result.lower() == "y": + title = standardize_jira_ref(pr["title"]) + print "Using modified title:" + else: + title = pr["title"] + print "Using original title:" + print title + title = standardize_jira_ref(pr["title"]) body = pr["body"] target_ref = pr["base"]["ref"] From 8c195bb199b1e325929100e34f6d37cb0f824fc2 Mon Sep 17 00:00:00 2001 From: texasmichelle Date: Mon, 20 Apr 2015 13:08:30 -0500 Subject: [PATCH 7/9] removed erroneous line --- dev/merge_spark_pr.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index c37bf73e2380..4d1ab5022627 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -357,8 +357,7 @@ def main(): print "I've re-written the title as follows to match the standard format:" print "Original: %s" % pr["title"] print "Modified: %s" % standardize_jira_ref(pr["title"]) - prompt = "Would you like to use the modified title?" - result = raw_input("%s (y/n): " % prompt) + result = raw_input("Would you like to use the modified title? (y/n): ") if result.lower() == "y": title = standardize_jira_ref(pr["title"]) print "Using modified title:" @@ -367,7 +366,6 @@ def main(): print "Using original title:" print title - title = standardize_jira_ref(pr["title"]) body = pr["body"] target_ref = pr["base"]["ref"] user_login = pr["user"]["login"] From 7d5fa20b59fe87de8451c0260cacce632c8da872 Mon Sep 17 00:00:00 2001 From: texasmichelle Date: Mon, 20 Apr 2015 19:50:32 -0500 Subject: [PATCH 8/9] only prompt if title has been modified --- dev/merge_spark_pr.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index 4d1ab5022627..f17e1360a133 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -354,17 +354,21 @@ def main(): url = pr["url"] # Decide whether to use the modified title or not - print "I've re-written the title as follows to match the standard format:" - print "Original: %s" % pr["title"] - print "Modified: %s" % standardize_jira_ref(pr["title"]) - result = raw_input("Would you like to use the modified title? (y/n): ") - if result.lower() == "y": - title = standardize_jira_ref(pr["title"]) - print "Using modified title:" + modified_title = standardize_jira_ref(pr["title"]) + if modified_title != pr["title"]: + print "I've re-written the title as follows to match the standard format:" + print "Original: %s" % pr["title"] + print "Modified: %s" % modified_title + result = raw_input("Would you like to use the modified title? (y/n): ") + if result.lower() == "y": + title = modified_title + print "Using modified title:" + else: + title = pr["title"] + print "Using original title:" + print title else: title = pr["title"] - print "Using original title:" - print title body = pr["body"] target_ref = pr["base"]["ref"] From 9b6b0a761692941eca30385124424fa09ca1f52b Mon Sep 17 00:00:00 2001 From: texasmichelle Date: Tue, 21 Apr 2015 08:07:25 -0500 Subject: [PATCH 9/9] resolved variable scope issue --- dev/merge_spark_pr.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index f17e1360a133..b69cd15f99f6 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -95,7 +95,7 @@ def clean_up(): # merge the requested PR and return the merge hash -def merge_pr(pr_num, target_ref): +def merge_pr(pr_num, target_ref, title, body, pr_repo_desc): pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num) target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX, pr_num, target_ref.upper()) run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, pr_branch_name)) @@ -339,6 +339,8 @@ def standardize_jira_ref(text): return clean_text def main(): + global original_head + os.chdir(SPARK_HOME) original_head = run_cmd("git rev-parse HEAD")[:8] @@ -407,7 +409,7 @@ def main(): merged_refs = [target_ref] - merge_hash = merge_pr(pr_num, target_ref) + merge_hash = merge_pr(pr_num, target_ref, title, body, pr_repo_desc) pick_prompt = "Would you like to pick %s into another branch?" % merge_hash while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y":