From 8f4a7d1bb1e415e7d916b9291bc702a4f5741b58 Mon Sep 17 00:00:00 2001
From: texasmichelle <texasmichelle@gmail.com>
Date: Mon, 23 Mar 2015 20:18:41 -0500
Subject: [PATCH 1/9] SPARK-1684 Merge script should standardize SPARK-XXX
 prefix

Added text parsing capability to merge script so that titles are
standardized to[MODULE] SPARK-XXX: Description.
---
 dev/merge_spark_pr.py | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index 3062e9c3c665..3ec285455c6d 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -30,6 +30,7 @@
 import subprocess
 import sys
 import urllib2
+import Queue
 
 try:
     import jira.client
@@ -286,6 +287,42 @@ def resolve_jira_issues(title, merge_branches, comment):
         resolve_jira_issue(merge_branches, comment, jira_id)
 
 
+def standardize_jira_ref(text):
+    # Standardize the [MODULE] SPARK-XXXXX prefix
+    # Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[MLLIB] SPARK-XXX: Issue"
+    
+    # Check for compliance
+    if (re.search(r'\[[A-Z0-9_]+\] SPARK-[0-9]{4,5}: \S+', text)):
+        return text
+    
+    # Extract JIRA ref(s):
+    jira_refs = Queue.Queue()
+    pattern = re.compile(r'(SPARK[-\s]*[0-9]{4,5})', re.IGNORECASE)
+    while (pattern.search(text) is not None):
+        jira_refs.put(re.sub(r'\s', '-', pattern.search(text).groups()[0].upper()))
+        text = text.replace(pattern.search(text).groups()[0], '')
+
+    # Extract spark component(s):
+    components = Queue.Queue()
+    pattern = re.compile(r'(\[\w+\])', re.IGNORECASE)
+    while (pattern.search(text) is not None):
+        components.put(pattern.search(text).groups()[0])
+        text = text.replace(pattern.search(text).groups()[0], '')
+
+    # Cleanup remaining symbols:
+    pattern = re.compile(r'\W+(.*)', re.IGNORECASE)
+    text = pattern.search(text).groups()[0]
+
+    # Assemble full text (module(s), JIRA ref(s), remaining text)
+    clean_text = ''
+    while (not components.empty()):
+        clean_text += components.get() + ' '
+    while (not jira_refs.empty()):
+        clean_text += jira_refs.get() + ' '
+    clean_text = clean_text.rstrip() + ': ' + text.strip()
+    
+    return clean_text
+
 branches = get_json("%s/branches" % GITHUB_API_BASE)
 branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
 # Assumes branch names can be sorted lexicographically
@@ -296,7 +333,7 @@ def resolve_jira_issues(title, merge_branches, comment):
 pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
 
 url = pr["url"]
-title = pr["title"]
+title = standardize_jira_ref(pr["title"])
 body = pr["body"]
 target_ref = pr["base"]["ref"]
 user_login = pr["user"]["login"]

From 042099dd0d9ed4b42718729a9780182922c4bef3 Mon Sep 17 00:00:00 2001
From: texasmichelle <texasmichelle@gmail.com>
Date: Mon, 23 Mar 2015 20:46:19 -0500
Subject: [PATCH 2/9] SPARK-1684 Merge script should standardize SPARK-XXX
 prefix

Added accuracy tweaks based on test results
---
 dev/merge_spark_pr.py | 52 +++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 19 deletions(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index 3ec285455c6d..e09c64bb3288 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -286,40 +286,54 @@ def resolve_jira_issues(title, merge_branches, comment):
     for jira_id in jira_ids:
         resolve_jira_issue(merge_branches, comment, jira_id)
 
-
 def standardize_jira_ref(text):
     # Standardize the [MODULE] SPARK-XXXXX prefix
     # Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[MLLIB] SPARK-XXX: Issue"
     
-    # Check for compliance
-    if (re.search(r'\[[A-Z0-9_]+\] SPARK-[0-9]{4,5}: \S+', text)):
+    #If the string is compliant, no need to process any further
+    if (re.search(r'\[[A-Z0-9_]+\] SPARK-[0-9]{3,5}: \S+', text)):
         return text
     
     # Extract JIRA ref(s):
-    jira_refs = Queue.Queue()
-    pattern = re.compile(r'(SPARK[-\s]*[0-9]{4,5})', re.IGNORECASE)
+    jira_refs = deque()
+    pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,5})', re.IGNORECASE)
     while (pattern.search(text) is not None):
-        jira_refs.put(re.sub(r'\s', '-', pattern.search(text).groups()[0].upper()))
-        text = text.replace(pattern.search(text).groups()[0], '')
+        ref = pattern.search(text).groups()[0]
+        # Replace any whitespace with a dash & convert to uppercase
+        jira_refs.append(re.sub(r'\s', '-', ref.upper()))
+        text = text.replace(ref, '')
 
     # Extract spark component(s):
-    components = Queue.Queue()
-    pattern = re.compile(r'(\[\w+\])', re.IGNORECASE)
+    components = deque()
+    # Look for alphanumeric chars, spaces, and/or commas
+    pattern = re.compile(r'(\[[\w\s,]+\])', re.IGNORECASE)
     while (pattern.search(text) is not None):
-        components.put(pattern.search(text).groups()[0])
-        text = text.replace(pattern.search(text).groups()[0], '')
+        component = pattern.search(text).groups()[0]
+        # Convert to uppercase
+        components.append(component.upper())
+        text = text.replace(component, '')
 
     # Cleanup remaining symbols:
-    pattern = re.compile(r'\W+(.*)', re.IGNORECASE)
-    text = pattern.search(text).groups()[0]
+    pattern = re.compile(r'^\W+(.*)', re.IGNORECASE)
+    if (pattern.search(text) is not None):
+        text = pattern.search(text).groups()[0]
 
     # Assemble full text (module(s), JIRA ref(s), remaining text)
-    clean_text = ''
-    while (not components.empty()):
-        clean_text += components.get() + ' '
-    while (not jira_refs.empty()):
-        clean_text += jira_refs.get() + ' '
-    clean_text = clean_text.rstrip() + ': ' + text.strip()
+    if (len(components) < 1):
+        components = ""
+    component_text = ' '.join(components).strip()
+    if (len(jira_refs) < 1):
+        jira_ref_text = ""
+    jira_ref_text = ' '.join(jira_refs).strip()
+    
+    if (len(jira_ref_text) < 1 and len(component_text) < 1):
+        clean_text = text.strip()
+    elif (len(jira_ref_text) < 1):
+        clean_text = component_text + ' ' + text.strip()
+    elif (len(component_text) < 1):
+        clean_text = jira_ref_text + ': ' + text.strip()
+    else:
+        clean_text = component_text + ' ' + jira_ref_text + ': ' + text.strip()
     
     return clean_text
 

From 48520ba748649a4ddee4b2b7897c5f08b7a99e42 Mon Sep 17 00:00:00 2001
From: texasmichelle <texasmichelle@gmail.com>
Date: Mon, 23 Mar 2015 21:00:23 -0500
Subject: [PATCH 3/9] SPARK-1684: Corrected import statement

Replaced queue with deque
---
 dev/merge_spark_pr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index e09c64bb3288..609c31fecb00 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -30,7 +30,7 @@
 import subprocess
 import sys
 import urllib2
-import Queue
+from collections import deque
 
 try:
     import jira.client

From aa20a6e14d5f7b204124638d7cb147a2f8e6190d Mon Sep 17 00:00:00 2001
From: texasmichelle <texasmichelle@gmail.com>
Date: Tue, 24 Mar 2015 22:07:10 -0500
Subject: [PATCH 4/9] Move code into main() and add doctest for new text
 parsing method

Also corrected a spelling mistake in one of the print statements.
---
 dev/merge_spark_pr.py | 143 +++++++++++++++++++++++-------------------
 1 file changed, 78 insertions(+), 65 deletions(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index 609c31fecb00..c43b94d409cb 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -56,8 +56,6 @@
 # Prefix added to temporary branches
 BRANCH_PREFIX = "PR_TOOL"
 
-os.chdir(SPARK_HOME)
-
 
 def get_json(url):
     try:
@@ -86,10 +84,6 @@ def continue_maybe(prompt):
     if result.lower() != "y":
         fail("Okay, exiting")
 
-
-original_head = run_cmd("git rev-parse HEAD")[:8]
-
-
 def clean_up():
     print "Restoring head pointer to %s" % original_head
     run_cmd("git checkout %s" % original_head)
@@ -275,7 +269,7 @@ def get_version_json(version_str):
     asf_jira.transition_issue(
         jira_id, resolve["id"], fixVersions=jira_fix_versions, comment=comment)
 
-    print "Succesfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
+    print "Successfully resolved %s with fixVersions=%s!" % (jira_id, fix_versions)
 
 
 def resolve_jira_issues(title, merge_branches, comment):
@@ -286,10 +280,19 @@ def resolve_jira_issues(title, merge_branches, comment):
     for jira_id in jira_ids:
         resolve_jira_issue(merge_branches, comment, jira_id)
 
+
 def standardize_jira_ref(text):
-    # Standardize the [MODULE] SPARK-XXXXX prefix
-    # Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[MLLIB] SPARK-XXX: Issue"
+    """
+    Standardize the [MODULE] SPARK-XXXXX prefix
+    Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[MLLIB] SPARK-XXX: Issue"
     
+    >>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
+    '[SQL] SPARK-5821: ParquetRelation2 CTAS should check if delete is successful'
+    >>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
+    '[PROJECT INFRA] [WIP] SPARK-4123: Show new dependencies added in pull requests'
+    >>> standardize_jira_ref("[MLlib] Spark  5954: Top by key")
+    '[MLLIB] SPARK-5954: Top by key'
+    """
     #If the string is compliant, no need to process any further
     if (re.search(r'\[[A-Z0-9_]+\] SPARK-[0-9]{3,5}: \S+', text)):
         return text
@@ -300,7 +303,7 @@ def standardize_jira_ref(text):
     while (pattern.search(text) is not None):
         ref = pattern.search(text).groups()[0]
         # Replace any whitespace with a dash & convert to uppercase
-        jira_refs.append(re.sub(r'\s', '-', ref.upper()))
+        jira_refs.append(re.sub(r'\s+', '-', ref.upper()))
         text = text.replace(ref, '')
 
     # Extract spark component(s):
@@ -337,68 +340,78 @@ def standardize_jira_ref(text):
     
     return clean_text
 
-branches = get_json("%s/branches" % GITHUB_API_BASE)
-branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
-# Assumes branch names can be sorted lexicographically
-latest_branch = sorted(branch_names, reverse=True)[0]
-
-pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ")
-pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
-pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
-
-url = pr["url"]
-title = standardize_jira_ref(pr["title"])
-body = pr["body"]
-target_ref = pr["base"]["ref"]
-user_login = pr["user"]["login"]
-base_ref = pr["head"]["ref"]
-pr_repo_desc = "%s/%s" % (user_login, base_ref)
-
-# Merged pull requests don't appear as merged in the GitHub API;
-# Instead, they're closed by asfgit.
-merge_commits = \
-    [e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"]
-
-if merge_commits:
-    merge_hash = merge_commits[0]["commit_id"]
-    message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"]
-
-    print "Pull request %s has already been merged, assuming you want to backport" % pr_num
-    commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify',
+def main():
+    os.chdir(SPARK_HOME)
+    original_head = run_cmd("git rev-parse HEAD")[:8]
+    
+    branches = get_json("%s/branches" % GITHUB_API_BASE)
+    branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
+    # Assumes branch names can be sorted lexicographically
+    latest_branch = sorted(branch_names, reverse=True)[0]
+
+    pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): ")
+    pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
+    pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
+
+    url = pr["url"]
+    title = standardize_jira_ref(pr["title"])
+    body = pr["body"]
+    target_ref = pr["base"]["ref"]
+    user_login = pr["user"]["login"]
+    base_ref = pr["head"]["ref"]
+    pr_repo_desc = "%s/%s" % (user_login, base_ref)
+
+    # Merged pull requests don't appear as merged in the GitHub API;
+    # Instead, they're closed by asfgit.
+    merge_commits = \
+        [e for e in pr_events if e["actor"]["login"] == "asfgit" and e["event"] == "closed"]
+
+    if merge_commits:
+        merge_hash = merge_commits[0]["commit_id"]
+        message = get_json("%s/commits/%s" % (GITHUB_API_BASE, merge_hash))["commit"]["message"]
+
+        print "Pull request %s has already been merged, assuming you want to backport" % pr_num
+        commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify',
                                     "%s^{commit}" % merge_hash]).strip() != ""
-    if not commit_is_downloaded:
-        fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num)
+        if not commit_is_downloaded:
+            fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num)
 
-    print "Found commit %s:\n%s" % (merge_hash, message)
-    cherry_pick(pr_num, merge_hash, latest_branch)
-    sys.exit(0)
+        print "Found commit %s:\n%s" % (merge_hash, message)
+        cherry_pick(pr_num, merge_hash, latest_branch)
+        sys.exit(0)
 
-if not bool(pr["mergeable"]):
-    msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \
-        "Continue? (experts only!)"
-    continue_maybe(msg)
+    if not bool(pr["mergeable"]):
+        msg = "Pull request %s is not mergeable in its current form.\n" % pr_num + \
+            "Continue? (experts only!)"
+        continue_maybe(msg)
 
-print ("\n=== Pull Request #%s ===" % pr_num)
-print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % (
-    title, pr_repo_desc, target_ref, url))
-continue_maybe("Proceed with merging pull request #%s?" % pr_num)
+    print ("\n=== Pull Request #%s ===" % pr_num)
+    print ("title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s" % (
+        title, pr_repo_desc, target_ref, url))
+    continue_maybe("Proceed with merging pull request #%s?" % pr_num)
 
-merged_refs = [target_ref]
+    merged_refs = [target_ref]
 
-merge_hash = merge_pr(pr_num, target_ref)
+    merge_hash = merge_pr(pr_num, target_ref)
 
-pick_prompt = "Would you like to pick %s into another branch?" % merge_hash
-while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y":
-    merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)]
+    pick_prompt = "Would you like to pick %s into another branch?" % merge_hash
+    while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y":
+        merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)]
 
-if JIRA_IMPORTED:
-    if JIRA_USERNAME and JIRA_PASSWORD:
-        continue_maybe("Would you like to update an associated JIRA?")
-        jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
-        resolve_jira_issues(title, merged_refs, jira_comment)
+    if JIRA_IMPORTED:
+        if JIRA_USERNAME and JIRA_PASSWORD:
+            continue_maybe("Would you like to update an associated JIRA?")
+            jira_comment = "Issue resolved by pull request %s\n[%s/%s]" % (pr_num, GITHUB_BASE, pr_num)
+            resolve_jira_issues(title, merged_refs, jira_comment)
+        else:
+            print "JIRA_USERNAME and JIRA_PASSWORD not set"
+            print "Exiting without trying to close the associated JIRA."
     else:
-        print "JIRA_USERNAME and JIRA_PASSWORD not set"
+        print "Could not find jira-python library. Run 'sudo pip install jira-python' to install."
         print "Exiting without trying to close the associated JIRA."
-else:
-    print "Could not find jira-python library. Run 'sudo pip install jira-python' to install."
-    print "Exiting without trying to close the associated JIRA."
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
+    
+    main()

From df73f6a68e2c097ce4898e66752a757dd7c328a7 Mon Sep 17 00:00:00 2001
From: texasmichelle <texasmichelle@gmail.com>
Date: Sat, 18 Apr 2015 21:51:42 -0500
Subject: [PATCH 5/9] reworked regex's to enforce brackets around JIRA ref

---
 dev/merge_spark_pr.py | 42 +++++++++++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index c43b94d409cb..e81dff53917b 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -283,33 +283,41 @@ def resolve_jira_issues(title, merge_branches, comment):
 
 def standardize_jira_ref(text):
     """
-    Standardize the [MODULE] SPARK-XXXXX prefix
-    Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[MLLIB] SPARK-XXX: Issue"
+    Standardize the [SPARK-XXXXX] [MODULE] prefix
+    Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[SPARK-XXX] [MLLIB] Issue"
     
     >>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful")
-    '[SQL] SPARK-5821: ParquetRelation2 CTAS should check if delete is successful'
+    '[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is successful'
     >>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new dependencies added in pull requests")
-    '[PROJECT INFRA] [WIP] SPARK-4123: Show new dependencies added in pull requests'
+    '[SPARK-4123] [PROJECT INFRA] [WIP] Show new dependencies added in pull requests'
     >>> standardize_jira_ref("[MLlib] Spark  5954: Top by key")
-    '[MLLIB] SPARK-5954: Top by key'
+    '[SPARK-5954] [MLLIB] Top by key'
+    >>> standardize_jira_ref("[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl")
+    '[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl'
+    >>> standardize_jira_ref("SPARK-1094 Support MiMa for reporting binary compatibility accross versions.")
+    '[SPARK-1094] Support MiMa for reporting binary compatibility accross versions.'
+    >>> standardize_jira_ref("[WIP]  [SPARK-1146] Vagrant support for Spark")
+    '[SPARK-1146] [WIP] Vagrant support for Spark'
+    >>> standardize_jira_ref("SPARK-1032. If Yarn app fails before registering, app master stays aroun...")
+    '[SPARK-1032] If Yarn app fails before registering, app master stays aroun...'
     """
-    #If the string is compliant, no need to process any further
-    if (re.search(r'\[[A-Z0-9_]+\] SPARK-[0-9]{3,5}: \S+', text)):
+    # If the string is compliant, no need to process any further
+    if (re.search(r'^\[SPARK-[0-9]{3,6}\] (\[[A-Z0-9_\s,]+\] )+\S+', text)):
         return text
     
     # Extract JIRA ref(s):
     jira_refs = deque()
-    pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,5})', re.IGNORECASE)
+    pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,6})', re.IGNORECASE)
     while (pattern.search(text) is not None):
         ref = pattern.search(text).groups()[0]
         # Replace any whitespace with a dash & convert to uppercase
-        jira_refs.append(re.sub(r'\s+', '-', ref.upper()))
+        jira_refs.append('[' + re.sub(r'\s+', '-', ref.upper()) + ']')
         text = text.replace(ref, '')
 
     # Extract spark component(s):
     components = deque()
-    # Look for alphanumeric chars, spaces, and/or commas
-    pattern = re.compile(r'(\[[\w\s,]+\])', re.IGNORECASE)
+    # Look for alphanumeric chars, spaces, dashes, periods, and/or commas
+    pattern = re.compile(r'(\[[\w\s,-\.]+\])', re.IGNORECASE)
     while (pattern.search(text) is not None):
         component = pattern.search(text).groups()[0]
         # Convert to uppercase
@@ -321,22 +329,22 @@ def standardize_jira_ref(text):
     if (pattern.search(text) is not None):
         text = pattern.search(text).groups()[0]
 
-    # Assemble full text (module(s), JIRA ref(s), remaining text)
-    if (len(components) < 1):
-        components = ""
-    component_text = ' '.join(components).strip()
+    # Assemble full text (JIRA ref(s), module(s), remaining text)
     if (len(jira_refs) < 1):
         jira_ref_text = ""
     jira_ref_text = ' '.join(jira_refs).strip()
+    if (len(components) < 1):
+        components = ""
+    component_text = ' '.join(components).strip()
     
     if (len(jira_ref_text) < 1 and len(component_text) < 1):
         clean_text = text.strip()
     elif (len(jira_ref_text) < 1):
         clean_text = component_text + ' ' + text.strip()
     elif (len(component_text) < 1):
-        clean_text = jira_ref_text + ': ' + text.strip()
+        clean_text = jira_ref_text + ' ' + text.strip()
     else:
-        clean_text = component_text + ' ' + jira_ref_text + ': ' + text.strip()
+        clean_text = jira_ref_text + ' ' + component_text + ' ' + text.strip()
     
     return clean_text
 

From 4f1ed46a8b6624871c37db868ce473da0e60f6a2 Mon Sep 17 00:00:00 2001
From: texasmichelle <texasmichelle@gmail.com>
Date: Mon, 20 Apr 2015 12:38:39 -0500
Subject: [PATCH 6/9] Deque removal, logic simplifications, & prompt user to
 pick a title (orig or modified)

---
 dev/merge_spark_pr.py | 55 +++++++++++++++++++++++--------------------
 1 file changed, 30 insertions(+), 25 deletions(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index e81dff53917b..c37bf73e2380 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -30,7 +30,6 @@
 import subprocess
 import sys
 import urllib2
-from collections import deque
 
 try:
     import jira.client
@@ -300,51 +299,42 @@ def standardize_jira_ref(text):
     '[SPARK-1146] [WIP] Vagrant support for Spark'
     >>> standardize_jira_ref("SPARK-1032. If Yarn app fails before registering, app master stays aroun...")
     '[SPARK-1032] If Yarn app fails before registering, app master stays aroun...'
+    >>> standardize_jira_ref("[SPARK-6250][SPARK-6146][SPARK-5911][SQL] Types are now reserved words in DDL parser.")
+    '[SPARK-6250] [SPARK-6146] [SPARK-5911] [SQL] Types are now reserved words in DDL parser.'
+    >>> standardize_jira_ref("Additional information for users building from source code")
+    'Additional information for users building from source code'
     """
+    jira_refs = []
+    components = []
+    
     # If the string is compliant, no need to process any further
     if (re.search(r'^\[SPARK-[0-9]{3,6}\] (\[[A-Z0-9_\s,]+\] )+\S+', text)):
         return text
     
     # Extract JIRA ref(s):
-    jira_refs = deque()
-    pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,6})', re.IGNORECASE)
-    while (pattern.search(text) is not None):
-        ref = pattern.search(text).groups()[0]
-        # Replace any whitespace with a dash & convert to uppercase
+    pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,6})+', re.IGNORECASE)
+    for ref in pattern.findall(text):
+        # Add brackets, replace spaces with a dash, & convert to uppercase
         jira_refs.append('[' + re.sub(r'\s+', '-', ref.upper()) + ']')
         text = text.replace(ref, '')
 
     # Extract spark component(s):
-    components = deque()
     # Look for alphanumeric chars, spaces, dashes, periods, and/or commas
     pattern = re.compile(r'(\[[\w\s,-\.]+\])', re.IGNORECASE)
-    while (pattern.search(text) is not None):
-        component = pattern.search(text).groups()[0]
-        # Convert to uppercase
+    for component in pattern.findall(text):
         components.append(component.upper())
         text = text.replace(component, '')
 
-    # Cleanup remaining symbols:
+    # Cleanup any remaining symbols:
     pattern = re.compile(r'^\W+(.*)', re.IGNORECASE)
     if (pattern.search(text) is not None):
         text = pattern.search(text).groups()[0]
 
     # Assemble full text (JIRA ref(s), module(s), remaining text)
-    if (len(jira_refs) < 1):
-        jira_ref_text = ""
-    jira_ref_text = ' '.join(jira_refs).strip()
-    if (len(components) < 1):
-        components = ""
-    component_text = ' '.join(components).strip()
+    clean_text = ' '.join(jira_refs).strip() + " " + ' '.join(components).strip() + " " + text.strip()
     
-    if (len(jira_ref_text) < 1 and len(component_text) < 1):
-        clean_text = text.strip()
-    elif (len(jira_ref_text) < 1):
-        clean_text = component_text + ' ' + text.strip()
-    elif (len(component_text) < 1):
-        clean_text = jira_ref_text + ' ' + text.strip()
-    else:
-        clean_text = jira_ref_text + ' ' + component_text + ' ' + text.strip()
+    # Replace multiple spaces with a single space, e.g. if no jira refs and/or components were included
+    clean_text = re.sub(r'\s+', ' ', clean_text.strip())
     
     return clean_text
 
@@ -362,6 +352,21 @@ def main():
     pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
 
     url = pr["url"]
+
+    # Decide whether to use the modified title or not
+    print "I've re-written the title as follows to match the standard format:"
+    print "Original: %s" % pr["title"]
+    print "Modified: %s" % standardize_jira_ref(pr["title"])
+    prompt = "Would you like to use the modified title?"
+    result = raw_input("%s (y/n): " % prompt)
+    if result.lower() == "y":
+        title = standardize_jira_ref(pr["title"])
+        print "Using modified title:"
+    else:
+        title = pr["title"]
+        print "Using original title:"
+    print title
+
     title = standardize_jira_ref(pr["title"])
     body = pr["body"]
     target_ref = pr["base"]["ref"]

From 8c195bb199b1e325929100e34f6d37cb0f824fc2 Mon Sep 17 00:00:00 2001
From: texasmichelle <texasmichelle@gmail.com>
Date: Mon, 20 Apr 2015 13:08:30 -0500
Subject: [PATCH 7/9] removed erroneous line

---
 dev/merge_spark_pr.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index c37bf73e2380..4d1ab5022627 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -357,8 +357,7 @@ def main():
     print "I've re-written the title as follows to match the standard format:"
     print "Original: %s" % pr["title"]
     print "Modified: %s" % standardize_jira_ref(pr["title"])
-    prompt = "Would you like to use the modified title?"
-    result = raw_input("%s (y/n): " % prompt)
+    result = raw_input("Would you like to use the modified title? (y/n): ")
     if result.lower() == "y":
         title = standardize_jira_ref(pr["title"])
         print "Using modified title:"
@@ -367,7 +366,6 @@ def main():
         print "Using original title:"
     print title
 
-    title = standardize_jira_ref(pr["title"])
     body = pr["body"]
     target_ref = pr["base"]["ref"]
     user_login = pr["user"]["login"]

From 7d5fa20b59fe87de8451c0260cacce632c8da872 Mon Sep 17 00:00:00 2001
From: texasmichelle <texasmichelle@gmail.com>
Date: Mon, 20 Apr 2015 19:50:32 -0500
Subject: [PATCH 8/9] only prompt if title has been modified

---
 dev/merge_spark_pr.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index 4d1ab5022627..f17e1360a133 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -354,17 +354,21 @@ def main():
     url = pr["url"]
 
     # Decide whether to use the modified title or not
-    print "I've re-written the title as follows to match the standard format:"
-    print "Original: %s" % pr["title"]
-    print "Modified: %s" % standardize_jira_ref(pr["title"])
-    result = raw_input("Would you like to use the modified title? (y/n): ")
-    if result.lower() == "y":
-        title = standardize_jira_ref(pr["title"])
-        print "Using modified title:"
+    modified_title = standardize_jira_ref(pr["title"])
+    if modified_title != pr["title"]:
+        print "I've re-written the title as follows to match the standard format:"
+        print "Original: %s" % pr["title"]
+        print "Modified: %s" % modified_title
+        result = raw_input("Would you like to use the modified title? (y/n): ")
+        if result.lower() == "y":
+            title = modified_title
+            print "Using modified title:"
+        else:
+            title = pr["title"]
+            print "Using original title:"
+        print title
     else:
         title = pr["title"]
-        print "Using original title:"
-    print title
 
     body = pr["body"]
     target_ref = pr["base"]["ref"]

From 9b6b0a761692941eca30385124424fa09ca1f52b Mon Sep 17 00:00:00 2001
From: texasmichelle <texasmichelle@gmail.com>
Date: Tue, 21 Apr 2015 08:07:25 -0500
Subject: [PATCH 9/9] resolved variable scope issue

---
 dev/merge_spark_pr.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index f17e1360a133..b69cd15f99f6 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -95,7 +95,7 @@ def clean_up():
 
 
 # merge the requested PR and return the merge hash
-def merge_pr(pr_num, target_ref):
+def merge_pr(pr_num, target_ref, title, body, pr_repo_desc):
     pr_branch_name = "%s_MERGE_PR_%s" % (BRANCH_PREFIX, pr_num)
     target_branch_name = "%s_MERGE_PR_%s_%s" % (BRANCH_PREFIX, pr_num, target_ref.upper())
     run_cmd("git fetch %s pull/%s/head:%s" % (PR_REMOTE_NAME, pr_num, pr_branch_name))
@@ -339,6 +339,8 @@ def standardize_jira_ref(text):
     return clean_text
 
 def main():
+    global original_head
+    
     os.chdir(SPARK_HOME)
     original_head = run_cmd("git rev-parse HEAD")[:8]
     
@@ -407,7 +409,7 @@ def main():
 
     merged_refs = [target_ref]
 
-    merge_hash = merge_pr(pr_num, target_ref)
+    merge_hash = merge_pr(pr_num, target_ref, title, body, pr_repo_desc)
 
     pick_prompt = "Would you like to pick %s into another branch?" % merge_hash
     while raw_input("\n%s (y/n): " % pick_prompt).lower() == "y":