Skip to content

Commit 8f4a7d1

Browse files
committed
SPARK-1684 Merge script should standardize SPARK-XXX prefix
Added text parsing capability to merge script so that titles are standardized to[MODULE] SPARK-XXX: Description.
1 parent bfd3ee9 commit 8f4a7d1

File tree

1 file changed

+38
-1
lines changed

1 file changed

+38
-1
lines changed

dev/merge_spark_pr.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import subprocess
3131
import sys
3232
import urllib2
33+
import Queue
3334

3435
try:
3536
import jira.client
@@ -286,6 +287,42 @@ def resolve_jira_issues(title, merge_branches, comment):
286287
resolve_jira_issue(merge_branches, comment, jira_id)
287288

288289

290+
def standardize_jira_ref(text):
291+
# Standardize the [MODULE] SPARK-XXXXX prefix
292+
# Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or "SPARK XXX [MLLIB]: Issue" to "[MLLIB] SPARK-XXX: Issue"
293+
294+
# Check for compliance
295+
if (re.search(r'\[[A-Z0-9_]+\] SPARK-[0-9]{4,5}: \S+', text)):
296+
return text
297+
298+
# Extract JIRA ref(s):
299+
jira_refs = Queue.Queue()
300+
pattern = re.compile(r'(SPARK[-\s]*[0-9]{4,5})', re.IGNORECASE)
301+
while (pattern.search(text) is not None):
302+
jira_refs.put(re.sub(r'\s', '-', pattern.search(text).groups()[0].upper()))
303+
text = text.replace(pattern.search(text).groups()[0], '')
304+
305+
# Extract spark component(s):
306+
components = Queue.Queue()
307+
pattern = re.compile(r'(\[\w+\])', re.IGNORECASE)
308+
while (pattern.search(text) is not None):
309+
components.put(pattern.search(text).groups()[0])
310+
text = text.replace(pattern.search(text).groups()[0], '')
311+
312+
# Cleanup remaining symbols:
313+
pattern = re.compile(r'\W+(.*)', re.IGNORECASE)
314+
text = pattern.search(text).groups()[0]
315+
316+
# Assemble full text (module(s), JIRA ref(s), remaining text)
317+
clean_text = ''
318+
while (not components.empty()):
319+
clean_text += components.get() + ' '
320+
while (not jira_refs.empty()):
321+
clean_text += jira_refs.get() + ' '
322+
clean_text = clean_text.rstrip() + ': ' + text.strip()
323+
324+
return clean_text
325+
289326
branches = get_json("%s/branches" % GITHUB_API_BASE)
290327
branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x in branches])
291328
# Assumes branch names can be sorted lexicographically
@@ -296,7 +333,7 @@ def resolve_jira_issues(title, merge_branches, comment):
296333
pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
297334

298335
url = pr["url"]
299-
title = pr["title"]
336+
title = standardize_jira_ref(pr["title"])
300337
body = pr["body"]
301338
target_ref = pr["base"]["ref"]
302339
user_login = pr["user"]["login"]

0 commit comments

Comments
 (0)