Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 110 additions & 10 deletions dev/merge_arrow_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import sys
import requests
import getpass
import json

from six.moves import input
import six
Expand Down Expand Up @@ -135,6 +136,14 @@ def fix_version_from_branch(branch, versions):
for project in SUPPORTED_PROJECTS]


def get_jira_id(title):
for project, regex in PR_TITLE_REGEXEN:
m = regex.search(title)
if m:
return (m.group(1), project)
return (None, None)


class JiraIssue(object):

def __init__(self, jira_con, jira_id, project, cmd):
Expand Down Expand Up @@ -263,6 +272,21 @@ def get_pr_data(self, number):
return get_json("%s/pulls/%s" % (self.github_api, number),
headers=self.headers)

def update_pr_title(self, number, title):
if not self.headers:
msg = ("Can not update PR {} title to '{}'. " +
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

s/Can not/Cannot/

"ARROW_GITHUB_API_TOKEN environment not set").format(
number, title
)
raise Exception(msg)

# note need to use the issues URL to update
url = "%s/issues/%s" % (self.github_api, number)
data = json.dumps({'title': title})
resp = requests.patch(url, data=data, headers=self.headers)
resp.raise_for_status()
return resp.json()


class CommandInput(object):
"""
Expand Down Expand Up @@ -292,12 +316,12 @@ def continue_maybe(self, prompt):

class PullRequest(object):

def __init__(self, cmd, github_api, git_remote, jira_con, number):
def __init__(self, cmd, pr_data, git_remote, jira_con, number):
self.cmd = cmd
self.git_remote = git_remote
self.con = jira_con
self.number = number
self._pr_data = github_api.get_pr_data(number)
self._pr_data = pr_data
try:
self.url = self._pr_data["url"]
self.title = self._pr_data["title"]
Expand Down Expand Up @@ -327,12 +351,7 @@ def is_mergeable(self):
return bool(self._pr_data["mergeable"])

def _get_jira(self):
jira_id = None
for project, regex in PR_TITLE_REGEXEN:
m = regex.search(self.title)
if m:
jira_id = m.group(1)
break
(jira_id, project) = get_jira_id(self.title)

if jira_id is None:
options = ' or '.join('{0}-XXX'.format(project)
Expand Down Expand Up @@ -370,7 +389,7 @@ def merge(self):
had_conflicts = True

commit_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name,
'--pretty=format:%an <%ae>']).split("\n")
'--pretty=format:%an <%ae>']).split("\n")
distinct_authors = sorted(set(commit_authors),
key=lambda x: commit_authors.count(x),
reverse=True)
Expand Down Expand Up @@ -551,6 +570,86 @@ def get_pr_num():
return input("Which pull request would you like to merge? (e.g. 34): ")


_JIRA_COMPONENT_REGEX = re.compile(r'(\[[^\]]*\])+')

# Maps PR title prefixes to JIRA components
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should Python be in this mapping?

PR_COMPONENTS_TO_JIRA_COMPONENTS = {
'[Rust]': 'Rust',
'[Rust][DataFusion]': 'Rust - DataFusion',
'[C++]': 'C++',
'[R]': 'R',
}


# Return the best matching JIRA component from a PR title, if any
# "[Rust] Fix something" --> Rust
# "[Rust][DataFusion] Fix" --> Rust - DataFusion
# "[CPP] Fix " --> "C++"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# "[CPP] Fix " --> "C++"
# "[C++] Fix " --> "C++"

def jira_component_name_from_title(title):
match = _JIRA_COMPONENT_REGEX.match(title.strip())
if match:
pr_component = str(match.group(0))
return PR_COMPONENTS_TO_JIRA_COMPONENTS.get(pr_component)


# If no jira ID can be found for the PR, offer to create one
# returns returns the github pr_data
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Duplicate word: returns returns

def make_auto_jira(github_api, jira_con, cmd, pr_num):
pr_data = github_api.get_pr_data(pr_num)

try:
title = pr_data["title"]
html_link = pr_data["_links"]["html"]["href"]
except KeyError:
pprint.pprint(pr_data)
raise

# had valid JIRA already
if get_jira_id(title)[0]:
return pr_data

print("No JIRA link found for PR", pr_num)
options = ' or '.join('{0}-XXX'.format(project)
for project in SUPPORTED_PROJECTS)
print(" Looked for PR title prefixed by {}".format(options))

# try to make a JIRA issue in the ARROW project with a
# component extracted from the PR title
component = jira_component_name_from_title(title)

if not component:
print(" Could not determine component from title")
print(" Expected '[Component] description', found '{}'".format(title))
print(" Known components: {}".format(
", ".join(PR_COMPONENTS_TO_JIRA_COMPONENTS)))
return pr_data

components = [{"name": component}]
summary = "{}".format(title)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Alternatively, f-strings could be used in most of the places where format is currently being used.

Before:

>>> title = "Hello"
>>> summary = "{}".format(title)
>>> summary
'Hello'

After:

>>> title = "Hello"
>>> summary = f"{title}"
>>> summary
'Hello'


print("=== NEW JIRA ===")
print("Summary\t\t{}".format(summary))
print("Assignee\tNONE")
print("Components\t{}".format(component))
print("Status\t\tNew")
description = ("Issue automatically created " +
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about using the pull request description as is?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My rationale to add a note on provenance was:

  1. I felt for auto generated tickets it was unlikely the author would keep JIRA and the Pull Request synced, and the PR was likely to be the source of truth
  2. Hint to anyone reading the JIRA issue they should look at the PR if they wanted to know the current status of the change.

I am happy to remove the auto-link if that is the consensus

"from Pull Request [{}|{}]\n{}").format(
pr_num, html_link, pr_data.get("body")
)

cmd.continue_maybe("Create JIRA and link to PR?")
issue = jira_con.create_issue(project='ARROW',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit:

The bracketing style isn't totally consistent in this file, but it appears to be either:

    issue = jira_con.create_issue(project='ARROW',
                                  summary=summary,
                                  description=description,
                                  issuetype={'name': 'Bug'},
                                  components=components)

or:

    issue = jira_con.create_issue(
        project='ARROW',
        summary=summary,
        description=description,
        issuetype={'name': 'Bug'},
        components=components,
    )

But not:

    issue = jira_con.create_issue(project='ARROW',
                                  summary=summary,
                                  description=description,
                                  issuetype={'name': 'Bug'},
                                  components=components,
                                  )

summary=summary,
description=description,
issuetype={'name': 'Bug'},
components=components,
)
key = issue.key
print(" Created", key)
github_api.update_pr_title(pr_num, "{}: {}".format(key, title))
return github_api.get_pr_data(pr_num)


def cli():
# Location of your Arrow git clone
ARROW_HOME = os.path.abspath(os.path.dirname(__file__))
Expand All @@ -567,7 +666,8 @@ def cli():
github_api = GitHubAPI(PROJECT_NAME)

jira_con = connect_jira(cmd)
pr = PullRequest(cmd, github_api, PR_REMOTE_NAME, jira_con, pr_num)
pr_data = make_auto_jira(github_api, jira_con, cmd, pr_num)
pr = PullRequest(cmd, pr_data, PR_REMOTE_NAME, jira_con, pr_num)

if pr.is_merged:
print("Pull request %s has already been merged")
Expand Down
19 changes: 19 additions & 0 deletions dev/test_merge_arrow_pr.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,3 +315,22 @@ def test_jira_output_no_components():
Components\tC++, Python
Status\t\tResolved
URL\t\thttps://issues.apache.org/jira/browse/ARROW-1234"""


def test_jira_component_name_from_title():
assert merge_arrow_pr.jira_component_name_from_title(
'[Rust] Example PR') == 'Rust'
assert merge_arrow_pr.jira_component_name_from_title(
'[Rust ] Example PR') is None
assert merge_arrow_pr.jira_component_name_from_title(
' [Rust] Example PR') == 'Rust'
assert merge_arrow_pr.jira_component_name_from_title(
'[Rust][DataFusion] Example PR') == 'Rust - DataFusion'
assert merge_arrow_pr.jira_component_name_from_title(
'[DataFusion][Rust] Example PR') is None
assert merge_arrow_pr.jira_component_name_from_title(
'[Rust][ddFusion] Example PR') is None
assert merge_arrow_pr.jira_component_name_from_title(
'[C++] Example PR') == 'C++'
assert merge_arrow_pr.jira_component_name_from_title(
'[R] Example PR') == 'R'