Skip to content

Commit 5e026a3

Browse files
author
Andrew Or
committed
[Release] Translate unknown author names automatically
1 parent 658fe8f commit 5e026a3

File tree

2 files changed

+111
-18
lines changed

2 files changed

+111
-18
lines changed

dev/create-release/generate-contributors.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -26,23 +26,11 @@
2626

2727
# You must set the following before use!
2828
JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira")
29+
JIRA_USERNAME = os.environ.get("JIRA_USERNAME", None)
30+
JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", None)
2931
START_COMMIT = os.environ.get("START_COMMIT", "37b100")
3032
END_COMMIT = os.environ.get("END_COMMIT", "3693ae")
3133

32-
try:
33-
from jira.client import JIRA
34-
except ImportError:
35-
print "This tool requires the jira-python library"
36-
print "Install using 'sudo pip install jira-python'"
37-
sys.exit(-1)
38-
39-
try:
40-
import unidecode
41-
except ImportError:
42-
print "This tool requires the unidecode library to decode obscure github usernames"
43-
print "Install using 'sudo pip install unidecode'"
44-
sys.exit(-1)
45-
4634
# If commit range is not specified, prompt the user to provide it
4735
if not START_COMMIT or not END_COMMIT:
4836
print "A commit range is required to proceed."
@@ -52,6 +40,8 @@
5240
END_COMMIT = raw_input("Please specify ending commit hash (non-inclusive): ")
5341

5442
# Verify provided arguments
43+
if not JIRA_USERNAME: sys.exit("JIRA_USERNAME must be provided")
44+
if not JIRA_PASSWORD: sys.exit("JIRA_PASSWORD must be provided")
5545
start_commit_line = get_one_line(START_COMMIT)
5646
end_commit_line = get_one_line(END_COMMIT)
5747
num_commits = num_commits_in_range(START_COMMIT, END_COMMIT)
@@ -70,6 +60,14 @@
7060
sys.exit("Ok, exiting")
7161
print "==================================================================================\n"
7262

63+
# Setup JIRA and github clients. We use two JIRA clients, one with authentication
64+
# and one without, because authentication is slow and required only when we query
65+
# JIRA user details but not Spark issues
66+
jira_options = { "server": JIRA_API_BASE }
67+
jira_client = JIRA(options = jira_options)
68+
jira_client_auth = JIRA(options = jira_options, basic_auth = (JIRA_USERNAME, JIRA_PASSWORD))
69+
github_client = Github()
70+
7371
# Find all commits within this range
7472
print "Gathering commits within range [%s..%s)" % (START_COMMIT, END_COMMIT)
7573
commits = get_one_line_commits(START_COMMIT, END_COMMIT)
@@ -129,14 +127,16 @@ def print_indented(_list):
129127
# }
130128
#
131129
author_info = {}
132-
jira_options = { "server": JIRA_API_BASE }
133-
jira = JIRA(jira_options)
134130
print "\n=========================== Compiling contributor list ==========================="
135131
for commit in filtered_commits:
136132
commit_hash = re.findall("^[a-z0-9]+", commit)[0]
137133
issues = re.findall("SPARK-[0-9]+", commit.upper())
134+
# Translate the author in case the github username is not an actual name
135+
# Also guard against any special characters used in the name
136+
# Note the JIRA client we use here must have authentication enabled
138137
author = get_author(commit_hash)
139-
author = unidecode.unidecode(unicode(author, "UTF-8")) # guard against special characters
138+
author = unidecode.unidecode(unicode(author, "UTF-8"))
139+
author = translate_author(author, github_client, jira_client_auth, warnings)
140140
date = get_date(commit_hash)
141141
# Parse components from the commit message, if any
142142
commit_components = find_components(commit, commit_hash)
@@ -151,7 +151,7 @@ def populate(issue_type, components):
151151
author_info[author][issue_type].add(component)
152152
# Find issues and components associated with this commit
153153
for issue in issues:
154-
jira_issue = jira.issue(issue)
154+
jira_issue = jira_client.issue(issue)
155155
jira_type = jira_issue.fields.issuetype.name
156156
jira_type = translate_issue_type(jira_type, issue, warnings)
157157
jira_components = [translate_component(c.name, commit_hash, warnings)\

dev/create-release/releaseutils.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,29 @@
2121
import re
2222
from subprocess import Popen, PIPE
2323

24+
try:
25+
from jira.client import JIRA
26+
from jira.exceptions import JIRAError
27+
except ImportError:
28+
print "This tool requires the jira-python library"
29+
print "Install using 'sudo pip install jira-python'"
30+
sys.exit(-1)
31+
32+
try:
33+
from github import Github
34+
from github import GithubException
35+
except ImportError:
36+
print "This tool requires the PyGithub library"
37+
print "Install using 'sudo pip install PyGithub'"
38+
sys.exit(-1)
39+
40+
try:
41+
import unidecode
42+
except ImportError:
43+
print "This tool requires the unidecode library to decode obscure github usernames"
44+
print "Install using 'sudo pip install unidecode'"
45+
sys.exit(-1)
46+
2447
# Utility functions run git commands (written with Git 1.8.5)
2548
def run_cmd(cmd): return Popen(cmd, stdout=PIPE).communicate()[0]
2649
def get_author(commit_hash):
@@ -122,3 +145,73 @@ def nice_join(str_list):
122145
else:
123146
return ", ".join(str_list[:-1]) + ", and " + str_list[-1]
124147

148+
# Return the full name of the specified user on Github
149+
# If the user doesn't exist, return None
150+
def get_github_name(author, github_client):
151+
if github_client:
152+
try:
153+
return github_client.get_user(author).name
154+
except GithubException as e:
155+
# If this is not a "not found" exception
156+
if e.status != 404:
157+
raise e
158+
return None
159+
160+
# Return the full name of the specified user on JIRA
161+
# If the user doesn't exist, return None
162+
def get_jira_name(author, jira_client):
163+
if jira_client:
164+
try:
165+
return jira_client.user(author).displayName
166+
except JIRAError as e:
167+
# If this is not a "not found" exception
168+
if e.status_code != 404:
169+
raise e
170+
return None
171+
172+
# Return whether the given name is in the form <First Name><space><Last Name>
173+
def is_valid_author(author):
174+
if not author: return False
175+
author_words = len(author.split(" "))
176+
return author_words == 2 or author_words == 3
177+
178+
# Capitalize the first letter of each word in the given author name
179+
def capitalize_author(author):
180+
if not author: return None
181+
words = author.split(" ")
182+
words = [w[0].capitalize() + w[1:] for w in words if w]
183+
return " ".join(words)
184+
185+
# Maintain a mapping of translated author names as a cache
186+
translated_authors = {}
187+
188+
# Format the given author in a format appropriate for the contributors list.
189+
# If the author is not an actual name, search github and JIRA for potential
190+
# replacements and log all candidates as a warning.
191+
def translate_author(github_author, github_client, jira_client, warnings):
192+
if is_valid_author(github_author):
193+
return capitalize_author(github_author)
194+
# If the translated author is already cached, just return it
195+
if github_author in translated_authors:
196+
return translated_authors[github_author]
197+
# Otherwise, author name is not found, so we need to search for an alternative name
198+
candidates = set()
199+
github_name = get_github_name(github_author, github_client)
200+
jira_name = get_jira_name(github_author, jira_client)
201+
if is_valid_author(github_name): github_name = capitalize_author(github_name)
202+
if is_valid_author(jira_name): jira_name = capitalize_author(jira_name)
203+
if github_name: candidates.add(github_name)
204+
if jira_name: candidates.add(jira_name)
205+
# Only use the github name as a replacement automatically
206+
# The JIRA name may not make sense because it can belong to someone else
207+
if is_valid_author(github_name):
208+
candidates_message = " (another candidate is %s)" % jira_name if jira_name else ""
209+
warnings.append("Replacing github user %s with %s%s" % (github_author, github_name, candidates_message))
210+
translated_authors[github_name] = github_name
211+
return translated_authors[github_name]
212+
# No direct replacement, so return the original author and list any candidates found
213+
candidates_message = " (candidates: %s)" % nice_join(candidates) if candidates else ""
214+
warnings.append("Unable to find a replacement for github user %s%s" % (github_author, candidates_message))
215+
translated_authors[github_author] = github_author
216+
return translated_authors[github_author]
217+

0 commit comments

Comments
 (0)