From 545bea866a1a72aade101138ce7267fc19b2a66f Mon Sep 17 00:00:00 2001 From: AA Turner <9087854+AA-Turner@users.noreply.github.com> Date: Mon, 27 Apr 2020 23:44:44 +0100 Subject: [PATCH 1/9] Fix name parsing in PEP 0 --- pep0/pep.py | 108 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 74 insertions(+), 34 deletions(-) diff --git a/pep0/pep.py b/pep0/pep.py index e01518df539..db23e56b6dc 100644 --- a/pep0/pep.py +++ b/pep0/pep.py @@ -62,23 +62,29 @@ class Author(object): def __init__(self, author_and_email_tuple): """Parse the name and email address of an author.""" + self.first = self.last = '' + name, email = author_and_email_tuple self.first_last = name.strip() self.email = email.lower() - last_name_fragment, suffix = self._last_name(name) - name_sep = name.index(last_name_fragment) - self.first = name[:name_sep].rstrip() - self.last = last_name_fragment - if self.last[1] == u'.': - # Add an escape to avoid docutils turning `v.` into `22.`. - self.last = u'\\' + self.last - self.suffix = suffix - if not self.first: - self.last_first = self.last + + name_dict = self._parse_name(name) + self.suffix = name_dict.get("suffix") + if name_dict.get("name"): + self.last_first = name_dict["name"] + self.nick = name_dict["name"] else: - self.last_first = u', '.join([self.last, self.first]) - if self.suffix: - self.last_first += u', ' + self.suffix + self.first = name_dict["forename"].rstrip() + self.last = name_dict["surname"] + if self.last[1] == ".": + # Add an escape to avoid docutils turning `v.` into `22.`. + self.last = "\\" + self.last + self.last_first = ", ".join([self.last, self.first]) + self.nick = self.last + + if self.suffix: + self.last_first += ", " + self.suffix + if self.last == "van Rossum": # Special case for our beloved BDFL. :) if self.first == "Guido": @@ -86,10 +92,8 @@ def __init__(self, author_and_email_tuple): elif self.first == "Just": self.nick = "JvR" else: - raise ValueError("unknown van Rossum %r!" % self) - self.last_first += " (%s)" % (self.nick,) - else: - self.nick = self.last + raise ValueError(f"unknown van Rossum {self}!") + self.last_first += f" ({self.nick})" def __hash__(self): return hash(self.first_last) @@ -109,28 +113,64 @@ def sort_by(self): base = self.last.lower() return unicodedata.normalize('NFKD', base).encode('ASCII', 'ignore') - def _last_name(self, full_name): - """Find the last name (or nickname) of a full name. + @staticmethod + def _parse_name(full_name): + """Decompose a full name into parts. - If no last name (e.g, 'Aahz') then return the full name. If there is - a leading, lowercase portion to the last name (e.g., 'van' or 'von') - then include it. If there is a suffix (e.g., 'Jr.') that is appended - through a comma, then drop the suffix. + If a mononym (e.g, 'Aahz') then return the full name. If there are + suffixes in the name (e.g. ', Jr.' or 'III'), then find and extract + them. If there is a middle initial followed by a full stop, then + combine the following words into a surname (e.g. N. Vander Weele). If + there is a leading, lowercase portion to the last name (e.g. 'van' or + 'von') then include it in the surname. """ - name_partition = full_name.partition(u',') - no_suffix = name_partition[0].strip() - suffix = name_partition[2].strip() - name_parts = no_suffix.split() - part_count = len(name_parts) - if part_count == 1 or part_count == 2: - return name_parts[-1], suffix - else: - assert part_count > 2 + possible_suffixes = ["Jr", "Jr.", "II", "III"] + special_cases = ["The Python core team and community"] + + if full_name in special_cases: + return {"name": full_name} + + suffix_partition = full_name.partition(",") + pre_suffix = suffix_partition[0].strip() + suffix = suffix_partition[2].strip() + + name_parts = pre_suffix.split(" ") + num_parts = len(name_parts) + name = {"suffix": suffix} + + if num_parts == 0: + raise ValueError("Name is empty!") + elif num_parts == 1: + name.update(name=name_parts[0]) + elif num_parts == 2: + name.update(forename=name_parts[0], surname=name_parts[1]) + elif num_parts > 2: + # handles III etc. + if name_parts[-1] in possible_suffixes: + new_suffix = " ".join([*name_parts[-1:], suffix]).strip() + name_parts.pop(-1) + name.update(suffix=new_suffix) + + # handles von, van, v. etc. if name_parts[-2].islower(): - return u' '.join(name_parts[-2:]), suffix + forename = " ".join(name_parts[:-2]) + surname = " ".join(name_parts[-2:]) + name.update(forename=forename, surname=surname) + + # handles double surnames after a middle initial (e.g. N. Vander Weele) + elif any(s.endswith(".") for s in name_parts): + split_position = [i for i, x in enumerate(name_parts) if x.endswith(".")][-1] + 1 + forename = " ".join(name_parts[:split_position]) + surname = " ".join(name_parts[split_position:]) + name.update(forename=forename, surname=surname) + else: - return name_parts[-1], suffix + forename = " ".join(name_parts[:-1]) + surname = " ".join(name_parts[-1:]) + name.update(forename=forename, surname=surname) + + return name class PEP(object): From 90bbb4ccbdaa223f7569f198d50a1e3001925a08 Mon Sep 17 00:00:00 2001 From: AA Turner <9087854+AA-Turner@users.noreply.github.com> Date: Tue, 28 Apr 2020 03:22:24 +0100 Subject: [PATCH 2/9] Fixes as per comments --- pep0/pep.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pep0/pep.py b/pep0/pep.py index db23e56b6dc..3e3e263599f 100644 --- a/pep0/pep.py +++ b/pep0/pep.py @@ -70,7 +70,7 @@ def __init__(self, author_and_email_tuple): name_dict = self._parse_name(name) self.suffix = name_dict.get("suffix") - if name_dict.get("name"): + if "name" in name_dict: self.last_first = name_dict["name"] self.nick = name_dict["name"] else: @@ -92,7 +92,7 @@ def __init__(self, author_and_email_tuple): elif self.first == "Just": self.nick = "JvR" else: - raise ValueError(f"unknown van Rossum {self}!") + raise ValueError(f"unknown van Rossum ({name})!") self.last_first += f" ({self.nick})" def __hash__(self): From a1013cef64a724bd6765ea16afce41d8a39d29f1 Mon Sep 17 00:00:00 2001 From: AA Turner <9087854+AA-Turner@users.noreply.github.com> Date: Tue, 28 Apr 2020 18:16:32 +0100 Subject: [PATCH 3/9] Move to author metadata lookup for PEP index --- AUTHORS.csv | 249 +++++++++++++++++++++++++++++++++++++++++++++++++ genpepindex.py | 14 ++- pep0/pep.py | 95 ++----------------- 3 files changed, 268 insertions(+), 90 deletions(-) create mode 100644 AUTHORS.csv diff --git a/AUTHORS.csv b/AUTHORS.csv new file mode 100644 index 00000000000..55a712b0fe0 --- /dev/null +++ b/AUTHORS.csv @@ -0,0 +1,249 @@ +"Full Name"; "Surname First"; "Name Reference" +"Aahz"; "Aahz"; "Aahz" +"James C. Ahlstrom"; "Ahlstrom, James C."; "Ahlstrom" +"Jim Althoff"; "Althoff, Jim"; "Althoff" +"Kevin Altis"; "Altis, Kevin"; "Altis" +"Chris Angelico"; "Angelico, Chris"; "Angelico" +"Philipp Angerer"; "Angerer, Philipp"; "Angerer" +"David Ascher"; "Ascher, David"; "Ascher" +"Peter Astrand"; "Astrand, Peter"; "Astrand" +"Carl Banks"; "Banks, Carl"; "Banks" +"Christopher Barker"; "Barker, Christopher"; "Barker" +"Paul Barrett"; "Barrett, Paul"; "Barrett" +"Facundo Batista"; "Batista, Facundo"; "Batista" +"Anthony Baxter"; "Baxter, Anthony"; "Baxter" +"Stefan Behnel"; "Behnel, Stefan"; "Behnel" +"Thomas Bellman"; "Bellman, Thomas"; "Bellman" +"Alexander Belopolsky"; "Belopolsky, Alexander"; "Belopolsky" +"Eli Bendersky"; "Bendersky, Eli"; "Bendersky" +"Cory Benfield"; "Benfield, Cory"; "Benfield" +"Steven Bethard"; "Bethard, Steven"; "Bethard" +"Stéphane Bidoul"; "Bidoul, Stéphane"; "Bidoul" +"Stefano Borini"; "Borini, Stefano"; "Borini" +"Georg Brandl"; "Brandl, Georg"; "Brandl" +"Erik M. Bray"; "Bray, Erik M."; "Bray" +"Gerald Britton"; "Britton, Gerald"; "Britton" +"Oleg Broytman"; "Broytman, Oleg"; "Broytman" +"Benoit Bryon"; "Bryon, Benoit"; "Bryon" +"Brandt Bucher"; "Bucher, Brandt"; "Bucher" +"Brett Cannon"; "Cannon, Brett"; "Cannon" +"Justin Cappos"; "Cappos, Justin"; "Cappos" +"Josiah Carlson"; "Carlson, Josiah"; "Carlson" +"W Isaac Carroll"; "Carroll, W Isaac"; "Carroll" +"Matt Chisholm"; "Chisholm, Matt"; "Chisholm" +"Nick Coghlan"; "Coghlan, Nick"; "Coghlan" +"Dave Cole"; "Cole, Dave"; "Cole" +"Robert Collins"; "Collins, Robert"; "Collins" +"Paul Colomiets"; "Colomiets, Paul"; "Colomiets" +"Mario Corchero"; "Corchero, Mario"; "Corchero" +"Christopher A. Craig"; "Craig, Christopher A."; "Craig" +"Laura Creighton"; "Creighton, Laura"; "Creighton" +"Steven D'Aprano"; "D'Aprano, Steven"; "D'Aprano" +"Kushal Das"; "Das, Kushal"; "Das" +"Ned Deily"; "Deily, Ned"; "Deily" +"Tim Delaney"; "Delaney, Tim"; "Delaney" +"Lois Anne DeLong"; "DeLong, Lois Anne"; "DeLong" +"Jeroen Demeyer"; "Demeyer, Jeroen"; "Demeyer" +"Vladimir Diaz"; "Diaz, Vladimir"; "Diaz" +"Jack Diederich"; "Diederich, Jack"; "Diederich" +"Steve Dower"; "Dower, Steve"; "Dower" +"Walter Dörwald"; "Dörwald, Walter"; "Dörwald" +"Fred L. Drake, Jr."; "Drake, Fred L., Jr."; "Drake" +"Michael P. Dubner"; "Dubner, Michael P."; "Dubner" +"Paul F. Dubois"; "Dubois, Paul F."; "Dubois" +"Ernest W. Durbin III"; "Durbin, Ernest W., III"; "Durbin" +"P.J. Eby"; "Eby, P.J."; "Eby" +"Phillip J. Eby"; "Eby, Phillip J."; "Eby" +"Tal Einat"; "Einat, Tal"; "Einat" +"Micah Elliott"; "Elliott, Micah"; "Elliott" +"Jeff Epler"; "Epler, Jeff"; "Epler" +"David Eppstein"; "Eppstein, David"; "Eppstein" +"Clark C. Evans"; "Evans, Clark C."; "Evans" +"Gregory Ewing"; "Ewing, Gregory"; "Ewing" +"Greg Ewing"; "Ewing, Greg"; "Ewing" +"Martijn Faassen"; "Faassen, Martijn"; "Faassen" +"Ben Finney"; "Finney, Ben"; "Finney" +"Michael Foord"; "Foord, Michael"; "Foord" +"Ethan Furman"; "Furman, Ethan"; "Furman" +"Pablo Galindo"; "Galindo, Pablo"; "Galindo" +"Paul Ganssle"; "Ganssle, Paul"; "Ganssle" +"Alex Gaynor"; "Gaynor, Alex"; "Gaynor" +"Pradyun Gedam"; "Gedam, Pradyun"; "Gedam" +"Damien George"; "George, Damien"; "George" +"Frédéric B. Giacometti"; "Giacometti, Frédéric B."; "Giacometti" +"Scott Gilbert"; "Gilbert, Scott"; "Gilbert" +"Ryan Gonzalez"; "Gonzalez, Ryan"; "Gonzalez" +"David Goodger"; "Goodger, David"; "Goodger" +"Grant Griffin"; "Griffin, Grant"; "Griffin" +"Mark E. Haase"; "Haase, Mark E."; "Haase" +"Mark Hammond"; "Hammond, Mark"; "Hammond" +"Peter Harris"; "Harris, Peter"; "Harris" +"Larry Hastings"; "Hastings, Larry"; "Hastings" +"Christian Heimes"; "Heimes, Christian"; "Heimes" +"Thomas Heller"; "Heller, Thomas"; "Heller" +"Doug Hellmann"; "Hellmann, Doug"; "Hellmann" +"Magnus Lie Hetland"; "Hetland, Magnus Lie"; "Hetland" +"Raymond Hettinger"; "Hettinger, Raymond"; "Hettinger" +"Neil Hodgson"; "Hodgson, Neil"; "Hodgson" +"Daniel Holth"; "Holth, Daniel"; "Holth" +"Philip House"; "House, Philip"; "House" +"Laurens Van Houtven"; "Van Houtven, Laurens"; "Houtven" +"Ben Hoyt"; "Hoyt, Ben"; "Hoyt" +"Miro Hrončok"; "Hrončok, Miro"; "Hrončok" +"Michael Hudson"; "Hudson, Michael"; "Hudson" +"Jeremy Hylton"; "Hylton, Jeremy"; "Hylton" +"Inada Naoki"; "Inada, Naoki"; "Inada" +"Dustin Ingram"; "Ingram, Dustin"; "Ingram" +"Atsuo Ishimoto"; "Ishimoto, Atsuo"; "Ishimoto" +"Jack Jansen"; "Jansen, Jack"; "Jansen" +"Chris Jerdonek"; "Jerdonek, Chris"; "Jerdonek" +"Joseph Jevnik"; "Jevnik, Joseph"; "Jevnik" +"Jim J. Jewett"; "Jewett, Jim J."; "Jewett" +"Jim Jewett"; "Jewett, Jim"; "Jewett" +"Ewa Jodlowska"; "Jodlowska, Ewa"; "Jodlowska" +"Richard Jones"; "Jones, Richard"; "Jones" +"Konstantin Kashin"; "Kashin, Konstantin"; "Kashin" +"Reid Kleckner"; "Kleckner, Reid"; "Kleckner" +"Thomas Kluyver"; "Kluyver, Thomas"; "Kluyver" +"Stepan Koltsov"; "Koltsov, Stepan"; "Koltsov" +"Stefan Krah"; "Krah, Stefan"; "Krah" +"Sebastian Kreft"; "Kreft, Sebastian"; "Kreft" +"Holger Krekel"; "Krekel, Holger"; "Krekel" +"A.M. Kuchling"; "Kuchling, A.M."; "Kuchling" +"Trishank Karthik Kuppusamy"; "Kuppusamy, Trishank Karthik"; "Kuppusamy" +"Robert Kuska"; "Kuska, Robert"; "Kuska" +"Joshua Landau"; "Landau, Joshua"; "Landau" +"Łukasz Langa"; "Langa, Łukasz"; "Langa" +"Michael Lee"; "Lee, Michael"; "Lee" +"Jukka Lehtosalo"; "Lehtosalo, Jukka"; "Lehtosalo" +"Marc-André Lemburg"; "Lemburg, Marc-André"; "Lemburg" +"Ivan Levkivskyi"; "Levkivskyi, Ivan"; "Levkivskyi" +"Gregory Lielens"; "Lielens, Gregory"; "Lielens" +"Björn Lindqvist"; "Lindqvist, Björn"; "Lindqvist" +"Joshua Lock"; "Lock, Joshua"; "Lock" +"Tony Lownds"; "Lownds, Tony"; "Lownds" +"Martin von Löwis"; "von Löwis, Martin"; "von Löwis" +"Martin v. Löwis"; "\v. Löwis, Martin"; "\v. Löwis" +"Mariatta"; "Mariatta"; "Mariatta" +"Alex Martelli"; "Martelli, Alex"; "Martelli" +"Joseph Martinot-Lagarde"; "Martinot-Lagarde, Joseph"; "Martinot-Lagarde" +"Lino Mastrodomenico"; "Mastrodomenico, Lino"; "Mastrodomenico" +"Patrick Maupin"; "Maupin, Patrick"; "Maupin" +"Andrew McClelland"; "McClelland, Andrew"; "McClelland" +"Charles R. McCreary"; "McCreary, Charles R."; "McCreary" +"Chris McDonough"; "McDonough, Chris"; "McDonough" +"Robert T. McGibbon"; "McGibbon, Robert T."; "McGibbon" +"Gordon McMillan"; "McMillan, Gordon"; "McMillan" +"Andrew McNamara"; "McNamara, Andrew"; "McNamara" +"Ezio Melotti"; "Melotti, Ezio"; "Melotti" +"Mark Mendoza"; "Mendoza, Mark"; "Mendoza" +"Markus Meskanen"; "Meskanen, Markus"; "Meskanen" +"Mike Meyer"; "Meyer, Mike"; "Meyer" +"Carl Meyer"; "Meyer, Carl"; "Meyer" +"Trent Mick"; "Mick, Trent"; "Mick" +"Mike G. Miller"; "Miller, Mike G."; "Miller" +"Skip Montanaro"; "Montanaro, Skip"; "Montanaro" +"Peter Moody"; "Moody, Peter"; "Moody" +"Marina Moore"; "Moore, Marina"; "Moore" +"Paul Moore"; "Moore, Paul"; "Moore" +"R David Murray"; "Murray, R David"; "Murray" +"Charles-François Natali"; "Natali, Charles-François"; "Natali" +"Lysandros Nikolaou"; "Nikolaou, Lysandros"; "Nikolaou" +"Jesse Noller"; "Noller, Jesse"; "Noller" +"Ben North"; "North, Ben"; "North" +"Neal Norwitz"; "Norwitz, Neal"; "Norwitz" +"Dirkjan Ochtman"; "Ochtman, Dirkjan"; "Ochtman" +"Travis Oliphant"; "Oliphant, Travis"; "Oliphant" +"Jason Orendorff"; "Orendorff, Jason"; "Orendorff" +"Tomáš Orsava"; "Orsava, Tomáš"; "Orsava" +"Richard Oudkerk"; "Oudkerk, Richard"; "Oudkerk" +"Ronald Oussoren"; "Oussoren, Ronald"; "Oussoren" +"Julien Palard"; "Palard, Julien"; "Palard" +"Samuele Pedroni"; "Pedroni, Samuele"; "Pedroni" +"Berker Peksag"; "Peksag, Berker"; "Peksag" +"Michel Pelletier"; "Pelletier, Michel"; "Pelletier" +"Tim Peters"; "Peters, Tim"; "Peters" +"Benjamin Peterson"; "Peterson, Benjamin"; "Peterson" +"Jason Petrone"; "Petrone, Jason"; "Petrone" +"Antoine Pitrou"; "Pitrou, Antoine"; "Pitrou" +"Marcel Plch"; "Plch, Marcel"; "Plch" +"James Polley"; "Polley, James"; "Polley" +"Philippe PRADOS"; "Prados, Philippe"; "Prados" +"Elvis Pranskevichus"; "Pranskevichus, Elvis"; "Pranskevichus" +"Paul Prescod"; "Prescod, Paul"; "Prescod" +"(James) Eric Pruitt"; "Pruitt, (James) Eric"; "Pruitt" +"Lukas Puehringer"; "Puehringer, Lukas"; "Puehringer" +"Brian Quinlan"; "Quinlan, Brian"; "Quinlan" +"Terry Reedy"; "Reedy, Terry"; "Reedy" +"Lennart Regebro"; "Regebro, Lennart"; "Regebro" +"Sean Reifschneider"; "Reifschneider, Sean"; "Reifschneider" +"Christian R. Reis"; "Reis, Christian R."; "Reis" +"Jonathan Riehl"; "Riehl, Jonathan"; "Riehl" +"Lisa Roach"; "Roach, Lisa"; "Roach" +"Andre Roberge"; "Roberge, Andre"; "Roberge" +"Armin Ronacher"; "Ronacher, Armin"; "Ronacher" +"Guido van Rossum"; "van Rossum, Guido (GvR)"; "GvR" +"Just van Rossum"; "van Rossum, Just (JvR)"; "JvR" +"Todd Rovito"; "Rovito, Todd"; "Rovito" +"Lie Ryan"; "Ryan, Lie"; "Ryan" +"Vinay Sajip"; "Sajip, Vinay"; "Sajip" +"Pablo Galindo Salgado"; "Salgado, Pablo Galindo"; "Salgado" +"Neil Schemenauer"; "Schemenauer, Neil"; "Schemenauer" +"Peter Schneider-Kamp"; "Schneider-Kamp, Peter"; "Schneider-Kamp" +"Ed Schofield"; "Schofield, Ed"; "Schofield" +"Yury Selivanov"; "Selivanov, Yury"; "Selivanov" +"Jiwon Seo"; "Seo, Jiwon"; "Seo" +"Mark Shannon"; "Shannon, Mark"; "Shannon" +"Cameron Simpson"; "Simpson, Cameron"; "Simpson" +"Greg Slodkowicz"; "Slodkowicz, Greg"; "Slodkowicz" +"Nathaniel J. Smith"; "Smith, Nathaniel J."; "Smith" +"Gregory P. Smith"; "Smith, Gregory P."; "Smith" +"Kevin D. Smith"; "Smith, Kevin D."; "Smith" +"Ethan Smith"; "Smith, Ethan"; "Smith" +"Nathaniel Smith"; "Smith, Nathaniel"; "Smith" +"Eric V. Smith"; "Smith, Eric V."; "Smith" +"Eric Snow"; "Snow, Eric"; "Snow" +"Calvin Spealman"; "Spealman, Calvin"; "Spealman" +"Kerrick Staley"; "Staley, Kerrick"; "Staley" +"Greg Stein"; "Stein, Greg"; "Stein" +"Victor Stinner"; "Stinner, Victor"; "Stinner" +"Serhiy Storchaka"; "Storchaka, Serhiy"; "Storchaka" +"Donald Stufft"; "Stufft, Donald"; "Stufft" +"Daniel Stutzbach"; "Stutzbach, Daniel"; "Stutzbach" +"Michael J. Sullivan"; "Sullivan, Michael J."; "Sullivan" +"Roman Suzi"; "Suzi, Roman"; "Suzi" +"Dennis Sweeney"; "Sweeney, Dennis"; "Sweeney" +"Talin"; "Talin"; "Talin" +"Steven Taschuk"; "Taschuk, Steven"; "Taschuk" +"Batuhan Taskaya"; "Taskaya, Batuhan"; "Taskaya" +"Martin Teichmann"; "Teichmann, Martin"; "Teichmann" +"The Python core team and community"; "The Python core team and community"; "The Python core team and community" +"Geoffrey Thomas"; "Thomas, Geoffrey"; "Thomas" +"Oren Tirosh"; "Tirosh, Oren"; "Tirosh" +"Stephen J. Turnbull"; "Turnbull, Stephen J."; "Turnbull" +"Daniel Urban"; "Urban, Daniel"; "Urban" +"Eric N. Vander Weele"; "Vander Weele, Eric N."; "Vander Weele" +"Till Varoquaux"; "Varoquaux, Till"; "Varoquaux" +"Alexandre Vassalotti"; "Vassalotti, Alexandre"; "Vassalotti" +"Mike Verdone"; "Verdone, Mike"; "Verdone" +"Dino Viehland"; "Viehland, Dino"; "Viehland" +"Petr Viktorin"; "Viktorin, Petr"; "Viktorin" +"Zachary Ware"; "Ware, Zachary"; "Ware" +"Gregory R. Warnes"; "Warnes, Gregory R."; "Warnes" +"Barry Warsaw"; "Warsaw, Barry"; "Warsaw" +"Terence Way"; "Way, Terence"; "Way" +"Cliff Wells"; "Wells, Cliff"; "Wells" +"Jervis Whitley"; "Whitley, Jervis"; "Whitley" +"Mark Williams"; "Williams, Mark"; "Williams" +"Carol Willing"; "Willing, Carol"; "Willing" +"Greg Wilson"; "Wilson, Greg"; "Wilson" +"Collin Winter"; "Winter, Collin"; "Winter" +"Thomas Wouters"; "Wouters, Thomas"; "Wouters" +"Masayuki Yamamoto"; "Yamamoto, Masayuki"; "Yamamoto" +"Jeffrey Yasskin"; "Yasskin, Jeffrey"; "Yasskin" +"Ka-Ping Yee"; "Yee, Ka-Ping"; "Yee" +"Moshe Zadka"; "Zadka, Moshe"; "Zadka" +"Koos Zevenhoven"; "Zevenhoven, Koos"; "Zevenhoven" +"Huaiyu Zhu"; "Zhu, Huaiyu"; "Zhu" +"Shannon Zhu"; "Zhu, Shannon"; "Zhu" +"Tarek Ziadé"; "Ziadé, Tarek"; "Ziadé" \ No newline at end of file diff --git a/genpepindex.py b/genpepindex.py index 2ab6698a05a..30845f9a6b7 100755 --- a/genpepindex.py +++ b/genpepindex.py @@ -19,6 +19,7 @@ import sys import os +import csv import codecs from operator import attrgetter @@ -33,6 +34,14 @@ def main(argv): else: path = argv[1] + with open("AUTHORS.csv", "r", encoding="UTF8") as f: + read = csv.DictReader(f, delimiter=";") + author_data = {} + for line in read: + full_name = line.pop("Full Name").strip().strip("\"") + details = {k.strip().strip("\""): v.strip().strip("\"") for k, v in line.items()} + author_data[full_name] = details + peps = [] if os.path.isdir(path): for file_path in os.listdir(path): @@ -44,7 +53,7 @@ def main(argv): if file_path.startswith("pep-") and file_path.endswith((".txt", "rst")): with codecs.open(abs_file_path, 'r', encoding='UTF-8') as pep_file: try: - pep = PEP(pep_file) + pep = PEP(pep_file, author_data) if pep.number != int(file_path[4:-4]): raise PEPError('PEP number does not match file name', file_path, pep.number) @@ -57,12 +66,13 @@ def main(argv): peps.sort(key=attrgetter('number')) elif os.path.isfile(path): with open(path, 'r') as pep_file: - peps.append(PEP(pep_file)) + peps.append(PEP(pep_file, author_data)) else: raise ValueError("argument must be a directory or file path") with codecs.open('pep-0000.rst', 'w', encoding='UTF-8') as pep0_file: write_pep0(peps, pep0_file) + if __name__ == "__main__": main(sys.argv) diff --git a/pep0/pep.py b/pep0/pep.py index 3e3e263599f..fc7889bfc41 100644 --- a/pep0/pep.py +++ b/pep0/pep.py @@ -60,7 +60,7 @@ class Author(object): The author's email address. """ - def __init__(self, author_and_email_tuple): + def __init__(self, author_and_email_tuple, authors_lookup): """Parse the name and email address of an author.""" self.first = self.last = '' @@ -68,32 +68,10 @@ def __init__(self, author_and_email_tuple): self.first_last = name.strip() self.email = email.lower() - name_dict = self._parse_name(name) - self.suffix = name_dict.get("suffix") - if "name" in name_dict: - self.last_first = name_dict["name"] - self.nick = name_dict["name"] - else: - self.first = name_dict["forename"].rstrip() - self.last = name_dict["surname"] - if self.last[1] == ".": - # Add an escape to avoid docutils turning `v.` into `22.`. - self.last = "\\" + self.last - self.last_first = ", ".join([self.last, self.first]) - self.nick = self.last - - if self.suffix: - self.last_first += ", " + self.suffix - - if self.last == "van Rossum": - # Special case for our beloved BDFL. :) - if self.first == "Guido": - self.nick = "GvR" - elif self.first == "Just": - self.nick = "JvR" - else: - raise ValueError(f"unknown van Rossum ({name})!") - self.last_first += f" ({self.nick})" + name_dict = authors_lookup[self.first_last] + + self.last_first = name_dict["Surname First"] + self.nick = name_dict["Name Reference"] def __hash__(self): return hash(self.first_last) @@ -113,65 +91,6 @@ def sort_by(self): base = self.last.lower() return unicodedata.normalize('NFKD', base).encode('ASCII', 'ignore') - @staticmethod - def _parse_name(full_name): - """Decompose a full name into parts. - - If a mononym (e.g, 'Aahz') then return the full name. If there are - suffixes in the name (e.g. ', Jr.' or 'III'), then find and extract - them. If there is a middle initial followed by a full stop, then - combine the following words into a surname (e.g. N. Vander Weele). If - there is a leading, lowercase portion to the last name (e.g. 'van' or - 'von') then include it in the surname. - - """ - possible_suffixes = ["Jr", "Jr.", "II", "III"] - special_cases = ["The Python core team and community"] - - if full_name in special_cases: - return {"name": full_name} - - suffix_partition = full_name.partition(",") - pre_suffix = suffix_partition[0].strip() - suffix = suffix_partition[2].strip() - - name_parts = pre_suffix.split(" ") - num_parts = len(name_parts) - name = {"suffix": suffix} - - if num_parts == 0: - raise ValueError("Name is empty!") - elif num_parts == 1: - name.update(name=name_parts[0]) - elif num_parts == 2: - name.update(forename=name_parts[0], surname=name_parts[1]) - elif num_parts > 2: - # handles III etc. - if name_parts[-1] in possible_suffixes: - new_suffix = " ".join([*name_parts[-1:], suffix]).strip() - name_parts.pop(-1) - name.update(suffix=new_suffix) - - # handles von, van, v. etc. - if name_parts[-2].islower(): - forename = " ".join(name_parts[:-2]) - surname = " ".join(name_parts[-2:]) - name.update(forename=forename, surname=surname) - - # handles double surnames after a middle initial (e.g. N. Vander Weele) - elif any(s.endswith(".") for s in name_parts): - split_position = [i for i, x in enumerate(name_parts) if x.endswith(".")][-1] + 1 - forename = " ".join(name_parts[:split_position]) - surname = " ".join(name_parts[split_position:]) - name.update(forename=forename, surname=surname) - - else: - forename = " ".join(name_parts[:-1]) - surname = " ".join(name_parts[-1:]) - name.update(forename=forename, surname=surname) - - return name - class PEP(object): @@ -216,7 +135,7 @@ class PEP(object): u"Rejected", u"Withdrawn", u"Deferred", u"Final", u"Active", u"Draft", u"Superseded") - def __init__(self, pep_file): + def __init__(self, pep_file, author_lookup: dict): """Init object from an open PEP file object.""" # Parse the headers. self.filename = pep_file @@ -284,7 +203,7 @@ def __init__(self, pep_file): if len(authors_and_emails) < 1: raise PEPError("no authors found", pep_file.name, self.number) - self.authors = list(map(Author, authors_and_emails)) + self.authors = [Author(author_email, author_lookup) for author_email in authors_and_emails] def _parse_author(self, data): """Return a list of author names and emails.""" From 552a7b662ede551c262dfd50c0c42586fb717c1f Mon Sep 17 00:00:00 2001 From: AA Turner <9087854+AA-Turner@users.noreply.github.com> Date: Thu, 30 Apr 2020 00:37:31 +0100 Subject: [PATCH 4/9] Move CSV to comma separated --- AUTHORS.csv | 498 ++++++++++++++++++++++++------------------------- genpepindex.py | 2 +- 2 files changed, 250 insertions(+), 250 deletions(-) diff --git a/AUTHORS.csv b/AUTHORS.csv index 55a712b0fe0..7450a4a5cea 100644 --- a/AUTHORS.csv +++ b/AUTHORS.csv @@ -1,249 +1,249 @@ -"Full Name"; "Surname First"; "Name Reference" -"Aahz"; "Aahz"; "Aahz" -"James C. Ahlstrom"; "Ahlstrom, James C."; "Ahlstrom" -"Jim Althoff"; "Althoff, Jim"; "Althoff" -"Kevin Altis"; "Altis, Kevin"; "Altis" -"Chris Angelico"; "Angelico, Chris"; "Angelico" -"Philipp Angerer"; "Angerer, Philipp"; "Angerer" -"David Ascher"; "Ascher, David"; "Ascher" -"Peter Astrand"; "Astrand, Peter"; "Astrand" -"Carl Banks"; "Banks, Carl"; "Banks" -"Christopher Barker"; "Barker, Christopher"; "Barker" -"Paul Barrett"; "Barrett, Paul"; "Barrett" -"Facundo Batista"; "Batista, Facundo"; "Batista" -"Anthony Baxter"; "Baxter, Anthony"; "Baxter" -"Stefan Behnel"; "Behnel, Stefan"; "Behnel" -"Thomas Bellman"; "Bellman, Thomas"; "Bellman" -"Alexander Belopolsky"; "Belopolsky, Alexander"; "Belopolsky" -"Eli Bendersky"; "Bendersky, Eli"; "Bendersky" -"Cory Benfield"; "Benfield, Cory"; "Benfield" -"Steven Bethard"; "Bethard, Steven"; "Bethard" -"Stéphane Bidoul"; "Bidoul, Stéphane"; "Bidoul" -"Stefano Borini"; "Borini, Stefano"; "Borini" -"Georg Brandl"; "Brandl, Georg"; "Brandl" -"Erik M. Bray"; "Bray, Erik M."; "Bray" -"Gerald Britton"; "Britton, Gerald"; "Britton" -"Oleg Broytman"; "Broytman, Oleg"; "Broytman" -"Benoit Bryon"; "Bryon, Benoit"; "Bryon" -"Brandt Bucher"; "Bucher, Brandt"; "Bucher" -"Brett Cannon"; "Cannon, Brett"; "Cannon" -"Justin Cappos"; "Cappos, Justin"; "Cappos" -"Josiah Carlson"; "Carlson, Josiah"; "Carlson" -"W Isaac Carroll"; "Carroll, W Isaac"; "Carroll" -"Matt Chisholm"; "Chisholm, Matt"; "Chisholm" -"Nick Coghlan"; "Coghlan, Nick"; "Coghlan" -"Dave Cole"; "Cole, Dave"; "Cole" -"Robert Collins"; "Collins, Robert"; "Collins" -"Paul Colomiets"; "Colomiets, Paul"; "Colomiets" -"Mario Corchero"; "Corchero, Mario"; "Corchero" -"Christopher A. Craig"; "Craig, Christopher A."; "Craig" -"Laura Creighton"; "Creighton, Laura"; "Creighton" -"Steven D'Aprano"; "D'Aprano, Steven"; "D'Aprano" -"Kushal Das"; "Das, Kushal"; "Das" -"Ned Deily"; "Deily, Ned"; "Deily" -"Tim Delaney"; "Delaney, Tim"; "Delaney" -"Lois Anne DeLong"; "DeLong, Lois Anne"; "DeLong" -"Jeroen Demeyer"; "Demeyer, Jeroen"; "Demeyer" -"Vladimir Diaz"; "Diaz, Vladimir"; "Diaz" -"Jack Diederich"; "Diederich, Jack"; "Diederich" -"Steve Dower"; "Dower, Steve"; "Dower" -"Walter Dörwald"; "Dörwald, Walter"; "Dörwald" -"Fred L. Drake, Jr."; "Drake, Fred L., Jr."; "Drake" -"Michael P. Dubner"; "Dubner, Michael P."; "Dubner" -"Paul F. Dubois"; "Dubois, Paul F."; "Dubois" -"Ernest W. Durbin III"; "Durbin, Ernest W., III"; "Durbin" -"P.J. Eby"; "Eby, P.J."; "Eby" -"Phillip J. Eby"; "Eby, Phillip J."; "Eby" -"Tal Einat"; "Einat, Tal"; "Einat" -"Micah Elliott"; "Elliott, Micah"; "Elliott" -"Jeff Epler"; "Epler, Jeff"; "Epler" -"David Eppstein"; "Eppstein, David"; "Eppstein" -"Clark C. Evans"; "Evans, Clark C."; "Evans" -"Gregory Ewing"; "Ewing, Gregory"; "Ewing" -"Greg Ewing"; "Ewing, Greg"; "Ewing" -"Martijn Faassen"; "Faassen, Martijn"; "Faassen" -"Ben Finney"; "Finney, Ben"; "Finney" -"Michael Foord"; "Foord, Michael"; "Foord" -"Ethan Furman"; "Furman, Ethan"; "Furman" -"Pablo Galindo"; "Galindo, Pablo"; "Galindo" -"Paul Ganssle"; "Ganssle, Paul"; "Ganssle" -"Alex Gaynor"; "Gaynor, Alex"; "Gaynor" -"Pradyun Gedam"; "Gedam, Pradyun"; "Gedam" -"Damien George"; "George, Damien"; "George" -"Frédéric B. Giacometti"; "Giacometti, Frédéric B."; "Giacometti" -"Scott Gilbert"; "Gilbert, Scott"; "Gilbert" -"Ryan Gonzalez"; "Gonzalez, Ryan"; "Gonzalez" -"David Goodger"; "Goodger, David"; "Goodger" -"Grant Griffin"; "Griffin, Grant"; "Griffin" -"Mark E. Haase"; "Haase, Mark E."; "Haase" -"Mark Hammond"; "Hammond, Mark"; "Hammond" -"Peter Harris"; "Harris, Peter"; "Harris" -"Larry Hastings"; "Hastings, Larry"; "Hastings" -"Christian Heimes"; "Heimes, Christian"; "Heimes" -"Thomas Heller"; "Heller, Thomas"; "Heller" -"Doug Hellmann"; "Hellmann, Doug"; "Hellmann" -"Magnus Lie Hetland"; "Hetland, Magnus Lie"; "Hetland" -"Raymond Hettinger"; "Hettinger, Raymond"; "Hettinger" -"Neil Hodgson"; "Hodgson, Neil"; "Hodgson" -"Daniel Holth"; "Holth, Daniel"; "Holth" -"Philip House"; "House, Philip"; "House" -"Laurens Van Houtven"; "Van Houtven, Laurens"; "Houtven" -"Ben Hoyt"; "Hoyt, Ben"; "Hoyt" -"Miro Hrončok"; "Hrončok, Miro"; "Hrončok" -"Michael Hudson"; "Hudson, Michael"; "Hudson" -"Jeremy Hylton"; "Hylton, Jeremy"; "Hylton" -"Inada Naoki"; "Inada, Naoki"; "Inada" -"Dustin Ingram"; "Ingram, Dustin"; "Ingram" -"Atsuo Ishimoto"; "Ishimoto, Atsuo"; "Ishimoto" -"Jack Jansen"; "Jansen, Jack"; "Jansen" -"Chris Jerdonek"; "Jerdonek, Chris"; "Jerdonek" -"Joseph Jevnik"; "Jevnik, Joseph"; "Jevnik" -"Jim J. Jewett"; "Jewett, Jim J."; "Jewett" -"Jim Jewett"; "Jewett, Jim"; "Jewett" -"Ewa Jodlowska"; "Jodlowska, Ewa"; "Jodlowska" -"Richard Jones"; "Jones, Richard"; "Jones" -"Konstantin Kashin"; "Kashin, Konstantin"; "Kashin" -"Reid Kleckner"; "Kleckner, Reid"; "Kleckner" -"Thomas Kluyver"; "Kluyver, Thomas"; "Kluyver" -"Stepan Koltsov"; "Koltsov, Stepan"; "Koltsov" -"Stefan Krah"; "Krah, Stefan"; "Krah" -"Sebastian Kreft"; "Kreft, Sebastian"; "Kreft" -"Holger Krekel"; "Krekel, Holger"; "Krekel" -"A.M. Kuchling"; "Kuchling, A.M."; "Kuchling" -"Trishank Karthik Kuppusamy"; "Kuppusamy, Trishank Karthik"; "Kuppusamy" -"Robert Kuska"; "Kuska, Robert"; "Kuska" -"Joshua Landau"; "Landau, Joshua"; "Landau" -"Łukasz Langa"; "Langa, Łukasz"; "Langa" -"Michael Lee"; "Lee, Michael"; "Lee" -"Jukka Lehtosalo"; "Lehtosalo, Jukka"; "Lehtosalo" -"Marc-André Lemburg"; "Lemburg, Marc-André"; "Lemburg" -"Ivan Levkivskyi"; "Levkivskyi, Ivan"; "Levkivskyi" -"Gregory Lielens"; "Lielens, Gregory"; "Lielens" -"Björn Lindqvist"; "Lindqvist, Björn"; "Lindqvist" -"Joshua Lock"; "Lock, Joshua"; "Lock" -"Tony Lownds"; "Lownds, Tony"; "Lownds" -"Martin von Löwis"; "von Löwis, Martin"; "von Löwis" -"Martin v. Löwis"; "\v. Löwis, Martin"; "\v. Löwis" -"Mariatta"; "Mariatta"; "Mariatta" -"Alex Martelli"; "Martelli, Alex"; "Martelli" -"Joseph Martinot-Lagarde"; "Martinot-Lagarde, Joseph"; "Martinot-Lagarde" -"Lino Mastrodomenico"; "Mastrodomenico, Lino"; "Mastrodomenico" -"Patrick Maupin"; "Maupin, Patrick"; "Maupin" -"Andrew McClelland"; "McClelland, Andrew"; "McClelland" -"Charles R. McCreary"; "McCreary, Charles R."; "McCreary" -"Chris McDonough"; "McDonough, Chris"; "McDonough" -"Robert T. McGibbon"; "McGibbon, Robert T."; "McGibbon" -"Gordon McMillan"; "McMillan, Gordon"; "McMillan" -"Andrew McNamara"; "McNamara, Andrew"; "McNamara" -"Ezio Melotti"; "Melotti, Ezio"; "Melotti" -"Mark Mendoza"; "Mendoza, Mark"; "Mendoza" -"Markus Meskanen"; "Meskanen, Markus"; "Meskanen" -"Mike Meyer"; "Meyer, Mike"; "Meyer" -"Carl Meyer"; "Meyer, Carl"; "Meyer" -"Trent Mick"; "Mick, Trent"; "Mick" -"Mike G. Miller"; "Miller, Mike G."; "Miller" -"Skip Montanaro"; "Montanaro, Skip"; "Montanaro" -"Peter Moody"; "Moody, Peter"; "Moody" -"Marina Moore"; "Moore, Marina"; "Moore" -"Paul Moore"; "Moore, Paul"; "Moore" -"R David Murray"; "Murray, R David"; "Murray" -"Charles-François Natali"; "Natali, Charles-François"; "Natali" -"Lysandros Nikolaou"; "Nikolaou, Lysandros"; "Nikolaou" -"Jesse Noller"; "Noller, Jesse"; "Noller" -"Ben North"; "North, Ben"; "North" -"Neal Norwitz"; "Norwitz, Neal"; "Norwitz" -"Dirkjan Ochtman"; "Ochtman, Dirkjan"; "Ochtman" -"Travis Oliphant"; "Oliphant, Travis"; "Oliphant" -"Jason Orendorff"; "Orendorff, Jason"; "Orendorff" -"Tomáš Orsava"; "Orsava, Tomáš"; "Orsava" -"Richard Oudkerk"; "Oudkerk, Richard"; "Oudkerk" -"Ronald Oussoren"; "Oussoren, Ronald"; "Oussoren" -"Julien Palard"; "Palard, Julien"; "Palard" -"Samuele Pedroni"; "Pedroni, Samuele"; "Pedroni" -"Berker Peksag"; "Peksag, Berker"; "Peksag" -"Michel Pelletier"; "Pelletier, Michel"; "Pelletier" -"Tim Peters"; "Peters, Tim"; "Peters" -"Benjamin Peterson"; "Peterson, Benjamin"; "Peterson" -"Jason Petrone"; "Petrone, Jason"; "Petrone" -"Antoine Pitrou"; "Pitrou, Antoine"; "Pitrou" -"Marcel Plch"; "Plch, Marcel"; "Plch" -"James Polley"; "Polley, James"; "Polley" -"Philippe PRADOS"; "Prados, Philippe"; "Prados" -"Elvis Pranskevichus"; "Pranskevichus, Elvis"; "Pranskevichus" -"Paul Prescod"; "Prescod, Paul"; "Prescod" -"(James) Eric Pruitt"; "Pruitt, (James) Eric"; "Pruitt" -"Lukas Puehringer"; "Puehringer, Lukas"; "Puehringer" -"Brian Quinlan"; "Quinlan, Brian"; "Quinlan" -"Terry Reedy"; "Reedy, Terry"; "Reedy" -"Lennart Regebro"; "Regebro, Lennart"; "Regebro" -"Sean Reifschneider"; "Reifschneider, Sean"; "Reifschneider" -"Christian R. Reis"; "Reis, Christian R."; "Reis" -"Jonathan Riehl"; "Riehl, Jonathan"; "Riehl" -"Lisa Roach"; "Roach, Lisa"; "Roach" -"Andre Roberge"; "Roberge, Andre"; "Roberge" -"Armin Ronacher"; "Ronacher, Armin"; "Ronacher" -"Guido van Rossum"; "van Rossum, Guido (GvR)"; "GvR" -"Just van Rossum"; "van Rossum, Just (JvR)"; "JvR" -"Todd Rovito"; "Rovito, Todd"; "Rovito" -"Lie Ryan"; "Ryan, Lie"; "Ryan" -"Vinay Sajip"; "Sajip, Vinay"; "Sajip" -"Pablo Galindo Salgado"; "Salgado, Pablo Galindo"; "Salgado" -"Neil Schemenauer"; "Schemenauer, Neil"; "Schemenauer" -"Peter Schneider-Kamp"; "Schneider-Kamp, Peter"; "Schneider-Kamp" -"Ed Schofield"; "Schofield, Ed"; "Schofield" -"Yury Selivanov"; "Selivanov, Yury"; "Selivanov" -"Jiwon Seo"; "Seo, Jiwon"; "Seo" -"Mark Shannon"; "Shannon, Mark"; "Shannon" -"Cameron Simpson"; "Simpson, Cameron"; "Simpson" -"Greg Slodkowicz"; "Slodkowicz, Greg"; "Slodkowicz" -"Nathaniel J. Smith"; "Smith, Nathaniel J."; "Smith" -"Gregory P. Smith"; "Smith, Gregory P."; "Smith" -"Kevin D. Smith"; "Smith, Kevin D."; "Smith" -"Ethan Smith"; "Smith, Ethan"; "Smith" -"Nathaniel Smith"; "Smith, Nathaniel"; "Smith" -"Eric V. Smith"; "Smith, Eric V."; "Smith" -"Eric Snow"; "Snow, Eric"; "Snow" -"Calvin Spealman"; "Spealman, Calvin"; "Spealman" -"Kerrick Staley"; "Staley, Kerrick"; "Staley" -"Greg Stein"; "Stein, Greg"; "Stein" -"Victor Stinner"; "Stinner, Victor"; "Stinner" -"Serhiy Storchaka"; "Storchaka, Serhiy"; "Storchaka" -"Donald Stufft"; "Stufft, Donald"; "Stufft" -"Daniel Stutzbach"; "Stutzbach, Daniel"; "Stutzbach" -"Michael J. Sullivan"; "Sullivan, Michael J."; "Sullivan" -"Roman Suzi"; "Suzi, Roman"; "Suzi" -"Dennis Sweeney"; "Sweeney, Dennis"; "Sweeney" -"Talin"; "Talin"; "Talin" -"Steven Taschuk"; "Taschuk, Steven"; "Taschuk" -"Batuhan Taskaya"; "Taskaya, Batuhan"; "Taskaya" -"Martin Teichmann"; "Teichmann, Martin"; "Teichmann" -"The Python core team and community"; "The Python core team and community"; "The Python core team and community" -"Geoffrey Thomas"; "Thomas, Geoffrey"; "Thomas" -"Oren Tirosh"; "Tirosh, Oren"; "Tirosh" -"Stephen J. Turnbull"; "Turnbull, Stephen J."; "Turnbull" -"Daniel Urban"; "Urban, Daniel"; "Urban" -"Eric N. Vander Weele"; "Vander Weele, Eric N."; "Vander Weele" -"Till Varoquaux"; "Varoquaux, Till"; "Varoquaux" -"Alexandre Vassalotti"; "Vassalotti, Alexandre"; "Vassalotti" -"Mike Verdone"; "Verdone, Mike"; "Verdone" -"Dino Viehland"; "Viehland, Dino"; "Viehland" -"Petr Viktorin"; "Viktorin, Petr"; "Viktorin" -"Zachary Ware"; "Ware, Zachary"; "Ware" -"Gregory R. Warnes"; "Warnes, Gregory R."; "Warnes" -"Barry Warsaw"; "Warsaw, Barry"; "Warsaw" -"Terence Way"; "Way, Terence"; "Way" -"Cliff Wells"; "Wells, Cliff"; "Wells" -"Jervis Whitley"; "Whitley, Jervis"; "Whitley" -"Mark Williams"; "Williams, Mark"; "Williams" -"Carol Willing"; "Willing, Carol"; "Willing" -"Greg Wilson"; "Wilson, Greg"; "Wilson" -"Collin Winter"; "Winter, Collin"; "Winter" -"Thomas Wouters"; "Wouters, Thomas"; "Wouters" -"Masayuki Yamamoto"; "Yamamoto, Masayuki"; "Yamamoto" -"Jeffrey Yasskin"; "Yasskin, Jeffrey"; "Yasskin" -"Ka-Ping Yee"; "Yee, Ka-Ping"; "Yee" -"Moshe Zadka"; "Zadka, Moshe"; "Zadka" -"Koos Zevenhoven"; "Zevenhoven, Koos"; "Zevenhoven" -"Huaiyu Zhu"; "Zhu, Huaiyu"; "Zhu" -"Shannon Zhu"; "Zhu, Shannon"; "Zhu" -"Tarek Ziadé"; "Ziadé, Tarek"; "Ziadé" \ No newline at end of file +"Full Name", "Surname First", "Name Reference" +"Aahz", "Aahz", "Aahz" +"James C. Ahlstrom", "Ahlstrom, James C.", "Ahlstrom" +"Jim Althoff", "Althoff, Jim", "Althoff" +"Kevin Altis", "Altis, Kevin", "Altis" +"Chris Angelico", "Angelico, Chris", "Angelico" +"Philipp Angerer", "Angerer, Philipp", "Angerer" +"David Ascher", "Ascher, David", "Ascher" +"Peter Astrand", "Astrand, Peter", "Astrand" +"Carl Banks", "Banks, Carl", "Banks" +"Christopher Barker", "Barker, Christopher", "Barker" +"Paul Barrett", "Barrett, Paul", "Barrett" +"Facundo Batista", "Batista, Facundo", "Batista" +"Anthony Baxter", "Baxter, Anthony", "Baxter" +"Stefan Behnel", "Behnel, Stefan", "Behnel" +"Thomas Bellman", "Bellman, Thomas", "Bellman" +"Alexander Belopolsky", "Belopolsky, Alexander", "Belopolsky" +"Eli Bendersky", "Bendersky, Eli", "Bendersky" +"Cory Benfield", "Benfield, Cory", "Benfield" +"Steven Bethard", "Bethard, Steven", "Bethard" +"Stéphane Bidoul", "Bidoul, Stéphane", "Bidoul" +"Stefano Borini", "Borini, Stefano", "Borini" +"Georg Brandl", "Brandl, Georg", "Brandl" +"Erik M. Bray", "Bray, Erik M.", "Bray" +"Gerald Britton", "Britton, Gerald", "Britton" +"Oleg Broytman", "Broytman, Oleg", "Broytman" +"Benoit Bryon", "Bryon, Benoit", "Bryon" +"Brandt Bucher", "Bucher, Brandt", "Bucher" +"Brett Cannon", "Cannon, Brett", "Cannon" +"Justin Cappos", "Cappos, Justin", "Cappos" +"Josiah Carlson", "Carlson, Josiah", "Carlson" +"W Isaac Carroll", "Carroll, W Isaac", "Carroll" +"Matt Chisholm", "Chisholm, Matt", "Chisholm" +"Nick Coghlan", "Coghlan, Nick", "Coghlan" +"Dave Cole", "Cole, Dave", "Cole" +"Robert Collins", "Collins, Robert", "Collins" +"Paul Colomiets", "Colomiets, Paul", "Colomiets" +"Mario Corchero", "Corchero, Mario", "Corchero" +"Christopher A. Craig", "Craig, Christopher A.", "Craig" +"Laura Creighton", "Creighton, Laura", "Creighton" +"Steven D'Aprano", "D'Aprano, Steven", "D'Aprano" +"Kushal Das", "Das, Kushal", "Das" +"Ned Deily", "Deily, Ned", "Deily" +"Tim Delaney", "Delaney, Tim", "Delaney" +"Lois Anne DeLong", "DeLong, Lois Anne", "DeLong" +"Jeroen Demeyer", "Demeyer, Jeroen", "Demeyer" +"Vladimir Diaz", "Diaz, Vladimir", "Diaz" +"Jack Diederich", "Diederich, Jack", "Diederich" +"Steve Dower", "Dower, Steve", "Dower" +"Walter Dörwald", "Dörwald, Walter", "Dörwald" +"Fred L. Drake, Jr.", "Drake, Fred L., Jr.", "Drake" +"Michael P. Dubner", "Dubner, Michael P.", "Dubner" +"Paul F. Dubois", "Dubois, Paul F.", "Dubois" +"Ernest W. Durbin III", "Durbin, Ernest W., III", "Durbin" +"P.J. Eby", "Eby, P.J.", "Eby" +"Phillip J. Eby", "Eby, Phillip J.", "Eby" +"Tal Einat", "Einat, Tal", "Einat" +"Micah Elliott", "Elliott, Micah", "Elliott" +"Jeff Epler", "Epler, Jeff", "Epler" +"David Eppstein", "Eppstein, David", "Eppstein" +"Clark C. Evans", "Evans, Clark C.", "Evans" +"Gregory Ewing", "Ewing, Gregory", "Ewing" +"Greg Ewing", "Ewing, Greg", "Ewing" +"Martijn Faassen", "Faassen, Martijn", "Faassen" +"Ben Finney", "Finney, Ben", "Finney" +"Michael Foord", "Foord, Michael", "Foord" +"Ethan Furman", "Furman, Ethan", "Furman" +"Pablo Galindo", "Galindo, Pablo", "Galindo" +"Paul Ganssle", "Ganssle, Paul", "Ganssle" +"Alex Gaynor", "Gaynor, Alex", "Gaynor" +"Pradyun Gedam", "Gedam, Pradyun", "Gedam" +"Damien George", "George, Damien", "George" +"Frédéric B. Giacometti", "Giacometti, Frédéric B.", "Giacometti" +"Scott Gilbert", "Gilbert, Scott", "Gilbert" +"Ryan Gonzalez", "Gonzalez, Ryan", "Gonzalez" +"David Goodger", "Goodger, David", "Goodger" +"Grant Griffin", "Griffin, Grant", "Griffin" +"Mark E. Haase", "Haase, Mark E.", "Haase" +"Mark Hammond", "Hammond, Mark", "Hammond" +"Peter Harris", "Harris, Peter", "Harris" +"Larry Hastings", "Hastings, Larry", "Hastings" +"Christian Heimes", "Heimes, Christian", "Heimes" +"Thomas Heller", "Heller, Thomas", "Heller" +"Doug Hellmann", "Hellmann, Doug", "Hellmann" +"Magnus Lie Hetland", "Hetland, Magnus Lie", "Hetland" +"Raymond Hettinger", "Hettinger, Raymond", "Hettinger" +"Neil Hodgson", "Hodgson, Neil", "Hodgson" +"Daniel Holth", "Holth, Daniel", "Holth" +"Philip House", "House, Philip", "House" +"Laurens Van Houtven", "Van Houtven, Laurens", "Houtven" +"Ben Hoyt", "Hoyt, Ben", "Hoyt" +"Miro Hrončok", "Hrončok, Miro", "Hrončok" +"Michael Hudson", "Hudson, Michael", "Hudson" +"Jeremy Hylton", "Hylton, Jeremy", "Hylton" +"Inada Naoki", "Inada, Naoki", "Inada" +"Dustin Ingram", "Ingram, Dustin", "Ingram" +"Atsuo Ishimoto", "Ishimoto, Atsuo", "Ishimoto" +"Jack Jansen", "Jansen, Jack", "Jansen" +"Chris Jerdonek", "Jerdonek, Chris", "Jerdonek" +"Joseph Jevnik", "Jevnik, Joseph", "Jevnik" +"Jim J. Jewett", "Jewett, Jim J.", "Jewett" +"Jim Jewett", "Jewett, Jim", "Jewett" +"Ewa Jodlowska", "Jodlowska, Ewa", "Jodlowska" +"Richard Jones", "Jones, Richard", "Jones" +"Konstantin Kashin", "Kashin, Konstantin", "Kashin" +"Reid Kleckner", "Kleckner, Reid", "Kleckner" +"Thomas Kluyver", "Kluyver, Thomas", "Kluyver" +"Stepan Koltsov", "Koltsov, Stepan", "Koltsov" +"Stefan Krah", "Krah, Stefan", "Krah" +"Sebastian Kreft", "Kreft, Sebastian", "Kreft" +"Holger Krekel", "Krekel, Holger", "Krekel" +"A.M. Kuchling", "Kuchling, A.M.", "Kuchling" +"Trishank Karthik Kuppusamy", "Kuppusamy, Trishank Karthik", "Kuppusamy" +"Robert Kuska", "Kuska, Robert", "Kuska" +"Joshua Landau", "Landau, Joshua", "Landau" +"Łukasz Langa", "Langa, Łukasz", "Langa" +"Michael Lee", "Lee, Michael", "Lee" +"Jukka Lehtosalo", "Lehtosalo, Jukka", "Lehtosalo" +"Marc-André Lemburg", "Lemburg, Marc-André", "Lemburg" +"Ivan Levkivskyi", "Levkivskyi, Ivan", "Levkivskyi" +"Gregory Lielens", "Lielens, Gregory", "Lielens" +"Björn Lindqvist", "Lindqvist, Björn", "Lindqvist" +"Joshua Lock", "Lock, Joshua", "Lock" +"Tony Lownds", "Lownds, Tony", "Lownds" +"Martin von Löwis", "von Löwis, Martin", "von Löwis" +"Martin v. Löwis", "\v. Löwis, Martin", "\v. Löwis" +"Mariatta", "Mariatta", "Mariatta" +"Alex Martelli", "Martelli, Alex", "Martelli" +"Joseph Martinot-Lagarde", "Martinot-Lagarde, Joseph", "Martinot-Lagarde" +"Lino Mastrodomenico", "Mastrodomenico, Lino", "Mastrodomenico" +"Patrick Maupin", "Maupin, Patrick", "Maupin" +"Andrew McClelland", "McClelland, Andrew", "McClelland" +"Charles R. McCreary", "McCreary, Charles R.", "McCreary" +"Chris McDonough", "McDonough, Chris", "McDonough" +"Robert T. McGibbon", "McGibbon, Robert T.", "McGibbon" +"Gordon McMillan", "McMillan, Gordon", "McMillan" +"Andrew McNamara", "McNamara, Andrew", "McNamara" +"Ezio Melotti", "Melotti, Ezio", "Melotti" +"Mark Mendoza", "Mendoza, Mark", "Mendoza" +"Markus Meskanen", "Meskanen, Markus", "Meskanen" +"Mike Meyer", "Meyer, Mike", "Meyer" +"Carl Meyer", "Meyer, Carl", "Meyer" +"Trent Mick", "Mick, Trent", "Mick" +"Mike G. Miller", "Miller, Mike G.", "Miller" +"Skip Montanaro", "Montanaro, Skip", "Montanaro" +"Peter Moody", "Moody, Peter", "Moody" +"Marina Moore", "Moore, Marina", "Moore" +"Paul Moore", "Moore, Paul", "Moore" +"R David Murray", "Murray, R David", "Murray" +"Charles-François Natali", "Natali, Charles-François", "Natali" +"Lysandros Nikolaou", "Nikolaou, Lysandros", "Nikolaou" +"Jesse Noller", "Noller, Jesse", "Noller" +"Ben North", "North, Ben", "North" +"Neal Norwitz", "Norwitz, Neal", "Norwitz" +"Dirkjan Ochtman", "Ochtman, Dirkjan", "Ochtman" +"Travis Oliphant", "Oliphant, Travis", "Oliphant" +"Jason Orendorff", "Orendorff, Jason", "Orendorff" +"Tomáš Orsava", "Orsava, Tomáš", "Orsava" +"Richard Oudkerk", "Oudkerk, Richard", "Oudkerk" +"Ronald Oussoren", "Oussoren, Ronald", "Oussoren" +"Julien Palard", "Palard, Julien", "Palard" +"Samuele Pedroni", "Pedroni, Samuele", "Pedroni" +"Berker Peksag", "Peksag, Berker", "Peksag" +"Michel Pelletier", "Pelletier, Michel", "Pelletier" +"Tim Peters", "Peters, Tim", "Peters" +"Benjamin Peterson", "Peterson, Benjamin", "Peterson" +"Jason Petrone", "Petrone, Jason", "Petrone" +"Antoine Pitrou", "Pitrou, Antoine", "Pitrou" +"Marcel Plch", "Plch, Marcel", "Plch" +"James Polley", "Polley, James", "Polley" +"Philippe PRADOS", "Prados, Philippe", "Prados" +"Elvis Pranskevichus", "Pranskevichus, Elvis", "Pranskevichus" +"Paul Prescod", "Prescod, Paul", "Prescod" +"(James) Eric Pruitt", "Pruitt, (James) Eric", "Pruitt" +"Lukas Puehringer", "Puehringer, Lukas", "Puehringer" +"Brian Quinlan", "Quinlan, Brian", "Quinlan" +"Terry Reedy", "Reedy, Terry", "Reedy" +"Lennart Regebro", "Regebro, Lennart", "Regebro" +"Sean Reifschneider", "Reifschneider, Sean", "Reifschneider" +"Christian R. Reis", "Reis, Christian R.", "Reis" +"Jonathan Riehl", "Riehl, Jonathan", "Riehl" +"Lisa Roach", "Roach, Lisa", "Roach" +"Andre Roberge", "Roberge, Andre", "Roberge" +"Armin Ronacher", "Ronacher, Armin", "Ronacher" +"Guido van Rossum", "van Rossum, Guido (GvR)", "GvR" +"Just van Rossum", "van Rossum, Just (JvR)", "JvR" +"Todd Rovito", "Rovito, Todd", "Rovito" +"Lie Ryan", "Ryan, Lie", "Ryan" +"Vinay Sajip", "Sajip, Vinay", "Sajip" +"Pablo Galindo Salgado", "Salgado, Pablo Galindo", "Salgado" +"Neil Schemenauer", "Schemenauer, Neil", "Schemenauer" +"Peter Schneider-Kamp", "Schneider-Kamp, Peter", "Schneider-Kamp" +"Ed Schofield", "Schofield, Ed", "Schofield" +"Yury Selivanov", "Selivanov, Yury", "Selivanov" +"Jiwon Seo", "Seo, Jiwon", "Seo" +"Mark Shannon", "Shannon, Mark", "Shannon" +"Cameron Simpson", "Simpson, Cameron", "Simpson" +"Greg Slodkowicz", "Slodkowicz, Greg", "Slodkowicz" +"Nathaniel J. Smith", "Smith, Nathaniel J.", "Smith" +"Gregory P. Smith", "Smith, Gregory P.", "Smith" +"Kevin D. Smith", "Smith, Kevin D.", "Smith" +"Ethan Smith", "Smith, Ethan", "Smith" +"Nathaniel Smith", "Smith, Nathaniel", "Smith" +"Eric V. Smith", "Smith, Eric V.", "Smith" +"Eric Snow", "Snow, Eric", "Snow" +"Calvin Spealman", "Spealman, Calvin", "Spealman" +"Kerrick Staley", "Staley, Kerrick", "Staley" +"Greg Stein", "Stein, Greg", "Stein" +"Victor Stinner", "Stinner, Victor", "Stinner" +"Serhiy Storchaka", "Storchaka, Serhiy", "Storchaka" +"Donald Stufft", "Stufft, Donald", "Stufft" +"Daniel Stutzbach", "Stutzbach, Daniel", "Stutzbach" +"Michael J. Sullivan", "Sullivan, Michael J.", "Sullivan" +"Roman Suzi", "Suzi, Roman", "Suzi" +"Dennis Sweeney", "Sweeney, Dennis", "Sweeney" +"Talin", "Talin", "Talin" +"Steven Taschuk", "Taschuk, Steven", "Taschuk" +"Batuhan Taskaya", "Taskaya, Batuhan", "Taskaya" +"Martin Teichmann", "Teichmann, Martin", "Teichmann" +"The Python core team and community", "The Python core team and community", "The Python core team and community" +"Geoffrey Thomas", "Thomas, Geoffrey", "Thomas" +"Oren Tirosh", "Tirosh, Oren", "Tirosh" +"Stephen J. Turnbull", "Turnbull, Stephen J.", "Turnbull" +"Daniel Urban", "Urban, Daniel", "Urban" +"Eric N. Vander Weele", "Vander Weele, Eric N.", "Vander Weele" +"Till Varoquaux", "Varoquaux, Till", "Varoquaux" +"Alexandre Vassalotti", "Vassalotti, Alexandre", "Vassalotti" +"Mike Verdone", "Verdone, Mike", "Verdone" +"Dino Viehland", "Viehland, Dino", "Viehland" +"Petr Viktorin", "Viktorin, Petr", "Viktorin" +"Zachary Ware", "Ware, Zachary", "Ware" +"Gregory R. Warnes", "Warnes, Gregory R.", "Warnes" +"Barry Warsaw", "Warsaw, Barry", "Warsaw" +"Terence Way", "Way, Terence", "Way" +"Cliff Wells", "Wells, Cliff", "Wells" +"Jervis Whitley", "Whitley, Jervis", "Whitley" +"Mark Williams", "Williams, Mark", "Williams" +"Carol Willing", "Willing, Carol", "Willing" +"Greg Wilson", "Wilson, Greg", "Wilson" +"Collin Winter", "Winter, Collin", "Winter" +"Thomas Wouters", "Wouters, Thomas", "Wouters" +"Masayuki Yamamoto", "Yamamoto, Masayuki", "Yamamoto" +"Jeffrey Yasskin", "Yasskin, Jeffrey", "Yasskin" +"Ka-Ping Yee", "Yee, Ka-Ping", "Yee" +"Moshe Zadka", "Zadka, Moshe", "Zadka" +"Koos Zevenhoven", "Zevenhoven, Koos", "Zevenhoven" +"Huaiyu Zhu", "Zhu, Huaiyu", "Zhu" +"Shannon Zhu", "Zhu, Shannon", "Zhu" +"Tarek Ziadé", "Ziadé, Tarek", "Ziadé" \ No newline at end of file diff --git a/genpepindex.py b/genpepindex.py index 30845f9a6b7..820176afc11 100755 --- a/genpepindex.py +++ b/genpepindex.py @@ -35,7 +35,7 @@ def main(argv): path = argv[1] with open("AUTHORS.csv", "r", encoding="UTF8") as f: - read = csv.DictReader(f, delimiter=";") + read = csv.DictReader(f, quotechar='"', skipinitialspace=True) author_data = {} for line in read: full_name = line.pop("Full Name").strip().strip("\"") From 7a0b5b5083248f5e6bb32e438de7ca0b5e457cb2 Mon Sep 17 00:00:00 2001 From: AA Turner <9087854+AA-Turner@users.noreply.github.com> Date: Thu, 30 Apr 2020 00:42:18 +0100 Subject: [PATCH 5/9] Fix Mark Williams --- AUTHORS.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS.csv b/AUTHORS.csv index 7450a4a5cea..0270e4383c4 100644 --- a/AUTHORS.csv +++ b/AUTHORS.csv @@ -234,7 +234,7 @@ "Terence Way", "Way, Terence", "Way" "Cliff Wells", "Wells, Cliff", "Wells" "Jervis Whitley", "Whitley, Jervis", "Whitley" -"Mark Williams", "Williams, Mark", "Williams" +"Mark Williams", "Williams, Mark", "Williams" "Carol Willing", "Willing, Carol", "Willing" "Greg Wilson", "Wilson, Greg", "Wilson" "Collin Winter", "Winter, Collin", "Winter" From 3c6520dc5fd5d0a4fad59c166b5c558cda2aab6a Mon Sep 17 00:00:00 2001 From: AA Turner <9087854+AA-Turner@users.noreply.github.com> Date: Wed, 17 Jun 2020 18:29:24 +0100 Subject: [PATCH 6/9] Rollback name parsing changes and move to using author exception file as per Victor's suggestion --- AUTHORS.csv | 252 +------------------------------------------------ genpepindex.py | 13 +-- pep0/pep.py | 103 ++++++++++++++++++-- 3 files changed, 106 insertions(+), 262 deletions(-) diff --git a/AUTHORS.csv b/AUTHORS.csv index 0270e4383c4..44f9ca161c1 100644 --- a/AUTHORS.csv +++ b/AUTHORS.csv @@ -1,249 +1,3 @@ -"Full Name", "Surname First", "Name Reference" -"Aahz", "Aahz", "Aahz" -"James C. Ahlstrom", "Ahlstrom, James C.", "Ahlstrom" -"Jim Althoff", "Althoff, Jim", "Althoff" -"Kevin Altis", "Altis, Kevin", "Altis" -"Chris Angelico", "Angelico, Chris", "Angelico" -"Philipp Angerer", "Angerer, Philipp", "Angerer" -"David Ascher", "Ascher, David", "Ascher" -"Peter Astrand", "Astrand, Peter", "Astrand" -"Carl Banks", "Banks, Carl", "Banks" -"Christopher Barker", "Barker, Christopher", "Barker" -"Paul Barrett", "Barrett, Paul", "Barrett" -"Facundo Batista", "Batista, Facundo", "Batista" -"Anthony Baxter", "Baxter, Anthony", "Baxter" -"Stefan Behnel", "Behnel, Stefan", "Behnel" -"Thomas Bellman", "Bellman, Thomas", "Bellman" -"Alexander Belopolsky", "Belopolsky, Alexander", "Belopolsky" -"Eli Bendersky", "Bendersky, Eli", "Bendersky" -"Cory Benfield", "Benfield, Cory", "Benfield" -"Steven Bethard", "Bethard, Steven", "Bethard" -"Stéphane Bidoul", "Bidoul, Stéphane", "Bidoul" -"Stefano Borini", "Borini, Stefano", "Borini" -"Georg Brandl", "Brandl, Georg", "Brandl" -"Erik M. Bray", "Bray, Erik M.", "Bray" -"Gerald Britton", "Britton, Gerald", "Britton" -"Oleg Broytman", "Broytman, Oleg", "Broytman" -"Benoit Bryon", "Bryon, Benoit", "Bryon" -"Brandt Bucher", "Bucher, Brandt", "Bucher" -"Brett Cannon", "Cannon, Brett", "Cannon" -"Justin Cappos", "Cappos, Justin", "Cappos" -"Josiah Carlson", "Carlson, Josiah", "Carlson" -"W Isaac Carroll", "Carroll, W Isaac", "Carroll" -"Matt Chisholm", "Chisholm, Matt", "Chisholm" -"Nick Coghlan", "Coghlan, Nick", "Coghlan" -"Dave Cole", "Cole, Dave", "Cole" -"Robert Collins", "Collins, Robert", "Collins" -"Paul Colomiets", "Colomiets, Paul", "Colomiets" -"Mario Corchero", "Corchero, Mario", "Corchero" -"Christopher A. Craig", "Craig, Christopher A.", "Craig" -"Laura Creighton", "Creighton, Laura", "Creighton" -"Steven D'Aprano", "D'Aprano, Steven", "D'Aprano" -"Kushal Das", "Das, Kushal", "Das" -"Ned Deily", "Deily, Ned", "Deily" -"Tim Delaney", "Delaney, Tim", "Delaney" -"Lois Anne DeLong", "DeLong, Lois Anne", "DeLong" -"Jeroen Demeyer", "Demeyer, Jeroen", "Demeyer" -"Vladimir Diaz", "Diaz, Vladimir", "Diaz" -"Jack Diederich", "Diederich, Jack", "Diederich" -"Steve Dower", "Dower, Steve", "Dower" -"Walter Dörwald", "Dörwald, Walter", "Dörwald" -"Fred L. Drake, Jr.", "Drake, Fred L., Jr.", "Drake" -"Michael P. Dubner", "Dubner, Michael P.", "Dubner" -"Paul F. Dubois", "Dubois, Paul F.", "Dubois" -"Ernest W. Durbin III", "Durbin, Ernest W., III", "Durbin" -"P.J. Eby", "Eby, P.J.", "Eby" -"Phillip J. Eby", "Eby, Phillip J.", "Eby" -"Tal Einat", "Einat, Tal", "Einat" -"Micah Elliott", "Elliott, Micah", "Elliott" -"Jeff Epler", "Epler, Jeff", "Epler" -"David Eppstein", "Eppstein, David", "Eppstein" -"Clark C. Evans", "Evans, Clark C.", "Evans" -"Gregory Ewing", "Ewing, Gregory", "Ewing" -"Greg Ewing", "Ewing, Greg", "Ewing" -"Martijn Faassen", "Faassen, Martijn", "Faassen" -"Ben Finney", "Finney, Ben", "Finney" -"Michael Foord", "Foord, Michael", "Foord" -"Ethan Furman", "Furman, Ethan", "Furman" -"Pablo Galindo", "Galindo, Pablo", "Galindo" -"Paul Ganssle", "Ganssle, Paul", "Ganssle" -"Alex Gaynor", "Gaynor, Alex", "Gaynor" -"Pradyun Gedam", "Gedam, Pradyun", "Gedam" -"Damien George", "George, Damien", "George" -"Frédéric B. Giacometti", "Giacometti, Frédéric B.", "Giacometti" -"Scott Gilbert", "Gilbert, Scott", "Gilbert" -"Ryan Gonzalez", "Gonzalez, Ryan", "Gonzalez" -"David Goodger", "Goodger, David", "Goodger" -"Grant Griffin", "Griffin, Grant", "Griffin" -"Mark E. Haase", "Haase, Mark E.", "Haase" -"Mark Hammond", "Hammond, Mark", "Hammond" -"Peter Harris", "Harris, Peter", "Harris" -"Larry Hastings", "Hastings, Larry", "Hastings" -"Christian Heimes", "Heimes, Christian", "Heimes" -"Thomas Heller", "Heller, Thomas", "Heller" -"Doug Hellmann", "Hellmann, Doug", "Hellmann" -"Magnus Lie Hetland", "Hetland, Magnus Lie", "Hetland" -"Raymond Hettinger", "Hettinger, Raymond", "Hettinger" -"Neil Hodgson", "Hodgson, Neil", "Hodgson" -"Daniel Holth", "Holth, Daniel", "Holth" -"Philip House", "House, Philip", "House" -"Laurens Van Houtven", "Van Houtven, Laurens", "Houtven" -"Ben Hoyt", "Hoyt, Ben", "Hoyt" -"Miro Hrončok", "Hrončok, Miro", "Hrončok" -"Michael Hudson", "Hudson, Michael", "Hudson" -"Jeremy Hylton", "Hylton, Jeremy", "Hylton" -"Inada Naoki", "Inada, Naoki", "Inada" -"Dustin Ingram", "Ingram, Dustin", "Ingram" -"Atsuo Ishimoto", "Ishimoto, Atsuo", "Ishimoto" -"Jack Jansen", "Jansen, Jack", "Jansen" -"Chris Jerdonek", "Jerdonek, Chris", "Jerdonek" -"Joseph Jevnik", "Jevnik, Joseph", "Jevnik" -"Jim J. Jewett", "Jewett, Jim J.", "Jewett" -"Jim Jewett", "Jewett, Jim", "Jewett" -"Ewa Jodlowska", "Jodlowska, Ewa", "Jodlowska" -"Richard Jones", "Jones, Richard", "Jones" -"Konstantin Kashin", "Kashin, Konstantin", "Kashin" -"Reid Kleckner", "Kleckner, Reid", "Kleckner" -"Thomas Kluyver", "Kluyver, Thomas", "Kluyver" -"Stepan Koltsov", "Koltsov, Stepan", "Koltsov" -"Stefan Krah", "Krah, Stefan", "Krah" -"Sebastian Kreft", "Kreft, Sebastian", "Kreft" -"Holger Krekel", "Krekel, Holger", "Krekel" -"A.M. Kuchling", "Kuchling, A.M.", "Kuchling" -"Trishank Karthik Kuppusamy", "Kuppusamy, Trishank Karthik", "Kuppusamy" -"Robert Kuska", "Kuska, Robert", "Kuska" -"Joshua Landau", "Landau, Joshua", "Landau" -"Łukasz Langa", "Langa, Łukasz", "Langa" -"Michael Lee", "Lee, Michael", "Lee" -"Jukka Lehtosalo", "Lehtosalo, Jukka", "Lehtosalo" -"Marc-André Lemburg", "Lemburg, Marc-André", "Lemburg" -"Ivan Levkivskyi", "Levkivskyi, Ivan", "Levkivskyi" -"Gregory Lielens", "Lielens, Gregory", "Lielens" -"Björn Lindqvist", "Lindqvist, Björn", "Lindqvist" -"Joshua Lock", "Lock, Joshua", "Lock" -"Tony Lownds", "Lownds, Tony", "Lownds" -"Martin von Löwis", "von Löwis, Martin", "von Löwis" -"Martin v. Löwis", "\v. Löwis, Martin", "\v. Löwis" -"Mariatta", "Mariatta", "Mariatta" -"Alex Martelli", "Martelli, Alex", "Martelli" -"Joseph Martinot-Lagarde", "Martinot-Lagarde, Joseph", "Martinot-Lagarde" -"Lino Mastrodomenico", "Mastrodomenico, Lino", "Mastrodomenico" -"Patrick Maupin", "Maupin, Patrick", "Maupin" -"Andrew McClelland", "McClelland, Andrew", "McClelland" -"Charles R. McCreary", "McCreary, Charles R.", "McCreary" -"Chris McDonough", "McDonough, Chris", "McDonough" -"Robert T. McGibbon", "McGibbon, Robert T.", "McGibbon" -"Gordon McMillan", "McMillan, Gordon", "McMillan" -"Andrew McNamara", "McNamara, Andrew", "McNamara" -"Ezio Melotti", "Melotti, Ezio", "Melotti" -"Mark Mendoza", "Mendoza, Mark", "Mendoza" -"Markus Meskanen", "Meskanen, Markus", "Meskanen" -"Mike Meyer", "Meyer, Mike", "Meyer" -"Carl Meyer", "Meyer, Carl", "Meyer" -"Trent Mick", "Mick, Trent", "Mick" -"Mike G. Miller", "Miller, Mike G.", "Miller" -"Skip Montanaro", "Montanaro, Skip", "Montanaro" -"Peter Moody", "Moody, Peter", "Moody" -"Marina Moore", "Moore, Marina", "Moore" -"Paul Moore", "Moore, Paul", "Moore" -"R David Murray", "Murray, R David", "Murray" -"Charles-François Natali", "Natali, Charles-François", "Natali" -"Lysandros Nikolaou", "Nikolaou, Lysandros", "Nikolaou" -"Jesse Noller", "Noller, Jesse", "Noller" -"Ben North", "North, Ben", "North" -"Neal Norwitz", "Norwitz, Neal", "Norwitz" -"Dirkjan Ochtman", "Ochtman, Dirkjan", "Ochtman" -"Travis Oliphant", "Oliphant, Travis", "Oliphant" -"Jason Orendorff", "Orendorff, Jason", "Orendorff" -"Tomáš Orsava", "Orsava, Tomáš", "Orsava" -"Richard Oudkerk", "Oudkerk, Richard", "Oudkerk" -"Ronald Oussoren", "Oussoren, Ronald", "Oussoren" -"Julien Palard", "Palard, Julien", "Palard" -"Samuele Pedroni", "Pedroni, Samuele", "Pedroni" -"Berker Peksag", "Peksag, Berker", "Peksag" -"Michel Pelletier", "Pelletier, Michel", "Pelletier" -"Tim Peters", "Peters, Tim", "Peters" -"Benjamin Peterson", "Peterson, Benjamin", "Peterson" -"Jason Petrone", "Petrone, Jason", "Petrone" -"Antoine Pitrou", "Pitrou, Antoine", "Pitrou" -"Marcel Plch", "Plch, Marcel", "Plch" -"James Polley", "Polley, James", "Polley" -"Philippe PRADOS", "Prados, Philippe", "Prados" -"Elvis Pranskevichus", "Pranskevichus, Elvis", "Pranskevichus" -"Paul Prescod", "Prescod, Paul", "Prescod" -"(James) Eric Pruitt", "Pruitt, (James) Eric", "Pruitt" -"Lukas Puehringer", "Puehringer, Lukas", "Puehringer" -"Brian Quinlan", "Quinlan, Brian", "Quinlan" -"Terry Reedy", "Reedy, Terry", "Reedy" -"Lennart Regebro", "Regebro, Lennart", "Regebro" -"Sean Reifschneider", "Reifschneider, Sean", "Reifschneider" -"Christian R. Reis", "Reis, Christian R.", "Reis" -"Jonathan Riehl", "Riehl, Jonathan", "Riehl" -"Lisa Roach", "Roach, Lisa", "Roach" -"Andre Roberge", "Roberge, Andre", "Roberge" -"Armin Ronacher", "Ronacher, Armin", "Ronacher" -"Guido van Rossum", "van Rossum, Guido (GvR)", "GvR" -"Just van Rossum", "van Rossum, Just (JvR)", "JvR" -"Todd Rovito", "Rovito, Todd", "Rovito" -"Lie Ryan", "Ryan, Lie", "Ryan" -"Vinay Sajip", "Sajip, Vinay", "Sajip" -"Pablo Galindo Salgado", "Salgado, Pablo Galindo", "Salgado" -"Neil Schemenauer", "Schemenauer, Neil", "Schemenauer" -"Peter Schneider-Kamp", "Schneider-Kamp, Peter", "Schneider-Kamp" -"Ed Schofield", "Schofield, Ed", "Schofield" -"Yury Selivanov", "Selivanov, Yury", "Selivanov" -"Jiwon Seo", "Seo, Jiwon", "Seo" -"Mark Shannon", "Shannon, Mark", "Shannon" -"Cameron Simpson", "Simpson, Cameron", "Simpson" -"Greg Slodkowicz", "Slodkowicz, Greg", "Slodkowicz" -"Nathaniel J. Smith", "Smith, Nathaniel J.", "Smith" -"Gregory P. Smith", "Smith, Gregory P.", "Smith" -"Kevin D. Smith", "Smith, Kevin D.", "Smith" -"Ethan Smith", "Smith, Ethan", "Smith" -"Nathaniel Smith", "Smith, Nathaniel", "Smith" -"Eric V. Smith", "Smith, Eric V.", "Smith" -"Eric Snow", "Snow, Eric", "Snow" -"Calvin Spealman", "Spealman, Calvin", "Spealman" -"Kerrick Staley", "Staley, Kerrick", "Staley" -"Greg Stein", "Stein, Greg", "Stein" -"Victor Stinner", "Stinner, Victor", "Stinner" -"Serhiy Storchaka", "Storchaka, Serhiy", "Storchaka" -"Donald Stufft", "Stufft, Donald", "Stufft" -"Daniel Stutzbach", "Stutzbach, Daniel", "Stutzbach" -"Michael J. Sullivan", "Sullivan, Michael J.", "Sullivan" -"Roman Suzi", "Suzi, Roman", "Suzi" -"Dennis Sweeney", "Sweeney, Dennis", "Sweeney" -"Talin", "Talin", "Talin" -"Steven Taschuk", "Taschuk, Steven", "Taschuk" -"Batuhan Taskaya", "Taskaya, Batuhan", "Taskaya" -"Martin Teichmann", "Teichmann, Martin", "Teichmann" -"The Python core team and community", "The Python core team and community", "The Python core team and community" -"Geoffrey Thomas", "Thomas, Geoffrey", "Thomas" -"Oren Tirosh", "Tirosh, Oren", "Tirosh" -"Stephen J. Turnbull", "Turnbull, Stephen J.", "Turnbull" -"Daniel Urban", "Urban, Daniel", "Urban" -"Eric N. Vander Weele", "Vander Weele, Eric N.", "Vander Weele" -"Till Varoquaux", "Varoquaux, Till", "Varoquaux" -"Alexandre Vassalotti", "Vassalotti, Alexandre", "Vassalotti" -"Mike Verdone", "Verdone, Mike", "Verdone" -"Dino Viehland", "Viehland, Dino", "Viehland" -"Petr Viktorin", "Viktorin, Petr", "Viktorin" -"Zachary Ware", "Ware, Zachary", "Ware" -"Gregory R. Warnes", "Warnes, Gregory R.", "Warnes" -"Barry Warsaw", "Warsaw, Barry", "Warsaw" -"Terence Way", "Way, Terence", "Way" -"Cliff Wells", "Wells, Cliff", "Wells" -"Jervis Whitley", "Whitley, Jervis", "Whitley" -"Mark Williams", "Williams, Mark", "Williams" -"Carol Willing", "Willing, Carol", "Willing" -"Greg Wilson", "Wilson, Greg", "Wilson" -"Collin Winter", "Winter, Collin", "Winter" -"Thomas Wouters", "Wouters, Thomas", "Wouters" -"Masayuki Yamamoto", "Yamamoto, Masayuki", "Yamamoto" -"Jeffrey Yasskin", "Yasskin, Jeffrey", "Yasskin" -"Ka-Ping Yee", "Yee, Ka-Ping", "Yee" -"Moshe Zadka", "Zadka, Moshe", "Zadka" -"Koos Zevenhoven", "Zevenhoven, Koos", "Zevenhoven" -"Huaiyu Zhu", "Zhu, Huaiyu", "Zhu" -"Shannon Zhu", "Zhu, Shannon", "Zhu" -"Tarek Ziadé", "Ziadé, Tarek", "Ziadé" \ No newline at end of file +Full Name, Surname First, Name Reference +Ernest W. Durbin III, "Durbin, Ernest W., III", Durbin +Inada Naoki, "Inada, Naoki", Inada \ No newline at end of file diff --git a/genpepindex.py b/genpepindex.py index 820176afc11..a2c89b093b6 100755 --- a/genpepindex.py +++ b/genpepindex.py @@ -34,13 +34,14 @@ def main(argv): else: path = argv[1] + # AUTHORS.csv is an exception file for PEP0 name parsing with open("AUTHORS.csv", "r", encoding="UTF8") as f: read = csv.DictReader(f, quotechar='"', skipinitialspace=True) - author_data = {} + author_exception_data = {} for line in read: - full_name = line.pop("Full Name").strip().strip("\"") - details = {k.strip().strip("\""): v.strip().strip("\"") for k, v in line.items()} - author_data[full_name] = details + full_name = line.pop("Full Name").strip() + details = {k.strip(): v.strip() for k, v in line.items()} + author_exception_data[full_name] = details peps = [] if os.path.isdir(path): @@ -53,7 +54,7 @@ def main(argv): if file_path.startswith("pep-") and file_path.endswith((".txt", "rst")): with codecs.open(abs_file_path, 'r', encoding='UTF-8') as pep_file: try: - pep = PEP(pep_file, author_data) + pep = PEP(pep_file, author_exception_data) if pep.number != int(file_path[4:-4]): raise PEPError('PEP number does not match file name', file_path, pep.number) @@ -66,7 +67,7 @@ def main(argv): peps.sort(key=attrgetter('number')) elif os.path.isfile(path): with open(path, 'r') as pep_file: - peps.append(PEP(pep_file, author_data)) + peps.append(PEP(pep_file, author_exception_data)) else: raise ValueError("argument must be a directory or file path") diff --git a/pep0/pep.py b/pep0/pep.py index fc7889bfc41..9007db44104 100644 --- a/pep0/pep.py +++ b/pep0/pep.py @@ -60,7 +60,7 @@ class Author(object): The author's email address. """ - def __init__(self, author_and_email_tuple, authors_lookup): + def __init__(self, author_and_email_tuple, authors_exceptions): """Parse the name and email address of an author.""" self.first = self.last = '' @@ -68,10 +68,40 @@ def __init__(self, author_and_email_tuple, authors_lookup): self.first_last = name.strip() self.email = email.lower() - name_dict = authors_lookup[self.first_last] - - self.last_first = name_dict["Surname First"] - self.nick = name_dict["Name Reference"] + name_dict = authors_exceptions.get(self.first_last) + if name_dict: + self.last_first = name_dict["Surname First"] + self.nick = name_dict["Name Reference"] + else: + self.set_name_parts() + + def set_name_parts(self): + name_dict = self._parse_name(self.first_last) + suffix = name_dict.get("suffix") + if "name" in name_dict: + self.last_first = name_dict["name"] + self.nick = name_dict["name"] + else: + self.first = name_dict["forename"].rstrip() + self.last = name_dict["surname"] + if self.last[1] == ".": + # Add an escape to avoid docutils turning `v.` into `22.`. + self.last = "\\" + self.last + self.last_first = ", ".join([self.last, self.first]) + self.nick = self.last + + if suffix: + self.last_first += f", {suffix}" + + if self.last == "van Rossum": + # Special case for our beloved BDFL. :) + if self.first == "Guido": + self.nick = "GvR" + elif self.first == "Just": + self.nick = "JvR" + else: + raise ValueError(f"unknown van Rossum ({self.first_last})!") + self.last_first += f" ({self.nick})" def __hash__(self): return hash(self.first_last) @@ -91,6 +121,65 @@ def sort_by(self): base = self.last.lower() return unicodedata.normalize('NFKD', base).encode('ASCII', 'ignore') + @staticmethod + def _parse_name(full_name): + """Decompose a full name into parts. + + If a mononym (e.g, 'Aahz') then return the full name. If there are + suffixes in the name (e.g. ', Jr.' or 'III'), then find and extract + them. If there is a middle initial followed by a full stop, then + combine the following words into a surname (e.g. N. Vander Weele). If + there is a leading, lowercase portion to the last name (e.g. 'van' or + 'von') then include it in the surname. + + """ + possible_suffixes = ["Jr", "Jr.", "II", "III"] + special_cases = ["The Python core team and community"] + + if full_name in special_cases: + return {"name": full_name} + + suffix_partition = full_name.partition(",") + pre_suffix = suffix_partition[0].strip() + suffix = suffix_partition[2].strip() + + name_parts = pre_suffix.split(" ") + num_parts = len(name_parts) + name = {"suffix": suffix} + + if num_parts == 0: + raise ValueError("Name is empty!") + elif num_parts == 1: + name.update(name=name_parts[0]) + elif num_parts == 2: + name.update(forename=name_parts[0], surname=name_parts[1]) + elif num_parts > 2: + # handles III etc. + if name_parts[-1] in possible_suffixes: + new_suffix = " ".join([*name_parts[-1:], suffix]).strip() + name_parts.pop(-1) + name.update(suffix=new_suffix) + + # handles von, van, v. etc. + if name_parts[-2].islower(): + forename = " ".join(name_parts[:-2]) + surname = " ".join(name_parts[-2:]) + name.update(forename=forename, surname=surname) + + # handles double surnames after a middle initial (e.g. N. Vander Weele) + elif any(s.endswith(".") for s in name_parts): + split_position = [i for i, x in enumerate(name_parts) if x.endswith(".")][-1] + 1 + forename = " ".join(name_parts[:split_position]) + surname = " ".join(name_parts[split_position:]) + name.update(forename=forename, surname=surname) + + else: + forename = " ".join(name_parts[:-1]) + surname = " ".join(name_parts[-1:]) + name.update(forename=forename, surname=surname) + + return name + class PEP(object): @@ -135,7 +224,7 @@ class PEP(object): u"Rejected", u"Withdrawn", u"Deferred", u"Final", u"Active", u"Draft", u"Superseded") - def __init__(self, pep_file, author_lookup: dict): + def __init__(self, pep_file, author_exceptions: dict): """Init object from an open PEP file object.""" # Parse the headers. self.filename = pep_file @@ -203,7 +292,7 @@ def __init__(self, pep_file, author_lookup: dict): if len(authors_and_emails) < 1: raise PEPError("no authors found", pep_file.name, self.number) - self.authors = [Author(author_email, author_lookup) for author_email in authors_and_emails] + self.authors = [Author(author_email, author_exceptions) for author_email in authors_and_emails] def _parse_author(self, data): """Return a list of author names and emails.""" From ee33701d55dfa199b28df27bf4279e09d80e4902 Mon Sep 17 00:00:00 2001 From: AA Turner <9087854+AA-Turner@users.noreply.github.com> Date: Wed, 17 Jun 2020 19:54:01 +0100 Subject: [PATCH 7/9] Move more special cases to exceptions file --- AUTHORS.csv | 5 ++++- pep0/pep.py | 14 -------------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/AUTHORS.csv b/AUTHORS.csv index 44f9ca161c1..c89e021b708 100644 --- a/AUTHORS.csv +++ b/AUTHORS.csv @@ -1,3 +1,6 @@ Full Name, Surname First, Name Reference Ernest W. Durbin III, "Durbin, Ernest W., III", Durbin -Inada Naoki, "Inada, Naoki", Inada \ No newline at end of file +Inada Naoki, "Inada, Naoki", Inada +Guido van Rossum, "van Rossum, Guido (GvR)", GvR +Just van Rossum, "van Rossum, Just (JvR)", JvR +The Python core team and community, The Python core team and community, The Python core team and community diff --git a/pep0/pep.py b/pep0/pep.py index 9007db44104..a55e3d750ff 100644 --- a/pep0/pep.py +++ b/pep0/pep.py @@ -93,16 +93,6 @@ def set_name_parts(self): if suffix: self.last_first += f", {suffix}" - if self.last == "van Rossum": - # Special case for our beloved BDFL. :) - if self.first == "Guido": - self.nick = "GvR" - elif self.first == "Just": - self.nick = "JvR" - else: - raise ValueError(f"unknown van Rossum ({self.first_last})!") - self.last_first += f" ({self.nick})" - def __hash__(self): return hash(self.first_last) @@ -134,10 +124,6 @@ def _parse_name(full_name): """ possible_suffixes = ["Jr", "Jr.", "II", "III"] - special_cases = ["The Python core team and community"] - - if full_name in special_cases: - return {"name": full_name} suffix_partition = full_name.partition(",") pre_suffix = suffix_partition[0].strip() From efdaf15a8a5377955a01ef1dca8b50dc840305e2 Mon Sep 17 00:00:00 2001 From: AA Turner <9087854+AA-Turner@users.noreply.github.com> Date: Sun, 21 Jun 2020 20:11:01 +0100 Subject: [PATCH 8/9] python-dev nickname --- AUTHORS.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AUTHORS.csv b/AUTHORS.csv index c89e021b708..dd4bd9b4796 100644 --- a/AUTHORS.csv +++ b/AUTHORS.csv @@ -3,4 +3,4 @@ Ernest W. Durbin III, "Durbin, Ernest W., III", Durbin Inada Naoki, "Inada, Naoki", Inada Guido van Rossum, "van Rossum, Guido (GvR)", GvR Just van Rossum, "van Rossum, Just (JvR)", JvR -The Python core team and community, The Python core team and community, The Python core team and community +The Python core team and community, The Python core team and community, python-dev From 8f9db05d3d7aafec0a97c1e13384f25145297be0 Mon Sep 17 00:00:00 2001 From: AA Turner <9087854+AA-Turner@users.noreply.github.com> Date: Sun, 21 Jun 2020 20:50:21 +0100 Subject: [PATCH 9/9] Add duplicate names and de-duping logic --- AUTHORS.csv | 5 +++++ pep0/output.py | 7 ++++--- pep0/pep.py | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/AUTHORS.csv b/AUTHORS.csv index dd4bd9b4796..1c521f7779c 100644 --- a/AUTHORS.csv +++ b/AUTHORS.csv @@ -4,3 +4,8 @@ Inada Naoki, "Inada, Naoki", Inada Guido van Rossum, "van Rossum, Guido (GvR)", GvR Just van Rossum, "van Rossum, Just (JvR)", JvR The Python core team and community, The Python core team and community, python-dev +P.J. Eby, "Eby, Phillip J.", Eby +Greg Ewing, "Ewing, Gregory", Ewing +Jim Jewett, "Jewett, Jim J.", Jewett +Nathaniel Smith, "Smith, Nathaniel J.", Smith +Martin v. Löwis, "von Löwis, Martin", von Löwis diff --git a/pep0/output.py b/pep0/output.py index 10024c221b8..bdd0ec4d90b 100644 --- a/pep0/output.py +++ b/pep0/output.py @@ -5,6 +5,7 @@ import sys import unicodedata +from itertools import groupby from operator import attrgetter from . import constants @@ -124,9 +125,9 @@ def verify_email_addresses(peps): def sort_authors(authors_dict): - authors_list = list(authors_dict.keys()) - authors_list.sort(key=attrgetter('sort_by')) - return authors_list + authors_list = sorted(authors_dict.keys(), key=attrgetter("sort_by")) + unique_authors = [next(a) for k, a in groupby(authors_list, key=attrgetter("last_first"))] + return unique_authors def normalized_last_first(name): return len(unicodedata.normalize('NFC', name.last_first)) diff --git a/pep0/pep.py b/pep0/pep.py index a55e3d750ff..fc2850b445e 100644 --- a/pep0/pep.py +++ b/pep0/pep.py @@ -71,7 +71,7 @@ def __init__(self, author_and_email_tuple, authors_exceptions): name_dict = authors_exceptions.get(self.first_last) if name_dict: self.last_first = name_dict["Surname First"] - self.nick = name_dict["Name Reference"] + self.nick = self.last = name_dict["Name Reference"] else: self.set_name_parts()