Skip to content

Commit

Permalink
Fix parsing of author information
Browse files Browse the repository at this point in the history
Instead of relying on regular expressions, this patch leverages Python’s
builtin `email.utils.parseaddr()` functionality to parse an RFC-822-compliant
email address string into its name and address parts.

This should also resolve issues with special characters in the name part; see
issues python-poetry#370 and python-poetry#798.

python-poetry#370
python-poetry#798
  • Loading branch information
yggi49 committed Apr 16, 2019
1 parent 5a2a3e6 commit 024ebfa
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 19 deletions.
7 changes: 3 additions & 4 deletions poetry/console/commands/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import List
from typing import Tuple

from poetry.utils.helpers import parse_author
from .command import Command
from .env_command import EnvCommand

Expand Down Expand Up @@ -294,15 +295,13 @@ def _format_requirements(self, requirements): # type: (List[str]) -> dict
return requires

def _validate_author(self, author, default):
from poetry.packages.package import AUTHOR_REGEX

author = author or default

if author in ["n", "no"]:
return

m = AUTHOR_REGEX.match(author)
if not m:
name, email = parse_author(author)
if not name:
raise ValueError(
"Invalid author string. Must be in the format: "
"John Smith <[email protected]>"
Expand Down
9 changes: 2 additions & 7 deletions poetry/masonry/builders/builder.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
import os
import re
import shutil
import tempfile

Expand All @@ -16,15 +15,14 @@
from poetry.utils._compat import glob
from poetry.utils._compat import lru_cache
from poetry.utils._compat import to_str
from poetry.utils.helpers import parse_author
from poetry.vcs import get_vcs

from ..metadata import Metadata
from ..utils.module import Module
from ..utils.package_include import PackageInclude


AUTHOR_REGEX = re.compile(r"(?u)^(?P<name>[- .,\w\d'’\"()]+) <(?P<email>.+?)>$")

METADATA_BASE = """\
Metadata-Version: 2.1
Name: {name}
Expand Down Expand Up @@ -228,10 +226,7 @@ def convert_entry_points(self): # type: () -> dict

@classmethod
def convert_author(cls, author): # type: () -> dict
m = AUTHOR_REGEX.match(author)

name = m.group("name")
email = m.group("email")
name, email = parse_author(author)

return {"name": name, "email": email}

Expand Down
10 changes: 2 additions & 8 deletions poetry/packages/package.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
import copy
import re

from contextlib import contextmanager
from typing import Union
Expand All @@ -10,7 +9,7 @@
from poetry.spdx import license_by_id
from poetry.spdx import License
from poetry.utils._compat import Path
from poetry.utils.helpers import canonicalize_name
from poetry.utils.helpers import canonicalize_name, parse_author
from poetry.version.markers import AnyMarker
from poetry.version.markers import parse_marker

Expand All @@ -22,8 +21,6 @@
from .utils.utils import convert_markers
from .utils.utils import create_nested_marker

AUTHOR_REGEX = re.compile(r"(?u)^(?P<name>[- .,\w\d'’\"()]+)(?: <(?P<email>.+?)>)?$")


class Package(object):

Expand Down Expand Up @@ -141,10 +138,7 @@ def _get_author(self): # type: () -> dict
if not self._authors:
return {"name": None, "email": None}

m = AUTHOR_REGEX.match(self._authors[0])

name = m.group("name")
email = m.group("email")
name, email = parse_author(self._authors[0])

return {"name": name, "email": email}

Expand Down
19 changes: 19 additions & 0 deletions poetry/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import shutil
import stat
import tempfile
from email.utils import parseaddr

from contextlib import contextmanager
from typing import List
Expand Down Expand Up @@ -100,3 +101,21 @@ def _on_rm_error(func, path, exc_info):

def safe_rmtree(path):
shutil.rmtree(path, onerror=_on_rm_error)


def parse_author(address): # type: (str) -> tuple
"""Parse name and address parts from an email address string.
.. note::
If the input string does not contain an `@` character, it is
assumed that it represents only a name without an email address.
:param address: the email address string to parse.
:return: a 2-tuple with the parsed name and email address. If a
part is missing, ``None`` will be returned in its place.
"""
if "@" not in address:
return (address, None)
name, email = parseaddr(address)
return (name or None, email or None)
8 changes: 8 additions & 0 deletions tests/packages/test_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,11 @@ def test_package_authors():
package.authors.insert(0, "John Doe")
assert package.author_name == "John Doe"
assert package.author_email is None


def test_package_authors_with_fancy_unicode():
package = Package("foo", "0.1.0")

package.authors.append("my·fancy·company <[email protected]>")
assert package.author_name == "my·fancy·company"
assert package.author_email == "[email protected]"
52 changes: 52 additions & 0 deletions tests/utils/test_helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

from poetry.utils.helpers import get_http_basic_auth
from poetry.utils.helpers import parse_requires
from poetry.utils.helpers import parse_author


def test_parse_requires():
Expand Down Expand Up @@ -66,3 +70,51 @@ def test_get_http_basic_auth_without_password(config):

def test_get_http_basic_auth_missing(config):
assert get_http_basic_auth(config, "foo") is None


def test_parse_author_simple_name_and_email():
name, email = parse_author("John Doe <[email protected]>")
assert name == "John Doe"
assert email == "[email protected]"


def test_parse_author_simple_name_only():
name, email = parse_author("John Doe")
assert name == "John Doe"
assert email is None


def test_parse_author_ascii_specialchars_name_and_email():
name, email = parse_author("R&D <[email protected]>")
assert name == "R&D"
assert email == "[email protected]"


def test_parse_author_ascii_specialchars_name_only():
name, email = parse_author("R&D")
assert name == "R&D"
assert email is None


def test_parse_author_unicode_name_and_email():
name, email = parse_author("my·fancy·corp <[email protected]>")
assert name == "my·fancy·corp"
assert email == "[email protected]"


def test_parse_author_unicode_name_only():
name, email = parse_author("my·fancy·corp")
assert name == "my·fancy·corp"
assert email is None


def test_parse_author_email_only_with_angular_brackets():
name, email = parse_author("<[email protected]>")
assert name is None
assert email == "[email protected]"


def test_parse_author_email_only_without_angular_brackets():
name, email = parse_author("[email protected]")
assert name is None
assert email == "[email protected]"

0 comments on commit 024ebfa

Please sign in to comment.