Skip to content

[3.9] bpo-43882 Remove the newline, and tab early. From query and fragments. #25853

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 3, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions Lib/test/test_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,32 +614,40 @@ def test_urlsplit_attributes(self):

def test_urlsplit_remove_unsafe_bytes(self):
# Remove ASCII tabs and newlines from input
url = "http://www.python.org/java\nscript:\talert('msg\r\n')/#frag"
url = "http\t://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i suggest also adding a \r in the middle of the netloc for good measure. www.python\r.org perhaps?

p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.netloc, "www.python.org")
self.assertEqual(p.path, "/javascript:alert('msg')/")
self.assertEqual(p.query, "")
self.assertEqual(p.fragment, "frag")
self.assertEqual(p.query, "query=something")
self.assertEqual(p.fragment, "fragment")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
self.assertEqual(p.hostname, "www.python.org")
self.assertEqual(p.port, None)
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/#frag")
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")

# Remove ASCII tabs and newlines from input as bytes.
url = b"http://www.python.org/java\nscript:\talert('msg\r\n')/#frag"
url = b"http\t://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, b"http")
self.assertEqual(p.netloc, b"www.python.org")
self.assertEqual(p.path, b"/javascript:alert('msg')/")
self.assertEqual(p.query, b"")
self.assertEqual(p.fragment, b"frag")
self.assertEqual(p.query, b"query=something")
self.assertEqual(p.fragment, b"fragment")
self.assertEqual(p.username, None)
self.assertEqual(p.password, None)
self.assertEqual(p.hostname, b"www.python.org")
self.assertEqual(p.port, None)
self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/#frag")
self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")

# with scheme as cache-key
url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
scheme = "ht\ntp"
for _ in range(2):
p = urllib.parse.urlsplit(url)
self.assertEqual(p.scheme, "http")
self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")

def test_attributes_bad_port(self):
"""Check handling of invalid ports."""
Expand Down
8 changes: 5 additions & 3 deletions Lib/urllib/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,11 @@ def urlsplit(url, scheme='', allow_fragments=True):
"""

url, scheme, _coerce_result = _coerce_args(url, scheme)

for b in _UNSAFE_URL_BYTES_TO_REMOVE:
url = url.replace(b, "")
scheme = scheme.replace(b, "")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the typeshed, the scheme argument to urlparse is Optional. However, with this change, a value of None will result in an error like:

AttributeError: 'NoneType' object has no attribute 'replace'

I'm not sure whether the typeshed is wrong or if this line should be guarded with if scheme is not None, but we should definitely fix one or the other and perhaps improve the documentation on this option.

Thanks to @branchvincent for doing the digging and reporting this issue to me.


allow_fragments = bool(allow_fragments)
key = url, scheme, allow_fragments, type(url), type(scheme)
cached = _parse_cache.get(key, None)
Expand All @@ -472,9 +477,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
else:
scheme, url = url[:i].lower(), url[i+1:]

for b in _UNSAFE_URL_BYTES_TO_REMOVE:
url = url.replace(b, "")

if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
if (('[' in netloc and ']' not in netloc) or
Expand Down