Skip to content

Commit

Permalink
x1.125 speed up
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed Nov 17, 2022
1 parent 7152d0b commit 9daca87
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 21 deletions.
2 changes: 1 addition & 1 deletion tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ def test_safe_url_performance(url):
# presummably due to caching by urllib.
number = 1 # TODO: Increase? How much?
# Make sure the new implementation is at most this number of times as slow.
multiplier = 45 # TODO: Lower as close to 1 as possible.
multiplier = 40 # TODO: Lower as close to 1 as possible.

time1 = timeit(
f"safe_url({url!r})", "from w3lib.url import safe_url", number=number
Expand Down
2 changes: 2 additions & 0 deletions w3lib/_rfc5892.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@


def _check_contextj_rules(label: str) -> None:
if label.isascii():
return
for i, code_point in enumerate(label):
value = ord(code_point)
if not intranges_contain(value, codepoint_classes["CONTEXTJ"]):
Expand Down
19 changes: 0 additions & 19 deletions w3lib/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,25 +432,6 @@ def _starts_with_windows_drive_letter(input: str) -> bool:
)


_ASCII_URL_CODE_POINTS = _ASCII_ALPHANUMERIC + "!$&'()*+,-./:;=?@_~"


# https://url.spec.whatwg.org/commit-snapshots/a46cb9188a48c2c9d80ba32a9b1891652d6b4900/#url-code-points
def _is_url_code_point(code_point: str) -> bool:
if code_point in _ASCII_URL_CODE_POINTS:
return True
code_point_id = ord(code_point)
if code_point_id < 0xA0:
return False
if code_point_id > 0x10FFFD:
return False
if _is_surrogate_code_point_id(code_point_id):
return False
if _is_noncharacter_code_point_id(code_point_id):
return False
return True


# https://url.spec.whatwg.org/commit-snapshots/a46cb9188a48c2c9d80ba32a9b1891652d6b4900/#double-dot-path-segment
def _is_double_dot_path_segment(input: str) -> bool:
return input in (
Expand Down
2 changes: 1 addition & 1 deletion w3lib/_utr46.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def _process(


def _convert_label(label: str) -> str:
if not any(ord(code_point) >= 0x80 for code_point in label):
if label.isascii():
return label
return f"xn--{label.encode('punycode').decode()}"

Expand Down

0 comments on commit 9daca87

Please sign in to comment.