Skip to content

Commit

Permalink
Add conditional string encoding based on urllib3 major version
Browse files Browse the repository at this point in the history
  • Loading branch information
nateprewitt committed Jul 18, 2024
1 parent f8aa36b commit 4e38364
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 18 deletions.
12 changes: 12 additions & 0 deletions src/requests/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,18 @@
import importlib
import sys

# -------
# urllib3
# -------
from urllib3 import __version__ as urllib3_version

# Detect which major version of urllib3 is being used.
try:
is_urllib3_2 = int(urllib3_version.split(".")[0]) == 2
except (TypeError, AttributeError):
# If we can't discern a version, prefer old functionality.
is_urllib3_2 = False

# -------------------
# Character Detection
# -------------------
Expand Down
5 changes: 4 additions & 1 deletion src/requests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
getproxies,
getproxies_environment,
integer_types,
is_urllib3_2,
)
from .compat import parse_http_list as _parse_list_header
from .compat import (
Expand Down Expand Up @@ -136,7 +137,9 @@ def super_len(o):
total_length = None
current_position = 0

if isinstance(o, str):
if is_urllib3_2 and isinstance(o, str):
# urllib3 2.x treats all strings as utf-8 instead
# of latin-1 (iso-8859-1) like http.client.
o = o.encode("utf-8")

if hasattr(o, "__len__"):
Expand Down
41 changes: 24 additions & 17 deletions tests/test_requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
builtin_str,
cookielib,
getproxies,
is_urllib3_2,
urlparse,
)
from requests.cookies import cookiejar_from_dict, morsel_to_cookie
Expand Down Expand Up @@ -1810,23 +1811,6 @@ def test_autoset_header_values_are_native(self, httpbin):

assert p.headers["Content-Length"] == length

def test_content_length_for_bytes_data(self, httpbin):
data = "This is a string containing multi-byte UTF-8 ☃️"
encoded_data = data.encode("utf-8")
length = str(len(encoded_data))
req = requests.Request("POST", httpbin("post"), data=encoded_data)
p = req.prepare()

assert p.headers["Content-Length"] == length

def test_content_length_for_string_data_counts_bytes(self, httpbin):
data = "This is a string containing multi-byte UTF-8 ☃️"
length = str(len(data.encode("utf-8")))
req = requests.Request("POST", httpbin("post"), data=data)
p = req.prepare()

assert p.headers["Content-Length"] == length

def test_nonhttp_schemes_dont_check_URLs(self):
test_urls = (
"data:image/gif;base64,R0lGODlhAQABAHAAACH5BAUAAAAALAAAAAABAAEAAAICRAEAOw==",
Expand Down Expand Up @@ -2966,6 +2950,29 @@ def response_handler(sock):
assert client_cert is not None


def test_content_length_for_bytes_data(httpbin):
data = "This is a string containing multi-byte UTF-8 ☃️"
encoded_data = data.encode("utf-8")
length = str(len(encoded_data))
req = requests.Request("POST", httpbin("post"), data=encoded_data)
p = req.prepare()

assert p.headers["Content-Length"] == length


@pytest.mark.skipif(
not is_urllib3_2,
reason="urllib3 2.x encodes all strings to utf-8, urllib3 1.x uses latin-1",
)
def test_content_length_for_string_data_counts_bytes(httpbin):
data = "This is a string containing multi-byte UTF-8 ☃️"
length = str(len(data.encode("utf-8")))
req = requests.Request("POST", httpbin("post"), data=data)
p = req.prepare()

assert p.headers["Content-Length"] == length


def test_json_decode_errors_are_serializable_deserializable():
json_decode_error = requests.exceptions.JSONDecodeError(
"Extra data",
Expand Down

0 comments on commit 4e38364

Please sign in to comment.