Skip to content

Commit 4589ed4

Browse files
authored
Merge pull request #6311 from hroncok/i6054
Fix utils.encoding.auto_decode() LookupError with invalid encodings
2 parents 729404d + d48475d commit 4589ed4

File tree

3 files changed

+28
-9
lines changed

3 files changed

+28
-9
lines changed

Diff for: news/6054.bugfix

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fix ``utils.encoding.auto_decode()`` ``LookupError`` with invalid encodings.
2+
``utils.encoding.auto_decode()`` was broken when decoding Big Endian BOM
3+
byte-strings on Little Endian or vice versa.
4+

Diff for: src/pip/_internal/utils/encoding.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@
99
from typing import List, Tuple, Text
1010

1111
BOMS = [
12-
(codecs.BOM_UTF8, 'utf8'),
13-
(codecs.BOM_UTF16, 'utf16'),
14-
(codecs.BOM_UTF16_BE, 'utf16-be'),
15-
(codecs.BOM_UTF16_LE, 'utf16-le'),
16-
(codecs.BOM_UTF32, 'utf32'),
17-
(codecs.BOM_UTF32_BE, 'utf32-be'),
18-
(codecs.BOM_UTF32_LE, 'utf32-le'),
12+
(codecs.BOM_UTF8, 'utf-8'),
13+
(codecs.BOM_UTF16, 'utf-16'),
14+
(codecs.BOM_UTF16_BE, 'utf-16-be'),
15+
(codecs.BOM_UTF16_LE, 'utf-16-le'),
16+
(codecs.BOM_UTF32, 'utf-32'),
17+
(codecs.BOM_UTF32_BE, 'utf-32-be'),
18+
(codecs.BOM_UTF32_LE, 'utf-32-le'),
1919
] # type: List[Tuple[bytes, Text]]
2020

2121
ENCODING_RE = re.compile(br'coding[:=]\s*([-\w.]+)')

Diff for: tests/unit/test_utils.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
util tests
55
66
"""
7+
import codecs
78
import itertools
89
import os
910
import shutil
@@ -20,7 +21,7 @@
2021
from pip._internal.exceptions import (
2122
HashMismatch, HashMissing, InstallationError, UnsupportedPythonVersion,
2223
)
23-
from pip._internal.utils.encoding import auto_decode
24+
from pip._internal.utils.encoding import BOMS, auto_decode
2425
from pip._internal.utils.glibc import check_glibc_version
2526
from pip._internal.utils.hashes import Hashes, MissingHashes
2627
from pip._internal.utils.misc import (
@@ -462,11 +463,20 @@ def test_non_zero(self):
462463
class TestEncoding(object):
463464
"""Tests for pip._internal.utils.encoding"""
464465

465-
def test_auto_decode_utf16_le(self):
466+
def test_auto_decode_utf_16_le(self):
466467
data = (
467468
b'\xff\xfeD\x00j\x00a\x00n\x00g\x00o\x00=\x00'
468469
b'=\x001\x00.\x004\x00.\x002\x00'
469470
)
471+
assert data.startswith(codecs.BOM_UTF16_LE)
472+
assert auto_decode(data) == "Django==1.4.2"
473+
474+
def test_auto_decode_utf_16_be(self):
475+
data = (
476+
b'\xfe\xff\x00D\x00j\x00a\x00n\x00g\x00o\x00='
477+
b'\x00=\x001\x00.\x004\x00.\x002'
478+
)
479+
assert data.startswith(codecs.BOM_UTF16_BE)
470480
assert auto_decode(data) == "Django==1.4.2"
471481

472482
def test_auto_decode_no_bom(self):
@@ -486,6 +496,11 @@ def test_auto_decode_no_preferred_encoding(self):
486496
ret = auto_decode(data.encode(sys.getdefaultencoding()))
487497
assert ret == data
488498

499+
@pytest.mark.parametrize('encoding', [encoding for bom, encoding in BOMS])
500+
def test_all_encodings_are_valid(self, encoding):
501+
# we really only care that there is no LookupError
502+
assert ''.encode(encoding).decode(encoding) == ''
503+
489504

490505
class TestTempDirectory(object):
491506

0 commit comments

Comments
 (0)