Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whatsnew/fragments/7661.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix crash that happened when parsing files with unexpected encoding starting with 'utf' like ``utf13``.

Closes #7661
2 changes: 1 addition & 1 deletion pylint/checkers/unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def _normalize_codec_name(codec: str) -> str:

def _remove_bom(encoded: bytes, encoding: str) -> bytes:
"""Remove the bom if given from a line."""
if not encoding.startswith("utf"):
if encoding not in UNICODE_BOMS:
return encoded
bom = UNICODE_BOMS[encoding]
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since all UNICODE_BOMS are utf..., it's redundant to check if it starts with utf. Just checking if encoding is in UNICODE is sufficient and prevents unsafe KeyError

if encoded.startswith(bom):
Expand Down
1 change: 1 addition & 0 deletions tests/regrtest_data/encoding/bad_missing_num.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# -*- encoding: utf -*-
1 change: 1 addition & 0 deletions tests/regrtest_data/encoding/bad_wrong_num.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# -*- encoding: utf-9 -*-
1 change: 1 addition & 0 deletions tests/regrtest_data/encoding/good.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# -*- encoding: utf-8 -*-
21 changes: 20 additions & 1 deletion tests/test_self.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,19 @@ def _clean_paths(output: str) -> str:
output = re.sub(CLEAN_PATH, "", output, flags=re.MULTILINE)
return output.replace("\\", "/")

def _test_output(self, args: list[str], expected_output: str) -> None:
def _test_output(
self, args: list[str], expected_output: str, unexpected_output: str = ""
) -> None:
out = StringIO()
args = _add_rcfile_default_pylintrc(args)
self._run_pylint(args, out=out)
actual_output = self._clean_paths(out.getvalue())
expected_output = self._clean_paths(expected_output)
assert expected_output.strip() in actual_output.strip()

if unexpected_output:
assert unexpected_output.strip() not in actual_output.strip()

def _test_output_file(
self, args: list[str], filename: LocalPath, expected_output: str
) -> None:
Expand Down Expand Up @@ -1196,6 +1201,20 @@ def test_syntax_error_invalid_encoding(self) -> None:
expected_output = "unknown encoding"
self._test_output([module, "-E"], expected_output=expected_output)

@pytest.mark.parametrize(
"module_name,expected_output",
[
("good.py", ""),
("bad_wrong_num.py", "(syntax-error)"),
("bad_missing_num.py", "(bad-file-encoding)"),
],
)
def test_encoding(self, module_name: str, expected_output: str) -> None:
path = join(HERE, "regrtest_data", "encoding", module_name)
self._test_output(
[path], expected_output=expected_output, unexpected_output="(astroid-error)"
)


class TestCallbackOptions:
"""Test for all callback options we support."""
Expand Down