diff --git a/doc/whatsnew/fragments/7661.bugfix b/doc/whatsnew/fragments/7661.bugfix new file mode 100644 index 0000000000..2e58c861bb --- /dev/null +++ b/doc/whatsnew/fragments/7661.bugfix @@ -0,0 +1,3 @@ +Fix crash that happened when parsing files with unexpected encoding starting with 'utf' like ``utf13``. + +Closes #7661 diff --git a/pylint/checkers/unicode.py b/pylint/checkers/unicode.py index b5123ef17c..35a0cd7fce 100644 --- a/pylint/checkers/unicode.py +++ b/pylint/checkers/unicode.py @@ -218,7 +218,7 @@ def _normalize_codec_name(codec: str) -> str: def _remove_bom(encoded: bytes, encoding: str) -> bytes: """Remove the bom if given from a line.""" - if not encoding.startswith("utf"): + if encoding not in UNICODE_BOMS: return encoded bom = UNICODE_BOMS[encoding] if encoded.startswith(bom): diff --git a/tests/regrtest_data/encoding/bad_missing_num.py b/tests/regrtest_data/encoding/bad_missing_num.py new file mode 100644 index 0000000000..a43139838d --- /dev/null +++ b/tests/regrtest_data/encoding/bad_missing_num.py @@ -0,0 +1 @@ +# -*- encoding: utf -*- diff --git a/tests/regrtest_data/encoding/bad_wrong_num.py b/tests/regrtest_data/encoding/bad_wrong_num.py new file mode 100644 index 0000000000..5c6bfe7868 --- /dev/null +++ b/tests/regrtest_data/encoding/bad_wrong_num.py @@ -0,0 +1 @@ +# -*- encoding: utf-9 -*- diff --git a/tests/regrtest_data/encoding/good.py b/tests/regrtest_data/encoding/good.py new file mode 100644 index 0000000000..dae354a675 --- /dev/null +++ b/tests/regrtest_data/encoding/good.py @@ -0,0 +1 @@ +# -*- encoding: utf-8 -*- diff --git a/tests/test_self.py b/tests/test_self.py index 632022668a..65b76f6612 100644 --- a/tests/test_self.py +++ b/tests/test_self.py @@ -148,7 +148,9 @@ def _clean_paths(output: str) -> str: output = re.sub(CLEAN_PATH, "", output, flags=re.MULTILINE) return output.replace("\\", "/") - def _test_output(self, args: list[str], expected_output: str) -> None: + def _test_output( + self, args: list[str], expected_output: str, unexpected_output: str = "" + ) -> None: out = StringIO() args = _add_rcfile_default_pylintrc(args) self._run_pylint(args, out=out) @@ -156,6 +158,9 @@ def _test_output(self, args: list[str], expected_output: str) -> None: expected_output = self._clean_paths(expected_output) assert expected_output.strip() in actual_output.strip() + if unexpected_output: + assert unexpected_output.strip() not in actual_output.strip() + def _test_output_file( self, args: list[str], filename: LocalPath, expected_output: str ) -> None: @@ -1196,6 +1201,20 @@ def test_syntax_error_invalid_encoding(self) -> None: expected_output = "unknown encoding" self._test_output([module, "-E"], expected_output=expected_output) + @pytest.mark.parametrize( + "module_name,expected_output", + [ + ("good.py", ""), + ("bad_wrong_num.py", "(syntax-error)"), + ("bad_missing_num.py", "(bad-file-encoding)"), + ], + ) + def test_encoding(self, module_name: str, expected_output: str) -> None: + path = join(HERE, "regrtest_data", "encoding", module_name) + self._test_output( + [path], expected_output=expected_output, unexpected_output="(astroid-error)" + ) + class TestCallbackOptions: """Test for all callback options we support."""