From 6965e0a0cd4fca7f48c5444e7ac8b3dbaf2fbc2d Mon Sep 17 00:00:00 2001 From: clavedeluna Date: Sat, 22 Oct 2022 14:18:15 -0300 Subject: [PATCH 1/3] fix astroid-error for parsing module encoding --- doc/whatsnew/fragments/7661.bugfix | 3 +++ pylint/checkers/unicode.py | 2 +- .../regrtest_data/encoding/bad_missing_num.py | 1 + tests/regrtest_data/encoding/bad_wrong_num.py | 1 + tests/regrtest_data/encoding/good.py | 1 + tests/test_self.py | 21 ++++++++++++++++++- 6 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 doc/whatsnew/fragments/7661.bugfix create mode 100644 tests/regrtest_data/encoding/bad_missing_num.py create mode 100644 tests/regrtest_data/encoding/bad_wrong_num.py create mode 100644 tests/regrtest_data/encoding/good.py diff --git a/doc/whatsnew/fragments/7661.bugfix b/doc/whatsnew/fragments/7661.bugfix new file mode 100644 index 0000000000..6a2c596fa7 --- /dev/null +++ b/doc/whatsnew/fragments/7661.bugfix @@ -0,0 +1,3 @@ +Fix bug that unsafely retrieved a value from a dict and led to an ``astroid-error`` when parsing a module. + +Closes #7661 diff --git a/pylint/checkers/unicode.py b/pylint/checkers/unicode.py index b5123ef17c..35a0cd7fce 100644 --- a/pylint/checkers/unicode.py +++ b/pylint/checkers/unicode.py @@ -218,7 +218,7 @@ def _normalize_codec_name(codec: str) -> str: def _remove_bom(encoded: bytes, encoding: str) -> bytes: """Remove the bom if given from a line.""" - if not encoding.startswith("utf"): + if encoding not in UNICODE_BOMS: return encoded bom = UNICODE_BOMS[encoding] if encoded.startswith(bom): diff --git a/tests/regrtest_data/encoding/bad_missing_num.py b/tests/regrtest_data/encoding/bad_missing_num.py new file mode 100644 index 0000000000..a43139838d --- /dev/null +++ b/tests/regrtest_data/encoding/bad_missing_num.py @@ -0,0 +1 @@ +# -*- encoding: utf -*- diff --git a/tests/regrtest_data/encoding/bad_wrong_num.py b/tests/regrtest_data/encoding/bad_wrong_num.py new file mode 100644 index 0000000000..5c6bfe7868 --- /dev/null +++ b/tests/regrtest_data/encoding/bad_wrong_num.py @@ -0,0 +1 @@ +# -*- encoding: utf-9 -*- diff --git a/tests/regrtest_data/encoding/good.py b/tests/regrtest_data/encoding/good.py new file mode 100644 index 0000000000..dae354a675 --- /dev/null +++ b/tests/regrtest_data/encoding/good.py @@ -0,0 +1 @@ +# -*- encoding: utf-8 -*- diff --git a/tests/test_self.py b/tests/test_self.py index 632022668a..65b76f6612 100644 --- a/tests/test_self.py +++ b/tests/test_self.py @@ -148,7 +148,9 @@ def _clean_paths(output: str) -> str: output = re.sub(CLEAN_PATH, "", output, flags=re.MULTILINE) return output.replace("\\", "/") - def _test_output(self, args: list[str], expected_output: str) -> None: + def _test_output( + self, args: list[str], expected_output: str, unexpected_output: str = "" + ) -> None: out = StringIO() args = _add_rcfile_default_pylintrc(args) self._run_pylint(args, out=out) @@ -156,6 +158,9 @@ def _test_output(self, args: list[str], expected_output: str) -> None: expected_output = self._clean_paths(expected_output) assert expected_output.strip() in actual_output.strip() + if unexpected_output: + assert unexpected_output.strip() not in actual_output.strip() + def _test_output_file( self, args: list[str], filename: LocalPath, expected_output: str ) -> None: @@ -1196,6 +1201,20 @@ def test_syntax_error_invalid_encoding(self) -> None: expected_output = "unknown encoding" self._test_output([module, "-E"], expected_output=expected_output) + @pytest.mark.parametrize( + "module_name,expected_output", + [ + ("good.py", ""), + ("bad_wrong_num.py", "(syntax-error)"), + ("bad_missing_num.py", "(bad-file-encoding)"), + ], + ) + def test_encoding(self, module_name: str, expected_output: str) -> None: + path = join(HERE, "regrtest_data", "encoding", module_name) + self._test_output( + [path], expected_output=expected_output, unexpected_output="(astroid-error)" + ) + class TestCallbackOptions: """Test for all callback options we support.""" From 28055b21bb36687143b71f1aea8b0cd111255f60 Mon Sep 17 00:00:00 2001 From: Dani Alcala <112832187+clavedeluna@users.noreply.github.com> Date: Sun, 23 Oct 2022 08:41:30 -0300 Subject: [PATCH 2/3] Update doc/whatsnew/fragments/7661.bugfix Co-authored-by: Pierre Sassoulas --- doc/whatsnew/fragments/7661.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whatsnew/fragments/7661.bugfix b/doc/whatsnew/fragments/7661.bugfix index 6a2c596fa7..5349ce7ee6 100644 --- a/doc/whatsnew/fragments/7661.bugfix +++ b/doc/whatsnew/fragments/7661.bugfix @@ -1,3 +1,3 @@ -Fix bug that unsafely retrieved a value from a dict and led to an ``astroid-error`` when parsing a module. +Fix crash that happened when parsing files with unexpected encoding starting with 'utf' like ``utf13`` Closes #7661 From 416bc99f0160a02ebda5d762eaa4ff179bed2617 Mon Sep 17 00:00:00 2001 From: Pierre Sassoulas Date: Sun, 23 Oct 2022 14:52:35 +0200 Subject: [PATCH 3/3] Update doc/whatsnew/fragments/7661.bugfix --- doc/whatsnew/fragments/7661.bugfix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whatsnew/fragments/7661.bugfix b/doc/whatsnew/fragments/7661.bugfix index 5349ce7ee6..2e58c861bb 100644 --- a/doc/whatsnew/fragments/7661.bugfix +++ b/doc/whatsnew/fragments/7661.bugfix @@ -1,3 +1,3 @@ -Fix crash that happened when parsing files with unexpected encoding starting with 'utf' like ``utf13`` +Fix crash that happened when parsing files with unexpected encoding starting with 'utf' like ``utf13``. Closes #7661