From 489b68f39944645a664d067bdabe7678568a4396 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sun, 15 Jun 2025 07:19:29 +0100 Subject: [PATCH 1/2] Deprecate support for source encodings other than UTF-8 --- CHANGES.rst | 3 +++ doc/usage/configuration.rst | 3 +++ doc/usage/restructuredtext/basics.rst | 7 +++---- doc/usage/restructuredtext/directives.rst | 2 +- sphinx/config.py | 14 ++++++++++++++ tests/test_config/test_config.py | 15 +++++++++++++-- 6 files changed, 37 insertions(+), 7 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 4920b4c7736..47eb18fc8a6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -23,6 +23,9 @@ Deprecated Patch by Adam Turner. * #13644: Deprecate the :py:attr:`!Parser.config` and :py:attr:`!env` attributes. Patch by Adam Turner. +* #13665: Deprecate support for non-UTF 8 source encodings, + scheduled for removal in Sphinx 10. + Patch by Adam Turner. Features added -------------- diff --git a/doc/usage/configuration.rst b/doc/usage/configuration.rst index 7cdf462c4ba..b2f40f76a43 100644 --- a/doc/usage/configuration.rst +++ b/doc/usage/configuration.rst @@ -1157,6 +1157,9 @@ Options for source files The recommended encoding is ``'utf-8-sig'``. .. versionadded:: 0.5 + .. deprecated:: 8.3 + Support for source encodings other than UTF-8 is deprecated. + Sphinx 10 will only support UTF-8 files. .. confval:: source_suffix :type: :code-py:`dict[str, str] | Sequence[str] | str` diff --git a/doc/usage/restructuredtext/basics.rst b/doc/usage/restructuredtext/basics.rst index ea61b80fc85..f6e23763b72 100644 --- a/doc/usage/restructuredtext/basics.rst +++ b/doc/usage/restructuredtext/basics.rst @@ -646,10 +646,9 @@ configurations: Source encoding --------------- -Since the easiest way to include special characters like em dashes or copyright -signs in reStructuredText is to directly write them as Unicode characters, one has to -specify an encoding. Sphinx assumes source files to be encoded in UTF-8 by -default; you can change this with the :confval:`source_encoding` config value. +Sphinx supports source files that are encoded in UTF-8. +This means that the full range of Unicode_ characters may be used +directly in reStructuredText. Gotchas diff --git a/doc/usage/restructuredtext/directives.rst b/doc/usage/restructuredtext/directives.rst index f882f33ba3e..5845a6ab717 100644 --- a/doc/usage/restructuredtext/directives.rst +++ b/doc/usage/restructuredtext/directives.rst @@ -971,7 +971,7 @@ __ https://pygments.org/docs/lexers :type: text Explicitly specify the encoding of the file. - This overwrites the default encoding (:confval:`source_encoding`). + This overwrites the default encoding (UTF-8). For example: .. code-block:: rst diff --git a/sphinx/config.py b/sphinx/config.py index a43b6cc82d0..e878cd7a834 100644 --- a/sphinx/config.py +++ b/sphinx/config.py @@ -895,7 +895,21 @@ def check_master_doc( return changed +def deprecate_source_encoding(_app: Sphinx, config: Config) -> None: + """Warn on non-UTF 8 source_encoding.""" + # RemovedInSphinx10Warning + if config.source_encoding.lower() not in {'utf-8', 'utf-8-sig', 'utf8'}: + msg = _( + 'Support for source encodings other than UTF-8 ' + 'is deprecated and will be removed in Sphinx 10. ' + 'Please comment at https://github.com/sphinx-doc/sphinx/issues/13665 ' + 'if this causes a problem.' + ) + logger.warning(msg) + + def setup(app: Sphinx) -> ExtensionMetadata: + app.connect('config-inited', deprecate_source_encoding, priority=790) app.connect('config-inited', convert_source_suffix, priority=800) app.connect('config-inited', convert_highlight_options, priority=800) app.connect('config-inited', init_numfig_format, priority=800) diff --git a/tests/test_config/test_config.py b/tests/test_config/test_config.py index b3392e654b2..857e1c28e34 100644 --- a/tests/test_config/test_config.py +++ b/tests/test_config/test_config.py @@ -19,14 +19,14 @@ ) from sphinx.deprecation import RemovedInSphinx90Warning from sphinx.errors import ConfigError, ExtensionError, VersionRequirementError +from sphinx.testing.util import SphinxTestApp from sphinx.util.tags import Tags if TYPE_CHECKING: from collections.abc import Iterable + from pathlib import Path from typing import TypeAlias - from sphinx.testing.util import SphinxTestApp - CircularList: TypeAlias = list[int | 'CircularList'] CircularDict: TypeAlias = dict[str, int | 'CircularDict'] @@ -811,3 +811,14 @@ def test_root_doc_and_master_doc_are_synchronized() -> None: c.root_doc = '1234' assert c.master_doc == '1234' assert c.root_doc == c.master_doc + + +def test_source_encoding_deprecation(tmp_path: Path) -> None: + (tmp_path / 'conf.py').touch() + app = SphinxTestApp( + buildername='dummy', + srcdir=tmp_path, + confoverrides={'source_encoding': 'latin-1'}, + ) + expected = 'Support for source encodings other than UTF-8 is deprecated and will be removed' + assert expected in app.warning.getvalue() From f235f736dc39d5f3fcf6fd5677a61aeb92e3e4fa Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+aa-turner@users.noreply.github.com> Date: Sun, 15 Jun 2025 07:23:45 +0100 Subject: [PATCH 2/2] fixup! Deprecate support for source encodings other than UTF-8 --- doc/usage/restructuredtext/basics.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/usage/restructuredtext/basics.rst b/doc/usage/restructuredtext/basics.rst index f6e23763b72..8f408f45e38 100644 --- a/doc/usage/restructuredtext/basics.rst +++ b/doc/usage/restructuredtext/basics.rst @@ -647,9 +647,11 @@ Source encoding --------------- Sphinx supports source files that are encoded in UTF-8. -This means that the full range of Unicode_ characters may be used +This means that the full range of Unicode__ characters may be used directly in reStructuredText. +__ https://www.unicode.org/ + Gotchas -------