From ef4c5ceef72bae4b1d74117f57d580b1426e15f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Mon, 9 Sep 2024 09:31:49 +0200 Subject: [PATCH 1/3] ruff: pyupgrade to 3.8 ruff check --target-version py38 --select UP --fix . --- debug-info.py | 1 - doc/conf.py | 3 +-- html5lib/__init__.py | 1 - html5lib/_ihatexml.py | 3 +-- html5lib/_inputstream.py | 9 ++++----- html5lib/_tokenizer.py | 3 +-- html5lib/_trie/__init__.py | 1 - html5lib/_trie/_base.py | 3 +-- html5lib/_trie/py.py | 1 - html5lib/_utils.py | 7 +++---- html5lib/constants.py | 1 - html5lib/filters/alphabeticalattributes.py | 1 - html5lib/filters/base.py | 3 +-- html5lib/filters/inject_meta_charset.py | 1 - html5lib/filters/lint.py | 1 - html5lib/filters/optionaltags.py | 1 - html5lib/filters/sanitizer.py | 1 - html5lib/filters/whitespace.py | 1 - html5lib/html5parser.py | 5 ++--- html5lib/serializer.py | 3 +-- html5lib/tests/__init__.py | 1 - html5lib/tests/conftest.py | 5 ++--- html5lib/tests/sanitizer.py | 1 - html5lib/tests/support.py | 3 +-- html5lib/tests/test_alphabeticalattributes.py | 1 - html5lib/tests/test_encoding.py | 5 ++--- html5lib/tests/test_meta.py | 6 +----- html5lib/tests/test_optionaltags_filter.py | 1 - html5lib/tests/test_parser2.py | 1 - html5lib/tests/test_sanitizer.py | 1 - html5lib/tests/test_serializer.py | 1 - html5lib/tests/test_stream.py | 7 +++---- html5lib/tests/test_tokenizer2.py | 1 - html5lib/tests/test_treeadapters.py | 1 - html5lib/tests/test_treewalkers.py | 5 ++--- html5lib/tests/test_whitespace_filter.py | 1 - html5lib/tests/tokenizer.py | 3 +-- html5lib/tests/tokenizertotree.py | 3 +-- html5lib/tests/tree_construction.py | 1 - html5lib/treeadapters/__init__.py | 1 - html5lib/treeadapters/genshi.py | 1 - html5lib/treeadapters/sax.py | 1 - html5lib/treebuilders/__init__.py | 1 - html5lib/treebuilders/base.py | 5 ++--- html5lib/treebuilders/dom.py | 3 +-- html5lib/treebuilders/etree.py | 1 - html5lib/treebuilders/etree_lxml.py | 7 +++---- html5lib/treewalkers/__init__.py | 1 - html5lib/treewalkers/base.py | 3 +-- html5lib/treewalkers/dom.py | 1 - html5lib/treewalkers/etree.py | 1 - html5lib/treewalkers/etree_lxml.py | 7 +++---- html5lib/treewalkers/genshi.py | 1 - parse.py | 2 +- setup.py | 1 - toxver.py | 4 ---- 56 files changed, 39 insertions(+), 100 deletions(-) diff --git a/debug-info.py b/debug-info.py index b47b8ebf..7e1b6fd0 100644 --- a/debug-info.py +++ b/debug-info.py @@ -1,4 +1,3 @@ -from __future__ import print_function, unicode_literals import platform import sys diff --git a/doc/conf.py b/doc/conf.py index d5a1e863..66defcce 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # html5lib documentation build configuration file, created by # sphinx-quickstart on Wed May 8 00:04:49 2013. @@ -100,7 +99,7 @@ } -class CExtMock(object): +class CExtMock: """Required for autodoc on readthedocs.org where you cannot build C extensions.""" def __init__(self, *args, **kwargs): pass diff --git a/html5lib/__init__.py b/html5lib/__init__.py index 7b854f99..d2c68855 100644 --- a/html5lib/__init__.py +++ b/html5lib/__init__.py @@ -20,7 +20,6 @@ * :func:`~.serializer.serialize` """ -from __future__ import absolute_import, division, unicode_literals from .html5parser import HTMLParser, parse, parseFragment from .treebuilders import getTreeBuilder diff --git a/html5lib/_ihatexml.py b/html5lib/_ihatexml.py index d725eabd..f5b6e1f4 100644 --- a/html5lib/_ihatexml.py +++ b/html5lib/_ihatexml.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import re import warnings @@ -181,7 +180,7 @@ def escapeRegexp(string): nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]") -class InfosetFilter(object): +class InfosetFilter: replacementRegexp = re.compile(r"U[\dA-F]{5,5}") def __init__(self, diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py index a93b5a4e..54c5c498 100644 --- a/html5lib/_inputstream.py +++ b/html5lib/_inputstream.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type from six.moves import http_client, urllib @@ -48,7 +47,7 @@ charsUntilRegEx = {} -class BufferedStream(object): +class BufferedStream: """Buffering for streams that do not have buffering of their own The buffer is implemented as a list of chunks on the assumption that @@ -145,7 +144,7 @@ def HTMLInputStream(source, **kwargs): return HTMLBinaryInputStream(source, **kwargs) -class HTMLUnicodeInputStream(object): +class HTMLUnicodeInputStream: """Provides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing @@ -673,7 +672,7 @@ def jumpTo(self, bytes): return True -class EncodingParser(object): +class EncodingParser: """Mini parser for detecting character encoding from meta elements""" def __init__(self, data): @@ -861,7 +860,7 @@ def getAttribute(self): attrValue.append(c) -class ContentAttrParser(object): +class ContentAttrParser: def __init__(self, data): assert isinstance(data, bytes) self.data = data diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py index 4748a197..782310ec 100644 --- a/html5lib/_tokenizer.py +++ b/html5lib/_tokenizer.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import unichr as chr @@ -24,7 +23,7 @@ attributeMap = OrderedDict -class HTMLTokenizer(object): +class HTMLTokenizer: """ This class takes care of tokenizing HTML. * self.currentToken diff --git a/html5lib/_trie/__init__.py b/html5lib/_trie/__init__.py index 07bad5d3..df8912a0 100644 --- a/html5lib/_trie/__init__.py +++ b/html5lib/_trie/__init__.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from .py import Trie diff --git a/html5lib/_trie/_base.py b/html5lib/_trie/_base.py index 6b71975f..fe2d02e5 100644 --- a/html5lib/_trie/_base.py +++ b/html5lib/_trie/_base.py @@ -1,9 +1,8 @@ -from __future__ import absolute_import, division, unicode_literals try: from collections.abc import Mapping except ImportError: # Python 2.7 - from collections import Mapping + from collections.abc import Mapping class Trie(Mapping): diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py index c2ba3da7..92f6f861 100644 --- a/html5lib/_trie/py.py +++ b/html5lib/_trie/py.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type from bisect import bisect_left diff --git a/html5lib/_utils.py b/html5lib/_utils.py index 7e23ee57..1c229d0f 100644 --- a/html5lib/_utils.py +++ b/html5lib/_utils.py @@ -1,11 +1,10 @@ -from __future__ import absolute_import, division, unicode_literals from types import ModuleType try: from collections.abc import Mapping except ImportError: - from collections import Mapping + from collections.abc import Mapping from six import text_type, PY3 @@ -13,7 +12,7 @@ import xml.etree.ElementTree as default_etree else: try: - import xml.etree.cElementTree as default_etree + import xml.etree.ElementTree as default_etree except ImportError: import xml.etree.ElementTree as default_etree @@ -122,7 +121,7 @@ def moduleFactoryFactory(factory): moduleCache = {} def moduleFactory(baseModule, *args, **kwargs): - if isinstance(ModuleType.__name__, type("")): + if isinstance(ModuleType.__name__, str): name = "_%s_factory" % baseModule.__name__ else: name = b"_%s_factory" % baseModule.__name__ diff --git a/html5lib/constants.py b/html5lib/constants.py index 2fa4146d..a4b1efa1 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import string diff --git a/html5lib/filters/alphabeticalattributes.py b/html5lib/filters/alphabeticalattributes.py index 5ba926e3..c0be95b2 100644 --- a/html5lib/filters/alphabeticalattributes.py +++ b/html5lib/filters/alphabeticalattributes.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from . import base diff --git a/html5lib/filters/base.py b/html5lib/filters/base.py index c7dbaed0..6d6639e6 100644 --- a/html5lib/filters/base.py +++ b/html5lib/filters/base.py @@ -1,7 +1,6 @@ -from __future__ import absolute_import, division, unicode_literals -class Filter(object): +class Filter: def __init__(self, source): self.source = source diff --git a/html5lib/filters/inject_meta_charset.py b/html5lib/filters/inject_meta_charset.py index aefb5c84..c8dc57b8 100644 --- a/html5lib/filters/inject_meta_charset.py +++ b/html5lib/filters/inject_meta_charset.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from . import base diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py index acd4d7a2..cd7a6a43 100644 --- a/html5lib/filters/lint.py +++ b/html5lib/filters/lint.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type diff --git a/html5lib/filters/optionaltags.py b/html5lib/filters/optionaltags.py index 4a865012..a44b2a00 100644 --- a/html5lib/filters/optionaltags.py +++ b/html5lib/filters/optionaltags.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from . import base diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index ea2c5dd3..2dc4583d 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -6,7 +6,6 @@ if Bleach is unsuitable for your needs. """ -from __future__ import absolute_import, division, unicode_literals import re import warnings diff --git a/html5lib/filters/whitespace.py b/html5lib/filters/whitespace.py index 0d12584b..ab40ef5a 100644 --- a/html5lib/filters/whitespace.py +++ b/html5lib/filters/whitespace.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import re diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index b3c206d1..8ab005ba 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import viewkeys from . import _inputstream @@ -69,7 +68,7 @@ def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElemen return p.parseFragment(doc, container=container, **kwargs) -class HTMLParser(object): +class HTMLParser: """HTML parser Generates a tree structure from a stream of (possibly malformed) HTML. @@ -397,7 +396,7 @@ def parseRCDataRawtext(self, token, contentType): self.phase = self.phases["text"] -class Phase(object): +class Phase: """Base class for helper object that implements each phase of processing """ __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache") diff --git a/html5lib/serializer.py b/html5lib/serializer.py index a171ac1c..34f1b7e3 100644 --- a/html5lib/serializer.py +++ b/html5lib/serializer.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type import re @@ -101,7 +100,7 @@ def serialize(input, tree="etree", encoding=None, **serializer_opts): return s.render(walker(input), encoding) -class HTMLSerializer(object): +class HTMLSerializer: # attribute quoting options quote_attr_values = "legacy" # be secure by default diff --git a/html5lib/tests/__init__.py b/html5lib/tests/__init__.py index b8ce2de3..e69de29b 100644 --- a/html5lib/tests/__init__.py +++ b/html5lib/tests/__init__.py @@ -1 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py index fffeb50c..de9b1572 100644 --- a/html5lib/tests/conftest.py +++ b/html5lib/tests/conftest.py @@ -1,4 +1,3 @@ -from __future__ import print_function import os.path import sys @@ -54,7 +53,7 @@ def pytest_configure(config): # Check for optional requirements req_file = os.path.join(_root, "requirements-optional.txt") if os.path.exists(req_file): - with open(req_file, "r") as fp: + with open(req_file) as fp: for line in fp: if (line.strip() and not (line.startswith("-r") or @@ -79,7 +78,7 @@ def pytest_configure(config): import xml.etree.ElementTree as ElementTree try: - import xml.etree.cElementTree as cElementTree + import xml.etree.ElementTree as cElementTree except ImportError: msgs.append("cElementTree unable to be imported") else: diff --git a/html5lib/tests/sanitizer.py b/html5lib/tests/sanitizer.py index 16e53868..93ad4f52 100644 --- a/html5lib/tests/sanitizer.py +++ b/html5lib/tests/sanitizer.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import codecs import json diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index 1bd0ccc1..3a6f37c2 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals # pylint:disable=wrong-import-position @@ -86,7 +85,7 @@ def __getitem__(self, key): return dict.get(self, key, self.default) -class TestData(object): +class TestData: def __init__(self, filename, newTestHeading="data", encoding="utf8"): if encoding is None: self.f = open(filename, mode="rb") diff --git a/html5lib/tests/test_alphabeticalattributes.py b/html5lib/tests/test_alphabeticalattributes.py index 7d5b8e0f..87beb8f1 100644 --- a/html5lib/tests/test_alphabeticalattributes.py +++ b/html5lib/tests/test_alphabeticalattributes.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from collections import OrderedDict diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index 47c4814a..10b666da 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import os @@ -9,7 +8,7 @@ def test_basic_prescan_length(): - data = "Caf\u00E9".encode('utf-8') + data = "Caf\u00E9".encode() pad = 1024 - len(data) + 1 data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-") assert len(data) == 1024 # Sanity @@ -18,7 +17,7 @@ def test_basic_prescan_length(): def test_parser_reparse(): - data = "Caf\u00E9".encode('utf-8') + data = "Caf\u00E9".encode() pad = 10240 - len(data) + 1 data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-") assert len(data) == 10240 # Sanity diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py index e02268aa..aa7e35e2 100644 --- a/html5lib/tests/test_meta.py +++ b/html5lib/tests/test_meta.py @@ -1,10 +1,6 @@ -from __future__ import absolute_import, division, unicode_literals import six -try: - from unittest.mock import Mock -except ImportError: - from mock import Mock +from unittest.mock import Mock from . import support diff --git a/html5lib/tests/test_optionaltags_filter.py b/html5lib/tests/test_optionaltags_filter.py index cd282149..180a109e 100644 --- a/html5lib/tests/test_optionaltags_filter.py +++ b/html5lib/tests/test_optionaltags_filter.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from html5lib.filters.optionaltags import Filter diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 6b464bea..f30595b4 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import PY2, text_type diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py index 499310b6..562ee7fa 100644 --- a/html5lib/tests/test_sanitizer.py +++ b/html5lib/tests/test_sanitizer.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import warnings diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index a2be0be5..5c225790 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import os import json diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py index efe9b472..7dce2b1d 100644 --- a/html5lib/tests/test_stream.py +++ b/html5lib/tests/test_stream.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from . import support # noqa @@ -105,7 +104,7 @@ def test_char_ascii(): def test_char_utf8(): - stream = HTMLInputStream('\u2018'.encode('utf-8'), override_encoding='utf-8') + stream = HTMLInputStream('\u2018'.encode(), override_encoding='utf-8') assert stream.charEncoding[0].name == 'utf-8' assert stream.char() == '\u2018' @@ -186,7 +185,7 @@ def test_python_issue_20007(): Make sure we have a work-around for Python bug #20007 http://bugs.python.org/issue20007 """ - class FakeSocket(object): + class FakeSocket: def makefile(self, _mode, _bufsize=None): # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") @@ -205,7 +204,7 @@ def test_python_issue_20007_b(): if six.PY2: return - class FakeSocket(object): + class FakeSocket: def makefile(self, _mode, _bufsize=None): # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py index 158d847a..f8a74eee 100644 --- a/html5lib/tests/test_tokenizer2.py +++ b/html5lib/tests/test_tokenizer2.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import io diff --git a/html5lib/tests/test_treeadapters.py b/html5lib/tests/test_treeadapters.py index 95e56c00..3af383c3 100644 --- a/html5lib/tests/test_treeadapters.py +++ b/html5lib/tests/test_treeadapters.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from . import support # noqa diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 780ca964..89e20dab 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import itertools import sys @@ -74,11 +73,11 @@ def param_treewalker_six_mix(): # fragment but not using the u'' syntax nor importing unicode_literals sm_tests = [ ('Example', - [(str('class'), str('test123'))], + [('class', 'test123')], '\n class="test123"\n href="http://example.com"\n "Example"'), ('', - [(str('rel'), str('alternate'))], + [('rel', 'alternate')], '\n href="http://example.com/cow"\n rel="alternate"\n "Example"') ] diff --git a/html5lib/tests/test_whitespace_filter.py b/html5lib/tests/test_whitespace_filter.py index e9da6140..d4e4e3be 100644 --- a/html5lib/tests/test_whitespace_filter.py +++ b/html5lib/tests/test_whitespace_filter.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from html5lib.filters.whitespace import Filter from html5lib.constants import spaceCharacters diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py index b49d2e6e..9ba19b16 100644 --- a/html5lib/tests/tokenizer.py +++ b/html5lib/tests/tokenizer.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import codecs import json @@ -12,7 +11,7 @@ from html5lib import constants, _utils -class TokenizerTestParser(object): +class TokenizerTestParser: def __init__(self, initialState, lastStartTag=None): self.tokenizer = HTMLTokenizer self._state = initialState diff --git a/html5lib/tests/tokenizertotree.py b/html5lib/tests/tokenizertotree.py index 42463f32..6c0b4f77 100644 --- a/html5lib/tests/tokenizertotree.py +++ b/html5lib/tests/tokenizertotree.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import sys import os @@ -25,7 +24,7 @@ def main(out_path): def run_file(filename, out_path): try: - tests_data = json.load(open(filename, "r")) + tests_data = json.load(open(filename)) except ValueError: sys.stderr.write("Failed to load %s\n" % filename) return diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py index 363b48c2..e2381754 100644 --- a/html5lib/tests/tree_construction.py +++ b/html5lib/tests/tree_construction.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import itertools import re diff --git a/html5lib/treeadapters/__init__.py b/html5lib/treeadapters/__init__.py index dfeb0ba5..1444fc9a 100644 --- a/html5lib/treeadapters/__init__.py +++ b/html5lib/treeadapters/__init__.py @@ -16,7 +16,6 @@ genshi_tree = genshi.to_genshi(TreeWalker(tree)) """ -from __future__ import absolute_import, division, unicode_literals from . import sax diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py index 61d5fb6a..b0b29ed3 100644 --- a/html5lib/treeadapters/genshi.py +++ b/html5lib/treeadapters/genshi.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from genshi.core import QName, Attrs from genshi.core import START, END, TEXT, COMMENT, DOCTYPE diff --git a/html5lib/treeadapters/sax.py b/html5lib/treeadapters/sax.py index f4ccea5a..ead1a5c4 100644 --- a/html5lib/treeadapters/sax.py +++ b/html5lib/treeadapters/sax.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from xml.sax.xmlreader import AttributesNSImpl diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py index d44447ea..90aad5fb 100644 --- a/html5lib/treebuilders/__init__.py +++ b/html5lib/treebuilders/__init__.py @@ -29,7 +29,6 @@ """ -from __future__ import absolute_import, division, unicode_literals from .._utils import default_etree diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py index 020d7e15..125ed82c 100644 --- a/html5lib/treebuilders/base.py +++ b/html5lib/treebuilders/base.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type from ..constants import scopingElements, tableInsertModeElements, namespaces @@ -20,7 +19,7 @@ } -class Node(object): +class Node: """Represents an item in the tree""" def __init__(self, name): """Creates a Node @@ -144,7 +143,7 @@ def nodesEqual(self, node1, node2): return True -class TreeBuilder(object): +class TreeBuilder: """Base treebuilder implementation * documentClass - the class to use for the bottommost node of a document diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py index d8b53004..09b217c4 100644 --- a/html5lib/treebuilders/dom.py +++ b/html5lib/treebuilders/dom.py @@ -1,10 +1,9 @@ -from __future__ import absolute_import, division, unicode_literals try: from collections.abc import MutableMapping except ImportError: # Python 2.7 - from collections import MutableMapping + from collections.abc import MutableMapping from xml.dom import minidom, Node import weakref diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py index 0b745081..bd20b957 100644 --- a/html5lib/treebuilders/etree.py +++ b/html5lib/treebuilders/etree.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals # pylint:disable=protected-access from six import text_type diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index e73de61a..bc2d779e 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -9,7 +9,6 @@ When any of these things occur, we emit a DataLossWarning """ -from __future__ import absolute_import, division, unicode_literals # pylint:disable=protected-access import warnings @@ -19,7 +18,7 @@ try: from collections.abc import MutableMapping except ImportError: - from collections import MutableMapping + from collections.abc import MutableMapping from . import base from ..constants import DataLossWarning @@ -37,14 +36,14 @@ comment_type = etree.Comment("asd").tag -class DocumentType(object): +class DocumentType: def __init__(self, name, publicId, systemId): self.name = name self.publicId = publicId self.systemId = systemId -class Document(object): +class Document: def __init__(self): self._elementTree = None self._childNodes = [] diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py index b2d3aac3..b78d6f46 100644 --- a/html5lib/treewalkers/__init__.py +++ b/html5lib/treewalkers/__init__.py @@ -8,7 +8,6 @@ returns an iterator which generates tokens. """ -from __future__ import absolute_import, division, unicode_literals from .. import constants from .._utils import default_etree diff --git a/html5lib/treewalkers/base.py b/html5lib/treewalkers/base.py index 80c474c4..7ee75d81 100644 --- a/html5lib/treewalkers/base.py +++ b/html5lib/treewalkers/base.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from xml.dom import Node from ..constants import namespaces, voidElements, spaceCharacters @@ -17,7 +16,7 @@ spaceCharacters = "".join(spaceCharacters) -class TreeWalker(object): +class TreeWalker: """Walks a tree yielding tokens Tokens are dicts that all have a ``type`` field specifying the type of the diff --git a/html5lib/treewalkers/dom.py b/html5lib/treewalkers/dom.py index b0c89b00..85e12505 100644 --- a/html5lib/treewalkers/dom.py +++ b/html5lib/treewalkers/dom.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from xml.dom import Node diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py index 411a1d45..ef5e914c 100644 --- a/html5lib/treewalkers/etree.py +++ b/html5lib/treewalkers/etree.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from collections import OrderedDict import re diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py index a614ac5b..af6c260d 100644 --- a/html5lib/treewalkers/etree_lxml.py +++ b/html5lib/treewalkers/etree_lxml.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type from collections import OrderedDict @@ -20,7 +19,7 @@ def ensure_str(s): return s.decode("ascii", "strict") -class Root(object): +class Root: def __init__(self, et): self.elementtree = et self.children = [] @@ -58,7 +57,7 @@ def __len__(self): return 1 -class Doctype(object): +class Doctype: def __init__(self, root_node, name, public_id, system_id): self.root_node = root_node self.name = name @@ -81,7 +80,7 @@ def getnext(self): return None -class FragmentWrapper(object): +class FragmentWrapper: def __init__(self, fragment_root, obj): self.root_node = fragment_root self.obj = obj diff --git a/html5lib/treewalkers/genshi.py b/html5lib/treewalkers/genshi.py index 7483be27..78f22fd3 100644 --- a/html5lib/treewalkers/genshi.py +++ b/html5lib/treewalkers/genshi.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from genshi.core import QName from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT diff --git a/parse.py b/parse.py index e6806b46..14bbe99a 100755 --- a/parse.py +++ b/parse.py @@ -42,7 +42,7 @@ def parse(): try: # Try opening from file system f = open(f, "rb") - except IOError as e: + except OSError as e: sys.stderr.write("Unable to open file: %s\n" % e) sys.exit(1) except IndexError: diff --git a/setup.py b/setup.py index 30ee0575..5f3dc186 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,3 @@ -from __future__ import print_function import ast import codecs diff --git a/toxver.py b/toxver.py index 68eb71ec..b082a345 100755 --- a/toxver.py +++ b/toxver.py @@ -20,10 +20,6 @@ """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals import sys From fb864a9e4ea393c3bd863de3f1c62275ce94f622 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Mon, 9 Sep 2024 10:18:02 +0200 Subject: [PATCH 2/3] manually remove fallback imports --- html5lib/_trie/_base.py | 5 +---- html5lib/_utils.py | 5 +---- html5lib/treebuilders/dom.py | 5 +---- html5lib/treebuilders/etree_lxml.py | 5 +---- 4 files changed, 4 insertions(+), 16 deletions(-) diff --git a/html5lib/_trie/_base.py b/html5lib/_trie/_base.py index fe2d02e5..63927ee4 100644 --- a/html5lib/_trie/_base.py +++ b/html5lib/_trie/_base.py @@ -1,8 +1,5 @@ -try: - from collections.abc import Mapping -except ImportError: # Python 2.7 - from collections.abc import Mapping +from collections.abc import Mapping class Trie(Mapping): diff --git a/html5lib/_utils.py b/html5lib/_utils.py index 1c229d0f..2e74c07f 100644 --- a/html5lib/_utils.py +++ b/html5lib/_utils.py @@ -1,10 +1,7 @@ from types import ModuleType -try: - from collections.abc import Mapping -except ImportError: - from collections.abc import Mapping +from collections.abc import Mapping from six import text_type, PY3 diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py index 09b217c4..bc56c708 100644 --- a/html5lib/treebuilders/dom.py +++ b/html5lib/treebuilders/dom.py @@ -1,9 +1,6 @@ -try: - from collections.abc import MutableMapping -except ImportError: # Python 2.7 - from collections.abc import MutableMapping +from collections.abc import MutableMapping from xml.dom import minidom, Node import weakref diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index bc2d779e..3e88d76e 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -15,10 +15,7 @@ import re import sys -try: - from collections.abc import MutableMapping -except ImportError: - from collections.abc import MutableMapping +from collections.abc import MutableMapping from . import base from ..constants import DataLossWarning From 29d3072ee1f982df582b88b4e120453cbbefd37e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Mon, 9 Sep 2024 09:44:00 +0200 Subject: [PATCH 3/3] only support pythons that are not EOL (https://endoflife.date/python) Even debian oldstable has python 3.9. For internet-facing libraries it is not secure for contributor to install unsupported python versions in order to test them. Reducing the number of python versions will make maintenance and testing easier. is not secure to contributor to install unsupported python versions in order to test them. --- .appveyor.yml | 29 ----------------------------- .github/workflows/python-tox.yml | 3 --- README.rst | 16 ++-------------- html5lib/html5parser.py | 4 ++-- setup.py | 6 +----- tox.ini | 2 +- toxver.py | 7 ------- 7 files changed, 6 insertions(+), 61 deletions(-) delete mode 100644 .appveyor.yml diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index e6f7bf48..00000000 --- a/.appveyor.yml +++ /dev/null @@ -1,29 +0,0 @@ -# appveyor.yml - https://www.appveyor.com/docs/lang/python -# https://www.appveyor.com/docs/windows-images-software/#visual-studio-2022 ---- -image: Visual Studio 2022 -environment: - matrix: - - PY_PYTHON: 2.7 - TOXENV: py27-base - - PY_PYTHON: 2.7 - TOXENV: py27-optional - - PY_PYTHON: 3.7 - TOXENV: py37-base - - PY_PYTHON: 3.7 - TOXENV: py37-optional - -install: - - git submodule update --init --recursive - - py --list - - py -VV - - py -m pip install --upgrade pip - - py -m pip install tox - -build: off - -test_script: - - py -m tox - -after_test: - - py debug-info.py diff --git a/.github/workflows/python-tox.yml b/.github/workflows/python-tox.yml index 5ed83175..0912abb3 100644 --- a/.github/workflows/python-tox.yml +++ b/.github/workflows/python-tox.yml @@ -12,9 +12,6 @@ jobs: os: [ubuntu-latest, windows-latest] deps: [base, optional] include: - - python: "pypy-2.7" - os: ubuntu-latest - deps: base - python: "pypy-3.10" os: ubuntu-latest deps: base diff --git a/README.rst b/README.rst index 6a623a43..befc7aaa 100644 --- a/README.rst +++ b/README.rst @@ -29,7 +29,7 @@ or: By default, the ``document`` will be an ``xml.etree`` element instance. Whenever possible, html5lib chooses the accelerated ``ElementTree`` -implementation (i.e. ``xml.etree.cElementTree`` on Python 2.x). +implementation. Two other tree types are supported: ``xml.dom.minidom`` and ``lxml.etree``. To use an alternative format, specify the name of @@ -41,18 +41,6 @@ a treebuilder: with open("mydocument.html", "rb") as f: lxml_etree_document = html5lib.parse(f, treebuilder="lxml") -When using with ``urllib2`` (Python 2), the charset from HTTP should be -pass into html5lib as follows: - -.. code-block:: python - - from contextlib import closing - from urllib2 import urlopen - import html5lib - - with closing(urlopen("http://example.com/")) as f: - document = html5lib.parse(f, transport_encoding=f.info().getparam("charset")) - When using with ``urllib.request`` (Python 3), the charset from HTTP should be pass into html5lib as follows: @@ -90,7 +78,7 @@ More documentation is available at https://html5lib.readthedocs.io/. Installation ------------ -html5lib works on CPython 2.7+, CPython 3.5+ and PyPy. To install: +html5lib works on CPython 3.8+ and PyPy. To install: .. code-block:: bash diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 8ab005ba..3fe78b6b 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -427,7 +427,7 @@ def processSpaceCharacters(self, token): def processStartTag(self, token): # Note the caching is done here rather than BoundMethodDispatcher as doing it there # requires a circular reference to the Phase, and this ends up with a significant - # (CPython 2.7, 3.8) GC cost when parsing many short inputs + # (CPython 3.8) GC cost when parsing many short inputs name = token["name"] # In Py2, using `in` is quicker in general than try/except KeyError # In Py3, `in` is quicker when there are few cache hits (typically short inputs) @@ -454,7 +454,7 @@ def startTagHtml(self, token): def processEndTag(self, token): # Note the caching is done here rather than BoundMethodDispatcher as doing it there # requires a circular reference to the Phase, and this ends up with a significant - # (CPython 2.7, 3.8) GC cost when parsing many short inputs + # (CPython 3.8) GC cost when parsing many short inputs name = token["name"] # In Py2, using `in` is quicker in general than try/except KeyError # In Py3, `in` is quicker when there are few cache hits (typically short inputs) diff --git a/setup.py b/setup.py index 5f3dc186..afab2904 100644 --- a/setup.py +++ b/setup.py @@ -63,11 +63,7 @@ def default_environment(): 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', @@ -109,7 +105,7 @@ def default_environment(): 'six>=1.9', 'webencodings>=0.5.1', ], - python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*", + python_requires=">=3.8", extras_require={ # A conditional extra will only install these items when the extra is # requested and the condition matches. diff --git a/tox.ini b/tox.ini index fb228e96..94a78542 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{27,35,36,37,38,39,310,311,py,py3}-{base,optional,oldest} +envlist = py{38,39,310,311,py,py3}-{base,optional,oldest} [testenv] deps = diff --git a/toxver.py b/toxver.py index b082a345..950dc083 100755 --- a/toxver.py +++ b/toxver.py @@ -12,9 +12,6 @@ $ toxver.py pypy-3.8 base TOXENV=pypy3-base - $ toxver.py 2.7 oldest - TOXENV=py27-oldest - $ toxver.py ~3.12.0-0 optional TOXENV=py312-optional @@ -31,10 +28,6 @@ def main(argv): deps = argv[2] - if argv[1].startswith("pypy-2"): - print("TOXENV=pypy-" + deps) - return 0 - if argv[1].startswith("pypy-3"): print("TOXENV=pypy3-" + deps) return 0