diff --git a/.appveyor.yml b/.appveyor.yml deleted file mode 100644 index e6f7bf48..00000000 --- a/.appveyor.yml +++ /dev/null @@ -1,29 +0,0 @@ -# appveyor.yml - https://www.appveyor.com/docs/lang/python -# https://www.appveyor.com/docs/windows-images-software/#visual-studio-2022 ---- -image: Visual Studio 2022 -environment: - matrix: - - PY_PYTHON: 2.7 - TOXENV: py27-base - - PY_PYTHON: 2.7 - TOXENV: py27-optional - - PY_PYTHON: 3.7 - TOXENV: py37-base - - PY_PYTHON: 3.7 - TOXENV: py37-optional - -install: - - git submodule update --init --recursive - - py --list - - py -VV - - py -m pip install --upgrade pip - - py -m pip install tox - -build: off - -test_script: - - py -m tox - -after_test: - - py debug-info.py diff --git a/.github/workflows/python-tox.yml b/.github/workflows/python-tox.yml index 5ed83175..0912abb3 100644 --- a/.github/workflows/python-tox.yml +++ b/.github/workflows/python-tox.yml @@ -12,9 +12,6 @@ jobs: os: [ubuntu-latest, windows-latest] deps: [base, optional] include: - - python: "pypy-2.7" - os: ubuntu-latest - deps: base - python: "pypy-3.10" os: ubuntu-latest deps: base diff --git a/README.rst b/README.rst index 6a623a43..befc7aaa 100644 --- a/README.rst +++ b/README.rst @@ -29,7 +29,7 @@ or: By default, the ``document`` will be an ``xml.etree`` element instance. Whenever possible, html5lib chooses the accelerated ``ElementTree`` -implementation (i.e. ``xml.etree.cElementTree`` on Python 2.x). +implementation. Two other tree types are supported: ``xml.dom.minidom`` and ``lxml.etree``. To use an alternative format, specify the name of @@ -41,18 +41,6 @@ a treebuilder: with open("mydocument.html", "rb") as f: lxml_etree_document = html5lib.parse(f, treebuilder="lxml") -When using with ``urllib2`` (Python 2), the charset from HTTP should be -pass into html5lib as follows: - -.. code-block:: python - - from contextlib import closing - from urllib2 import urlopen - import html5lib - - with closing(urlopen("http://example.com/")) as f: - document = html5lib.parse(f, transport_encoding=f.info().getparam("charset")) - When using with ``urllib.request`` (Python 3), the charset from HTTP should be pass into html5lib as follows: @@ -90,7 +78,7 @@ More documentation is available at https://html5lib.readthedocs.io/. Installation ------------ -html5lib works on CPython 2.7+, CPython 3.5+ and PyPy. To install: +html5lib works on CPython 3.8+ and PyPy. To install: .. code-block:: bash diff --git a/debug-info.py b/debug-info.py index b47b8ebf..7e1b6fd0 100644 --- a/debug-info.py +++ b/debug-info.py @@ -1,4 +1,3 @@ -from __future__ import print_function, unicode_literals import platform import sys diff --git a/doc/conf.py b/doc/conf.py index d5a1e863..66defcce 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # html5lib documentation build configuration file, created by # sphinx-quickstart on Wed May 8 00:04:49 2013. @@ -100,7 +99,7 @@ } -class CExtMock(object): +class CExtMock: """Required for autodoc on readthedocs.org where you cannot build C extensions.""" def __init__(self, *args, **kwargs): pass diff --git a/html5lib/__init__.py b/html5lib/__init__.py index 7b854f99..d2c68855 100644 --- a/html5lib/__init__.py +++ b/html5lib/__init__.py @@ -20,7 +20,6 @@ * :func:`~.serializer.serialize` """ -from __future__ import absolute_import, division, unicode_literals from .html5parser import HTMLParser, parse, parseFragment from .treebuilders import getTreeBuilder diff --git a/html5lib/_ihatexml.py b/html5lib/_ihatexml.py index d725eabd..f5b6e1f4 100644 --- a/html5lib/_ihatexml.py +++ b/html5lib/_ihatexml.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import re import warnings @@ -181,7 +180,7 @@ def escapeRegexp(string): nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]") -class InfosetFilter(object): +class InfosetFilter: replacementRegexp = re.compile(r"U[\dA-F]{5,5}") def __init__(self, diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py index a93b5a4e..54c5c498 100644 --- a/html5lib/_inputstream.py +++ b/html5lib/_inputstream.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type from six.moves import http_client, urllib @@ -48,7 +47,7 @@ charsUntilRegEx = {} -class BufferedStream(object): +class BufferedStream: """Buffering for streams that do not have buffering of their own The buffer is implemented as a list of chunks on the assumption that @@ -145,7 +144,7 @@ def HTMLInputStream(source, **kwargs): return HTMLBinaryInputStream(source, **kwargs) -class HTMLUnicodeInputStream(object): +class HTMLUnicodeInputStream: """Provides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing @@ -673,7 +672,7 @@ def jumpTo(self, bytes): return True -class EncodingParser(object): +class EncodingParser: """Mini parser for detecting character encoding from meta elements""" def __init__(self, data): @@ -861,7 +860,7 @@ def getAttribute(self): attrValue.append(c) -class ContentAttrParser(object): +class ContentAttrParser: def __init__(self, data): assert isinstance(data, bytes) self.data = data diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py index 4748a197..782310ec 100644 --- a/html5lib/_tokenizer.py +++ b/html5lib/_tokenizer.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import unichr as chr @@ -24,7 +23,7 @@ attributeMap = OrderedDict -class HTMLTokenizer(object): +class HTMLTokenizer: """ This class takes care of tokenizing HTML. * self.currentToken diff --git a/html5lib/_trie/__init__.py b/html5lib/_trie/__init__.py index 07bad5d3..df8912a0 100644 --- a/html5lib/_trie/__init__.py +++ b/html5lib/_trie/__init__.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from .py import Trie diff --git a/html5lib/_trie/_base.py b/html5lib/_trie/_base.py index 6b71975f..63927ee4 100644 --- a/html5lib/_trie/_base.py +++ b/html5lib/_trie/_base.py @@ -1,9 +1,5 @@ -from __future__ import absolute_import, division, unicode_literals -try: - from collections.abc import Mapping -except ImportError: # Python 2.7 - from collections import Mapping +from collections.abc import Mapping class Trie(Mapping): diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py index c2ba3da7..92f6f861 100644 --- a/html5lib/_trie/py.py +++ b/html5lib/_trie/py.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type from bisect import bisect_left diff --git a/html5lib/_utils.py b/html5lib/_utils.py index 7e23ee57..2e74c07f 100644 --- a/html5lib/_utils.py +++ b/html5lib/_utils.py @@ -1,11 +1,7 @@ -from __future__ import absolute_import, division, unicode_literals from types import ModuleType -try: - from collections.abc import Mapping -except ImportError: - from collections import Mapping +from collections.abc import Mapping from six import text_type, PY3 @@ -13,7 +9,7 @@ import xml.etree.ElementTree as default_etree else: try: - import xml.etree.cElementTree as default_etree + import xml.etree.ElementTree as default_etree except ImportError: import xml.etree.ElementTree as default_etree @@ -122,7 +118,7 @@ def moduleFactoryFactory(factory): moduleCache = {} def moduleFactory(baseModule, *args, **kwargs): - if isinstance(ModuleType.__name__, type("")): + if isinstance(ModuleType.__name__, str): name = "_%s_factory" % baseModule.__name__ else: name = b"_%s_factory" % baseModule.__name__ diff --git a/html5lib/constants.py b/html5lib/constants.py index 2fa4146d..a4b1efa1 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import string diff --git a/html5lib/filters/alphabeticalattributes.py b/html5lib/filters/alphabeticalattributes.py index 5ba926e3..c0be95b2 100644 --- a/html5lib/filters/alphabeticalattributes.py +++ b/html5lib/filters/alphabeticalattributes.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from . import base diff --git a/html5lib/filters/base.py b/html5lib/filters/base.py index c7dbaed0..6d6639e6 100644 --- a/html5lib/filters/base.py +++ b/html5lib/filters/base.py @@ -1,7 +1,6 @@ -from __future__ import absolute_import, division, unicode_literals -class Filter(object): +class Filter: def __init__(self, source): self.source = source diff --git a/html5lib/filters/inject_meta_charset.py b/html5lib/filters/inject_meta_charset.py index aefb5c84..c8dc57b8 100644 --- a/html5lib/filters/inject_meta_charset.py +++ b/html5lib/filters/inject_meta_charset.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from . import base diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py index acd4d7a2..cd7a6a43 100644 --- a/html5lib/filters/lint.py +++ b/html5lib/filters/lint.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type diff --git a/html5lib/filters/optionaltags.py b/html5lib/filters/optionaltags.py index 4a865012..a44b2a00 100644 --- a/html5lib/filters/optionaltags.py +++ b/html5lib/filters/optionaltags.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from . import base diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index ea2c5dd3..2dc4583d 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -6,7 +6,6 @@ if Bleach is unsuitable for your needs. """ -from __future__ import absolute_import, division, unicode_literals import re import warnings diff --git a/html5lib/filters/whitespace.py b/html5lib/filters/whitespace.py index 0d12584b..ab40ef5a 100644 --- a/html5lib/filters/whitespace.py +++ b/html5lib/filters/whitespace.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import re diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index b3c206d1..3fe78b6b 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import viewkeys from . import _inputstream @@ -69,7 +68,7 @@ def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElemen return p.parseFragment(doc, container=container, **kwargs) -class HTMLParser(object): +class HTMLParser: """HTML parser Generates a tree structure from a stream of (possibly malformed) HTML. @@ -397,7 +396,7 @@ def parseRCDataRawtext(self, token, contentType): self.phase = self.phases["text"] -class Phase(object): +class Phase: """Base class for helper object that implements each phase of processing """ __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache") @@ -428,7 +427,7 @@ def processSpaceCharacters(self, token): def processStartTag(self, token): # Note the caching is done here rather than BoundMethodDispatcher as doing it there # requires a circular reference to the Phase, and this ends up with a significant - # (CPython 2.7, 3.8) GC cost when parsing many short inputs + # (CPython 3.8) GC cost when parsing many short inputs name = token["name"] # In Py2, using `in` is quicker in general than try/except KeyError # In Py3, `in` is quicker when there are few cache hits (typically short inputs) @@ -455,7 +454,7 @@ def startTagHtml(self, token): def processEndTag(self, token): # Note the caching is done here rather than BoundMethodDispatcher as doing it there # requires a circular reference to the Phase, and this ends up with a significant - # (CPython 2.7, 3.8) GC cost when parsing many short inputs + # (CPython 3.8) GC cost when parsing many short inputs name = token["name"] # In Py2, using `in` is quicker in general than try/except KeyError # In Py3, `in` is quicker when there are few cache hits (typically short inputs) diff --git a/html5lib/serializer.py b/html5lib/serializer.py index a171ac1c..34f1b7e3 100644 --- a/html5lib/serializer.py +++ b/html5lib/serializer.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type import re @@ -101,7 +100,7 @@ def serialize(input, tree="etree", encoding=None, **serializer_opts): return s.render(walker(input), encoding) -class HTMLSerializer(object): +class HTMLSerializer: # attribute quoting options quote_attr_values = "legacy" # be secure by default diff --git a/html5lib/tests/__init__.py b/html5lib/tests/__init__.py index b8ce2de3..e69de29b 100644 --- a/html5lib/tests/__init__.py +++ b/html5lib/tests/__init__.py @@ -1 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py index fffeb50c..de9b1572 100644 --- a/html5lib/tests/conftest.py +++ b/html5lib/tests/conftest.py @@ -1,4 +1,3 @@ -from __future__ import print_function import os.path import sys @@ -54,7 +53,7 @@ def pytest_configure(config): # Check for optional requirements req_file = os.path.join(_root, "requirements-optional.txt") if os.path.exists(req_file): - with open(req_file, "r") as fp: + with open(req_file) as fp: for line in fp: if (line.strip() and not (line.startswith("-r") or @@ -79,7 +78,7 @@ def pytest_configure(config): import xml.etree.ElementTree as ElementTree try: - import xml.etree.cElementTree as cElementTree + import xml.etree.ElementTree as cElementTree except ImportError: msgs.append("cElementTree unable to be imported") else: diff --git a/html5lib/tests/sanitizer.py b/html5lib/tests/sanitizer.py index 16e53868..93ad4f52 100644 --- a/html5lib/tests/sanitizer.py +++ b/html5lib/tests/sanitizer.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import codecs import json diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index 1bd0ccc1..3a6f37c2 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals # pylint:disable=wrong-import-position @@ -86,7 +85,7 @@ def __getitem__(self, key): return dict.get(self, key, self.default) -class TestData(object): +class TestData: def __init__(self, filename, newTestHeading="data", encoding="utf8"): if encoding is None: self.f = open(filename, mode="rb") diff --git a/html5lib/tests/test_alphabeticalattributes.py b/html5lib/tests/test_alphabeticalattributes.py index 7d5b8e0f..87beb8f1 100644 --- a/html5lib/tests/test_alphabeticalattributes.py +++ b/html5lib/tests/test_alphabeticalattributes.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from collections import OrderedDict diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index 47c4814a..10b666da 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import os @@ -9,7 +8,7 @@ def test_basic_prescan_length(): - data = "Caf\u00E9".encode('utf-8') + data = "Caf\u00E9".encode() pad = 1024 - len(data) + 1 data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-") assert len(data) == 1024 # Sanity @@ -18,7 +17,7 @@ def test_basic_prescan_length(): def test_parser_reparse(): - data = "Caf\u00E9".encode('utf-8') + data = "Caf\u00E9".encode() pad = 10240 - len(data) + 1 data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-") assert len(data) == 10240 # Sanity diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py index e02268aa..aa7e35e2 100644 --- a/html5lib/tests/test_meta.py +++ b/html5lib/tests/test_meta.py @@ -1,10 +1,6 @@ -from __future__ import absolute_import, division, unicode_literals import six -try: - from unittest.mock import Mock -except ImportError: - from mock import Mock +from unittest.mock import Mock from . import support diff --git a/html5lib/tests/test_optionaltags_filter.py b/html5lib/tests/test_optionaltags_filter.py index cd282149..180a109e 100644 --- a/html5lib/tests/test_optionaltags_filter.py +++ b/html5lib/tests/test_optionaltags_filter.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from html5lib.filters.optionaltags import Filter diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 6b464bea..f30595b4 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import PY2, text_type diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py index 499310b6..562ee7fa 100644 --- a/html5lib/tests/test_sanitizer.py +++ b/html5lib/tests/test_sanitizer.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import warnings diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index a2be0be5..5c225790 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import os import json diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py index efe9b472..7dce2b1d 100644 --- a/html5lib/tests/test_stream.py +++ b/html5lib/tests/test_stream.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from . import support # noqa @@ -105,7 +104,7 @@ def test_char_ascii(): def test_char_utf8(): - stream = HTMLInputStream('\u2018'.encode('utf-8'), override_encoding='utf-8') + stream = HTMLInputStream('\u2018'.encode(), override_encoding='utf-8') assert stream.charEncoding[0].name == 'utf-8' assert stream.char() == '\u2018' @@ -186,7 +185,7 @@ def test_python_issue_20007(): Make sure we have a work-around for Python bug #20007 http://bugs.python.org/issue20007 """ - class FakeSocket(object): + class FakeSocket: def makefile(self, _mode, _bufsize=None): # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") @@ -205,7 +204,7 @@ def test_python_issue_20007_b(): if six.PY2: return - class FakeSocket(object): + class FakeSocket: def makefile(self, _mode, _bufsize=None): # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py index 158d847a..f8a74eee 100644 --- a/html5lib/tests/test_tokenizer2.py +++ b/html5lib/tests/test_tokenizer2.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import io diff --git a/html5lib/tests/test_treeadapters.py b/html5lib/tests/test_treeadapters.py index 95e56c00..3af383c3 100644 --- a/html5lib/tests/test_treeadapters.py +++ b/html5lib/tests/test_treeadapters.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from . import support # noqa diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 780ca964..89e20dab 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import itertools import sys @@ -74,11 +73,11 @@ def param_treewalker_six_mix(): # fragment but not using the u'' syntax nor importing unicode_literals sm_tests = [ ('Example', - [(str('class'), str('test123'))], + [('class', 'test123')], '\n class="test123"\n href="http://example.com"\n "Example"'), ('', - [(str('rel'), str('alternate'))], + [('rel', 'alternate')], '\n href="http://example.com/cow"\n rel="alternate"\n "Example"') ] diff --git a/html5lib/tests/test_whitespace_filter.py b/html5lib/tests/test_whitespace_filter.py index e9da6140..d4e4e3be 100644 --- a/html5lib/tests/test_whitespace_filter.py +++ b/html5lib/tests/test_whitespace_filter.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from html5lib.filters.whitespace import Filter from html5lib.constants import spaceCharacters diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py index b49d2e6e..9ba19b16 100644 --- a/html5lib/tests/tokenizer.py +++ b/html5lib/tests/tokenizer.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import codecs import json @@ -12,7 +11,7 @@ from html5lib import constants, _utils -class TokenizerTestParser(object): +class TokenizerTestParser: def __init__(self, initialState, lastStartTag=None): self.tokenizer = HTMLTokenizer self._state = initialState diff --git a/html5lib/tests/tokenizertotree.py b/html5lib/tests/tokenizertotree.py index 42463f32..6c0b4f77 100644 --- a/html5lib/tests/tokenizertotree.py +++ b/html5lib/tests/tokenizertotree.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import sys import os @@ -25,7 +24,7 @@ def main(out_path): def run_file(filename, out_path): try: - tests_data = json.load(open(filename, "r")) + tests_data = json.load(open(filename)) except ValueError: sys.stderr.write("Failed to load %s\n" % filename) return diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py index 363b48c2..e2381754 100644 --- a/html5lib/tests/tree_construction.py +++ b/html5lib/tests/tree_construction.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals import itertools import re diff --git a/html5lib/treeadapters/__init__.py b/html5lib/treeadapters/__init__.py index dfeb0ba5..1444fc9a 100644 --- a/html5lib/treeadapters/__init__.py +++ b/html5lib/treeadapters/__init__.py @@ -16,7 +16,6 @@ genshi_tree = genshi.to_genshi(TreeWalker(tree)) """ -from __future__ import absolute_import, division, unicode_literals from . import sax diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py index 61d5fb6a..b0b29ed3 100644 --- a/html5lib/treeadapters/genshi.py +++ b/html5lib/treeadapters/genshi.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from genshi.core import QName, Attrs from genshi.core import START, END, TEXT, COMMENT, DOCTYPE diff --git a/html5lib/treeadapters/sax.py b/html5lib/treeadapters/sax.py index f4ccea5a..ead1a5c4 100644 --- a/html5lib/treeadapters/sax.py +++ b/html5lib/treeadapters/sax.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from xml.sax.xmlreader import AttributesNSImpl diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py index d44447ea..90aad5fb 100644 --- a/html5lib/treebuilders/__init__.py +++ b/html5lib/treebuilders/__init__.py @@ -29,7 +29,6 @@ """ -from __future__ import absolute_import, division, unicode_literals from .._utils import default_etree diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py index 020d7e15..125ed82c 100644 --- a/html5lib/treebuilders/base.py +++ b/html5lib/treebuilders/base.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type from ..constants import scopingElements, tableInsertModeElements, namespaces @@ -20,7 +19,7 @@ } -class Node(object): +class Node: """Represents an item in the tree""" def __init__(self, name): """Creates a Node @@ -144,7 +143,7 @@ def nodesEqual(self, node1, node2): return True -class TreeBuilder(object): +class TreeBuilder: """Base treebuilder implementation * documentClass - the class to use for the bottommost node of a document diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py index d8b53004..bc56c708 100644 --- a/html5lib/treebuilders/dom.py +++ b/html5lib/treebuilders/dom.py @@ -1,10 +1,6 @@ -from __future__ import absolute_import, division, unicode_literals -try: - from collections.abc import MutableMapping -except ImportError: # Python 2.7 - from collections import MutableMapping +from collections.abc import MutableMapping from xml.dom import minidom, Node import weakref diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py index 0b745081..bd20b957 100644 --- a/html5lib/treebuilders/etree.py +++ b/html5lib/treebuilders/etree.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals # pylint:disable=protected-access from six import text_type diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index e73de61a..3e88d76e 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -9,17 +9,13 @@ When any of these things occur, we emit a DataLossWarning """ -from __future__ import absolute_import, division, unicode_literals # pylint:disable=protected-access import warnings import re import sys -try: - from collections.abc import MutableMapping -except ImportError: - from collections import MutableMapping +from collections.abc import MutableMapping from . import base from ..constants import DataLossWarning @@ -37,14 +33,14 @@ comment_type = etree.Comment("asd").tag -class DocumentType(object): +class DocumentType: def __init__(self, name, publicId, systemId): self.name = name self.publicId = publicId self.systemId = systemId -class Document(object): +class Document: def __init__(self): self._elementTree = None self._childNodes = [] diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py index b2d3aac3..b78d6f46 100644 --- a/html5lib/treewalkers/__init__.py +++ b/html5lib/treewalkers/__init__.py @@ -8,7 +8,6 @@ returns an iterator which generates tokens. """ -from __future__ import absolute_import, division, unicode_literals from .. import constants from .._utils import default_etree diff --git a/html5lib/treewalkers/base.py b/html5lib/treewalkers/base.py index 80c474c4..7ee75d81 100644 --- a/html5lib/treewalkers/base.py +++ b/html5lib/treewalkers/base.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from xml.dom import Node from ..constants import namespaces, voidElements, spaceCharacters @@ -17,7 +16,7 @@ spaceCharacters = "".join(spaceCharacters) -class TreeWalker(object): +class TreeWalker: """Walks a tree yielding tokens Tokens are dicts that all have a ``type`` field specifying the type of the diff --git a/html5lib/treewalkers/dom.py b/html5lib/treewalkers/dom.py index b0c89b00..85e12505 100644 --- a/html5lib/treewalkers/dom.py +++ b/html5lib/treewalkers/dom.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from xml.dom import Node diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py index 411a1d45..ef5e914c 100644 --- a/html5lib/treewalkers/etree.py +++ b/html5lib/treewalkers/etree.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from collections import OrderedDict import re diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py index a614ac5b..af6c260d 100644 --- a/html5lib/treewalkers/etree_lxml.py +++ b/html5lib/treewalkers/etree_lxml.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type from collections import OrderedDict @@ -20,7 +19,7 @@ def ensure_str(s): return s.decode("ascii", "strict") -class Root(object): +class Root: def __init__(self, et): self.elementtree = et self.children = [] @@ -58,7 +57,7 @@ def __len__(self): return 1 -class Doctype(object): +class Doctype: def __init__(self, root_node, name, public_id, system_id): self.root_node = root_node self.name = name @@ -81,7 +80,7 @@ def getnext(self): return None -class FragmentWrapper(object): +class FragmentWrapper: def __init__(self, fragment_root, obj): self.root_node = fragment_root self.obj = obj diff --git a/html5lib/treewalkers/genshi.py b/html5lib/treewalkers/genshi.py index 7483be27..78f22fd3 100644 --- a/html5lib/treewalkers/genshi.py +++ b/html5lib/treewalkers/genshi.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from genshi.core import QName from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT diff --git a/parse.py b/parse.py index e6806b46..14bbe99a 100755 --- a/parse.py +++ b/parse.py @@ -42,7 +42,7 @@ def parse(): try: # Try opening from file system f = open(f, "rb") - except IOError as e: + except OSError as e: sys.stderr.write("Unable to open file: %s\n" % e) sys.exit(1) except IndexError: diff --git a/setup.py b/setup.py index 30ee0575..afab2904 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,3 @@ -from __future__ import print_function import ast import codecs @@ -64,11 +63,7 @@ def default_environment(): 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', @@ -110,7 +105,7 @@ def default_environment(): 'six>=1.9', 'webencodings>=0.5.1', ], - python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*", + python_requires=">=3.8", extras_require={ # A conditional extra will only install these items when the extra is # requested and the condition matches. diff --git a/tox.ini b/tox.ini index fb228e96..94a78542 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{27,35,36,37,38,39,310,311,py,py3}-{base,optional,oldest} +envlist = py{38,39,310,311,py,py3}-{base,optional,oldest} [testenv] deps = diff --git a/toxver.py b/toxver.py index 68eb71ec..950dc083 100755 --- a/toxver.py +++ b/toxver.py @@ -12,18 +12,11 @@ $ toxver.py pypy-3.8 base TOXENV=pypy3-base - $ toxver.py 2.7 oldest - TOXENV=py27-oldest - $ toxver.py ~3.12.0-0 optional TOXENV=py312-optional """ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals import sys @@ -35,10 +28,6 @@ def main(argv): deps = argv[2] - if argv[1].startswith("pypy-2"): - print("TOXENV=pypy-" + deps) - return 0 - if argv[1].startswith("pypy-3"): print("TOXENV=pypy3-" + deps) return 0