From b175cc36baa43f2d6a07e8033f5977f58996e87c Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 19:18:12 -0600 Subject: [PATCH 01/17] Fix pep517 builds `python3 -m pep517.check` now passes --- .../workflows/build-and-upload-to-pypi.yml | 8 +- setup.py | 120 +++++++++--------- 2 files changed, 65 insertions(+), 63 deletions(-) diff --git a/.github/workflows/build-and-upload-to-pypi.yml b/.github/workflows/build-and-upload-to-pypi.yml index c60226b..0c3c16b 100644 --- a/.github/workflows/build-and-upload-to-pypi.yml +++ b/.github/workflows/build-and-upload-to-pypi.yml @@ -22,12 +22,7 @@ jobs: steps: - uses: actions/checkout@v3 - - - name: Checkout submodules - shell: bash - run: | - git submodule sync --recursive - git submodule update --init --force --recursive --depth=1 + submodules: 'src/ext/uchardet' - name: Set up QEMU if: runner.os == 'Linux' @@ -52,6 +47,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 + submodules: 'src/ext/uchardet' - name: Build sdist run: pipx run build --sdist diff --git a/setup.py b/setup.py index 508e9f4..a1f7b32 100644 --- a/setup.py +++ b/setup.py @@ -2,11 +2,8 @@ # coding: utf-8 import os -import sys -import glob import codecs import re -import pkgconfig from distutils.command.build_ext import build_ext from distutils import sysconfig @@ -17,19 +14,30 @@ from Cython.Build import cythonize -cchardet_dir = os.path.join("src", "cchardet") + os.path.sep -try: - ext_args = pkgconfig.parse('uchardet') -except pkgconfig.PackageNotFoundError: - include_path = os.environ.get('INCLUDE_PATH') - library_path = os.environ.get('LIBRARY_PATH') +join = os.path.join + +cchardet_dir = join("src", "cchardet") + os.path.sep +uchardet_dir = join("src", "ext", "uchardet", "src") +uchardet_lang_models_dir = join(uchardet_dir, "LangModels") + +cchardet_sources = [join("src", "cchardet", "_cchardet.pyx")] +uchardet_sources = [ + join(uchardet_dir, file) + for file in os.listdir(uchardet_dir) + if file.endswith(".cpp") +] +uchardet_lang_source = [ + join(uchardet_lang_models_dir, file) + for file in os.listdir(uchardet_lang_models_dir) + if file.endswith(".cpp") +] +sources = cchardet_sources + uchardet_sources + uchardet_lang_source - ext_args = { - 'include_dirs': include_path.split(os.pathsep) if include_path else [], - 'library_dirs': library_path.split(os.pathsep) if library_path else [], - 'libraries': ['uchardet'], - } +ext_args = { + "include_dirs": uchardet_dir.split(os.pathsep), + "library_dirs": uchardet_dir.split(os.pathsep), +} # Remove the "-Wstrict-prototypes" compiler option, which isn't valid for C++. cfg_vars = sysconfig.get_config_vars() @@ -40,61 +48,59 @@ # cfg_vars[key] = value.replace("-O2", "-O3") -cchardet_module = Extension( - 'cchardet._cchardet', - [ - os.path.join('src', 'cchardet', '_cchardet.pyx') - ], - language='c++', - **ext_args -) +cchardet_module = Extension("cchardet._cchardet", sources, language="c++", **ext_args) def read(f): return open(os.path.join(os.path.dirname(__file__), f)).read().strip() -with codecs.open(os.path.join(os.path.abspath(os.path.dirname(__file__)), 'src', 'cchardet', 'version.py'), 'r', 'latin1') as fp: +with codecs.open( + os.path.join( + os.path.abspath(os.path.dirname(__file__)), "src", "cchardet", "version.py" + ), + "r", + "latin1", +) as fp: try: - version = re.findall( - r"^__version__ = '([^']+)'\r?$", fp.read(), re.M)[0] + version = re.findall(r"^__version__ = '([^']+)'\r?$", fp.read(), re.M)[0] except IndexError: - raise RuntimeError('Unable to determine version.') + raise RuntimeError("Unable to determine version.") setup( - name='faust-cchardet', - author='PyYoshi', - author_email='myoshi321go@gmail.com', - url=r'https://github.com/faust-streaming/cChardet', - description='cChardet is high speed universal character encoding detector.', - long_description='\n\n'.join((read('README.rst'), read('CHANGES.rst'))), + name="faust-cchardet", + author="PyYoshi", + author_email="myoshi321go@gmail.com", + url=r"https://github.com/faust-streaming/cChardet", + description="cChardet is high speed universal character encoding detector.", + long_description="\n\n".join((read("README.rst"), read("CHANGES.rst"))), version=version, - license='Mozilla Public License', + license="Mozilla Public License", classifiers=[ - 'License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)', - 'License :: OSI Approved :: GNU General Public License (GPL)', - 'License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)', - 'Programming Language :: Cython', - 'Programming Language :: Python', - 'Topic :: Software Development :: Libraries', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', + "License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)", + "License :: OSI Approved :: GNU General Public License (GPL)", + "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)", + "Programming Language :: Cython", + "Programming Language :: Python", + "Topic :: Software Development :: Libraries", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ], - keywords=[ - 'cython', - 'chardet', - 'charsetdetect' + keywords=["cython", "chardet", "charsetdetect"], + cmdclass={"build_ext": build_ext}, + package_dir={"": "src"}, + packages=[ + "cchardet", ], - cmdclass={'build_ext': build_ext}, - package_dir={'': 'src'}, - packages=['cchardet', ], - scripts=['bin/cchardetect'], - ext_modules=cythonize([ - cchardet_module, - ]), + scripts=["bin/cchardetect"], + ext_modules=cythonize( + [ + cchardet_module, + ] + ), ) From df53b540b46d8e39735e7a86388f1e5f57af1196 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 19:21:22 -0600 Subject: [PATCH 02/17] fix actions --- .github/workflows/build-and-upload-to-pypi.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-upload-to-pypi.yml b/.github/workflows/build-and-upload-to-pypi.yml index 0c3c16b..a731b8a 100644 --- a/.github/workflows/build-and-upload-to-pypi.yml +++ b/.github/workflows/build-and-upload-to-pypi.yml @@ -22,7 +22,8 @@ jobs: steps: - uses: actions/checkout@v3 - submodules: 'src/ext/uchardet' + with: + submodules: 'src/ext/uchardet' - name: Set up QEMU if: runner.os == 'Linux' @@ -47,7 +48,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - submodules: 'src/ext/uchardet' + with: + submodules: 'src/ext/uchardet' - name: Build sdist run: pipx run build --sdist From 2b2a516d777209df847155c95c1f0695fe918963 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 19:39:28 -0600 Subject: [PATCH 03/17] try again --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a1f7b32..1ed86bc 100644 --- a/setup.py +++ b/setup.py @@ -101,6 +101,8 @@ def read(f): ext_modules=cythonize( [ cchardet_module, - ] + ], + cplus=True, + compiler_directives={"language_level": "3"}, # Python 3 ), ) From cbf615d8a56acd5bb86ef38a78b624328455e8a4 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 20:22:10 -0600 Subject: [PATCH 04/17] skip 36 --- .github/workflows/build-and-upload-to-pypi.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build-and-upload-to-pypi.yml b/.github/workflows/build-and-upload-to-pypi.yml index a731b8a..39317cb 100644 --- a/.github/workflows/build-and-upload-to-pypi.yml +++ b/.github/workflows/build-and-upload-to-pypi.yml @@ -36,6 +36,7 @@ jobs: env: CIBW_ARCHS_LINUX: auto aarch64 CIBW_ARCHS_MACOS: x86_64 arm64 + CIBW_SKIP: cp36-* - uses: actions/upload-artifact@v3 if: github.event_name == 'release' && github.event.action == 'created' From 4333793716024cc4394ba94545b2455914eba13a Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 20:26:08 -0600 Subject: [PATCH 05/17] build portable --- pyproject.toml | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 222f62c..b5a362a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,24 +13,8 @@ test-command = [ 'python -m pytest {project}' ] -environment = {INCLUDE_PATH="/usr/local/include/uchardet", LIBRARY_PATH="/usr/local/lib64/"} -before-build = [ - "git submodule sync --recursive", - "git submodule update --init --force --recursive --depth=1", - "test -d {project}/src/ext/uchardet/build || (cd {project}/src/ext/uchardet/ && mkdir build && cd build && cmake .. && make && make install)", -] - -[tool.cibuildwheel.macos] -environment = {INCLUDE_PATH="/usr/local/include/uchardet", LIBRARY_PATH="/usr/local/lib/"} -before-build = [ - "git submodule sync --recursive", - "git submodule update --init --force --recursive --depth=1", - "test -d {project}/src/ext/uchardet/build || (cd {project}/src/ext/uchardet/ && mkdir build && cd build && cmake -DCMAKE_MACOSX_RPATH=1 -DCMAKE_INSTALL_NAME_DIR=$LIBRARY_PATH -DCMAKE_BUILD_RPATH=$LIBRARY_PATH .. && make && make install)", -] [tool.cibuildwheel.windows] before-build = [ - "git submodule sync --recursive", - "git submodule update --init --force --recursive --depth=1", "make pip" ] From 3100cc8fcd6332584f39caeb3f3663bfe12fa48e Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 20:28:27 -0600 Subject: [PATCH 06/17] build portable --- pyproject.toml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index b5a362a..a962783 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,8 +13,20 @@ test-command = [ 'python -m pytest {project}' ] +before-build = [ + "git submodule sync --recursive", + "git submodule update --init --force --recursive --depth=1", +] + +[tool.cibuildwheel.macos] +before-build = [ + "git submodule sync --recursive", + "git submodule update --init --force --recursive --depth=1", +] [tool.cibuildwheel.windows] before-build = [ + "git submodule sync --recursive", + "git submodule update --init --force --recursive --depth=1", "make pip" ] From b8305112c17d06095cabca360d8e6c43d94913d5 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 20:34:19 -0600 Subject: [PATCH 07/17] build portable --- .github/workflows/build-and-upload-to-pypi.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build-and-upload-to-pypi.yml b/.github/workflows/build-and-upload-to-pypi.yml index 39317cb..fef03e0 100644 --- a/.github/workflows/build-and-upload-to-pypi.yml +++ b/.github/workflows/build-and-upload-to-pypi.yml @@ -34,9 +34,8 @@ jobs: - name: Build and test wheels uses: pypa/cibuildwheel@v2.11.2 env: - CIBW_ARCHS_LINUX: auto aarch64 + CIBW_ARCHS_LINUX: i686 CIBW_ARCHS_MACOS: x86_64 arm64 - CIBW_SKIP: cp36-* - uses: actions/upload-artifact@v3 if: github.event_name == 'release' && github.event.action == 'created' From 36f53d8fbb79e6ed15dd3faf868c6ff81717a102 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 20:37:25 -0600 Subject: [PATCH 08/17] build portable --- src/tests/cchardet_test.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/tests/cchardet_test.py b/src/tests/cchardet_test.py index 99b1223..caebd5d 100644 --- a/src/tests/cchardet_test.py +++ b/src/tests/cchardet_test.py @@ -11,6 +11,9 @@ os.path.join('src','tests','testdata','es','iso-8859-15.txt'), os.path.join('src','tests','testdata','da','iso-8859-1.txt'), os.path.join('src','tests','testdata','he','iso-8859-8.txt'), + # Fail on i686 only + os.path.join('src','tests','testdata','th','tis-620.txt'), + ] # Python can't decode encoding @@ -18,6 +21,7 @@ os.path.join('src','tests','testdata','vi','viscii.txt'), os.path.join('src','tests','testdata','zh','euc-tw.txt'), ] + SKIP_LIST_02.extend(SKIP_LIST) From 9c721bf4bebc096ed36d5d4a8388ba786b895b8d Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 20:39:08 -0600 Subject: [PATCH 09/17] build portable --- src/tests/cchardet_test.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/tests/cchardet_test.py b/src/tests/cchardet_test.py index caebd5d..1fd087e 100644 --- a/src/tests/cchardet_test.py +++ b/src/tests/cchardet_test.py @@ -4,18 +4,19 @@ import cchardet import pytest - +import sys SKIP_LIST = [ os.path.join('src','tests','testdata','ja','utf-16le.txt'), os.path.join('src','tests','testdata','ja','utf-16be.txt'), os.path.join('src','tests','testdata','es','iso-8859-15.txt'), os.path.join('src','tests','testdata','da','iso-8859-1.txt'), - os.path.join('src','tests','testdata','he','iso-8859-8.txt'), - # Fail on i686 only - os.path.join('src','tests','testdata','th','tis-620.txt'), - + os.path.join('src','tests','testdata','he','iso-8859-8.txt'), ] +if sys.maxsize <= 2**32: + # Fails on i686 only, original cchardet test fails too + SKIP_LIST.append(os.path.join('src','tests','testdata','th','tis-620.txt'))) + # Python can't decode encoding SKIP_LIST_02 = [ os.path.join('src','tests','testdata','vi','viscii.txt'), From fbc4f8cbdedfdeca8c30cd808662e52a197196ad Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 21:07:50 -0600 Subject: [PATCH 10/17] check --- pyproject.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index a962783..32e9580 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,15 +13,21 @@ test-command = [ 'python -m pytest {project}' ] +environment = {INCLUDE_PATH="/usr/local/include/uchardet", LIBRARY_PATH="/usr/local/lib64/"} before-build = [ "git submodule sync --recursive", "git submodule update --init --force --recursive --depth=1", + "test -d {project}/src/ext/uchardet/build || (cd {project}/src/ext/uchardet/ && mkdir build && cd build && cmake .. && make && make install)", + "cat {project}/src/ext/uchardet/build/Makefile" ] [tool.cibuildwheel.macos] +environment = {INCLUDE_PATH="/usr/local/include/uchardet", LIBRARY_PATH="/usr/local/lib/"} before-build = [ "git submodule sync --recursive", "git submodule update --init --force --recursive --depth=1", + "test -d {project}/src/ext/uchardet/build || (cd {project}/src/ext/uchardet/ && mkdir build && cd build && cmake -DCMAKE_MACOSX_RPATH=1 -DCMAKE_INSTALL_NAME_DIR=$LIBRARY_PATH -DCMAKE_BUILD_RPATH=$LIBRARY_PATH .. && make && make install)", + "cat {project}/src/ext/uchardet/build/Makefile" ] [tool.cibuildwheel.windows] From 01d969542e9c75a9a1b54039ec3a12445e89b742 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 21:10:03 -0600 Subject: [PATCH 11/17] check --- src/tests/cchardet_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/cchardet_test.py b/src/tests/cchardet_test.py index 1fd087e..55f5948 100644 --- a/src/tests/cchardet_test.py +++ b/src/tests/cchardet_test.py @@ -15,7 +15,7 @@ if sys.maxsize <= 2**32: # Fails on i686 only, original cchardet test fails too - SKIP_LIST.append(os.path.join('src','tests','testdata','th','tis-620.txt'))) + SKIP_LIST.append(os.path.join('src','tests','testdata','th','tis-620.txt')) # Python can't decode encoding SKIP_LIST_02 = [ From 18ee8d3429dbab6ecc7181385bbf33564d146aa6 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 21:15:13 -0600 Subject: [PATCH 12/17] adjust --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index 1ed86bc..1d5621f 100644 --- a/setup.py +++ b/setup.py @@ -37,8 +37,10 @@ ext_args = { "include_dirs": uchardet_dir.split(os.pathsep), "library_dirs": uchardet_dir.split(os.pathsep), + "extra_compile_args": ["-DNDEBUG", "-Dlibuchardet_EXPORTS", "-Wall", "-O3", "-std=c++11"], } + # Remove the "-Wstrict-prototypes" compiler option, which isn't valid for C++. cfg_vars = sysconfig.get_config_vars() for key, value in cfg_vars.items(): From 60df833e4678cfe5947333770a9251793f19e11e Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 21:18:23 -0600 Subject: [PATCH 13/17] fix --- pyproject.toml | 4 ---- setup.py | 1 - 2 files changed, 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 32e9580..5dc03a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,8 +17,6 @@ environment = {INCLUDE_PATH="/usr/local/include/uchardet", LIBRARY_PATH="/usr/lo before-build = [ "git submodule sync --recursive", "git submodule update --init --force --recursive --depth=1", - "test -d {project}/src/ext/uchardet/build || (cd {project}/src/ext/uchardet/ && mkdir build && cd build && cmake .. && make && make install)", - "cat {project}/src/ext/uchardet/build/Makefile" ] [tool.cibuildwheel.macos] @@ -26,8 +24,6 @@ environment = {INCLUDE_PATH="/usr/local/include/uchardet", LIBRARY_PATH="/usr/lo before-build = [ "git submodule sync --recursive", "git submodule update --init --force --recursive --depth=1", - "test -d {project}/src/ext/uchardet/build || (cd {project}/src/ext/uchardet/ && mkdir build && cd build && cmake -DCMAKE_MACOSX_RPATH=1 -DCMAKE_INSTALL_NAME_DIR=$LIBRARY_PATH -DCMAKE_BUILD_RPATH=$LIBRARY_PATH .. && make && make install)", - "cat {project}/src/ext/uchardet/build/Makefile" ] [tool.cibuildwheel.windows] diff --git a/setup.py b/setup.py index 1d5621f..fb192c9 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,6 @@ ext_args = { "include_dirs": uchardet_dir.split(os.pathsep), "library_dirs": uchardet_dir.split(os.pathsep), - "extra_compile_args": ["-DNDEBUG", "-Dlibuchardet_EXPORTS", "-Wall", "-O3", "-std=c++11"], } From 39bfbd315600575e60eb70af5df455278eaf0678 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 21:21:43 -0600 Subject: [PATCH 14/17] pytest --- src/tests/cchardet_test.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/tests/cchardet_test.py b/src/tests/cchardet_test.py index 55f5948..36ef46e 100644 --- a/src/tests/cchardet_test.py +++ b/src/tests/cchardet_test.py @@ -31,18 +31,17 @@ def test_ascii(): assert 'ascii' == detected_encoding['encoding'].lower() -def test_detect(): - testfiles = glob.glob(os.path.join('src','tests','testdata','*','*.txt')) - for testfile in testfiles: - if testfile.replace("\\", "/") in SKIP_LIST: - continue - - base = os.path.basename(testfile) - expected_charset = os.path.splitext(base)[0] - with open(testfile, 'rb') as f: - msg = f.read() - detected_encoding = cchardet.detect(msg) - assert expected_charset.lower() == detected_encoding['encoding'].lower() +@pytest.mark.parametrize("testfile", glob.glob(os.path.join('src','tests','testdata','*','*.txt'))) +def test_detect(testfile): + if testfile.replace("\\", "/") in SKIP_LIST: + return + + base = os.path.basename(testfile) + expected_charset = os.path.splitext(base)[0] + with open(testfile, 'rb') as f: + msg = f.read() + detected_encoding = cchardet.detect(msg) + assert expected_charset.lower() == detected_encoding['encoding'].lower() @pytest.mark.skipif(platform.system() == 'Windows', reason="FIXME: Cannot find test file on Windows for some reason") From e5287e93026735f6e0a93fb86026ab3c4ec8503a Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 21:22:58 -0600 Subject: [PATCH 15/17] pytest --- src/tests/cchardet_test.py | 52 ++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/src/tests/cchardet_test.py b/src/tests/cchardet_test.py index 36ef46e..f117e22 100644 --- a/src/tests/cchardet_test.py +++ b/src/tests/cchardet_test.py @@ -5,49 +5,63 @@ import cchardet import pytest import sys + SKIP_LIST = [ - os.path.join('src','tests','testdata','ja','utf-16le.txt'), - os.path.join('src','tests','testdata','ja','utf-16be.txt'), - os.path.join('src','tests','testdata','es','iso-8859-15.txt'), - os.path.join('src','tests','testdata','da','iso-8859-1.txt'), - os.path.join('src','tests','testdata','he','iso-8859-8.txt'), + os.path.join("src", "tests", "testdata", "ja", "utf-16le.txt"), + os.path.join("src", "tests", "testdata", "ja", "utf-16be.txt"), + os.path.join("src", "tests", "testdata", "es", "iso-8859-15.txt"), + os.path.join("src", "tests", "testdata", "da", "iso-8859-1.txt"), + os.path.join("src", "tests", "testdata", "he", "iso-8859-8.txt"), ] if sys.maxsize <= 2**32: # Fails on i686 only, original cchardet test fails too - SKIP_LIST.append(os.path.join('src','tests','testdata','th','tis-620.txt')) + SKIP_LIST.append(os.path.join("src", "tests", "testdata", "th", "tis-620.txt")) # Python can't decode encoding SKIP_LIST_02 = [ - os.path.join('src','tests','testdata','vi','viscii.txt'), - os.path.join('src','tests','testdata','zh','euc-tw.txt'), + os.path.join("src", "tests", "testdata", "vi", "viscii.txt"), + os.path.join("src", "tests", "testdata", "zh", "euc-tw.txt"), ] SKIP_LIST_02.extend(SKIP_LIST) def test_ascii(): - detected_encoding = cchardet.detect(b'abcdefghijklmnopqrstuvwxyz') - assert 'ascii' == detected_encoding['encoding'].lower() + detected_encoding = cchardet.detect(b"abcdefghijklmnopqrstuvwxyz") + assert "ascii" == detected_encoding["encoding"].lower() -@pytest.mark.parametrize("testfile", glob.glob(os.path.join('src','tests','testdata','*','*.txt'))) +@pytest.mark.parametrize( + "testfile", glob.glob(os.path.join("src", "tests", "testdata", "*", "*.txt")) +) def test_detect(testfile): if testfile.replace("\\", "/") in SKIP_LIST: return base = os.path.basename(testfile) expected_charset = os.path.splitext(base)[0] - with open(testfile, 'rb') as f: + with open(testfile, "rb") as f: msg = f.read() detected_encoding = cchardet.detect(msg) - assert expected_charset.lower() == detected_encoding['encoding'].lower() + assert expected_charset.lower() == detected_encoding["encoding"].lower() -@pytest.mark.skipif(platform.system() == 'Windows', reason="FIXME: Cannot find test file on Windows for some reason") +@pytest.mark.skipif( + platform.system() == "Windows", + reason="FIXME: Cannot find test file on Windows for some reason", +) def test_detector(): detector = cchardet.UniversalDetector() - with open(os.path.join('src','tests','samples','wikipediaJa_One_Thousand_and_One_Nights_SJIS.txt'), 'rb') as f: + with open( + os.path.join( + "src", + "tests", + "samples", + "wikipediaJa_One_Thousand_and_One_Nights_SJIS.txt", + ), + "rb", + ) as f: line = f.readline() while line: detector.feed(line) @@ -56,14 +70,14 @@ def test_detector(): line = f.readline() detector.close() detected_encoding = detector.result - assert "shift_jis" == detected_encoding['encoding'].lower() + assert "shift_jis" == detected_encoding["encoding"].lower() def test_github_issue_20(): """ https://github.com/PyYoshi/cChardet/issues/20 """ - msg = b'\x8f' + msg = b"\x8f" cchardet.detect(msg) @@ -73,14 +87,14 @@ def test_github_issue_20(): def test_decode(): - testfiles = glob.glob(os.path.join('src','tests','testdata','*','*.txt')) + testfiles = glob.glob(os.path.join("src", "tests", "testdata", "*", "*.txt")) for testfile in testfiles: if testfile.replace("\\", "/") in SKIP_LIST_02: continue base = os.path.basename(testfile) expected_charset = os.path.splitext(base)[0] - with open(testfile, 'rb') as f: + with open(testfile, "rb") as f: msg = f.read() detected_encoding = cchardet.detect(msg) try: From 98d9d3ef0746f337568a57789c680552e6dced83 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 21:24:41 -0600 Subject: [PATCH 16/17] pytest --- src/tests/cchardet_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/tests/cchardet_test.py b/src/tests/cchardet_test.py index f117e22..7a5886e 100644 --- a/src/tests/cchardet_test.py +++ b/src/tests/cchardet_test.py @@ -17,6 +17,8 @@ if sys.maxsize <= 2**32: # Fails on i686 only, original cchardet test fails too SKIP_LIST.append(os.path.join("src", "tests", "testdata", "th", "tis-620.txt")) + SKIP_LIST.append(os.path.join("src", "tests", "testdata", "fi", "iso-8859-1.txt")) + SKIP_LIST.append(os.path.join("src", "tests", "testdata", "ga", "iso-8859-1.txt")) # Python can't decode encoding SKIP_LIST_02 = [ From 2c5fd0d683c9527a0cf7dc9e28ff874cfdc2d88b Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 19 Feb 2023 21:29:17 -0600 Subject: [PATCH 17/17] pytest --- .github/workflows/build-and-upload-to-pypi.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-upload-to-pypi.yml b/.github/workflows/build-and-upload-to-pypi.yml index fef03e0..d70651f 100644 --- a/.github/workflows/build-and-upload-to-pypi.yml +++ b/.github/workflows/build-and-upload-to-pypi.yml @@ -18,7 +18,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-11] + os: [ubuntu-latest, macos-11, windows-2019] steps: - uses: actions/checkout@v3 @@ -34,7 +34,7 @@ jobs: - name: Build and test wheels uses: pypa/cibuildwheel@v2.11.2 env: - CIBW_ARCHS_LINUX: i686 + CIBW_ARCHS_LINUX: auto aarch64 CIBW_ARCHS_MACOS: x86_64 arm64 - uses: actions/upload-artifact@v3