From b4793163a79344c34dd02b66f36d00b290a88d31 Mon Sep 17 00:00:00 2001 From: KOLANICH Date: Tue, 15 Dec 2020 13:03:17 +0300 Subject: [PATCH] Initial commit --- .ci/aptPackagesToInstall.txt | 0 .ci/beforeBuild.sh | 7 +++ .ci/pythonPackagesToInstallFromGit.txt | 2 + .editorconfig | 12 +++++ .github/.templateMarker | 1 + .github/dependabot.yml | 8 +++ .github/workflows/CI.yml | 15 ++++++ .gitignore | 15 ++++++ .gitlab-ci.yml | 51 +++++++++++++++++++ Code_Of_Conduct.md | 1 + MANIFEST.in | 4 ++ ReadMe.md | 8 +++ UNLICENSE | 24 +++++++++ glossary/__init__.py | 66 ++++++++++++++++++++++++ glossary/kaitai/__init__.py | 0 glossary/kaitai/glossary_index.py | 70 ++++++++++++++++++++++++++ pyproject.toml | 51 +++++++++++++++++++ tests/tests.py | 26 ++++++++++ 18 files changed, 361 insertions(+) create mode 100644 .ci/aptPackagesToInstall.txt create mode 100755 .ci/beforeBuild.sh create mode 100644 .ci/pythonPackagesToInstallFromGit.txt create mode 100644 .editorconfig create mode 100644 .github/.templateMarker create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/CI.yml create mode 100644 .gitignore create mode 100644 .gitlab-ci.yml create mode 100644 Code_Of_Conduct.md create mode 100644 MANIFEST.in create mode 100644 ReadMe.md create mode 100644 UNLICENSE create mode 100644 glossary/__init__.py create mode 100644 glossary/kaitai/__init__.py create mode 100644 glossary/kaitai/glossary_index.py create mode 100644 pyproject.toml create mode 100644 tests/tests.py diff --git a/.ci/aptPackagesToInstall.txt b/.ci/aptPackagesToInstall.txt new file mode 100644 index 0000000..e69de29 diff --git a/.ci/beforeBuild.sh b/.ci/beforeBuild.sh new file mode 100755 index 0000000..ae4baec --- /dev/null +++ b/.ci/beforeBuild.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +curl -L https://github.com/KOLANICH-libs/glossary.py/files/7643704/glossary.zip > glossary.zip; +unzip glossary.zip; +#sudo dpkg -i glossary_0.1.1_amd64.deb; +curl -L https://raw.githubusercontent.com/waltonseymour/glossary/master/MOCK_DATA.csv > tests/MOCK_DATA.csv; +bash -c "cd ./tests; glossary index ./MOCK_DATA.csv"; diff --git a/.ci/pythonPackagesToInstallFromGit.txt b/.ci/pythonPackagesToInstallFromGit.txt new file mode 100644 index 0000000..98d6fa3 --- /dev/null +++ b/.ci/pythonPackagesToInstallFromGit.txt @@ -0,0 +1,2 @@ +https://github.com/prebuilder/fsutilz.py.git +https://github.com/kaitai-io/kaitai_struct_python_runtime.git diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..c9162b9 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,12 @@ +root = true + +[*] +charset = utf-8 +indent_style = tab +indent_size = 4 +insert_final_newline = true +end_of_line = lf + +[*.{yml,yaml}] +indent_style = space +indent_size = 2 diff --git a/.github/.templateMarker b/.github/.templateMarker new file mode 100644 index 0000000..5e3a3e0 --- /dev/null +++ b/.github/.templateMarker @@ -0,0 +1 @@ +KOLANICH/python_project_boilerplate.py diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..89ff339 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,8 @@ +version: 2 +updates: + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "daily" + allow: + - dependency-type: "all" diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 0000000..7fe33b3 --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,15 @@ +name: CI +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + build: + runs-on: ubuntu-22.04 + steps: + - name: typical python workflow + uses: KOLANICH-GHActions/typical-python-workflow@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..26a159d --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ +/tests/.glossary +/tests/MOCK_DATA.csv +/kaitai_struct_formats + +__pycache__ +*.pyc +*.pyo +/*.egg-info +*.srctrlbm +*.srctrldb +build +dist +.eggs +monkeytype.sqlite3 +/.ipynb_checkpoints diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..def2847 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,51 @@ +image: registry.gitlab.com/kolanich-subgroups/docker-images/fixed_python:latest + +variables: + DOCKER_DRIVER: overlay2 + SAST_ANALYZER_IMAGE_TAG: latest + SAST_DISABLE_DIND: "true" + SAST_CONFIDENCE_LEVEL: 5 + CODECLIMATE_VERSION: latest + +include: + - template: SAST.gitlab-ci.yml + - template: Code-Quality.gitlab-ci.yml + - template: License-Management.gitlab-ci.yml + +build: + tags: + - shared + - linux + stage: build + variables: + GIT_DEPTH: "1" + PYTHONUSERBASE: ${CI_PROJECT_DIR}/python_user_packages + + before_script: + - export PATH="$PATH:$PYTHONUSERBASE/bin" # don't move into `variables` + - apt-get update + # todo: + #- apt-get -y install + #- pip3 install --upgrade + #- python3 ./fix_python_modules_paths.py + + script: + - python3 -m build -nw bdist_wheel + - mv ./dist/*.whl ./dist/glossary-0.CI-py3-none-any.whl + - pip3 install --upgrade ./dist/*.whl + - coverage run --source=glossary -m --branch pytest --junitxml=./rspec.xml ./tests/test.py + - coverage report -m + - coverage xml + + coverage: "/^TOTAL(?:\\s+\\d+){4}\\s+(\\d+%).+/" + + cache: + paths: + - $PYTHONUSERBASE + + artifacts: + paths: + - dist + reports: + junit: ./rspec.xml + cobertura: ./coverage.xml diff --git a/Code_Of_Conduct.md b/Code_Of_Conduct.md new file mode 100644 index 0000000..bcaa2bf --- /dev/null +++ b/Code_Of_Conduct.md @@ -0,0 +1 @@ +No codes of conduct! \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..20f0fa8 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,4 @@ +include UNLICENSE +include *.md +include tests +include .editorconfig diff --git a/ReadMe.md b/ReadMe.md new file mode 100644 index 0000000..4d51928 --- /dev/null +++ b/ReadMe.md @@ -0,0 +1,8 @@ +glossary.py [![Unlicensed work](https://raw.githubusercontent.com/unlicense/unlicense.org/master/static/favicon.png)](https://unlicense.org/) +=========== +[wheel (GHA via `nightly.link`)](https://nightly.link/KOLANICH-libs/glossary.py/workflows/CI/master/glossary-0.CI-py3-none-any.whl) +[![GitHub Actions](https://github.com/KOLANICH-libs/glossary.py/workflows/CI/badge.svg)](https://github.com/KOLANICH-libs/glossary.py/actions/) +[![Libraries.io Status](https://img.shields.io/librariesio/github/KOLANICH-libs/glossary.py.svg)](https://libraries.io/github/KOLANICH-libs/glossary.py) +[![Code style: antiflash](https://img.shields.io/badge/code%20style-antiflash-FFF.svg)](https://github.com/KOLANICH-tools/antiflash.py) + +A pure python implementation of reader for index files produced by [`glossary`](https://github.com/waltonseymour/glossary) tool. diff --git a/UNLICENSE b/UNLICENSE new file mode 100644 index 0000000..efb9808 --- /dev/null +++ b/UNLICENSE @@ -0,0 +1,24 @@ +This is free and unencumbered software released into the public domain. + +Anyone is free to copy, modify, publish, use, compile, sell, or +distribute this software, either in source code form or as a compiled +binary, for any purpose, commercial or non-commercial, and by any +means. + +In jurisdictions that recognize copyright laws, the author or authors +of this software dedicate any and all copyright interest in the +software to the public domain. We make this dedication for the benefit +of the public at large and to the detriment of our heirs and +successors. We intend this dedication to be an overt act of +relinquishment in perpetuity of all present and future rights to this +software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. + +For more information, please refer to diff --git a/glossary/__init__.py b/glossary/__init__.py new file mode 100644 index 0000000..7fd526d --- /dev/null +++ b/glossary/__init__.py @@ -0,0 +1,66 @@ +from pathlib import Path +import typing + +import kaitaistruct +from fsutilz import MMap + +from glossary.kaitai.glossary_index import GlossaryIndex + +from .kaitai.glossary_index import GlossaryIndex + + +class Glossary: + __slots__ = ("path", "indexDir", "iF", "gIF", "map", "index") + + DEFAULT_SUBDIR = ".glossary" + DEFAULT_METAINDEX_FILE_NAME = "top_index.bin" + DEFAULT_INDEX_FILE_NAME = "index.bin" + + def __init__(self, path: Path, indexDir: typing.Optional[Path] = None) -> None: + self.path = path + if indexDir is None: + indexDir = path.parent / self.__class__.DEFAULT_SUBDIR + self.indexDir = indexDir + self.iF = None + self.gIF = None + self.map = None + self.index = None + + def __exit__(self, *args, **kwargs) -> None: + self.index = None + if self.iF is not None: + self.iF.__exit__(*args, **kwargs) + self.iF = None + + if self.gIF is not None: + self.gIF.__exit__(*args, **kwargs) + self.gIF = None + + if self.map is not None: + self.map.__exit__(*args, **kwargs) + self.map = None + + def parseMetaIndex(self) -> None: + globalIdxFile = self.indexDir / self.__class__.DEFAULT_METAINDEX_FILE_NAME + self.gIF = MMap(globalIdxFile).__enter__() + self.index = GlossaryIndex(kaitaistruct.KaitaiStream(self.iF), kaitaistruct.KaitaiStream(self.gIF)) + self.gIF.__exit__(None, None, None) + self.gIF = None + + def __enter__(self) -> "Glossary": + idxFile = self.indexDir / self.__class__.DEFAULT_INDEX_FILE_NAME + self.iF = MMap(idxFile).__enter__() + self.parseMetaIndex() + self.map = MMap(self.path).__enter__() + return self + + def getLine(self, rec_descriptor: GlossaryIndex.RecordDescriptor) -> bytes: + line = self.map[rec_descriptor.record.offset :] + l = line.find(b"\n") + if l >= 0: + line = line[:l] + return line + + def __iter__(self) -> typing.Iterator[bytes]: + for rec in self.index.records: + yield self.getLine(rec) diff --git a/glossary/kaitai/__init__.py b/glossary/kaitai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/glossary/kaitai/glossary_index.py b/glossary/kaitai/glossary_index.py new file mode 100644 index 0000000..719d966 --- /dev/null +++ b/glossary/kaitai/glossary_index.py @@ -0,0 +1,70 @@ +# This is a generated file! Please edit source .ksy file and use kaitai-struct-compiler to rebuild + +from pkg_resources import parse_version +import kaitaistruct +from kaitaistruct import KaitaiStruct, KaitaiStream, BytesIO + + +if parse_version(kaitaistruct.__version__) < parse_version('0.9'): + raise Exception("Incompatible Kaitai Struct Python API: 0.9 or later is required, but you have %s" % (kaitaistruct.__version__)) + +class GlossaryIndex(KaitaiStruct): + """'glossary' is a tool written in Rust to index flat files delimited by line breaks. + In fact the index can be used for any binary files. + + .. seealso:: + Source - https://github.com/waltonseymour/glossary/blob/7cfc390d20afd7373749aa94e0b4ce0f30709f97/src/write.rs + """ + def __init__(self, index_io, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self.index_io = index_io + self._read() + + def _read(self): + self.records = [] + i = 0 + while not self._io.is_eof(): + self.records.append(GlossaryIndex.RecordDescriptor(self._io, self, self._root)) + i += 1 + + + class RecordDescriptor(KaitaiStruct): + def __init__(self, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self._read() + + def _read(self): + self.size = self._io.read_u8le() + self.offset = self._io.read_u8le() + + class Record(KaitaiStruct): + def __init__(self, size, _io, _parent=None, _root=None): + self._io = _io + self._parent = _parent + self._root = _root if _root else self + self.size = size + self._read() + + def _read(self): + self.key = self._io.read_bytes(self.size) + self.offset = self._io.read_u8le() + + + @property + def record(self): + if hasattr(self, '_m_record'): + return self._m_record if hasattr(self, '_m_record') else None + + io = self._root.index_io + _pos = io.pos() + io.seek(self.offset) + self._m_record = GlossaryIndex.RecordDescriptor.Record(self.size, io, self, self._root) + io.seek(_pos) + return self._m_record if hasattr(self, '_m_record') else None + + + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8055d33 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,51 @@ +[build-system] +requires = ["setuptools>=61.2.0", "wheel", "setuptools_scm[toml]>=3.4.3"] +build-backend = "setuptools.build_meta" + +[project] +name = "glossary" +authors = [{name = "KOLANICH"}] +description = "A pure python implementation of a reader for indexes produced by glossary tool." +readme = "ReadMe.md" +keywords = ["glossary"] +license = {text = "Unlicense"} +classifiers = [ + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Development Status :: 4 - Beta", + "Environment :: Other Environment", + "Intended Audience :: Developers", + "License :: Public Domain", + #"License :: Public Domain :: Unlicense", + "Operating System :: OS Independent", + "Topic :: Software Development :: Libraries :: Python Modules", +] +requires-python = ">=3.4" +dependencies = [ + "kaitaistruct", + "fsutilz @ https://github.com/prebuilder/fsutilz.py", +] +dynamic = ["version"] + + +[project.urls] +Homepage = "https://github.com/KOLANICH-libs/glossary.py" + +[tool.setuptools] +zip-safe = true +include-package-data = false + +[tool.setuptools.packages] +find = {namespaces = false} + +[tool.setuptools_scm] + +[tool.kaitai.repos."https://github.com/KOLANICH/kaitai_struct_formats.git"."glossary_index"] +update = true +search = false +localPath = "kaitai_struct_formats" # rel to pyproject.toml dir +outputDir = "glossary/kaitai" # rel to localPath +inputDir = "database" + +[tool.kaitai.repos."https://github.com/KOLANICH/kaitai_struct_formats.git"."glossary_index".formats.glossary_index] +path = "glossary_index.ksy" diff --git a/tests/tests.py b/tests/tests.py new file mode 100644 index 0000000..c0036be --- /dev/null +++ b/tests/tests.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +import csv +import sys +import unittest +from pathlib import Path + +thisDir = Path(__file__).parent + +sys.path.insert(0, str(thisDir.parent)) + +from collections import OrderedDict + +dict = OrderedDict + +from glossary import Glossary + + +class Tests(unittest.TestCase): + def testReading(self): + with Glossary(thisDir / "MOCK_DATA.csv") as p: + for line in p: + iD = tuple(csv.reader([line.decode("utf-8")]))[0] + + +if __name__ == "__main__": + unittest.main()