Skip to content
This repository has been archived by the owner on Oct 10, 2023. It is now read-only.

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
KOLANICH committed Oct 10, 2023
0 parents commit f7f65f9
Show file tree
Hide file tree
Showing 19 changed files with 413 additions and 0 deletions.
1 change: 1 addition & 0 deletions .ci/aptPackagesToInstall.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
libblast
Empty file.
12 changes: 12 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
root = true

[*]
charset = utf-8
indent_style = tab
indent_size = 4
insert_final_newline = true
end_of_line = lf

[*.{yml,yaml}]
indent_style = space
indent_size = 2
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.imploded filter=lfs diff=lfs merge=lfs -text
*.decomp filter=lfs diff=lfs merge=lfs -text
1 change: 1 addition & 0 deletions .github/.templateMarker
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
KOLANICH/python_project_boilerplate.py
8 changes: 8 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"
allow:
- dependency-type: "all"
15 changes: 15 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: CI
on:
push:
branches: [master]
pull_request:
branches: [master]

jobs:
build:
runs-on: ubuntu-22.04
steps:
- name: typical python workflow
uses: KOLANICH-GHActions/typical-python-workflow@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
__pycache__
*.pyc
*.pyo
/*.egg-info
*.srctrlbm
*.srctrldb
build
dist
.eggs
monkeytype.sqlite3
/.ipynb_checkpoints
51 changes: 51 additions & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
image: registry.gitlab.com/kolanich-subgroups/docker-images/fixed_python:latest

variables:
DOCKER_DRIVER: overlay2
SAST_ANALYZER_IMAGE_TAG: latest
SAST_DISABLE_DIND: "true"
SAST_CONFIDENCE_LEVEL: 5
CODECLIMATE_VERSION: latest

include:
- template: SAST.gitlab-ci.yml
- template: Code-Quality.gitlab-ci.yml
- template: License-Management.gitlab-ci.yml

build:
tags:
- shared
- linux
stage: build
variables:
GIT_DEPTH: "1"
PYTHONUSERBASE: ${CI_PROJECT_DIR}/python_user_packages

before_script:
- export PATH="$PATH:$PYTHONUSERBASE/bin" # don't move into `variables`
- apt-get update
# todo:
#- apt-get -y install
#- pip3 install --upgrade
#- python3 ./fix_python_modules_paths.py

script:
- python3 -m build -nw bdist_wheel
- mv ./dist/*.whl ./dist/pkimplode-0.CI-py3-none-any.whl
- pip3 install --upgrade ./dist/*.whl
- coverage run --source=pkimplode -m --branch pytest --junitxml=./rspec.xml ./tests/test.py
- coverage report -m
- coverage xml

coverage: /^TOTAL(?:\s+\d+){4}\s+(\d+%).+/

cache:
paths:
- $PYTHONUSERBASE

artifacts:
paths:
- dist
reports:
junit: ./rspec.xml
cobertura: ./coverage.xml
5 changes: 5 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[submodule "tests/testDataset"]
path = tests/testDataset
url = https://github.com/implode-compression-impls/implode_test_files.git
branch = merged
shallow = true
1 change: 1 addition & 0 deletions Code_Of_Conduct.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
No codes of conduct!
22 changes: 22 additions & 0 deletions License.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
The MIT License (MIT)
=====================

Copyright © `1999-2013` `Ladislav Zezula`

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
4 changes: 4 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
include UNLICENSE
include *.md
include tests
include .editorconfig
49 changes: 49 additions & 0 deletions ReadMe.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
pkexplode.py
============
~~[wheel (GitLab)](https://gitlab.com/KOLANICH/pkexplode.py/-/jobs/artifacts/master/raw/dist/pkexplode-0.CI-py3-none-any.whl?job=build)~~
~~[wheel (GHA via `nightly.link`)](https://nightly.link/implode-compression-impls/pkexplode.py/workflows/CI/master/pkexplode-0.CI-py3-none-any.whl)~~
~~![GitLab Build Status](https://gitlab.com/KOLANICH/pkexplode.py/badges/master/pipeline.svg)~~
~~![GitLab Coverage](https://gitlab.com/KOLANICH/pkexplode.py/badges/master/coverage.svg)~~
~~[![Coveralls Coverage](https://img.shields.io/coveralls/implode-compression-impls/pkexplode.py.svg)](https://coveralls.io/r/implode-compression-impls/pkexplode.py)~~
~~[![GitHub Actions](https://github.com/implode-compression-impls/pkexplode.py/workflows/CI/badge.svg)](https://github.com/implode-compression-impls/pkexplode.py/actions/)~~
[![Libraries.io Status](https://img.shields.io/librariesio/github/implode-compression-impls/pkexplode.py.svg)](https://libraries.io/github/implode-compression-impls/pkexplode.py)
[![Code style: antiflash](https://img.shields.io/badge/code%20style-antiflash-FFF.svg)](https://codeberg.org/KOLANICH-tools/antiflash.py)
[![License](https://img.shields.io/github/license//implode-compression-impls/pkexplode.py.svg)](./License.md)

This are free and Open-Source ctypes-based bindings to [`libexplode`](https://codeberg.org/implode-compression-impls/libexplode) which is [a ripped out part](https://github.com/ladislav-zezula/StormLib/blob/master/src/pklib/explode.c) of [`pkglib`](https://github.com/ladislav-zezula/StormLib/tree/master/src/pklib) which is a Free Open-Source implementation of PKWare Data Compression Library (DCL) compression format, which itself was ripped out of [`StormLib`](https://github.com/ladislav-zezula/StormLib), all of which are by [Ladislav Zezula](https://github.com/ladislav-zezula).

Alternatively you can use:

* [`pkblast.py`](https://codeberg.org/implode-compression-impls/pkblast.py), a wrapper to [Mark @madler Adler](https://github.com/madler)'s [libblast](https://github.com/madler/zlib/tree/master/contrib/blast) - another free and open-source implementation of PKWare DCL decompressor.

* [`pwexplode`](https://github.com/Schallaven/pwexplode) - a pure-python impl. ⚠️⚠️⚠️ WARNING [![GPL-3.0-or-later](https://www.gnu.org/graphics/gplv3-or-later.svg)](https://github.com/Schallaven/pwexplode/blob/master/LICENSE) ⚠️⚠️⚠️

You also can be interested in the compression counterpart, [`pkimplode.py`](https://codeberg.org/implode-compression-impls/pkimplode.py)

Benefits of CTypes-based impl:

* Supports python versions other than CPython
* No need to recompile python module after python version upgrade

Drawbacks:
* performance and overhead may be worse, than in the case of a cext.

Installation
------------

In order to make it work you need a package with `libexplode` itself installed into your system using your distro package manager. If your distro doesn't provide one, you can build it yourself using CMake CPack from the sources [by the link](https://codeberg.org/implode-compression-impls/libexplode). You will get 3 packages, one with the headers and another one with the shared library. Only the one with the lib is mandatory.

Usage
-----

The package contains multiple functions. They have names matching the regular expression `^decompress(Stream|Bytes(Whole|Chunked))To(Stream|Bytes)$`.

The first subgroup describes the type of input argument, the second subgroup describes the type of output.
* If input is `Bytes`, then you need
* `Whole`, which means that the lib gots a pointer to whole array with compressed data. This is considered to be **the optimal input format**.
* `Chunked` (which means the data are processed in reality by `decompressStreamTo$3`) was created mainly for convenience of testing.
* Otherwise it is an object acting like a stream. In this case you can also provide `chunkSize`, because streams are processed in chunks. Larger the chunk - less the count of chunks in the stream, so less overhead on calls of callbacks, but more memory is needed to store the chunk.

The second subgroup describes the type of the result.
* The internal type of the result is always a `Stream`. This is considered to be **the optimal output format**. It is because we don't know the size of output ahead of time, so have to use streams.
* `Bytes` are only for your convenience and just wrap the `decompress$1ToStream` with a context with `BytesIO`.
71 changes: 71 additions & 0 deletions pkexplode/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import typing
from collections.abc import ByteString
from io import BytesIO, IOBase
from mmap import mmap
from warnings import warn
from zlib import crc32 as crc32_zlib

from pklib_base import PklibError

from .ctypes import _decompressStream

__all__ = ("decompressStreamToStream", "decompressStreamToBytes", "decompressBytesWholeToStream", "decompressBytesChunkedToStream", "decompressBytesChunkedToBytes", "decompress")


def crc32(data: ByteString, value: int = 0) -> int:
return (~crc32_zlib(data, value)) & 0xFFFFFFFF


def decompressStreamToStream(inputStream: IOBase, outputStream: IOBase) -> None:
"""Used to do streaming decompression. The first arg is the stream to read from, the second ard is the stream to write to.
May be a memory map. `chunkSize` is the hint"""

errorCode = _decompressStream(inputStream, outputStream)

if errorCode:
raise Exception(PklibError(errorCode))


def decompressBytesChunkedToStream(compressed: ByteString, outputStream: IOBase) -> int:
"""Compresses `compressed` into `outputStream`."""
with BytesIO(compressed) as inputStream:
return decompressStreamToStream(inputStream, outputStream)


def decompressBytesChunkedToBytes(compressed: ByteString) -> int:
"""Compresses `compressed` into `bytes`."""
with BytesIO() as outputStream:
decompressBytesChunkedToStream(compressed, outputStream)
return outputStream.getvalue()


def decompressStreamToBytes(inputStream: IOBase) -> int:
"""Compresses `inputStream` into `outputStream`. Processes the whole data."""
with BytesIO() as outputStream:
decompressStreamToStream(inputStream, outputStream)
return outputStream.getvalue()


_functionsUseCaseMapping = (
decompressStreamToStream,
decompressBytesChunkedToStream,
decompressStreamToBytes,
decompressBytesChunkedToBytes,
)


def decompress(compressed: typing.Union[ByteString, IOBase], outputStream: typing.Optional[IOBase] = None) -> int:
"""A convenience function. It is better to use the more specialized ones since they have less overhead. It decompresses `compressed` into `outputStream` and returns a tuple `(left, output)`.
`compressed` can be either a stream, or `bytes`-like stuff.
If `outputStream` is None, then it returns `bytes`. If `outputStream` is a stream, it writes into it.
`left` returned is the count of bytes in the array/stream that weren't processed."""

isOutputBytes = outputStream is None
isInputBytes = isinstance(compressed, (ByteString, mmap))
selector = isOutputBytes << 1 | int(isInputBytes)
func = _functionsUseCaseMapping[selector]
argz = [compressed]
if not isOutputBytes:
argz.append(outputStream)
#_efficiencyDeprecationMessage(decompress, func)
return func(*argz)
67 changes: 67 additions & 0 deletions pkexplode/ctypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import ctypes

from pklib_base import CommonSizeConstants, LUTSizeConstants, ReadFunT, WriteFunT, _initLibrary, getStreamCallbacks

__all__ = ("TDcmpStruct", "sizeConstants", "_decompressStream")


specializedSizeConstantsFields = (
("internalStructSize", None),
("IN_BUFF_SIZE", 2048),
("CODES_SIZE", 256),
("OFFSS_SIZE", 256),
("OFFSS_SIZE1", 128),
("CH_BITS_ASC_SIZE", 256),
("LENS_SIZES", 16),
)


def _getFieldsForInternalStateStructure(commonSizes: CommonSizeConstants, lutSizes: LUTSizeConstants, sizeConstants):
return (
("offs0000", ctypes.c_ulong),
("ctype", ctypes.c_ulong),
("outputPos", ctypes.c_ulong),
("dsize_bits", ctypes.c_ulong),
("dsize_mask", ctypes.c_ulong),
("bit_buff", ctypes.c_ulong),
("extra_bits", ctypes.c_ulong),
("in_pos", ctypes.c_uint),
("in_bytes", ctypes.c_ulong),
("param", ctypes.POINTER(None)),
("read_buf", ReadFunT),
("write_buf", WriteFunT),
# ("out_buff", ctypes.c_ubyte * int(commonSizes.OUT_BUFF_SIZE)),
("out_buff", ctypes.c_ubyte * int(commonSizes.BUFF_SIZE)),
("in_buff", ctypes.c_ubyte * int(sizeConstants.IN_BUFF_SIZE)),
("DistPosCodes", ctypes.c_ubyte * int(sizeConstants.CODES_SIZE)),
("LengthCodes", ctypes.c_ubyte * int(sizeConstants.CODES_SIZE)),
("offs2C34", ctypes.c_ubyte * int(sizeConstants.OFFSS_SIZE)),
("offs2D34", ctypes.c_ubyte * int(sizeConstants.OFFSS_SIZE)),
("offs2E34", ctypes.c_ubyte * int(sizeConstants.OFFSS_SIZE1)),
("offs2EB4", ctypes.c_ubyte * int(sizeConstants.OFFSS_SIZE)),
("ChBitsAsc", ctypes.c_ubyte * int(sizeConstants.CH_BITS_ASC_SIZE)),
("DistBits", ctypes.c_ubyte * int(lutSizes.DIST_SIZES)),
("LenBits", ctypes.c_ubyte * int(sizeConstants.LENS_SIZES)),
("ExLenBits", ctypes.c_ubyte * int(sizeConstants.LENS_SIZES)),
("LenBase", ctypes.c_ushort * int(sizeConstants.LENS_SIZES)),
)


TDcmpStruct = None


def explode(read_buf: ReadFunT, write_buf: WriteFunT, work_buf: ctypes.POINTER(TDcmpStruct), arbitraryData: ctypes.POINTER(None)) -> ctypes.c_uint:
return lib.explode(read_buf, write_buf, work_buf, arbitraryData)


lib, TDcmpStruct, sizeConstants = _initLibrary(explode, "TDcmpStruct", specializedSizeConstantsFields, _getFieldsForInternalStateStructure)


def _decompressStream(inputStream, outputStream) -> int:
s = TDcmpStruct()

return explode(
*getStreamCallbacks(inputStream, outputStream),
work_buf=ctypes.byref(s),
arbitraryData=None,
)
44 changes: 44 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
[build-system]
requires = ["setuptools>=61.2.0", "wheel", "setuptools_scm[toml]>=3.4.3"]
build-backend = "setuptools.build_meta"

[project]
name = "pkexplode"
authors = [{name = "KOLANICH"}]
description = "Python bindings to libexplode which is a ripped out part of pkglib which is a Free Open-Source implementation of PKWare Data Compression Library (DCL) compression format, which itself was ripped out of StormLib by Ladislav Zezula."
readme = "ReadMe.md"
keywords = ["blast", "PKWare", "bindings", "compression", "DCL", "implode"]
license = {text = "MIT"}
classifiers = [
"Programming Language :: Python",
"Programming Language :: Python :: 3",
"Development Status :: 4 - Beta",
"Environment :: Other Environment",
"Intended Audience :: Developers",
"License :: OSI Approved",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Topic :: Software Development :: Libraries :: Python Modules",
]
requires-python = ">=3.4"
dependencies = [
"pklib_base", # @ git+https://codeberg.org/implode-compression-impls/pklib_base.py.git
]
dynamic = ["version"]

[project.urls]
Homepage = "https://codeberg.org/implode-compression-impls/pkexplode.py"

[project.optional-dependencies]
test = [
"fileTestSuite", # @ git+https://codeberg.org/fileTestSuite/fileTestSuite.py"
]

[tool.setuptools]
zip-safe = true
include-package-data = false

[tool.setuptools.packages]
find = {namespaces = false}

[tool.setuptools_scm]
1 change: 1 addition & 0 deletions tests/testDataset
Submodule testDataset added at f05352
Loading

0 comments on commit f7f65f9

Please sign in to comment.