Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add bitshuffle #986

Merged
merged 10 commits into from
Jul 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions filters/H5Zbitshuffle/LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
H5Zbitshuffle - Julia wrapping of bitshuffle HDF5 Filter for improving
compression of typed binary data.

Copyright (c) Australian Nuclear Science and Technology Organisation
2022

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
13 changes: 13 additions & 0 deletions filters/H5Zbitshuffle/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name = "H5Zbitshuffle"
uuid = "51b4e782-877f-4ccf-958a-27bf628210da"
authors = ["James.Hester <[email protected]>"]
version = "0.1.0"

[deps]
HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
bitshuffle_jll = "228fe19c-1b83-5282-a626-13744502a320"

[compat]
HDF5 = "0.16"
bitshuffle_jll = "0.4.2"
julia = "1.6"
6 changes: 6 additions & 0 deletions filters/H5Zbitshuffle/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# H5Zbitshuffle.jl

Implements the bitshuffle filter for [HDF5.jl](https://github.com/JuliaIO/HDF5.jl) in Julia,
with optional integrated lz4 and zstd (de)compression.

This implements [HDF5 filter ID 32008](https://portal.hdfgroup.org/display/support/Filters#Filters-32008)
284 changes: 284 additions & 0 deletions filters/H5Zbitshuffle/src/H5Zbitshuffle.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
#==
jamesrhester marked this conversation as resolved.
Show resolved Hide resolved
Julia code wrapping the bitshuffle filter for HDF5. A rough translation of
bshuf_h5filter.c by Kiyoshi Masui, see
https://github.com/kiyo-masui/bitshuffle.
==#
"""
The bitshuffle filter for HDF5. See https://portal.hdfgroup.org/display/support/Filters#Filters-32008
and https://github.com/kiyo-masui/bitshuffle for details.
"""
module H5Zbitshuffle

using bitshuffle_jll

using HDF5.API
import HDF5.Filters: Filter, filterid, register_filter, filtername, filter_func, filter_cfunc, set_local_func, set_local_cfunc

export BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD, BitshuffleFilter, H5Z_filter_bitshuffle

# From bshuf_h5filter.h

const BSHUF_H5_COMPRESS_LZ4 = 2
const BSHUF_H5_COMPRESS_ZSTD = 3
const H5Z_FILTER_BITSHUFFLE = API.H5Z_filter_t(32008)

const BSHUF_VERSION_MAJOR = 0
const BSHUF_VERSION_MINOR = 4
const BSHUF_VERSION_POINT = 2

const bitshuffle_name = "HDF5 bitshuffle filter; see https://github.com/kiyo-masui/bitshuffle"

# Set filter arguments

function bitshuffle_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t)

# Sanity check of provided values and set element size

bs_flags = Ref{Cuint}()
bs_values = Vector{Cuint}(undef,8)
bs_nelements = Ref{Csize_t}(length(bs_values))

API.h5p_get_filter_by_id(dcpl, H5Z_FILTER_BITSHUFFLE, bs_flags, bs_nelements,
bs_values, 0, C_NULL, C_NULL)

@debug "Initial filter info" bs_flags bs_values bs_nelements

flags = bs_flags[]

# set values

bs_values[1] = BSHUF_VERSION_MAJOR
bs_values[2] = BSHUF_VERSION_MINOR

elem_size = API.h5t_get_size(htype)

@debug "Element size for $htype reported as $elem_size"

if elem_size <= 0
return API.herr_t(-1)
end

bs_values[3] = elem_size
nelements = bs_nelements[]

# check user-supplied values

if nelements > 3
if bs_values[4] % 8 !=0 || bs_values[4] < 0 return API.herr_t(-1) end
end

if nelements > 4
if !(bs_values[5] in (0,BSHUF_H5_COMPRESS_LZ4,BSHUF_H5_COMPRESS_ZSTD))
return API.herr_t(-1)
end
end

@debug "Final values" bs_values

API.h5p_modify_filter(dcpl, H5Z_FILTER_BITSHUFFLE, bs_flags[], nelements, bs_values)

return API.herr_t(1)
end

function H5Z_filter_bitshuffle(flags::Cuint, cd_nelmts::Csize_t,
cd_values::Ptr{Cuint}, nbytes::Csize_t,
buf_size::Ptr{Csize_t}, buf::Ptr{Ptr{Cvoid}})::Csize_t


in_buf = unsafe_load(buf) #in_buf is *void
out_buf = C_NULL
nbytes_out = 0
block_size = 0


try #mop up errors at end
@debug "nelmts" cd_nelmts

if cd_nelmts < 3
error("bitshuffle_h5plugin: Not enough elements provided to bitshuffle filter")
end

# Get needed information

major = unsafe_load(cd_values,1)
minor = unsafe_load(cd_values,2)
elem_size = unsafe_load(cd_values,3)
comp_lvl = unsafe_load(cd_values,6)
compress_flag = unsafe_load(cd_values,5)

if cd_nelmts > 3
block_size = unsafe_load(cd_values,4)
end

@debug "Major,minor:" major minor
@debug "element size, compress_level, compress_flag" elem_size comp_lvl compress_flag

if block_size == 0
block_size = ccall((:bshuf_default_block_size,libbitshuffle),Cuint,(Cuint,),elem_size)
end


# Work out buffer sizes

if cd_nelmts > 4 && (compress_flag in (BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD))

# Use compression

if(flags & API.H5Z_FLAG_REVERSE) != 0 # unshuffle and decompress

# First 8 bytes is number of uncompressed bytes
nbytes_uncomp = ccall((:bshuf_read_uint64_BE,libbitshuffle),Cuint,(Ptr{Cvoid},),in_buf)
# Next 4 bytes are the block size

block_size = ccall((:bshuf_read_uint32_BE,libbitshuffle),Cuint,(Ptr{Cvoid},),in_buf+8)/elem_size
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should ÷ here rather than /. block_size should be an integer.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense.


in_buf += 12
buf_size_out = nbytes_uncomp

else #shuffle and compress

nbytes_uncomp = nbytes
if compress_flag == BSHUF_H5_COMPRESS_LZ4
buf_size_out = ccall((:bshuf_compress_lz4_bound,libbitshuffle),Cuint,(Cuint,Cuint,Cuint),
nbytes_uncomp/elem_size,elem_size,block_size) + 12
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use ÷ here rather than /, though Julia might take care of translating this back for us here.

elseif compress_flag == BSHUF_H5_COMPRESS_ZSTD
buf_size_out = ccall((:bshuf_compress_zstd_bound,libbitshuffle),Cuint,(Cuint,Cuint,Cuint),
nbytes_uncomp/elem_size,elem_size,block_size)+12
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use ÷ here rather than /, though Julia might take care of translating this back for us here.

end
end

else # No compression required
nbytes_uncomp = nbytes
buf_size_out = nbytes
end

if nbytes_uncomp % elem_size != 0
error("bitshuffle_h5plugin: Uncompressed size $nbytes_uncomp is not a multiple of $elem_size")
end

size = nbytes_uncomp/elem_size
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use ÷ here rather than /

out_buf = Libc.malloc(buf_size_out)
if out_buf == C_NULL
error("bitshuffle_h5plugin: Cannot allocate memory for outbuf during decompression")
end

# Now perform the decompression

if cd_nelmts > 4 && (compress_flag in (BSHUF_H5_COMPRESS_LZ4, BSHUF_H5_COMPRESS_ZSTD))
if flags & API.H5Z_FLAG_REVERSE != 0 #unshuffle and decompress
if compress_flag == BSHUF_H5_COMPRESS_LZ4
err = ccall((:bshuf_decompress_lz4,libbitshuffle),Cint,
(Ptr{Cvoid},Ptr{Cvoid},Cuint,Cuint,Cuint),
in_buf,out_buf,size,elem_size,block_size)
elseif compress_flag == BSHUF_H5_COMPRESS_ZSTD
err = ccall((:bshuf_decompress_zstd,libbitshuffle),Cint,
(Ptr{Cvoid},Ptr{Cvoid},Cuint,Cuint,Cuint),
in_buf,out_buf,size,elem_size,block_size)
end
nbytes_out = nbytes_uncomp

else #shuffle and compress

ccall((:bshuf_write_uint64_BE,libbitshuffle),Cvoid,(Ptr{Cvoid},Cuint),out_buf,nbytes_uncomp)
ccall((:bshuf_write_uint32_BE,libbitshuffle),Cvoid,(Ptr{Cvoid},Cuint),out_buf+8,block_size*elem_size)

if compress_flag == BSHUF_H5_COMPRESS_LZ4
err = ccall((:bshuf_compress_lz4,libbitshuffle),Cint,
(Ptr{Cvoid},Ptr{Cvoid},Cuint,Cuint,Cuint),
in_buf,out_buf+12,size,elem_size,block_size)
else
err = ccall((:bshuf_compress_zstd,libbitshuffle),Cint,
(Ptr{Cvoid},Ptr{Cvoid},Cuint,Cuint,Cuint),
in_buf,out_buf+12,size,elem_size,block_size)
end

nbytes_out = err + 12
end
else # just the shuffle thanks

if flags & H5Z_FLAG_REVERSE != 0
err = ccall((:bshuf_bitunshuffle,libbitshuffle),Cint,
(Ptr{Cvoid},Ptr{Cvoid},Cuint,Cuint,Cuint),
in_buf,out_buf,size,elem_size,block_size)
else
err = ccall((:bshuf_bitshuffle,libbitshuffle),Cint,
(Ptr{Cvoid},Ptr{Cvoid},Cuint,Cuint,Cuint),
in_buf,out_buf,size,elem_size,block_size)
end

nbytes_out = nbytes
end

# And wrap it up

if err < 0
error("h5plugin_bitshuffle: Error in bitshuffle with code $err")
end

Libc.free(unsafe_load(buf))
unsafe_store!(buf,out_buf)
unsafe_store!(buf_size,Csize_t(buf_size_out))
out_buf = C_NULL

catch e

# On failure, return 0 and change no arguments

nbytes_out = Csize_t(0)
@error "Non-fatal H5 bitshuffle plugin error: " e
display(stacktrace(catch_backtrace()))

finally
if out_buf != C_NULL
Libc.free(out_buf)
end
end

return Csize_t(nbytes_out)
end

# Filter registration

# All information for the filter

struct BitshuffleFilter <: Filter
major::Cuint
minor::Cuint
typesize::Cuint
blocksize::Cuint
compression::Cuint
comp_level::Cuint #Zstd only
end

"""
BitshuffleFilter(blocksize=0,compressor=:none,comp_level=0)

The Bitshuffle filter can optionally include compression :lz4 or :zstd. For :zstd
comp_level can be provided. This is ignored for :lz4 compression. If `blocksize`
is zero the default bitshuffle blocksize is used.
"""
function BitshuffleFilter(;blocksize = 0, compressor=:none, comp_level=0)
compressor in (:lz4,:zstd,:none) || throw(ArgumentError("Invalid bitshuffle compression $compressor"))
compcode = 0
if compressor == :lz4
compcode = BSHUF_H5_COMPRESS_LZ4
elseif compressor == :zstd
compcode = BSHUF_H5_COMPRESS_ZSTD
end
BitshuffleFilter(BSHUF_VERSION_MAJOR,BSHUF_VERSION_MINOR,0,blocksize,compcode,comp_level)
end

filterid(::Type{BitshuffleFilter}) = H5Z_FILTER_BITSHUFFLE
filtername(::Type{BitshuffleFilter}) = bitshuffle_name
set_local_func(::Type{BitshuffleFilter}) = bitshuffle_set_local
set_local_cfunc(::Type{BitshuffleFilter}) = @cfunction(bitshuffle_set_local,API.herr_t,(API.hid_t,API.hid_t,API.hid_t))
filterfunc(::Type{BitshuffleFilter}) = H5Z_filter_bitshuffle
filter_cfunc(::Type{BitshuffleFilter}) = @cfunction(H5Z_filter_bitshuffle, Csize_t,
(Cuint, Csize_t, Ptr{Cuint}, Csize_t,
Ptr{Csize_t}, Ptr{Ptr{Cvoid}}))

function __init__()
register_filter(BitshuffleFilter)
end

end # module
31 changes: 30 additions & 1 deletion test/filter.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ using HDF5.Filters
using Test
using H5Zblosc, H5Zlz4, H5Zbzip2, H5Zzstd

@static if VERSION >= v"1.6" using H5Zbitshuffle end

using HDF5.Filters: ExternalFilter, isavailable, isencoderenabled, isdecoderenabled

@testset "filter" begin
Expand Down Expand Up @@ -42,7 +44,7 @@ compressionFilters = Dict(
"blosc" => BloscFilter,
"bzip2" => Bzip2Filter,
"lz4" => Lz4Filter,
"zstd" => ZstdFilter
"zstd" => ZstdFilter,
)

for (name, filter) in compressionFilters
Expand All @@ -65,8 +67,35 @@ ds = create_dataset(
f, "blosc_bitshuffle", datatype(data), dataspace(data),
chunk=(100,100), filters=BloscFilter(shuffle=H5Zblosc.BITSHUFFLE)
)

write(ds, data)

function extra_bitshuffle()

ds = create_dataset(
f, "bitshuffle_lz4", datatype(data), dataspace(data),
chunk=(100,100), filters=BitshuffleFilter(compressor=:lz4)
)

write(ds, data)

ds = create_dataset(
f, "bitshuffle_zstd", datatype(data), dataspace(data),
chunk=(100,100), filters=BitshuffleFilter(compressor=:zstd,comp_level=5)
)

write(ds, data)

ds = create_dataset(
f, "bitshuffle_plain", datatype(data), dataspace(data),
chunk=(100,100), filters=BitshuffleFilter()
)

write(ds, data)
end

@static VERSION >= v"1.6" ? extra_bitshuffle() : nothing

# Close and re-open file for reading
close(f)
f = h5open(fn)
Expand Down
3 changes: 3 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ Pkg.develop(PackageSpec(path=joinpath(filter_path, "H5Zblosc")))
Pkg.develop(PackageSpec(path=joinpath(filter_path, "H5Zbzip2")))
Pkg.develop(PackageSpec(path=joinpath(filter_path, "H5Zlz4")))
Pkg.develop(PackageSpec(path=joinpath(filter_path, "H5Zzstd")))
@static if VERSION >= v"1.6"
Pkg.develop(PackageSpec(path=joinpath(filter_path, "H5Zbitshuffle")))
end

@info "libhdf5 v$(HDF5.API.h5_get_libversion())"

Expand Down