From cfa5a2401e168396ad05750c24a3446a43cf9b25 Mon Sep 17 00:00:00 2001 From: Simon Byrne Date: Tue, 14 Jun 2022 14:50:46 -0700 Subject: [PATCH] move dataspace-related functions to new file (#961) * move dataspace-related functions to new file * Apply suggestions from code review Co-authored-by: Mustafa M * more changes * rearrange types Co-authored-by: Mustafa M --- docs/make.jl | 1 + docs/src/dataspaces.md | 7 ++ src/HDF5.jl | 149 +-------------------------- src/attributes.jl | 16 +-- src/datasets.jl | 5 +- src/dataspaces.jl | 223 +++++++++++++++++++++++++++++++++++++++++ src/types.jl | 16 ++- 7 files changed, 250 insertions(+), 167 deletions(-) create mode 100644 docs/src/dataspaces.md create mode 100644 src/dataspaces.jl diff --git a/docs/make.jl b/docs/make.jl index e73dc6125..9c5677775 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -32,6 +32,7 @@ makedocs(; pages=[ "Home" => "index.md", "Interface" => [ + "dataspaces.md", "dataset.md", "attributes.md", "properties.md", diff --git a/docs/src/dataspaces.md b/docs/src/dataspaces.md new file mode 100644 index 000000000..ae8f014be --- /dev/null +++ b/docs/src/dataspaces.md @@ -0,0 +1,7 @@ +# Dataspaces + +```@docs +HDF5.Dataspace +dataspace +HDF5.isnull +``` diff --git a/src/HDF5.jl b/src/HDF5.jl index ba7b31e7e..6e0e47da3 100644 --- a/src/HDF5.jl +++ b/src/HDF5.jl @@ -48,6 +48,7 @@ const ORDER = Ref(API.H5_ITER_INC) include("properties.jl") include("types.jl") include("typeconversions.jl") +include("dataspaces.jl") include("datasets.jl") include("attributes.jl") include("readwrite.jl") @@ -259,15 +260,6 @@ function Base.close(obj::Datatype) nothing end -function Base.close(obj::Dataspace) - if obj.id != -1 - if isvalid(obj) - API.h5s_close(obj) - end - obj.id = -1 - end - nothing -end """ ishdf5(name::AbstractString) @@ -479,99 +471,6 @@ datatype(dt::Datatype) = dt Base.sizeof(dtype::Datatype) = Int(API.h5t_get_size(dtype)) -# Get the dataspace of a dataset -dataspace(dset::Dataset) = Dataspace(API.h5d_get_space(checkvalid(dset))) -# The dataspace of a Dataspace is just the dataspace -dataspace(ds::Dataspace) = ds - -# Create a dataspace from in-memory types -dataspace(x::Union{T, Complex{T}}) where {T<:ScalarType} = Dataspace(API.h5s_create(API.H5S_SCALAR)) -dataspace(::AbstractString) = Dataspace(API.h5s_create(API.H5S_SCALAR)) - -function _dataspace(sz::Dims{N}, max_dims::Union{Dims{N}, Tuple{}}=()) where N - dims = API.hsize_t[sz[i] for i in N:-1:1] - if isempty(max_dims) - maxd = dims - else - # This allows max_dims to be specified as -1 without triggering an overflow - # exception due to the signed -> unsigned conversion. - maxd = API.hsize_t[API.hssize_t(max_dims[i]) % API.hsize_t for i in N:-1:1] - end - return Dataspace(API.h5s_create_simple(length(dims), dims, maxd)) -end -dataspace(A::AbstractArray{T,N}; max_dims::Union{Dims{N},Tuple{}} = ()) where {T,N} = _dataspace(size(A), max_dims) -# special array types -dataspace(v::VLen; max_dims::Union{Dims,Tuple{}}=()) = _dataspace(size(v.data), max_dims) -dataspace(A::EmptyArray) = Dataspace(API.h5s_create(API.H5S_NULL)) -dataspace(n::Nothing) = Dataspace(API.h5s_create(API.H5S_NULL)) -# for giving sizes explicitly -dataspace(sz::Dims{N}; max_dims::Union{Dims{N},Tuple{}}=()) where {N} = _dataspace(sz, max_dims) -dataspace(sz1::Int, sz2::Int, sz3::Int...; max_dims::Union{Dims,Tuple{}}=()) = _dataspace(tuple(sz1, sz2, sz3...), max_dims) - - -function Base.ndims(obj::Union{Dataspace,Dataset,Attribute}) - dspace = obj isa Dataspace ? checkvalid(obj) : dataspace(obj) - ret = API.h5s_get_simple_extent_ndims(dspace) - obj isa Dataspace || close(dspace) - return ret -end -function Base.size(obj::Union{Dataspace,Dataset,Attribute}) - dspace = obj isa Dataspace ? checkvalid(obj) : dataspace(obj) - h5_dims = API.h5s_get_simple_extent_dims(dspace, nothing) - N = length(h5_dims) - ret = ntuple(i -> @inbounds(Int(h5_dims[N-i+1])), N) - obj isa Dataspace || close(dspace) - return ret -end -function Base.size(obj::Union{Dataspace,Dataset,Attribute}, d::Integer) - d > 0 || throw(ArgumentError("invalid dimension d; must be positive integer")) - N = ndims(obj) - d > N && return 1 - dspace = obj isa Dataspace ? obj : dataspace(obj) - h5_dims = API.h5s_get_simple_extent_dims(dspace, nothing) - ret = @inbounds Int(h5_dims[N - d + 1]) - obj isa Dataspace || close(dspace) - return ret -end -function Base.length(obj::Union{Dataspace,Dataset,Attribute}) - isnull(obj) && return 0 - dspace = obj isa Dataspace ? obj : dataspace(obj) - h5_dims = API.h5s_get_simple_extent_dims(dspace, nothing) - ret = Int(prod(h5_dims)) - obj isa Dataspace || close(dspace) - return ret -end -Base.isempty(dspace::Union{Dataspace,Dataset,Attribute}) = length(dspace) == 0 - -""" - isnull(dspace::Union{HDF5.Dataspace, HDF5.Dataset, HDF5.Attribute}) - -Determines whether the given object has no size (consistent with the `API.H5S_NULL` dataspace). - -# Examples -```julia-repl -julia> HDF5.isnull(dataspace(HDF5.EmptyArray{Float64}())) -true - -julia> HDF5.isnull(dataspace((0,))) -false -``` -""" -function isnull(obj::Union{Dataspace,Dataset,Attribute}) - dspace = obj isa Dataspace ? checkvalid(obj) : dataspace(obj) - ret = API.h5s_get_simple_extent_type(dspace) == API.H5S_NULL - obj isa Dataspace || close(dspace) - return ret -end - - -function get_regular_hyperslab(dspace::Dataspace) - start, stride, count, block = API.h5s_get_regular_hyperslab(dspace) - N = length(start) - @inline rev(v) = ntuple(i -> @inbounds(Int(v[N-i+1])), N) - return rev(start), rev(stride), rev(count), rev(block) -end - """ start_swmr_write(h5::HDF5.File) @@ -606,52 +505,6 @@ end Base.getindex(parent::Union{File,Group}, r::Reference) = _deref(parent, r) Base.getindex(parent::Dataset, r::Reference) = _deref(parent, r) # defined separately to resolve ambiguity -function hyperslab(dspace::Dataspace, I::Union{AbstractRange{Int},Int}...) - local dsel_id - try - dims = size(dspace) - n_dims = length(dims) - if length(I) != n_dims - error("Wrong number of indices supplied, supplied length $(length(I)) but expected $(n_dims).") - end - dsel_id = API.h5s_copy(dspace) - dsel_start = Vector{API.hsize_t}(undef,n_dims) - dsel_stride = Vector{API.hsize_t}(undef,n_dims) - dsel_count = Vector{API.hsize_t}(undef,n_dims) - for k = 1:n_dims - index = I[n_dims-k+1] - if isa(index, Integer) - dsel_start[k] = index-1 - dsel_stride[k] = 1 - dsel_count[k] = 1 - elseif isa(index, AbstractRange) - dsel_start[k] = first(index)-1 - dsel_stride[k] = step(index) - dsel_count[k] = length(index) - else - error("index must be range or integer") - end - if dsel_start[k] < 0 || dsel_start[k]+(dsel_count[k]-1)*dsel_stride[k] >= dims[n_dims-k+1] - println(dsel_start) - println(dsel_stride) - println(dsel_count) - println(reverse(dims)) - error("index out of range") - end - end - API.h5s_select_hyperslab(dsel_id, API.H5S_SELECT_SET, dsel_start, dsel_stride, dsel_count, C_NULL) - finally - close(dspace) - end - Dataspace(dsel_id) -end - -function hyperslab(dset::Dataset, I::Union{AbstractRange{Int},Int}...) - dspace = dataspace(dset) - return hyperslab(dspace, I...) -end - - # end of high-level interface diff --git a/src/attributes.jl b/src/attributes.jl index 7c9c76c1b..f8fe742a8 100644 --- a/src/attributes.jl +++ b/src/attributes.jl @@ -1,5 +1,3 @@ -# mid-level API - """ HDF5.Attribute @@ -14,18 +12,8 @@ See also - [`write_attribute`](@ref) - [`delete_attribute`](@ref) """ -mutable struct Attribute - id::API.hid_t - file::File - - function Attribute(id, file) - dset = new(id, file) - finalizer(close, dset) - dset - end -end -Base.cconvert(::Type{API.hid_t}, attr::Attribute) = attr -Base.unsafe_convert(::Type{API.hid_t}, attr::Attribute) = attr.id +Attribute # defined in types.jl + function Base.close(obj::Attribute) if obj.id != -1 if obj.file.id != -1 && isvalid(obj) diff --git a/src/datasets.jl b/src/datasets.jl index 968c8675e..0f8e6bdbd 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -1,8 +1,11 @@ # Dataset defined in types.jl +# Get the dataspace of a dataset +dataspace(dset::Dataset) = Dataspace(API.h5d_get_space(checkvalid(dset))) + # Open Dataset -open_dataset(parent::Union{File,Group}, +open_dataset(parent::Union{File,Group}, name::AbstractString, dapl::DatasetAccessProperties=DatasetAccessProperties(), dxpl::DatasetTransferProperties=DatasetTransferProperties() diff --git a/src/dataspaces.jl b/src/dataspaces.jl new file mode 100644 index 000000000..21267b465 --- /dev/null +++ b/src/dataspaces.jl @@ -0,0 +1,223 @@ +""" + HDF5.Dataspace + +A dataspace defines the size and the shape of a dataset or an attribute. + +A dataspace is typically constructed by calling [`dataspace`](@ref). + +The following functions have methods defined for `Dataspace` objects +- `==` +- `ndims` +- `size` +- `length` +- `isempty` +- [`isnull`](@ref) +""" +Dataspace # defined in types.jl + +Base.:(==)(dspace1::Dataspace, dspace2::Dataspace) = + API.h5s_extent_equal(checkvalid(dspace1), checkvalid(dspace2)) +Base.hash(dspace::Dataspace, h::UInt) = hash(dspace.id, hash(Dataspace, h)) +Base.copy(dspace::Dataspace) = Dataspace(API.h5s_copy(checkvalid(dspace))) + +function Base.close(obj::Dataspace) + if obj.id != -1 + if isvalid(obj) + API.h5s_close(obj) + end + obj.id = -1 + end + nothing +end + +""" + dataspace(obj::Union{Attribute, Dataset, Dataspace}) + +The [`Dataspace`](@ref) of `obj`. +""" +dataspace(ds::Dataspace) = ds + + +# Create a dataspace from in-memory types +""" + dataspace(data) + +The default `Dataspace` used for representing a Julia object `data`: + - strings or numbers: a scalar `Dataspace` + - arrays: a simple `Dataspace` + - `nothing` or an `EmptyArray`: a null dataspace +""" +dataspace(x::Union{T, Complex{T}}) where {T<:ScalarType} = Dataspace(API.h5s_create(API.H5S_SCALAR)) +dataspace(::AbstractString) = Dataspace(API.h5s_create(API.H5S_SCALAR)) + +function _dataspace(sz::Dims{N}, max_dims::Union{Dims{N}, Tuple{}}=()) where N + dims = API.hsize_t[sz[i] for i in N:-1:1] + if isempty(max_dims) + maxd = dims + else + # This allows max_dims to be specified as -1 without triggering an overflow + # exception due to the signed -> unsigned conversion. + maxd = API.hsize_t[API.hssize_t(max_dims[i]) % API.hsize_t for i in N:-1:1] + end + return Dataspace(API.h5s_create_simple(length(dims), dims, maxd)) +end +dataspace(A::AbstractArray{T,N}; max_dims::Union{Dims{N},Tuple{}} = ()) where {T,N} = _dataspace(size(A), max_dims) +# special array types +dataspace(v::VLen; max_dims::Union{Dims,Tuple{}}=()) = _dataspace(size(v.data), max_dims) +dataspace(A::EmptyArray) = Dataspace(API.h5s_create(API.H5S_NULL)) +dataspace(n::Nothing) = Dataspace(API.h5s_create(API.H5S_NULL)) + +# for giving sizes explicitly +""" + dataspace(dims::Tuple; maxdims::Tuple=dims) + +Construct a simple `Dataspace` for the given dimensions `dims`. The maximum +dimensions `maxdims` specifies the maximum possible size: `-1` can be used to +indicate unlimited dimensions. +""" +dataspace(sz::Dims{N}; max_dims::Union{Dims{N},Tuple{}}=()) where {N} = _dataspace(sz, max_dims) +dataspace(sz1::Int, sz2::Int, sz3::Int...; max_dims::Union{Dims,Tuple{}}=()) = _dataspace(tuple(sz1, sz2, sz3...), max_dims) + + +function Base.ndims(dspace::Dataspace) + API.h5s_get_simple_extent_ndims(checkvalid(dspace)) +end +function Base.size(dspace::Dataspace) + h5_dims = API.h5s_get_simple_extent_dims(checkvalid(dspace), nothing) + N = length(h5_dims) + return ntuple(i -> @inbounds(Int(h5_dims[N-i+1])), N) +end +function Base.size(dspace::Dataspace, d::Integer) + d > 0 || throw(ArgumentError("invalid dimension d; must be positive integer")) + N = ndims(dspace) + d > N && return 1 + h5_dims = API.h5s_get_simple_extent_dims(dspace, nothing) + return @inbounds Int(h5_dims[N - d + 1]) +end +function Base.length(dspace::Dataspace) + isnull(dspace) && return 0 + h5_dims = API.h5s_get_simple_extent_dims(checkvalid(dspace), nothing) + return Int(prod(h5_dims)) +end +Base.isempty(dspace::Dataspace) = length(dspace) == 0 + + +""" + isnull(dspace::Union{HDF5.Dataspace, HDF5.Dataset, HDF5.Attribute}) + +Determines whether the given object has no size (consistent with the `API.H5S_NULL` dataspace). + +# Examples +```julia-repl +julia> HDF5.isnull(dataspace(HDF5.EmptyArray{Float64}())) +true + +julia> HDF5.isnull(dataspace((0,))) +false +``` +""" +function isnull(dspace::Dataspace) + return API.h5s_get_simple_extent_type(checkvalid(dspace)) == API.H5S_NULL +end + + +function get_regular_hyperslab(dspace::Dataspace) + start, stride, count, block = API.h5s_get_regular_hyperslab(dspace) + N = length(start) + @inline rev(v) = ntuple(i -> @inbounds(Int(v[N-i+1])), N) + return rev(start), rev(stride), rev(count), rev(block) +end + +function hyperslab(dspace::Dataspace, I::Union{AbstractRange{Int},Int}...) + dims = size(dspace) + n_dims = length(dims) + if length(I) != n_dims + error("Wrong number of indices supplied, supplied length $(length(I)) but expected $(n_dims).") + end + dsel_id = API.h5s_copy(dspace) + dsel_start = Vector{API.hsize_t}(undef,n_dims) + dsel_stride = Vector{API.hsize_t}(undef,n_dims) + dsel_count = Vector{API.hsize_t}(undef,n_dims) + for k = 1:n_dims + index = I[n_dims-k+1] + if isa(index, Integer) + dsel_start[k] = index-1 + dsel_stride[k] = 1 + dsel_count[k] = 1 + elseif isa(index, AbstractRange) + dsel_start[k] = first(index)-1 + dsel_stride[k] = step(index) + dsel_count[k] = length(index) + else + error("index must be range or integer") + end + if dsel_start[k] < 0 || dsel_start[k]+(dsel_count[k]-1)*dsel_stride[k] >= dims[n_dims-k+1] + println(dsel_start) + println(dsel_stride) + println(dsel_count) + println(reverse(dims)) + error("index out of range") + end + end + API.h5s_select_hyperslab(dsel_id, API.H5S_SELECT_SET, dsel_start, dsel_stride, dsel_count, C_NULL) + return Dataspace(dsel_id) +end + +# methods for Dataset/Attribute which operate on Dataspace +function Base.ndims(obj::Union{Dataset,Attribute}) + dspace = dataspace(obj) + try + return Base.ndims(dspace) + finally + close(dspace) + end +end +function Base.size(obj::Union{Dataset,Attribute}) + dspace = dataspace(obj) + try + return Base.size(dspace) + finally + close(dspace) + end +end +function Base.size(obj::Union{Dataset,Attribute}, d::Integer) + dspace = dataspace(obj) + try + return Base.size(dspace, d) + finally + close(dspace) + end +end +function Base.length(obj::Union{Dataset,Attribute}) + dspace = dataspace(obj) + try + return Base.length(dspace) + finally + close(dspace) + end +end +function Base.isempty(obj::Union{Dataset,Attribute}) + dspace = dataspace(obj) + try + return Base.isempty(dspace) + finally + close(dspace) + end +end +function isnull(obj::Union{Dataset,Attribute}) + dspace = dataspace(obj) + try + return isnull(dspace) + finally + close(dspace) + end +end + +function hyperslab(dset::Dataset, I::Union{AbstractRange{Int},Int}...) + dspace = dataspace(dset) + try + return hyperslab(dspace, I...) + finally + close(dspace) + end +end diff --git a/src/types.jl b/src/types.jl index 32d7ec5ab..af28f4014 100644 --- a/src/types.jl +++ b/src/types.jl @@ -113,11 +113,19 @@ mutable struct Dataspace end Base.cconvert(::Type{API.hid_t}, dspace::Dataspace) = dspace Base.unsafe_convert(::Type{API.hid_t}, dspace::Dataspace) = dspace.id -Base.:(==)(dspace1::Dataspace, dspace2::Dataspace) = API.h5s_extent_equal(checkvalid(dspace1), checkvalid(dspace2)) -Base.hash(dspace::Dataspace, h::UInt) = hash(dspace.id, hash(Dataspace, h)) -Base.copy(dspace::Dataspace) = Dataspace(API.h5s_copy(checkvalid(dspace))) -# Attribute defined in attributes.jl +mutable struct Attribute + id::API.hid_t + file::File + + function Attribute(id, file) + dset = new(id, file) + finalizer(close, dset) + dset + end +end +Base.cconvert(::Type{API.hid_t}, attr::Attribute) = attr +Base.unsafe_convert(::Type{API.hid_t}, attr::Attribute) = attr.id # High-level reference handler struct Reference