Skip to content

Commit

Permalink
Add copyto! and similar for Datasets (#937)
Browse files Browse the repository at this point in the history
* Add read!, copyto! and similar for Datasets

* Remove views code from this pull request

* Remove read!, refactor similar

* Fix normalize keyword, add to tests

* Use Dims

* Add handle hygine to _generic_read

* Fix normalize keyword

* More normalize keyword fixes

* Fix normalize
  • Loading branch information
mkitti authored Jun 7, 2022
1 parent 6dd3b56 commit 976ec2a
Show file tree
Hide file tree
Showing 3 changed files with 267 additions and 42 deletions.
268 changes: 226 additions & 42 deletions src/HDF5.jl
Original file line number Diff line number Diff line change
Expand Up @@ -859,10 +859,32 @@ function Base.read(obj::DatasetOrAttribute, ::Type{String}, I...)
return val
end

"""
copyto!(output_buffer::AbstractArray{T}, obj::Union{DatasetOrAttribute}) where T
Copy [part of] a HDF5 dataset or attribute to a preallocated output buffer.
The output buffer must be convertible to a pointer and have a contiguous layout.
"""
function Base.copyto!(output_buffer::AbstractArray{T}, obj::DatasetOrAttribute, I...) where T
dtype = datatype(obj)
val = nothing
try
val = generic_read!(output_buffer, obj, dtype, T, I...)
finally
close(dtype)
end
return val
end

# Special handling for reading OPAQUE datasets and attributes
function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque})
function generic_read!(buf::Matrix{UInt8}, obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque})
generic_read(obj, filetype, Opaque, buf)
end
function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque}, buf::Union{Matrix{UInt8}, Nothing} = nothing)
sz = size(obj)
buf = Matrix{UInt8}(undef, sizeof(filetype), prod(sz))
if isnothing(buf)
buf = Matrix{UInt8}(undef, sizeof(filetype), prod(sz))
end
if obj isa Dataset
read_dataset(obj, filetype, buf, obj.xfer)
else
Expand All @@ -880,11 +902,164 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque
end

# generic read function
function generic_read!(buf::Union{AbstractMatrix{UInt8}, AbstractArray{T}}, obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I...) where T
return _generic_read(obj, filetype, T, buf, I...)
end
function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I...) where T
return _generic_read(obj, filetype, T, nothing, I...)
end
function _generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T},
buf::Union{AbstractMatrix{UInt8}, AbstractArray{T}, Nothing}, I...) where T

sz, scalar, dspace = _size_of_buffer(obj, I)

if isempty(sz)
close(dspace)
return EmptyArray{T}()
end

try
if isnothing(buf)
buf = _normalized_buffer(T, sz)
else
sizeof(buf) != prod(sz)*sizeof(T) &&
error("Provided array buffer of size, $(size(buf)), and element type, $(eltype(buf)), does not match the dataset of size, $sz, and type, $T")
end
catch err
close(dspace)
rethrow(err)
end

memtype = _memtype(filetype, T)
memspace = isempty(I) ? dspace : dataspace(sz)

try
if obj isa Dataset
API.h5d_read(obj, memtype, memspace, dspace, obj.xfer, buf)
else
API.h5a_read(obj, memtype, buf)
end

if do_normalize(T)
out = reshape(normalize_types(T, buf), sz...)
else
out = buf
end

xfer_id = obj isa Dataset ? obj.xfer.id : API.H5P_DEFAULT
do_reclaim(T) && API.h5d_vlen_reclaim(memtype, memspace, xfer_id, buf)

if scalar
return out[1]
else
return out
end

finally
close(memtype)
close(memspace)
close(dspace)
end
end


"""
similar(obj::DatasetOrAttribute, [::Type{T}], [dims::Integer...]; normalize = true)
Return a `Array{T}` or `Matrix{UInt8}` to that can contain [part of] the dataset.
The `normalize` keyword will normalize the buffer for string and array datatypes.
"""
function Base.similar(
obj::DatasetOrAttribute,
::Type{T},
dims::Dims;
normalize::Bool = true
) where T
filetype = datatype(obj)
try
return similar(obj, filetype, T, dims; normalize=normalize)
finally
close(filetype)
end
end
Base.similar(
obj::DatasetOrAttribute,
::Type{T},
dims::Integer...;
normalize::Bool = true
) where T = similar(obj, T, Int.(dims); normalize=normalize)

# Base.similar without specifying the Julia type
function Base.similar(obj::DatasetOrAttribute, dims::Dims; normalize::Bool = true)
filetype = datatype(obj)
try
T = get_jl_type(filetype)
return similar(obj, filetype, T, dims; normalize=normalize)
finally
close(filetype)
end
end
Base.similar(
obj::DatasetOrAttribute,
dims::Integer...;
normalize::Bool = true
) = similar(obj, Int.(dims); normalize=normalize)

# Opaque types
function Base.similar(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque}; normalize::Bool = true)
# normalize keyword for consistency, but it is ignored for Opaque
sz = size(obj)
return Matrix{UInt8}(undef, sizeof(filetype), prod(sz))
end

# Undocumented Base.similar signature allowing filetype to be specified
function Base.similar(
obj::DatasetOrAttribute,
filetype::Datatype,
::Type{T},
dims::Dims;
normalize::Bool = true
) where T
# We are reusing code that expect indices
I = Base.OneTo.(dims)
sz, scalar, dspace = _size_of_buffer(obj, I)
memtype = _memtype(filetype, T)
try
buf = _normalized_buffer(T, sz)

if normalize && do_normalize(T)
buf = reshape(normalize_types(T, buf), sz)
end

return buf
finally
close(dspace)
close(memtype)
end
end
Base.similar(
obj::DatasetOrAttribute,
filetype::Datatype,
::Type{T},
dims::Integer...;
normalize::Bool = true
) where T = similar(obj, filetype, T, Int.(dims); normalize=normalize)

# Utilities used in Base.similar implementation

#=
_memtype(filetype::Datatype, T)
This is a utility function originall from generic_read.
It gets the native memory type for the system based on filetype, and checks
if the size matches.
=#
@inline function _memtype(filetype::Datatype, ::Type{T}) where T
!isconcretetype(T) && error("type $T is not concrete")
!isempty(I) && obj isa Attribute && error("HDF5 attributes do not support hyperslab selections")

memtype = Datatype(API.h5t_get_native_type(filetype)) # padded layout in memory
# padded layout in memory
memtype = Datatype(API.h5t_get_native_type(filetype))

if sizeof(T) != sizeof(memtype)
error("""
Expand All @@ -894,11 +1069,37 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I.
""")
end

dspace = dataspace(obj)
return memtype
end

#=
_size_of_buffer(obj::DatasetOrAttribute, [I::Tuple, dspace::Dataspace])
This is a utility function originally from generic_read, but factored out.
The primary purpose is to determine the size and shape of the buffer to
create in order to hold the contents of a Dataset or Attribute.
# Arguments
* obj - A Dataset or Attribute
* I - (optional) indices, defaults to ()
* dspace - (optional) dataspace, defaults to dataspace(obj).
This argument will be consumed by hyperslab and returned.
# Returns
* `sz` the size of the selection
* `scalar`, which is true if the value should be read as a scalar.
* `dspace`, hyper
=#
@inline function _size_of_buffer(
obj::DatasetOrAttribute,
I::Tuple = (),
dspace::Dataspace = dataspace(obj)
)
!isempty(I) && obj isa Attribute && error("HDF5 attributes do not support hyperslab selections")

stype = API.h5s_get_simple_extent_type(dspace)
stype == API.H5S_NULL && return EmptyArray{T}()

if !isempty(I)
if !isempty(I) && stype != API.H5S_NULL
indices = Base.to_indices(obj, I)
dspace = hyperslab(dspace, indices...)
end
Expand All @@ -907,16 +1108,32 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I.
if stype == API.H5S_SCALAR
sz = (1,)
scalar = true
elseif stype == API.H5S_NULL
sz = ()
# scalar = false
elseif isempty(I)
sz = size(dspace)
# scalar = false
else
# Determine the size by the length of non-Int indices
sz = map(length, filter(i -> !isa(i, Int), indices))
if isempty(sz)
# All indices are Int, so this is scalar
sz = (1,)
scalar = true
end
end

return sz, scalar, dspace
end

#=
_normalized_buffer(T, sz)
Return a Matrix{UInt8} for a normalized type or `Array{T}` for a regular type.
See `do_normalize` in typeconversions.jl.
=#
@inline function _normalized_buffer(::Type{T}, sz::NTuple{N, Int}) where {T, N}
if do_normalize(T)
# The entire dataset is read into in a buffer matrix where the first dimension at
# any stage of normalization is the bytes for a single element of type `T`, and
Expand All @@ -925,32 +1142,8 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I.
else
buf = Array{T}(undef, sz...)
end
memspace = isempty(I) ? dspace : dataspace(sz)

if obj isa Dataset
API.h5d_read(obj, memtype, memspace, dspace, obj.xfer, buf)
else
API.h5a_read(obj, memtype, buf)
end

if do_normalize(T)
out = reshape(normalize_types(T, buf), sz...)
else
out = buf
end

xfer_id = obj isa Dataset ? obj.xfer.id : API.H5P_DEFAULT
do_reclaim(T) && API.h5d_vlen_reclaim(memtype, memspace, xfer_id, buf)

close(memtype)
close(memspace)
close(dspace)

if scalar
return out[1]
else
return out
end
return buf
end

# Array constructor for datasets
Expand Down Expand Up @@ -1160,18 +1353,9 @@ function Base.setindex!(dset::Dataset, X::Array{T}, I::IndexType...) where T
end

filetype = datatype(dset)
memtype = Datatype(API.h5t_get_native_type(filetype)) # padded layout in memory
memtype = _memtype(filetype, eltype(X))
close(filetype)

elT = eltype(X)
if sizeof(elT) != sizeof(memtype)
error("""
Type size mismatch
sizeof($elT) = $(sizeof(elT))
sizeof($memtype) = $(sizeof(memtype))
""")
end

dspace = dataspace(dset)
stype = API.h5s_get_simple_extent_type(dspace)
stype == API.H5S_NULL && error("attempting to write to null dataspace")
Expand Down
39 changes: 39 additions & 0 deletions test/nonallocating.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
using HDF5
using Test

@testset "non-allocating methods" begin
fn = tempname()

data = rand(UInt16, 16, 16)

h5open(fn, "w") do h5f
h5f["data"] = data
end

h5open(fn, "r") do h5f
buffer = similar(h5f["data"])
copyto!(buffer, h5f["data"])
@test isequal(buffer, data)

# Consider making this a view later
v = h5f["data"][1:4, 1:4]

buffer = similar(v)
@test size(buffer) == (4,4)
copyto!(buffer, v)
@test isequal(buffer, @view(data[1:4, 1:4]))

@test size(similar(h5f["data"], Int16)) == size(h5f["data"])
@test size(similar(h5f["data"], 5,6)) == (5, 6)
@test size(similar(h5f["data"], Int16, 8,7)) == (8,7)
@test size(similar(h5f["data"], Int16, 8,7; normalize = false)) == (8,7)
@test_broken size(similar(h5f["data"], Int8, 8,7)) == (8,7)

@test size(similar(h5f["data"], (5,6))) == (5, 6)
@test size(similar(h5f["data"], Int16, (8,7))) == (8,7)
@test size(similar(h5f["data"], Int16, (8,7); normalize = false)) == (8,7)
@test size(similar(h5f["data"], Int16, 0x8,0x7; normalize = false)) == (8,7)
end

rm(fn)
end
2 changes: 2 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ include("filter.jl")
include("chunkstorage.jl")
@debug "fileio"
include("fileio.jl")
@debug "nonallocating"
include("nonallocating.jl")
@debug "filter test utils"
include("filters/FilterTestUtils.jl")

Expand Down

0 comments on commit 976ec2a

Please sign in to comment.