JuliaIO · musm · Jun 7, 2022 · May 13, 2022 · May 29, 2022 · Jun 6, 2022
diff --git a/src/HDF5.jl b/src/HDF5.jl
@@ -957,6 +957,19 @@ function Base.read(obj::DatasetOrAttribute, ::Type{T}, I...) where T
     return val
 end
 
+"""
+    read!(obj::DatasetOrAttribute, output_buffer::AbstractArray{T} [, I...]) where T
+
+Read [part of] a dataset or attribute into a preallocated output buffer.
+The output buffer must be convertible to a pointer and have a contiguous layout.
+"""
+function Base.read!(obj::DatasetOrAttribute, buf::AbstractArray{T}, I...) where T
+    dtype = datatype(obj)
+    val = generic_read!(buf, obj, dtype, T, I...)
+    close(dtype)
+    return val
+end
+
 # `Type{String}` does not have a definite size, so the generic_read does not accept
 # it even though it will return a `String`. This explicit overload allows that usage.
 function Base.read(obj::DatasetOrAttribute, ::Type{String}, I...)
@@ -968,10 +981,25 @@ function Base.read(obj::DatasetOrAttribute, ::Type{String}, I...)
     return val
 end
 
+"""
+    copyto!(output_buffer::AbstractArray{T}, obj::Union{DatasetOrAttribute}) where T
+
+Copy [part of] a HDF5 dataset or attribute to a preallocated output buffer.
+The output buffer must be convertible to a pointer and have a contiguous layout.
+"""
+function Base.copyto!(output_buffer::AbstractArray{T}, obj::DatasetOrAttribute, I...) where T
+    return Base.read!(obj, output_buffer, I...)
+end
+
 # Special handling for reading OPAQUE datasets and attributes
-function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque})
+function generic_read!(buf::Matrix{UInt8}, obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque})
+    generic_read(obj, filetype, Opaque, buf)
+end
+function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque}, buf::Union{Matrix{UInt8}, Nothing} = nothing)
     sz  = size(obj)
-    buf = Matrix{UInt8}(undef, sizeof(filetype), prod(sz))
+    if isnothing(buf)
+        buf = Matrix{UInt8}(undef, sizeof(filetype), prod(sz))
+    end
     if obj isa Dataset
         read_dataset(obj, filetype, buf, obj.xfer)
     else
@@ -989,7 +1017,14 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque
 end
 
 # generic read function
+function generic_read!(buf::Union{AbstractMatrix{UInt8}, AbstractArray{T}}, obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I...) where T
+    return _generic_read(obj, filetype, T, buf, I...)
+end
 function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I...) where T
+    return _generic_read(obj, filetype, T, nothing, I...)
+end
+function _generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T},
+    buf::Union{AbstractMatrix{UInt8}, AbstractArray{T}, Nothing}, I...) where T
     !isconcretetype(T) && error("type $T is not concrete")
     !isempty(I) && obj isa Attribute && error("HDF5 attributes do not support hyperslab selections")
 
@@ -1026,13 +1061,18 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I.
         end
     end
 
-    if do_normalize(T)
-        # The entire dataset is read into in a buffer matrix where the first dimension at
-        # any stage of normalization is the bytes for a single element of type `T`, and
-        # the second dimension of the matrix runs through all elements.
-        buf = Matrix{UInt8}(undef, sizeof(T), prod(sz))
+    if isnothing(buf)
+        if do_normalize(T)
+            # The entire dataset is read into in a buffer matrix where the first dimension at
+            # any stage of normalization is the bytes for a single element of type `T`, and
+            # the second dimension of the matrix runs through all elements.
+            buf = Matrix{UInt8}(undef, sizeof(T), prod(sz))
+        else
+            buf = Array{T}(undef, sz...)
+        end
     else
-        buf = Array{T}(undef, sz...)
+        sizeof(buf) != prod(sz)*sizeof(T) &&
+            error("Provided array buffer of size, $(size(buf)), and element type, $(eltype(buf)), does not match the dataset of size, $sz, and type, $T")
     end
     memspace = isempty(I) ? dspace : dataspace(sz)
 
@@ -1062,6 +1102,89 @@ function generic_read(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I.
     end
 end
 
+
+"""
+    similar(obj::DatasetOrAttribute, [::Type{T}], [I::Integer...])
+
+Return a `Array{T}` or `Matrix{UInt8}` to that can contain [part of] the dataset.
+"""
+function Base.similar(obj::DatasetOrAttribute, I::Integer...)
+    dtype = datatype(obj)
+    T = get_jl_type(dtype)
+    val = similar(obj, dtype, T, I...)
+    close(dtype)
+    return val
+end
+
+function Base.similar(obj::DatasetOrAttribute, ::Type{T}, I::Integer...) where T
+    dtype = datatype(obj)
+    val = similar(obj, dtype, T, I...)
+    close(dtype)
+    return val
+end
+
+function Base.similar(obj::DatasetOrAttribute, filetype::Datatype, ::Type{Opaque})
+    sz  = size(obj)
+    return Matrix{UInt8}(undef, sizeof(filetype), prod(sz))
+end
+
+# Duplicated from generic_read. TODO: Deduplicate
+function Base.similar(obj::DatasetOrAttribute, filetype::Datatype, ::Type{T}, I::Integer...) where T
+    !isconcretetype(T) && error("type $T is not concrete")
+    !isempty(I) && obj isa Attribute && error("HDF5 attributes do not support hyperslab selections")
+
+    I = Base.OneTo.(I)
+
+    memtype = Datatype(API.h5t_get_native_type(filetype))  # padded layout in memory
+
+    if sizeof(T) != sizeof(memtype)
+        error("""
+              Type size mismatch
+              sizeof($T) = $(sizeof(T))
+              sizeof($memtype) = $(sizeof(memtype))
+              """)
+    end
+
+    dspace = dataspace(obj)
+    stype = API.h5s_get_simple_extent_type(dspace)
+    stype == API.H5S_NULL && return EmptyArray{T}()
+
+    if !isempty(I)
+        indices = Base.to_indices(obj, I)
+        dspace = hyperslab(dspace, indices...)
+    end
+
+    scalar = false
+    if stype == API.H5S_SCALAR
+        sz = (1,)
+        scalar = true
+    elseif isempty(I)
+        sz = size(dspace)
+    else
+        sz = map(length, filter(i -> !isa(i, Int), indices))
+        if isempty(sz)
+            sz = (1,)
+            scalar = true
+        end
+    end
+
+    if do_normalize(T)
+        # The entire dataset is read into in a buffer matrix where the first dimension at
+        # any stage of normalization is the bytes for a single element of type `T`, and
+        # the second dimension of the matrix runs through all elements.
+        buf = Matrix{UInt8}(undef, sizeof(T), prod(sz))
+        buf = reshape(normalize_types(T, buf), sz...)
+    else
+        buf = Array{T}(undef, sz...)
+    end
+
+
+    close(memtype)
+    close(dspace)
+
+    return buf
+end
+
 # Array constructor for datasets
 Array(x::Dataset) = read(x)
 

diff --git a/test/nonallocating.jl b/test/nonallocating.jl
@@ -0,0 +1,39 @@
+using HDF5
+using Test
+
+@testset "non-allocating methods" begin
+    fn = tempname()
+
+    data = rand(UInt16, 16, 16)
+
+    h5open(fn, "w") do h5f
+        h5f["data"] = data
+    end
+
+    h5open(fn, "r") do h5f
+        buffer = similar(h5f["data"])
+        copyto!(buffer, h5f["data"])
+        @test isequal(buffer, data)
+
+        read!(h5f["data"], buffer)
+        @test isequal(buffer, data)
+
+        # Consider making this a view later
+        v = h5f["data"][1:4, 1:4]
+
+        buffer = similar(v)
+        @test size(buffer) == (4,4)
+        copyto!(buffer, v)
+        @test isequal(buffer, @view(data[1:4, 1:4]))
+
+        buffer .= 1
+        read!(h5f["data"], buffer, 1:4, 1:4)
+        @test isequal(buffer, @view(data[1:4, 1:4]))
+
+        @test size(similar(h5f["data"], Int16)) == size(h5f["data"])
+        @test size(similar(h5f["data"], 5,6)) == (5, 6)
+        @test size(similar(h5f["data"], Int16, 8,7)) == (8,7)
+    end
+
+    rm(fn)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -50,6 +50,8 @@ include("filter.jl")
 include("chunkstorage.jl")
 @debug "fileio"
 include("fileio.jl")
+@debug "nonallocating"
+include("nonallocating.jl")
 @debug "filter test utils"
 include("filters/FilterTestUtils.jl")