Skip to content

Commit

Permalink
fix JuliaIO#742, avoid instantiating large tuples
Browse files Browse the repository at this point in the history
  • Loading branch information
jmert committed Dec 7, 2020
1 parent 099c6cd commit 02b1a3f
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 12 deletions.
64 changes: 55 additions & 9 deletions src/HDF5.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1279,16 +1279,24 @@ function Base.read(obj::DatasetOrAttribute, ::Type{T}, I...) where T
end
end

buf = Array{T}(undef, sz...)
memspace = dataspace(buf)
if do_normalize(T)
buf = Matrix{UInt8}(undef, sizeof(T), prod(sz))
else
buf = Array{T}(undef, sz...)
end
memspace = dataspace(sz)

if obj isa Dataset
h5d_read(obj, memtype, memspace, dspace, obj.xfer, buf)
else
h5a_read(obj, memtype, buf)
end

out = do_normalize(T) ? normalize_types.(buf) : buf
if do_normalize(T)
out = reshape(normalize_types(T, buf), sz...)
else
out = buf
end

xfer_id = obj isa Dataset ? obj.xfer.id : H5P_DEFAULT
do_reclaim(T) && h5d_vlen_reclaim(memtype, memspace, xfer_id, buf)
Expand Down Expand Up @@ -1370,12 +1378,50 @@ Base.getindex(parent::Union{File,Group}, r::Reference) = _deref(parent, r)
Base.getindex(parent::Dataset, r::Reference) = _deref(parent, r) # defined separately to resolve ambiguity

# convert special types to native julia types
normalize_types(x) = x
normalize_types(x::NamedTuple{T}) where {T} = NamedTuple{T}(map(normalize_types, values(x)))
normalize_types(x::Cstring) = unsafe_string(x)
normalize_types(x::FixedString) = unpad(String(collect(x.data)), pad(x))
normalize_types(x::FixedArray) = normalize_types.(reshape(collect(x.data), size(x)...))
normalize_types(x::VariableArray) = normalize_types.(copy(unsafe_wrap(Array, convert(Ptr{eltype(x)}, x.p), x.len, own=false)))
function normalize_types(::Type{T}, buf::AbstractMatrix{UInt8}) where {T}
return [_normalize_types(T, view(buf, :, ind)) for ind in axes(buf, 2)]
end

# high-level description which should always work
function _typed_load(::Type{T}, buf::AbstractVector{UInt8}) where {T}
return @inbounds reinterpret(T, buf)[1]
end
# fast-path for common concrete types with simple layout (which should be nearly all
# cases)
function _typed_load(::Type{T}, buf::V) where {T, V <: Union{Vector{UInt8}, Base.FastContiguousSubArray{UInt8,1}}}
dest = Ref{T}()
GC.@preserve dest buf Base._memcpy!(unsafe_convert(Ptr{Cvoid}, dest), pointer(buf), sizeof(T))
return dest[]
end

_normalize_types(::Type{T}, buf::AbstractVector{UInt8}) where {T} = _typed_load(T, buf)
function _normalize_types(::Type{T}, buf::AbstractVector{UInt8}) where {K, T <: NamedTuple{K}}
nv = ntuple(length(K)) do ii
elT = fieldtype(T, ii)
off = fieldoffset(T, ii) % Int
sub = view(buf, off .+ (1:sizeof(elT)))
return _normalize_types(elT, sub)
end
return NamedTuple{K}(nv)
end
function _normalize_types(::Type{V}, buf::AbstractVector{UInt8}) where {T, V <: VariableArray{T}}
va = _typed_load(V, buf)
pbuf = unsafe_wrap(Array, convert(Ptr{UInt8}, va.p), (sizeof(T), Int(va.len)), own = false)
if do_normalize(T)
return normalize_types(T, pbuf)
else
return copy(vec(reinterpret(T, pbuf)))
end
end
function _normalize_types(::Type{F}, buf::AbstractVector{UInt8}) where {T, F <: FixedArray{T}}
if do_normalize(T)
return reshape(normalize_types(T, reshape(buf, sizeof(T), :)), size(F)...)
else
return copy(reshape(reinterpret(T, buf), size(F)...))
end
end
_normalize_types(::Type{Cstring}, buf::AbstractVector{UInt8}) = unsafe_string(_typed_load(Ptr{UInt8}, buf))
_normalize_types(::Type{T}, buf::AbstractVector{UInt8}) where {T <: FixedString} = unpad(String(buf), pad(T))

do_normalize(::Type{T}) where {T} = false
do_normalize(::Type{NamedTuple{T,U}}) where {U,T} = any(i -> do_normalize(fieldtype(U,i)), 1:fieldcount(U))
Expand Down
67 changes: 64 additions & 3 deletions test/plain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1055,6 +1055,67 @@ dset = HDF5.create_external_dataset(hfile, "ext", fn_external, Int, (10,20))

end

# length for FixedString
fix = HDF5.FixedString{4,0}((b"test"...,))
@test length(fix) == 4
@testset "FixedStrings and FixedArrays" begin
# properties for FixedString
fix = HDF5.FixedString{4,0}((b"test"...,))
@test length(typeof(fix)) == 4
@test length(fix) == 4
@test HDF5.pad(typeof(fix)) == 0
@test HDF5.pad(fix) == 0
# issue #742, large fixed strings are readable
mktemp() do path, io
close(io)
ref = join('a':'z') ^ 1000
fid = h5open(path, "w")
# long string serialized as FixedString
fid["longstring"] = ref

# compound datatype containing a FixedString
compound_dtype = HDF5.Datatype(HDF5.h5t_create(HDF5.H5T_COMPOUND, sizeof(Int64) + sizeof(ref)))
HDF5.h5t_insert(compound_dtype, "n", 0, datatype(Int64))
HDF5.h5t_insert(compound_dtype, "a", sizeof(Int64), datatype(ref))
c = create_dataset(fid, "compoundlongstring", compound_dtype, dataspace(()))
# normally this is done with a `struct name; n::Int64; a::NTuple{N,Char}; end`, but
# we need to not actually instantiate the NTuple.
buf = IOBuffer()
write(buf, Int64(9), ref)
@assert position(buf) == sizeof(compound_dtype)
write_dataset(c, compound_dtype, take!(buf))


# Test reading without stalling
d = fid["longstring"]
T = HDF5.get_jl_type(d)
@test T <: HDF5.FixedString
@test length(T) == length(ref)
@test read(d) == ref

T = HDF5.get_jl_type(c)
@test T <: NamedTuple
@test fieldnames(T) == (:n, :a)
@test read(c) == (n = 9, a = ref)
end

fix = HDF5.FixedArray{Float64,(2,2),4}((1, 2, 3, 4))
@test size(typeof(fix)) == (2, 2)
@test size(fix) == (2, 2)
@test eltype(typeof(fix)) == Float64
@test eltype(fix) == Float64
# large fixed arrays are readable
mktemp() do path, io
close(io)
ref = rand(Float64, 3000)
t = HDF5.Datatype(HDF5.h5t_array_create(datatype(Float64), ndims(ref), collect(size(ref))))
scalarspace = dataspace(())

fid = h5open(path, "w")
d = create_dataset(fid, "longnums", t, scalarspace)
write_dataset(d, t, ref)

T = HDF5.get_jl_type(d)
@test T <: HDF5.FixedArray
@test size(T) == size(ref)
@test eltype(T) == eltype(ref)
@test read(d) == ref
end
end

0 comments on commit 02b1a3f

Please sign in to comment.