Skip to content

Commit

Permalink
fix JuliaIO#742, avoid instantiating large tuples
Browse files Browse the repository at this point in the history
  • Loading branch information
jmert committed Nov 19, 2020
1 parent e31b731 commit 0b6e115
Show file tree
Hide file tree
Showing 2 changed files with 92 additions and 12 deletions.
57 changes: 48 additions & 9 deletions src/HDF5.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1181,16 +1181,25 @@ function Base.read(obj::DatasetOrAttribute, ::Type{T}, I...) where T
end
end

buf = Array{T}(undef, sz...)
memspace = dataspace(buf)
sz = Int.(sz)
if do_normalize(T)
buf = Array{UInt8}(undef, sizeof(T), prod(sz))
else
buf = Array{T}(undef, sz...)
end
memspace = dataspace(sz)

if obj isa Dataset
h5d_read(obj, memtype, memspace, dspace, obj.xfer, buf)
else
h5a_read(obj, memtype, buf)
end

out = do_normalize(T) ? normalize_types.(buf) : buf
if do_normalize(T)
out = reshape(normalize_types(T, buf), sz...)
else
out = buf
end

xfer_id = obj isa Dataset ? obj.xfer.id : H5P_DEFAULT
do_reclaim(T) && h5d_vlen_reclaim(memtype.id, memspace.id, xfer_id, buf)
Expand Down Expand Up @@ -1272,12 +1281,42 @@ Base.getindex(parent::Union{File,Group}, r::Reference) = _deref(parent, r)
Base.getindex(parent::Dataset, r::Reference) = _deref(parent, r) # defined separately to resolve ambiguity

# convert special types to native julia types
normalize_types(x) = x
normalize_types(x::NamedTuple{T}) where {T} = NamedTuple{T}(map(normalize_types, values(x)))
normalize_types(x::Cstring) = unsafe_string(x)
normalize_types(x::FixedString) = unpad(String(collect(x.data)), pad(x))
normalize_types(x::FixedArray) = normalize_types.(reshape(collect(x.data), size(x)...))
normalize_types(x::VariableArray) = normalize_types.(copy(unsafe_wrap(Array, convert(Ptr{eltype(x)}, x.p), x.len, own=false)))
function normalize_types(::Type{T}, buf) where {T}
I = CartesianIndices(Base.tail(size(buf)))
_normalize_T(x) = _normalize_types(T, x)
return [_normalize_T(view(buf, :, ind)) for ind in I]
end
function _typed_load(::Type{T}, buf) where {T}
GC.@preserve buf begin
return unsafe_load(convert(Ptr{T}, pointer(buf)))
end
end

_normalize_types(::Type{T}, buf) where {T} = _typed_load(T, buf)
function _normalize_types(::Type{T}, buf) where {K, T <: NamedTuple{K}}
nt = _typed_load(T, buf)
nv = Vector{Any}(undef, length(nt))
GC.@preserve buf begin
base = unsafe_convert(Ptr{UInt8}, buf)
for ii in 1:fieldcount(T)
elT = fieldtype(T, ii)
off = fieldoffset(T, ii)
sub = unsafe_wrap(Array, off + base, sizeof(elT), own = false)
nv[ii] = _normalize_types(elT, sub)
end
end
return NamedTuple{K}(nv)
end
function _normalize_types(::Type{V}, buf) where {T, V <: VariableArray{T}}
va = _typed_load(V, buf)
pbuf = unsafe_wrap(Array, convert(Ptr{UInt8}, va.p), (sizeof(T), Int(va.len)), own = false)
return normalize_types(T, pbuf)
end
function _normalize_types(::Type{F}, buf) where {T, F <: FixedArray{T}}
return normalize_types(T, reshape(buf, sizeof(T), size(F)...))
end
_normalize_types(::Type{Cstring}, buf) = unsafe_string(reinterpret(Ptr{UInt8}, buf)[1])
_normalize_types(::Type{T}, buf) where {T <: FixedString} = unpad(String(buf), pad(T))

do_normalize(::Type{T}) where {T} = false
do_normalize(::Type{NamedTuple{T,U}}) where {U,T} = any(i -> do_normalize(fieldtype(U,i)), 1:fieldcount(U))
Expand Down
47 changes: 44 additions & 3 deletions test/plain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1042,6 +1042,47 @@ dset = HDF5.create_external_dataset(hfile, "ext", fn_external, Int, (10,20))

end

# length for FixedString
fix = HDF5.FixedString{4,0}((b"test"...,))
@test length(fix) == 4
@testset "FixedStrings and FixedArrays" begin
# properties for FixedString
fix = HDF5.FixedString{4,0}((b"test"...,))
@test length(typeof(fix)) == 4
@test length(fix) == 4
@test HDF5.pad(typeof(fix)) == 0
@test HDF5.pad(fix) == 0
# issue #742, large fixed strings are readable
mktemp() do path, io
close(io)
ref = join('a':'z') ^ 1000
fid = h5open(path, "w")
fid["longstring"] = ref

d = fid["longstring"]
T = HDF5.get_jl_type(d)
@test T <: HDF5.FixedString
@test length(T) == length(ref)
@test read(d) == ref
end

fix = HDF5.FixedArray{Float64,(2,2),4}((1, 2, 3, 4))
@test size(typeof(fix)) == (2, 2)
@test size(fix) == (2, 2)
@test eltype(typeof(fix)) == Float64
@test eltype(fix) == Float64
# large fixed arrays are readable
mktemp() do path, io
close(io)
ref = rand(Float64, 3000)
t = HDF5.Datatype(HDF5.h5t_array_create(datatype(Float64), ndims(ref), collect(size(ref))))
scalarspace = dataspace(())

fid = h5open(path, "w")
d = create_dataset(fid, "longnums", t, scalarspace)
write_dataset(d, t, ref)

T = HDF5.get_jl_type(d)
@test T <: HDF5.FixedArray
@test size(T) == size(ref)
@test eltype(T) == eltype(ref)
@test read(d) == ref
end
end

0 comments on commit 0b6e115

Please sign in to comment.