Skip to content

Commit

Permalink
fix chunking bug identified by @jakebolewski
Browse files Browse the repository at this point in the history
  • Loading branch information
stevengj committed Nov 19, 2014
1 parent d531c08 commit 3c4e6df
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 16 deletions.
37 changes: 22 additions & 15 deletions src/JLD.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ using HDF5
# Add methods to...
import HDF5: close, dump, exists, file, getindex, setindex!, g_create, g_open, o_delete, name, names, read, size, write,
HDF5ReferenceObj, HDF5BitsKind, ismmappable, readmmap
import Base: length, endof, show, done, next, start, delete!
import Base: length, endof, show, done, next, start, delete!, sizeof

# .jld files written before v"0.4.0-dev+1419" might have Uint32 instead of UInt32 as the typename string.
# See julia issue #8907
Expand Down Expand Up @@ -37,6 +37,7 @@ immutable JldDatatype
dtype::HDF5Datatype
index::Int
end
sizeof(T::JldDatatype) = sizeof(T.dtype)

immutable JldWriteSession
persist::Vector{Any} # To hold objects that should not be garbage-collected
Expand Down Expand Up @@ -471,33 +472,34 @@ write(parent::Union(JldFile, JldGroup), name::ByteString,
# Pick whether to use compact or default storage based on data size
const COMPACT_PROPERTIES = p_create(HDF5.H5P_DATASET_CREATE)
HDF5.h5p_set_layout(COMPACT_PROPERTIES.id, HDF5.H5D_COMPACT)
function dset_create_properties(parent, sz::Int, obj)
sz <= 8192 && return COMPACT_PROPERTIES
if iscompressed(parent)
chunk = HDF5.heuristic_chunk(obj)
if !isempty(chunk)
p = p_create(HDF5.H5P_DATASET_CREATE)
p["chunk"] = chunk
p["blosc"] = 5
return p
end
function dset_create_properties(parent, sz::Int, obj, chunk=Int[])
sz <= 8192 && return COMPACT_PROPERTIES, false
if iscompressed(parent) && !isempty(chunk)
p = p_create(HDF5.H5P_DATASET_CREATE)
p["chunk"] = chunk
p["blosc"] = 5
return p, true
else
return HDF5.DEFAULT_PROPERTIES, false
end
return HDF5.DEFAULT_PROPERTIES
end

# Write "basic" types
function _write{T<:Union(HDF5BitsKind, ByteString)}(parent::Union(JldFile, JldGroup),
name::ByteString,
data::Union(T, Array{T}),
wsession::JldWriteSession)
dset, dtype = d_create(parent.plain, bytestring(name), data, HDF5._link_properties(name), dset_create_properties(parent, sizeof(data), data))
chunk = T <: ByteString ? Int[] : HDF5.heuristic_chunk(data)
dprop, dprop_close = dset_create_properties(parent, sizeof(data), data, chunk)
dset, dtype = d_create(parent.plain, bytestring(name), data, HDF5._link_properties(name), dprop)
try
# Write the attribute
isa(data, Array) && isempty(data) && a_write(dset, "dims", [size(data)...])
# Write the data
HDF5.writearray(dset, dtype.id, data)
finally
close(dtype)
dprop_close && close(dprop)
end
dset
end
Expand All @@ -511,8 +513,10 @@ function _write{T}(parent::Union(JldFile, JldGroup),
buf = h5convert_array(f, data, dtype, wsession)
dims = convert(Array{HDF5.Hsize, 1}, [reverse(size(data))...])
dspace = dataspace(data)
chunk = HDF5.heuristic_chunk(dtype, size(data))
dprop, dprop_close = dset_create_properties(parent, sizeof(buf),buf, chunk)
try
dset = d_create(parent.plain, path, dtype.dtype, dspace, HDF5._link_properties(path), dset_create_properties(parent, sizeof(buf), buf))
dset = d_create(parent.plain, path, dtype.dtype, dspace, HDF5._link_properties(path), dprop)
if dtype == JLD_REF_TYPE
a_write(dset, "julia eltype", full_typename(f, T))
end
Expand All @@ -523,6 +527,7 @@ function _write{T}(parent::Union(JldFile, JldGroup),
end
return dset
finally
dprop_close && close(dprop)
close(dspace)
end
end
Expand Down Expand Up @@ -657,11 +662,13 @@ function write_compound(parent::Union(JldFile, JldGroup), name::ByteString,
h5convert!(pointer(buf), file(parent), s, wsession)

dspace = HDF5Dataspace(HDF5.h5s_create(HDF5.H5S_SCALAR))
dprop, dprop_close = dset_create_properties(parent, length(buf), buf)
try
dset = HDF5.d_create(parent.plain, name, dtype.dtype, dspace, HDF5._link_properties(name), dset_create_properties(parent, length(buf), buf))
dset = HDF5.d_create(parent.plain, name, dtype.dtype, dspace, HDF5._link_properties(name), dprop)
HDF5.writearray(dset, dtype.dtype.id, buf)
return dset
finally
dprop_close && close(dprop)
close(dspace)
end
end
Expand Down
2 changes: 1 addition & 1 deletion src/plain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ include("blosc_filter.jl")
register_blosc()

# heuristic chunk layout (return empty array to disable chunking)
function heuristic_chunk(T::Type, shape)
function heuristic_chunk(T, shape)
Ts = sizeof(T)
sz = prod(shape)
sz == 0 && return Int[] # never return a zero-size chunk
Expand Down
3 changes: 3 additions & 0 deletions test/jld.jl
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ nonearr = Array(Union(), 5)

# some data big enough to ensure that compression is used:
Abig = kron(eye(10), rand(20,20))
Bbig = Any[i for i=1:3000]
Sbig = "A test string "^1000

iseq(x,y) = isequal(x,y)
Expand Down Expand Up @@ -337,6 +338,7 @@ for compress in (true,false)
@write fid none
@write fid nonearr
@write fid Abig
@write fid Bbig
@write fid Sbig
# Make sure we can create groups (i.e., use HDF5 features)
g = g_create(fid, "mygroup")
Expand Down Expand Up @@ -449,6 +451,7 @@ for compress in (true,false)
@check fidr none
@check fidr nonearr
@check fidr Abig
@check fidr Bbig
@check fidr Sbig

x1 = read(fidr, "group1/x")
Expand Down

0 comments on commit 3c4e6df

Please sign in to comment.