From ee41aa65ebc4a6ce9f9ef026e8d944bfc7a80f6c Mon Sep 17 00:00:00 2001 From: "Steven G. Johnson" Date: Wed, 19 Nov 2014 17:01:52 -0500 Subject: [PATCH] fix chunking bug identified by @jakebolewski --- src/JLD.jl | 26 +++++++++++++------------- src/plain.jl | 2 +- test/jld.jl | 3 +++ 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/src/JLD.jl b/src/JLD.jl index d36b2b413..2f68877d0 100644 --- a/src/JLD.jl +++ b/src/JLD.jl @@ -7,7 +7,7 @@ using HDF5 # Add methods to... import HDF5: close, dump, exists, file, getindex, setindex!, g_create, g_open, o_delete, name, names, read, size, write, HDF5ReferenceObj, HDF5BitsKind, ismmappable, readmmap -import Base: length, endof, show, done, next, start, delete! +import Base: length, endof, show, done, next, start, delete!, sizeof # .jld files written before v"0.4.0-dev+1419" might have Uint32 instead of UInt32 as the typename string. # See julia issue #8907 @@ -37,6 +37,7 @@ immutable JldDatatype dtype::HDF5Datatype index::Int end +sizeof(T::JldDatatype) = sizeof(T.dtype) immutable JldWriteSession persist::Vector{Any} # To hold objects that should not be garbage-collected @@ -471,18 +472,16 @@ write(parent::Union(JldFile, JldGroup), name::ByteString, # Pick whether to use compact or default storage based on data size const COMPACT_PROPERTIES = p_create(HDF5.H5P_DATASET_CREATE) HDF5.h5p_set_layout(COMPACT_PROPERTIES.id, HDF5.H5D_COMPACT) -function dset_create_properties(parent, sz::Int, obj) +function dset_create_properties(parent, sz::Int, obj, chunk=Int[]) sz <= 8192 && return COMPACT_PROPERTIES - if iscompressed(parent) - chunk = HDF5.heuristic_chunk(obj) - if !isempty(chunk) - p = p_create(HDF5.H5P_DATASET_CREATE) - p["chunk"] = chunk - p["blosc"] = 5 - return p - end + if iscompressed(parent) && !isempty(chunk) + p = p_create(HDF5.H5P_DATASET_CREATE) + p["chunk"] = chunk + p["blosc"] = 5 + return p + else + return HDF5.DEFAULT_PROPERTIES end - return HDF5.DEFAULT_PROPERTIES end # Write "basic" types @@ -490,7 +489,7 @@ function _write{T<:Union(HDF5BitsKind, ByteString)}(parent::Union(JldFile, JldGr name::ByteString, data::Union(T, Array{T}), wsession::JldWriteSession) - dset, dtype = d_create(parent.plain, bytestring(name), data, HDF5._link_properties(name), dset_create_properties(parent, sizeof(data), data)) + dset, dtype = d_create(parent.plain, bytestring(name), data, HDF5._link_properties(name), dset_create_properties(parent, sizeof(data), data, HDF5.heuristic_chunk(data))) try # Write the attribute isa(data, Array) && isempty(data) && a_write(dset, "dims", [size(data)...]) @@ -511,8 +510,9 @@ function _write{T}(parent::Union(JldFile, JldGroup), buf = h5convert_array(f, data, dtype, wsession) dims = convert(Array{HDF5.Hsize, 1}, [reverse(size(data))...]) dspace = dataspace(data) + chunk = HDF5.heuristic_chunk(dtype, size(data)) try - dset = d_create(parent.plain, path, dtype.dtype, dspace, HDF5._link_properties(path), dset_create_properties(parent, sizeof(buf), buf)) + dset = d_create(parent.plain, path, dtype.dtype, dspace, HDF5._link_properties(path), dset_create_properties(parent, sizeof(buf), buf, chunk)) if dtype == JLD_REF_TYPE a_write(dset, "julia eltype", full_typename(f, T)) end diff --git a/src/plain.jl b/src/plain.jl index 0a8cf7a8c..8cdc69f9d 100644 --- a/src/plain.jl +++ b/src/plain.jl @@ -515,7 +515,7 @@ include("blosc_filter.jl") register_blosc() # heuristic chunk layout (return empty array to disable chunking) -function heuristic_chunk(T::Type, shape) +function heuristic_chunk(T, shape) Ts = sizeof(T) sz = prod(shape) sz == 0 && return Int[] # never return a zero-size chunk diff --git a/test/jld.jl b/test/jld.jl index afb2acca4..2a07b9dd5 100644 --- a/test/jld.jl +++ b/test/jld.jl @@ -184,6 +184,7 @@ nonearr = Array(Union(), 5) # some data big enough to ensure that compression is used: Abig = kron(eye(10), rand(20,20)) +Bbig = Any[i for i=1:3000] Sbig = "A test string "^1000 iseq(x,y) = isequal(x,y) @@ -337,6 +338,7 @@ for compress in (true,false) @write fid none @write fid nonearr @write fid Abig + @write fid Bbig @write fid Sbig # Make sure we can create groups (i.e., use HDF5 features) g = g_create(fid, "mygroup") @@ -449,6 +451,7 @@ for compress in (true,false) @check fidr none @check fidr nonearr @check fidr Abig + @check fidr Bbig @check fidr Sbig x1 = read(fidr, "group1/x")