Skip to content

Commit

Permalink
fix chunking bug identified by @jakebolewski
Browse files Browse the repository at this point in the history
  • Loading branch information
stevengj committed Nov 19, 2014
1 parent d531c08 commit ee41aa6
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 14 deletions.
26 changes: 13 additions & 13 deletions src/JLD.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ using HDF5
# Add methods to...
import HDF5: close, dump, exists, file, getindex, setindex!, g_create, g_open, o_delete, name, names, read, size, write,
HDF5ReferenceObj, HDF5BitsKind, ismmappable, readmmap
import Base: length, endof, show, done, next, start, delete!
import Base: length, endof, show, done, next, start, delete!, sizeof

# .jld files written before v"0.4.0-dev+1419" might have Uint32 instead of UInt32 as the typename string.
# See julia issue #8907
Expand Down Expand Up @@ -37,6 +37,7 @@ immutable JldDatatype
dtype::HDF5Datatype
index::Int
end
sizeof(T::JldDatatype) = sizeof(T.dtype)

immutable JldWriteSession
persist::Vector{Any} # To hold objects that should not be garbage-collected
Expand Down Expand Up @@ -471,26 +472,24 @@ write(parent::Union(JldFile, JldGroup), name::ByteString,
# Pick whether to use compact or default storage based on data size
const COMPACT_PROPERTIES = p_create(HDF5.H5P_DATASET_CREATE)
HDF5.h5p_set_layout(COMPACT_PROPERTIES.id, HDF5.H5D_COMPACT)
function dset_create_properties(parent, sz::Int, obj)
function dset_create_properties(parent, sz::Int, obj, chunk=Int[])
sz <= 8192 && return COMPACT_PROPERTIES
if iscompressed(parent)
chunk = HDF5.heuristic_chunk(obj)
if !isempty(chunk)
p = p_create(HDF5.H5P_DATASET_CREATE)
p["chunk"] = chunk
p["blosc"] = 5
return p
end
if iscompressed(parent) && !isempty(chunk)
p = p_create(HDF5.H5P_DATASET_CREATE)
p["chunk"] = chunk
p["blosc"] = 5
return p
else
return HDF5.DEFAULT_PROPERTIES
end
return HDF5.DEFAULT_PROPERTIES
end

# Write "basic" types
function _write{T<:Union(HDF5BitsKind, ByteString)}(parent::Union(JldFile, JldGroup),
name::ByteString,
data::Union(T, Array{T}),
wsession::JldWriteSession)
dset, dtype = d_create(parent.plain, bytestring(name), data, HDF5._link_properties(name), dset_create_properties(parent, sizeof(data), data))
dset, dtype = d_create(parent.plain, bytestring(name), data, HDF5._link_properties(name), dset_create_properties(parent, sizeof(data), data, HDF5.heuristic_chunk(data)))
try
# Write the attribute
isa(data, Array) && isempty(data) && a_write(dset, "dims", [size(data)...])
Expand All @@ -511,8 +510,9 @@ function _write{T}(parent::Union(JldFile, JldGroup),
buf = h5convert_array(f, data, dtype, wsession)
dims = convert(Array{HDF5.Hsize, 1}, [reverse(size(data))...])
dspace = dataspace(data)
chunk = HDF5.heuristic_chunk(dtype, size(data))
try
dset = d_create(parent.plain, path, dtype.dtype, dspace, HDF5._link_properties(path), dset_create_properties(parent, sizeof(buf), buf))
dset = d_create(parent.plain, path, dtype.dtype, dspace, HDF5._link_properties(path), dset_create_properties(parent, sizeof(buf), buf, chunk))
if dtype == JLD_REF_TYPE
a_write(dset, "julia eltype", full_typename(f, T))
end
Expand Down
2 changes: 1 addition & 1 deletion src/plain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ include("blosc_filter.jl")
register_blosc()

# heuristic chunk layout (return empty array to disable chunking)
function heuristic_chunk(T::Type, shape)
function heuristic_chunk(T, shape)
Ts = sizeof(T)
sz = prod(shape)
sz == 0 && return Int[] # never return a zero-size chunk
Expand Down
3 changes: 3 additions & 0 deletions test/jld.jl
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ nonearr = Array(Union(), 5)

# some data big enough to ensure that compression is used:
Abig = kron(eye(10), rand(20,20))
Bbig = Any[i for i=1:3000]
Sbig = "A test string "^1000

iseq(x,y) = isequal(x,y)
Expand Down Expand Up @@ -337,6 +338,7 @@ for compress in (true,false)
@write fid none
@write fid nonearr
@write fid Abig
@write fid Bbig
@write fid Sbig
# Make sure we can create groups (i.e., use HDF5 features)
g = g_create(fid, "mygroup")
Expand Down Expand Up @@ -449,6 +451,7 @@ for compress in (true,false)
@check fidr none
@check fidr nonearr
@check fidr Abig
@check fidr Bbig
@check fidr Sbig

x1 = read(fidr, "group1/x")
Expand Down

0 comments on commit ee41aa6

Please sign in to comment.