Skip to content

Commit

Permalink
More perf tweaks
Browse files Browse the repository at this point in the history
- Use H5Pset_create_intermediate_group to create intermediate groups,
  instead of doing this ourselves.
- Store small datasets in compact format.
- Avoid looking up created datasets to create references to them.

Also implement a few more libhdf5 functions, most of which don't seem
to make a difference to performance.
  • Loading branch information
simonster committed Aug 22, 2014
1 parent 7dcf9fd commit 6714b2e
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 70 deletions.
89 changes: 49 additions & 40 deletions src/JLD.jl
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ type JldFile <: HDF5.DataFile
writeheader::Bool=false, mmaparrays::Bool=false)
f = new(plain, version, toclose, writeheader, mmaparrays,
Dict{HDF5Datatype,Type}(), Dict{Type,HDF5Datatype}(),
Dict{HDF5ReferenceObj,WeakRef}(), Array(ByteString, 0))
Dict{HDF5ReferenceObj,WeakRef}(), ByteString[])
if toclose
finalizer(f, close)
end
Expand Down Expand Up @@ -126,6 +126,7 @@ function jldopen(filename::String, rd::Bool, wr::Bool, cr::Bool, tr::Bool, ff::B
end
version = version_current
pa = p_create(HDF5.H5P_FILE_ACCESS)
# HDF5.h5p_set_libver_bounds(pa, HDF5.H5F_LIBVER_18, HDF5.H5F_LIBVER_18)
try
pa["fclose_degree"] = HDF5.H5F_CLOSE_STRONG
if cr && (tr || !isfile(filename))
Expand Down Expand Up @@ -444,47 +445,56 @@ end

### Writing ###

write(parent::Union(JldFile, JldGroup), name::ByteString,
data, wsession::JldWriteSession=JldWriteSession()) =
close(_write(parent, name, data, wsession))

# Pick whether to use compact or default storage based on data size
const COMPACT_PROPERTIES = p_create(HDF5.H5P_DATASET_CREATE)
HDF5.h5p_set_layout(COMPACT_PROPERTIES.id, HDF5.H5D_COMPACT)
dset_create_properties(sz::Int) =
sz <= 8192 ? COMPACT_PROPERTIES : HDF5.DEFAULT_PROPERTIES

# Write "basic" types
function write{T<:Union(HDF5BitsKind, ByteString)}(parent::Union(JldFile, JldGroup), name::ByteString,
data::Union(T, Array{T}))
function _write{T<:Union(HDF5BitsKind, ByteString)}(parent::Union(JldFile, JldGroup),
name::ByteString,
data::Union(T, Array{T}),
wsession::JldWriteSession)
# Create the dataset
dset, dtype = d_create(parent.plain, name, data)
dset, dtype = d_create(parent.plain, bytestring(name), data, HDF5._link_properties(name),
dset_create_properties(sizeof(data)))
try
# Write the attribute
isa(data, Array) && isempty(data) && a_write(dset, "dims", [size(data)...])
# Write the data
HDF5.writearray(dset, dtype.id, data)
finally
close(dset)
close(dtype)
end
dset
end
write{T<:Union(HDF5BitsKind, ByteString)}(parent::Union(JldFile, JldGroup), name::ByteString,
data::Union(T, Array{T}), wsession::JldWriteSession) =
write(parent, name, data)

# General array types
function write{T}(parent::Union(JldFile, JldGroup), path::ByteString, data::Array{T},
wsession::JldWriteSession=JldWriteSession())
function _write{T}(parent::Union(JldFile, JldGroup),
path::ByteString, data::Array{T},
wsession::JldWriteSession)
f = file(parent)
dtype = h5fieldtype(f, T, true)
buf = h5convert_array(f, data, dtype, wsession)
dims = convert(Array{HDF5.Hsize, 1}, [reverse(size(data))...])
dspace = dataspace(data)
try
dset = d_create(parent.plain, path, dtype.dtype, dspace)
try
if dtype == JLD_REF_TYPE
a_write(dset, "julia eltype", full_typename(f, T))
end
if isempty(data) && ndims(data) != 0
a_write(dset, "dims", [size(data)...])
else
HDF5.writearray(dset, dtype.dtype.id, buf)
end
finally
close(dset)
dset = d_create(parent.plain, path, dtype.dtype, dspace, HDF5._link_properties(path),
dset_create_properties(sizeof(buf)))
if dtype == JLD_REF_TYPE
a_write(dset, "julia eltype", full_typename(f, T))
end
if isempty(data) && ndims(data) != 1
a_write(dset, "dims", [size(data)...])
else
HDF5.writearray(dset, dtype.dtype.id, buf)
end
return dset
finally
close(dspace)
end
Expand Down Expand Up @@ -547,12 +557,12 @@ function write_ref(parent::JldFile, data, wsession::JldWriteSession)
# Write an new reference
gref = get_gref(parent)
name = @sprintf "%08d" (parent.nrefs += 1)
write(gref, name, data, wsession)
dset = _write(gref, name, data, wsession)

# Add reference to reference list
ref = HDF5ReferenceObj(gref.plain, name)
ref = HDF5ReferenceObj(HDF5.objinfo(dset).addr)
close(dset)
if !isa(data, Tuple) && typeof(data).mutable
parent.jlref[ref] = WeakRef(data)
wsession.h5ref[data] = ref
end
ref
Expand All @@ -561,8 +571,8 @@ write_ref(parent::JldGroup, data, wsession::JldWriteSession) =
write_ref(file(parent), data, wsession)

# Special case for associative, to rehash keys
function write(parent::Union(JldFile, JldGroup), name::ByteString, d::Associative,
wsession::JldWriteSession=JldWriteSession())
function _write(parent::Union(JldFile, JldGroup), name::ByteString,
d::Associative, wsession::JldWriteSession)
n = length(d)
K, V = eltype(d)
ks = Array(K, n)
Expand All @@ -572,12 +582,13 @@ function write(parent::Union(JldFile, JldGroup), name::ByteString, d::Associativ
ks[i+=1] = k
vs[i] = v
end
write(parent, name, AssociativeWrapper{K,V,typeof(d)}(ks, vs), wsession)
write_compound(parent, name, AssociativeWrapper{K,V,typeof(d)}(ks, vs), wsession)
end

# Expressions, drop line numbers
function write(parent::Union(JldFile, JldGroup), name::ByteString, ex::Expr,
wsession::JldWriteSession=JldWriteSession())
function _write(parent::Union(JldFile, JldGroup),
name::ByteString, ex::Expr,
wsession::JldWriteSession)
args = ex.args
# Discard "line" expressions
keep = trues(length(args))
Expand All @@ -592,11 +603,11 @@ function write(parent::Union(JldFile, JldGroup), name::ByteString, ex::Expr,
end

# Generic (tuples, immutables, and compound types)
write(parent::Union(JldFile, JldGroup), name::ByteString, s,
wsession::JldWriteSession=JldWriteSession()) =
_write(parent::Union(JldFile, JldGroup), name::ByteString, s,
wsession::JldWriteSession) =
write_compound(parent, name, s, wsession)
function write_compound(parent::Union(JldFile, JldGroup), name::ByteString, s,
wsession::JldWriteSession)
function write_compound(parent::Union(JldFile, JldGroup), name::ByteString,
s, wsession::JldWriteSession)
T = typeof(s)
f = file(parent)
dtype = h5type(f, T, true)
Expand All @@ -607,12 +618,10 @@ function write_compound(parent::Union(JldFile, JldGroup), name::ByteString, s,

dspace = HDF5Dataspace(HDF5.h5s_create(HDF5.H5S_SCALAR))
try
dset = HDF5.d_create(parent.plain, name, dtype.dtype, dspace)
try
HDF5.writearray(dset, dtype.dtype.id, buf)
finally
close(dset)
end
dset = HDF5.d_create(parent.plain, name, dtype.dtype, dspace, HDF5._link_properties(name),
dset_create_properties(length(buf)))
HDF5.writearray(dset, dtype.dtype.id, buf)
return dset
finally
close(dspace)
end
Expand Down
44 changes: 14 additions & 30 deletions src/plain.jl
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,10 @@ const H5T_NATIVE_INT64 = read_const(:H5T_NATIVE_INT64_g)
const H5T_NATIVE_UINT64 = read_const(:H5T_NATIVE_UINT64_g)
const H5T_NATIVE_FLOAT = read_const(:H5T_NATIVE_FLOAT_g)
const H5T_NATIVE_DOUBLE = read_const(:H5T_NATIVE_DOUBLE_g)

# Library versions
const H5F_LIBVER_EARLIEST = 0
const H5F_LIBVER_18 = 1
const H5F_LIBVER_LATEST = 1

## Conversion between Julia types and HDF5 atomic types
hdf5_type_id(::Type{Int8}) = H5T_NATIVE_INT8
Expand Down Expand Up @@ -689,30 +692,10 @@ function split1(path::ByteString)
end
end

# Create objects
function parents_create(parent::Union(HDF5File, HDF5Group), path::ByteString)
'/' in path || return tuple(parent.id, path)
g = split(path, "/")
if isempty(g[1])
g[1] = oftype(path, "/")
end
keepflag = Bool[!isempty(x) for x in g]
g = g[keepflag] # NOTE: performance bottleneck up to here; find a better way
for i = 1:length(g)-1
gstr = ascii(g[i])
if !exists(parent, gstr)
g_create(parent, gstr)
end
parent = g_open(parent, gstr)
end
tuple(parent.id, bytestring(g[end]))
end

function g_create(parent::Union(HDF5File, HDF5Group), path::ByteString,
lcpl::HDF5Properties=_link_properties(path),
dcpl::HDF5Properties=DEFAULT_PROPERTIES)
(parent_id, leafpath) = parents_create(checkvalid(parent), path)
HDF5Group(h5g_create(parent_id, leafpath, lcpl.id, dcpl.id), file(parent))
HDF5Group(h5g_create(checkvalid(parent).id, path, lcpl.id, dcpl.id), file(parent))
end
function g_create(f::Function, parent::Union(HDF5File, HDF5Group), args...)
g = g_create(parent, args...)
Expand All @@ -727,9 +710,8 @@ function d_create(parent::Union(HDF5File, HDF5Group), path::ByteString, dtype::H
dspace::HDF5Dataspace, lcpl::HDF5Properties=_link_properties(path),
dcpl::HDF5Properties=DEFAULT_PROPERTIES,
dapl::HDF5Properties=DEFAULT_PROPERTIES)
(parent_id, leafpath) = parents_create(checkvalid(parent), path)
HDF5Dataset(h5d_create(parent_id, leafpath, dtype.id, dspace.id, lcpl.id, dcpl.id, dapl.id),
file(parent))
HDF5Dataset(h5d_create(checkvalid(parent).id, path, dtype.id, dspace.id, lcpl.id,
dcpl.id, dapl.id), file(parent))
end

# Setting dset creation properties with name/value pairs
Expand All @@ -746,10 +728,7 @@ function d_create(parent::Union(HDF5File, HDF5Group), path::ByteString, dtype::H
end
p[thisname] = pv[i+1]
end
lcpl = p_create(H5P_LINK_CREATE)
h5p_set_char_encoding(lcpl.id, cset(typeof(path)))
(parent_id, leafpath) = parents_create(checkvalid(parent), path)
HDF5Dataset(h5d_create(parent_id, leafpath, dtype.id, dspace.id, lcpl, p.id, H5P_DEFAULT), file(parent))
HDF5Dataset(h5d_create(parent, path, dtype.id, dspace.id, _link_properties(path), p.id, H5P_DEFAULT), file(parent))
end
d_create(parent::Union(HDF5File, HDF5Group), path::ByteString, dtype::HDF5Datatype, dspace_dims::Dims, prop1::ASCIIString, val1, pv...) = d_create(checkvalid(parent), path, dtype, dataspace(dspace_dims), prop1, val1, pv...)
d_create(parent::Union(HDF5File, HDF5Group), path::ByteString, dtype::HDF5Datatype, dspace_dims::(Dims,Dims), prop1::ASCIIString, val1, pv...) = d_create(checkvalid(parent), path, dtype, dataspace(dspace_dims[1], max_dims=dspace_dims[2]), prop1, val1, pv...)
Expand Down Expand Up @@ -1327,7 +1306,7 @@ read(attr::HDF5Attributes, name::ByteString) = a_read(attr.parent, name)
function iscontiguous(obj::HDF5Dataset)
prop = h5d_get_create_plist(checkvalid(obj).id)
try
h5p_get_layout(prop) != H5D_CHUNKED
h5p_get_layout(prop) == H5D_CONTIGUOUS
finally
h5p_close(prop)
end
Expand Down Expand Up @@ -1817,10 +1796,13 @@ for (jlname, h5name, outtype, argtypes, argsyms, msg) in
(:h5p_get_userblock, :H5Pget_userblock, Herr, (Hid, Ptr{Hsize}), (:plist_id, :len), "Error getting userblock"),
(:h5p_set_char_encoding, :H5Pset_char_encoding, Herr, (Hid, Cint), (:plist_id, :encoding), "Error setting char encoding"),
(:h5p_set_chunk, :H5Pset_chunk, Herr, (Hid, Cint, Ptr{Hsize}), (:plist_id, :ndims, :dims), "Error setting chunk size"),
(:h5p_set_create_intermediate_group, :H5Pset_create_intermediate_group, Herr, (Hid, Cuint), (:plist_id, :setting), "Error setting create intermediate group"),
(:h5p_set_external, :H5Pset_external, Herr, (Hid, Ptr{Uint8}, Int, Csize_t), (:plist_id, :name, :offset, :size), "Error setting external property"),
(:h5p_set_fclose_degree, :H5Pset_fclose_degree, Herr, (Hid, Cint), (:plist_id, :fc_degree), "Error setting close degree"),
(:h5p_set_deflate, :H5Pset_deflate, Herr, (Hid, Cuint), (:plist_id, :setting), "Error setting compression method and level (deflate)"),
(:h5p_set_layout, :H5Pset_layout, Herr, (Hid, Cint), (:plist_id, :setting), "Error setting layout"),
(:h5p_set_libver_bounds, :H5Pset_libver_bounds, Herr, (Hid, Cint, Cint), (:fapl_id, :libver_low, :libver_high), "Error setting library version bounds"),
(:h5p_set_local_heap_size_hint, :H5Pset_local_heap_size_hint, Herr, (Hid, Cuint), (:fapl_id, :size_hint), "Error setting local heap size hint"),
(:h5p_set_userblock, :H5Pset_userblock, Herr, (Hid, Hsize), (:plist_id, :len), "Error setting userblock"),
(:h5s_close, :H5Sclose, Herr, (Hid,), (:space_id,), "Error closing dataspace"),
(:h5s_select_hyperslab, :H5Sselect_hyperslab, Herr, (Hid, Cint, Ptr{Hsize}, Ptr{Hsize}, Ptr{Hsize}, Ptr{Hsize}), (:dspace_id, :seloper, :start, :stride, :count, :block), "Error selecting hyperslab"),
Expand Down Expand Up @@ -2085,9 +2067,11 @@ end

const ASCII_LINK_PROPERTIES = p_create(H5P_LINK_CREATE)
h5p_set_char_encoding(ASCII_LINK_PROPERTIES.id, cset(ASCIIString))
h5p_set_create_intermediate_group(ASCII_LINK_PROPERTIES.id, 1)
_link_properties(path::ASCIIString) = ASCII_LINK_PROPERTIES
const UTF8_LINK_PROPERTIES = p_create(H5P_LINK_CREATE)
h5p_set_char_encoding(UTF8_LINK_PROPERTIES.id, cset(UTF8String))
h5p_set_create_intermediate_group(UTF8_LINK_PROPERTIES.id, 1)
_link_properties(path::UTF8String) = UTF8_LINK_PROPERTIES
const DEFAULT_PROPERTIES = HDF5Properties(H5P_DEFAULT)

Expand Down

0 comments on commit 6714b2e

Please sign in to comment.