Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Demo of custom serialization #191

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 54 additions & 48 deletions src/JLD.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ using HDF5, Compat
# Add methods to...
import HDF5: close, dump, exists, file, getindex, setindex!, g_create, g_open, o_delete, name, names, read, size, write,
HDF5ReferenceObj, HDF5BitsKind, ismmappable, readmmap
import Base: length, endof, show, done, next, start, delete!, sizeof
import Base: convert, length, endof, show, done, next, start, delete!, sizeof

# .jld files written before v"0.4.0-dev+1419" might have Uint32 instead of UInt32 as the typename string.
# See julia issue #8907
Expand Down Expand Up @@ -103,14 +103,6 @@ end
show(io::IO, e::TypeMismatchException) =
print(io, "stored type $(e.typename) does not match currently loaded type")

# Wrapper for associative keys
# We write this instead of the associative to avoid dependence on the
# Julia hash function
immutable AssociativeWrapper{K,V,T<:Associative}
keys::Vector{K}
values::Vector{V}
end

include("jld_types.jl")

file(x::JldFile) = x
Expand Down Expand Up @@ -317,7 +309,7 @@ function read(parent::Union(JldFile, JldGroup), name::ByteString)
finally
close(obj)
end
val
readas(val)
end
read(parent::Union(JldFile,JldGroup), name::Symbol) = read(parent,bytestring(string(name)))

Expand Down Expand Up @@ -356,24 +348,7 @@ read_scalar{T<:BitsKindOrByteString}(obj::JldDataset, dtype::HDF5Datatype, ::Typ
function read_scalar(obj::JldDataset, dtype::HDF5Datatype, T::Type)
buf = Array(Uint8, sizeof(dtype))
HDF5.readarray(obj.plain, dtype.id, buf)
return after_read(jlconvert(T, file(obj), pointer(buf)))
end

after_read(x) = x

# Special case for associative, to rehash keys
function after_read{K,V,T}(x::AssociativeWrapper{K,V,T})
ret = T()
keys = x.keys
values = x.values
n = length(keys)
if applicable(sizehint!, ret, n)
sizehint!(ret, n)
end
for i = 1:n
ret[keys[i]] = values[i]
end
ret
readas(jlconvert(T, file(obj), pointer(buf)))
end

## Arrays
Expand All @@ -382,10 +357,11 @@ end
function read_array(obj::JldDataset, dtype::HDF5Datatype, dspace_id::HDF5.Hid, dsel_id::HDF5.Hid,
dims::(Int...)=convert((Int...), HDF5.h5s_get_simple_extent_dims(dspace_id)[1]))
if HDF5.h5t_get_class(dtype) == HDF5.H5T_REFERENCE
return read_refs(obj, refarray_eltype(obj), dspace_id, dsel_id, dims)
val = read_refs(obj, refarray_eltype(obj), dspace_id, dsel_id, dims)
else
return read_vals(obj, dtype, jldatatype(file(obj), dtype), dspace_id, dsel_id, dims)
val = read_vals(obj, dtype, jldatatype(file(obj), dtype), dspace_id, dsel_id, dims)
end
readas(val)
end

# Arrays of basic HDF5 kinds
Expand Down Expand Up @@ -476,14 +452,14 @@ function read_ref(f::JldFile, ref::HDF5ReferenceObj)
end

f.jlref[ref] = WeakRef(data)
data
readas(data)
end

### Writing ###

write(parent::Union(JldFile, JldGroup), name::ByteString,
data, wsession::JldWriteSession=JldWriteSession(); kargs...) =
close(_write(parent, name, data, wsession; kargs...))
close(_write(parent, name, writeas(data), wsession; kargs...))

# Pick whether to use compact or default storage based on data size
function dset_create_properties(parent, sz::Int, obj, chunk=Int[]; mmap = false)
Expand Down Expand Up @@ -618,7 +594,7 @@ function write_ref(parent::JldFile, data, wsession::JldWriteSession)
# Write an new reference
gref = get_gref(parent)
name = @sprintf "%08d" (parent.nrefs += 1)
dset = _write(gref, name, data, wsession)
dset = _write(gref, name, writeas(data), wsession)

# Add reference to reference list
ref = HDF5ReferenceObj(HDF5.objinfo(dset).addr)
Expand All @@ -631,21 +607,6 @@ end
write_ref(parent::JldGroup, data, wsession::JldWriteSession) =
write_ref(file(parent), data, wsession)

# Special case for associative, to rehash keys
function _write(parent::Union(JldFile, JldGroup), name::ByteString,
d::Associative, wsession::JldWriteSession; kargs...)
n = length(d)
K, V = eltype(d)
ks = Array(K, n)
vs = Array(V, n)
i = 0
for (k,v) in d
ks[i+=1] = k
vs[i] = v
end
write_compound(parent, name, AssociativeWrapper{K,V,typeof(d)}(ks, vs), wsession)
end

# Expressions, drop line numbers
function _write(parent::Union(JldFile, JldGroup),
name::ByteString, ex::Expr,
Expand Down Expand Up @@ -745,6 +706,51 @@ end

length(x::Union(JldFile, JldGroup)) = length(names(x))

### Custom serialization

readas(x) = x
writeas(x) = x

# Wrapper for associative keys
# We write this instead of the associative to avoid dependence on the
# Julia hash function
immutable AssociativeWrapper{K,V,T<:Associative}
keys::Vector{K}
values::Vector{V}
end

readas{K,V,T}(x::AssociativeWrapper{K,V,T}) = convert(T, x)
function writeas{T<:Associative}(x::T)
K, V = eltype(x)
convert(AssociativeWrapper{K,V,T}, x)
end

function convert{K,V,T<:Associative}(::Type{T}, x::AssociativeWrapper{K,V,T})
ret = T()
keys = x.keys
values = x.values
n = length(keys)
if applicable(sizehint!, ret, n)
sizehint!(ret, n)
end
for i = 1:n
ret[keys[i]] = values[i]
end
ret
end

function convert{K,V,T}(::Type{AssociativeWrapper{K,V,T}}, d::Associative)
n = length(d)
ks = Array(K, n)
vs = Array(V, n)
i = 0
for (k,v) in d
ks[i+=1] = k
vs[i] = v
end
AssociativeWrapper{K,V,typeof(d)}(ks, vs)
end

### Converting attribute strings to Julia types

is_valid_type_ex(s::Symbol) = true
Expand Down
4 changes: 2 additions & 2 deletions src/jld_types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ function h5type(parent::JldFile, T::ANY, commit::Bool)
end

# Normal objects
function _gen_jlconvert_type(typeinfo::JldTypeInfo, T::ANY)
function _gen_jlconvert_type(typeinfo::JldTypeInfo, T::ANY, fsym = :jlconvert)
ex = Expr(:block)
args = ex.args
for i = 1:length(typeinfo.dtypes)
Expand All @@ -410,7 +410,7 @@ function _gen_jlconvert_type(typeinfo::JldTypeInfo, T::ANY)
push!(args, :(out.$(T.names[i]) = jlconvert($(T.types[i]), file, ptr+$h5offset)))
end
end
@eval function jlconvert(::Type{$T}, file::JldFile, ptr::Ptr)
@eval function $fsym(::Type{$T}, file::JldFile, ptr::Ptr)
out = ccall(:jl_new_struct_uninit, Any, (Any,), $T)::$T
$ex
out
Expand Down
77 changes: 77 additions & 0 deletions test/custom_serialization.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
module MyTypes

export MyType, MyContainer

## Objects we want to save
# data in MyType is always of length 5, and that is the basis for a more efficient serialization
immutable MyType{T}
data::Vector{T}
id::Int

function MyType(v::Vector{T}, id::Integer)
length(v) == 5 || error("All vectors must be of length 5")
new(v, id)
end
end
MyType{T}(v::Vector{T}, id::Integer) = MyType{T}(v, id)
Base.eltype{T}(::Type{MyType{T}}) = T
==(a::MyType, b::MyType) = a.data == b.data && a.id == b.id

immutable MyContainer{T}
objs::Vector{MyType{T}}
end
Base.eltype{T}(::Type{MyContainer{T}}) = T
==(a::MyContainer, b::MyContainer) = length(a.objs) == length(b.objs) && all(i->a.objs[i]==b.objs[i], 1:length(a.objs))

end


### Here are the definitions needed to implement the custom serialization
# If you prefer, you could include these definitions in the MyTypes module
module MySerializer

using HDF5, JLD, MyTypes

## Defining the serialization format
type MyContainerSerializer{T}
data::Matrix{T}
ids::Vector{Int}
end
MyContainerSerializer{T}(data::Matrix{T},ids) = MyContainerSerializer{T}(data, ids)
Base.eltype{T}(::Type{MyContainerSerializer{T}}) = T
Base.eltype{T}(::MyContainerSerializer{T}) = T

JLD.readas(serdata::MyContainerSerializer) =
MyContainer([MyType(serdata.data[:,i], serdata.ids[i]) for i = 1:length(serdata.ids)])
function JLD.writeas{T}(data::MyContainer{T})
ids = [obj.id for obj in data.objs]
n = length(data.objs)
vectors = Array(T, 5, n)
for i = 1:n
vectors[:,i] = data.objs[i].data
end
MyContainerSerializer(vectors, ids)
end

end # MySerializer



using MyTypes, JLD, Base.Test

obj1 = MyType(rand(5), 2)
obj2 = MyType(rand(5), 17)
container = MyContainer([obj1,obj2])
filename = joinpath(tempdir(), "customserializer.jld")
jldopen(filename, "w") do file
write(file, "mydata", container)
end

container_r = jldopen(filename) do file
obj = file["mydata"]
dtype = JLD.datatype(obj.plain)
@test JLD.jldatatype(JLD.file(obj), dtype) === MySerializer.MyContainerSerializer{Float64}
read(file, "mydata")
end

@test container_r == container
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ include("readremote.jl")
include("extend_test.jl")
include("gc.jl")
include("require.jl")
include("custom_serialization.jl")
if Pkg.installed("DataFrames") != nothing
include("jld_dataframe.jl")
end