Skip to content

Commit

Permalink
add header to serialization format. fixes #22888 (#23722)
Browse files Browse the repository at this point in the history
add header to serialization format. fixes #22888
  • Loading branch information
JeffBezanson authored Sep 18, 2017
1 parent e96478e commit 68d3305
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 9 deletions.
62 changes: 55 additions & 7 deletions base/serialize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ SerializationState(io::IO) = SerializationState{typeof(io)}(io)
# types AbstractSerializer and Serializer # defined in dict.jl

const n_int_literals = 33
const n_reserved_slots = 12
const n_reserved_slots = 25
const n_reserved_tags = 12

const TAGS = Any[
Symbol, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128,
Expand All @@ -48,6 +49,8 @@ const TAGS = Any[
Symbol, # OBJECT_TAG
Symbol, # REF_OBJECT_TAG
Symbol, # FULL_GLOBALREF_TAG
Symbol, # HEADER_TAG
fill(Symbol, n_reserved_tags)...,

(), Bool, Any, Bottom, Core.TypeofBottom, Type, svec(), Tuple{}, false, true, nothing,
:Any, :Array, :TypeVar, :Box, :Tuple, :Ptr, :return, :call, Symbol("::"), :Function,
Expand All @@ -64,9 +67,9 @@ const TAGS = Any[
(Int64(0):Int64(n_int_literals-1))...
]

@assert length(TAGS) <= 255
@assert length(TAGS) == 255

const ser_version = 6 # do not make changes without bumping the version #!
const ser_version = 7 # do not make changes without bumping the version #!

const NTAGS = length(TAGS)

Expand Down Expand Up @@ -120,6 +123,7 @@ const WRAPPER_DATATYPE_TAG = Int32(o0+11)
const OBJECT_TAG = Int32(o0+12)
const REF_OBJECT_TAG = Int32(o0+13)
const FULL_GLOBALREF_TAG = Int32(o0+14)
const HEADER_TAG = Int32(o0+15)

writetag(s::IO, tag) = write(s, UInt8(tag))

Expand Down Expand Up @@ -638,15 +642,54 @@ function serialize_any(s::AbstractSerializer, @nospecialize(x))
end

"""
serialize(stream, value)
Serializer.writeheader(s::AbstractSerializer)
Write an identifying header to the specified serializer. The header consists of
8 bytes as follows:
| Offset | Description |
|:-------|:------------------------------------------------|
| 0 | tag byte (0x37) |
| 1-2 | signature bytes "JL" |
| 3 | protocol version |
| 4 | bits 0-1: endianness: 0 = little, 1 = big |
| 4 | bits 2-3: platform: 0 = 32-bit, 1 = 64-bit |
| 5-7 | reserved |
"""
function writeheader(s::AbstractSerializer)
io = s.io
writetag(io, HEADER_TAG)
write(io, "JL") # magic bytes
write(io, UInt8(ser_version))
endianness = (ENDIAN_BOM == 0x04030201 ? 0 :
ENDIAN_BOM == 0x01020304 ? 1 :
error("unsupported endianness in serializer"))
machine = (sizeof(Int) == 4 ? 0 :
sizeof(Int) == 8 ? 1 :
error("unsupported word size in serializer"))
write(io, UInt8(endianness) | (UInt8(machine) << 2))
write(io, b"\x00\x00\x00") # 3 reserved bytes
nothing
end

"""
serialize(stream::IO, value)
Write an arbitrary value to a stream in an opaque format, such that it can be read back by
[`deserialize`](@ref). The read-back value will be as identical as possible to the original. In
general, this process will not work if the reading and writing are done by different
[`deserialize`](@ref). The read-back value will be as identical as possible to the original.
In general, this process will not work if the reading and writing are done by different
versions of Julia, or an instance of Julia with a different system image. `Ptr` values are
serialized as all-zero bit patterns (`NULL`).
An 8-byte identifying header is written to the stream first. To avoid writing the header,
construct a `SerializationState` and use it as the first argument to `serialize` instead.
See also [`Serializer.writeheader`](@ref).
"""
serialize(s::IO, x) = serialize(SerializationState(s), x)
function serialize(s::IO, x)
ss = SerializationState(s)
writeheader(ss)
serialize(ss, x)
end

## deserializing values ##

Expand Down Expand Up @@ -745,6 +788,11 @@ function handle_deserialize(s::AbstractSerializer, b::Int32)
return s.table[Int(id)]
elseif b == LONGSYMBOL_TAG
return deserialize_symbol(s, Int(read(s.io, Int32)::Int32))
elseif b == HEADER_TAG
for _ = 1:7
read(s.io, UInt8)
end
return deserialize(s)
end
t = desertag(b)
if t.mutable && length(t.types) > 0 # manual specialization of fieldcount
Expand Down
1 change: 1 addition & 0 deletions doc/src/stdlib/io-network.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Base.isreadable
Base.isopen
Base.Serializer.serialize
Base.Serializer.deserialize
Base.Serializer.writeheader
Base.Grisu.print_shortest
Base.fd
Base.redirect_stdout
Expand Down
18 changes: 16 additions & 2 deletions test/serialize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ using Base.Test

# Check that serializer hasn't gone out-of-frame
@test Serializer.sertag(Symbol) == 1
@test Serializer.sertag(()) == 55
@test Serializer.sertag(false) == 63
@test Serializer.sertag(()) == 68
@test Serializer.sertag(false) == 76

function create_serialization_stream(f::Function)
s = IOBuffer()
Expand Down Expand Up @@ -480,3 +480,17 @@ let x = T20324[T20324(1) for i = 1:2]
@test isa(y,Vector{T20324})
@test y == x
end

# serializer header
let io = IOBuffer()
serialize(io, ())
seekstart(io)
b = read(io)
@test b[1] == Serializer.HEADER_TAG
@test b[2:3] == b"JL"
@test b[4] == Serializer.ser_version
@test (b[5] & 0x3) == (ENDIAN_BOM == 0x01020304)
@test ((b[5] & 0xc)>>2) == (sizeof(Int) == 8)
@test (b[5] & 0xf0) == 0
@test all(b[6:8] .== 0)
end

0 comments on commit 68d3305

Please sign in to comment.