diff --git a/src/Filters.jl b/src/Filters.jl index 2666bdfc..381a30a0 100644 --- a/src/Filters.jl +++ b/src/Filters.jl @@ -51,8 +51,46 @@ function zencode(ain,::VLenArrayFilter) take!(b) end -JSON.lower(::VLenArrayFilter{T}) where T = Dict("id"=>"vlen-array","dtype"=> typestr(T) ) +JSON.lower(::VLenArrayFilter{T}) where T = Dict("id"=>"vlen-array","dtype"=> typestr(eltype(T)) ) -getfilter(::Type{<:VLenArrayFilter}, f) = VLenArrayFilter{typestr(f["dtype"])}() +getfilter(::Type{<:VLenArrayFilter}, f) = VLenArrayFilter{Vector{typestr(f["dtype"])}}() -filterdict = Dict("vlen-array"=>VLenArrayFilter) \ No newline at end of file +""" + VLenUTF8Filter + +Encodes and decodes variable-length arrays of arbitrary data type +""" +struct VLenUTF8Filter <: Filter{String,UInt8} end + +function zdecode(ain, ::VLenUTF8Filter) + arbuf = UInt8[] + f = IOBuffer(ain) + nitems = read(f, UInt32) + out = Array{String}(undef,nitems) + for i=1:nitems + len1 = read(f,UInt32) + resize!(arbuf,len1) + read!(f,arbuf) + out[i] = String(arbuf) + end + close(f) + out +end + +#Encodes Array of Vectors a into bytes +function zencode(ain,::VLenUTF8Filter) + b = IOBuffer() + nitems = length(ain) + write(b,UInt32(nitems)) + for a in ain + write(b, UInt32(sizeof(a))) + write(b, a) + end + take!(b) +end + +JSON.lower(::VLenUTF8Filter) = Dict("id"=>"vlen-utf8","dtype"=> "|O" ) + +getfilter(::Type{<:VLenUTF8Filter}, f) = VLenUTF8Filter() + +filterdict = Dict("vlen-array"=>VLenArrayFilter, "vlen-utf8"=>VLenUTF8Filter) \ No newline at end of file diff --git a/src/ZArray.jl b/src/ZArray.jl index 894dfeec..d5e63088 100644 --- a/src/ZArray.jl +++ b/src/ZArray.jl @@ -21,8 +21,6 @@ Base.IndexStyle(::Type{<:SenMissArray})=Base.IndexLinear() # Struct representing a Zarr Array in Julia, note that # chunks(chunk size) and size are always in Julia column-major order -# Currently this is not an AbstractArray, because indexing single elements is -# would be really slow, although most AbstractArray interface functions are implemented struct ZArray{T, N, C<:Compressor, S<:AbstractStore} <: AbstractDiskArray{T,N} metadata::Metadata{T, N, C} storage::S @@ -72,6 +70,7 @@ storageratio(z::ZArray{<:Vector}) = "unknown" nobytes(z::ZArray) = length(z)*sizeof(eltype(z)) nobytes(z::ZArray{<:Vector}) = "unknown" +nobytes(z::ZArray{<:String}) = "unknown" zinfo(z::ZArray) = zinfo(stdout,z) function zinfo(io::IO,z::ZArray) @@ -128,11 +127,7 @@ function getchunkarray(z::ZArray{>:Missing}) inner = fill(z.metadata.fill_value, z.metadata.chunks) a = SenMissArray(inner,z.metadata.fill_value) end -_zero(T) = zero(T) -_zero(T::Type{<:MaxLengthString}) = T("") -_zero(T::Type{ASCIIChar}) = ASCIIChar(0) -_zero(::Type{<:Vector{T}}) where T = T[] -getchunkarray(z::ZArray) = fill(_zero(eltype(z)), z.metadata.chunks) +getchunkarray(z::ZArray) = Array{eltype(z)}(undef, z.metadata.chunks...) maybeinner(a::Array) = a maybeinner(a::SenMissArray) = a.x diff --git a/src/metadata.jl b/src/metadata.jl index 6568fb63..49b82ca8 100644 --- a/src/metadata.jl +++ b/src/metadata.jl @@ -96,7 +96,7 @@ function typestr(s::AbstractString, filterlist=nothing) if filterlist === nothing throw(ArgumentError("Object array can only be parsed when an appropriate filter is defined")) end - return Vector{sourcetype(first(filterlist))} + return sourcetype(first(filterlist)) end isempty(typesize) && throw((ArgumentError("$s is not a valid numpy typestr"))) tc, ts = first(typecode), parse(Int, typesize) diff --git a/test/runtests.jl b/test/runtests.jl index 7678d93a..cd66c984 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -228,10 +228,11 @@ end end @testset "ragged arrays" begin - z = zcreate(Vector{Float64},2,3) + z = zcreate(Vector{Float64},2,3,chunks=(1,1)) a, b, c, d = [1.0,2.0,3.0], [4.0,5.0],[2.0],[2.0,3.0] z[1,1] = a z[2,1:3] = [b,c,d] + z[1,2:3] = [[],[]] @test z[:,:] == reshape([a,b,[],c,[],d],2,3) @test storageratio(z) == "unknown" @test zinfo(z) === nothing