Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for read-only s3 virtual driver #1010

Merged
merged 16 commits into from
Nov 21, 2022
4 changes: 4 additions & 0 deletions docs/src/api_bindings.md
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ h5pl_size
- [`h5p_get_fapl_mpio32`](@ref h5p_get_fapl_mpio32)
- [`h5p_get_fapl_mpio64`](@ref h5p_get_fapl_mpio64)
- [`h5p_get_fapl_multi`](@ref h5p_get_fapl_multi)
- [`h5p_get_fapl_ros3`](@ref h5p_get_fapl_ros3)
- [`h5p_get_fapl_splitter`](@ref h5p_get_fapl_splitter)
- [`h5p_get_fclose_degree`](@ref h5p_get_fclose_degree)
- [`h5p_get_file_image`](@ref h5p_get_file_image)
Expand Down Expand Up @@ -579,6 +580,7 @@ h5pl_size
- [`h5p_set_fapl_mpio32`](@ref h5p_set_fapl_mpio32)
- [`h5p_set_fapl_mpio64`](@ref h5p_set_fapl_mpio64)
- [`h5p_set_fapl_multi`](@ref h5p_set_fapl_multi)
- [`h5p_set_fapl_ros3`](@ref h5p_set_fapl_ros3)
- [`h5p_set_fapl_sec2`](@ref h5p_set_fapl_sec2)
- [`h5p_set_fapl_split`](@ref h5p_set_fapl_split)
- [`h5p_set_fapl_splitter`](@ref h5p_set_fapl_splitter)
Expand Down Expand Up @@ -693,6 +695,7 @@ h5p_get_fapl_hdfs
h5p_get_fapl_mpio32
h5p_get_fapl_mpio64
h5p_get_fapl_multi
h5p_get_fapl_ros3
h5p_get_fapl_splitter
h5p_get_fclose_degree
h5p_get_file_image
Expand Down Expand Up @@ -795,6 +798,7 @@ h5p_set_fapl_log
h5p_set_fapl_mpio32
h5p_set_fapl_mpio64
h5p_set_fapl_multi
h5p_set_fapl_ros3
h5p_set_fapl_sec2
h5p_set_fapl_split
h5p_set_fapl_splitter
Expand Down
1 change: 1 addition & 0 deletions docs/src/interface/properties.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,5 +44,6 @@ CurrentModule = HDF5.Drivers
```@docs
Core
POSIX
ROS3
MPIO
```
2 changes: 2 additions & 0 deletions gen/api_defs.jl
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@
@bind h5p_get_fapl_mpio64(fapl_id::hid_t, comm::Ptr{Hmpih64}, info::Ptr{Hmpih64})::herr_t "Error getting MPIO properties"
@bind h5p_get_fapl_multi(fapl_id::hid_t, memb_map::Ptr{H5FD_mem_t}, memb_fapl::Ptr{hid_t}, memb_name::Ptr{Ptr{Cchar}}, memb_addr::Ptr{haddr_t}, relax::Ptr{hbool_t})::herr_t "Error in h5p_get_fapl_multi (not annotated)"
@bind h5p_get_fapl_splitter(fapl_id::hid_t, config_ptr::Ptr{H5FD_splitter_vfd_config_t})::herr_t "Error in h5p_get_fapl_splitter (not annotated)"
@bind h5p_get_fapl_ros3(fapl_id::hid_t, fa_out::Ptr{H5FD_ros3_fapl_t})::herr_t "Error in getting ros3 properties"
@bind h5p_get_fclose_degree(fapl_id::hid_t, fc_degree::Ref{Cint})::herr_t "Error getting close degree"
@bind h5p_get_file_image(fapl_id::hid_t, buf_ptr_ptr::Ptr{Ptr{Cvoid}}, buf_len_ptr::Ptr{Csize_t})::herr_t "Error in h5p_get_file_image (not annotated)"
@bind h5p_get_file_image_callbacks(fapl_id::hid_t, callbacks_ptr::Ptr{H5FD_file_image_callbacks_t})::herr_t "Error in h5p_get_file_image_callbacks (not annotated)"
Expand Down Expand Up @@ -382,6 +383,7 @@
@bind h5p_set_fapl_mpio64(fapl_id::hid_t, comm::Hmpih64, info::Hmpih64)::herr_t "Error setting MPIO properties"
@bind h5p_set_fapl_multi(fapl_id::hid_t, memb_map::Ptr{H5FD_mem_t}, memb_fapl::Ptr{hid_t}, memb_name::Ptr{Ptr{Cchar}}, memb_addr::Ptr{haddr_t}, relax::hbool_t)::herr_t "Error in h5p_set_fapl_multi (not annotated)"
@bind h5p_set_fapl_sec2(fapl_id::hid_t)::herr_t "Error setting Sec2 properties"
@bind h5p_set_fapl_ros3(fapl_id::hid_t, fa::Ptr{H5FD_ros3_fapl_t})::herr_t "Error in setting ros3 properties"
@bind h5p_set_fapl_split(fapl::hid_t, meta_ext::Ptr{Cchar}, meta_plist_id::hid_t, raw_ext::Ptr{Cchar}, raw_plist_id::hid_t)::herr_t "Error in h5p_set_fapl_split (not annotated)"
@bind h5p_set_fapl_splitter(fapl_id::hid_t, config_ptr::Ptr{H5FD_splitter_vfd_config_t})::herr_t "Error in h5p_set_fapl_splitter (not annotated)"
@bind h5p_set_fapl_stdio(fapl_id::hid_t)::herr_t "Error in h5p_set_fapl_stdio (not annotated)"
Expand Down
2 changes: 2 additions & 0 deletions gen/bind_generator.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ bind_exceptions[:h5p_set_fapl_mpio64] = :H5Pset_fapl_mpio
# have numbers at the end
bind_exceptions[:h5p_set_fletcher32] = :H5Pset_fletcher32
bind_exceptions[:h5p_set_fapl_sec2] = :H5Pset_fapl_sec2
bind_exceptions[:h5p_get_fapl_ros3] = :H5Pget_fapl_ros3
bind_exceptions[:h5p_set_fapl_ros3] = :H5Pset_fapl_ros3
# underscore separator not removed
bind_exceptions[:h5fd_core_init] = :H5FD_core_init
bind_exceptions[:h5fd_family_init] = :H5FD_family_init
Expand Down
8 changes: 8 additions & 0 deletions src/HDF5.jl
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ include("highlevel.jl")

const libversion = API.h5_get_libversion()
const HAS_PARALLEL = Ref(false)
const HAS_ROS3 = Ref(false)

"""
has_parallel()
Expand All @@ -96,6 +97,13 @@ For the second condition to be true, MPI.jl must be imported before HDF5.jl.
"""
has_parallel() = HAS_PARALLEL[]

"""
has_ros3()

Returns `true` if the HDF5 libraries were compiled with ros3 support
"""
has_ros3() = HAS_ROS3[]

function __init__()
# HDF5.API.__init__() is run first
#
Expand Down
22 changes: 22 additions & 0 deletions src/api/functions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2366,6 +2366,17 @@ function h5p_get_fapl_splitter(fapl_id, config_ptr)
return nothing
end

"""
h5p_get_fapl_ros3(fapl_id::hid_t, fa_out::Ptr{H5FD_ros3_fapl_t})

See `libhdf5` documentation for [`H5Pget_fapl_ros3`](https://portal.hdfgroup.org/display/HDF5/H5P_GET_FAPL_ROS3).
"""
function h5p_get_fapl_ros3(fapl_id, fa_out)
var"#status#" = ccall((:H5Pget_fapl_ros3, libhdf5), herr_t, (hid_t, Ptr{H5FD_ros3_fapl_t}), fapl_id, fa_out)
var"#status#" < 0 && @h5error("Error in getting ros3 properties")
return nothing
end

"""
h5p_get_fclose_degree(fapl_id::hid_t, fc_degree::Ref{Cint})

Expand Down Expand Up @@ -3411,6 +3422,17 @@ function h5p_set_fapl_sec2(fapl_id)
return nothing
end

"""
h5p_set_fapl_ros3(fapl_id::hid_t, fa::Ptr{H5FD_ros3_fapl_t})

See `libhdf5` documentation for [`H5Pset_fapl_ros3`](https://portal.hdfgroup.org/display/HDF5/H5P_SET_FAPL_ROS3).
"""
function h5p_set_fapl_ros3(fapl_id, fa)
var"#status#" = ccall((:H5Pset_fapl_ros3, libhdf5), herr_t, (hid_t, Ptr{H5FD_ros3_fapl_t}), fapl_id, fa)
var"#status#" < 0 && @h5error("Error in setting ros3 properties")
return nothing
end

"""
h5p_set_fapl_split(fapl::hid_t, meta_ext::Ptr{Cchar}, meta_plist_id::hid_t, raw_ext::Ptr{Cchar}, raw_plist_id::hid_t)

Expand Down
8 changes: 8 additions & 0 deletions src/api/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,14 @@ struct H5FD_hdfs_fapl_t
stream_buffer_size::Int32
end

struct H5FD_ros3_fapl_t
version::Int32
authenticate::hbool_t
aws_region::Cstring
secret_id::Cstring
secret_key::Cstring
end

struct H5FD_splitter_vfd_config_t
magic::Int32
version::Cuint
Expand Down
4 changes: 3 additions & 1 deletion src/drivers/drivers.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module Drivers

export POSIX
export POSIX, ROS3

import ..API
import ..HDF5: HDF5, Properties, h5doc
Expand Down Expand Up @@ -87,10 +87,12 @@ function __init__()

# Check whether the libhdf5 was compiled with parallel support.
HDF5.HAS_PARALLEL[] = API._has_symbol(:H5Pset_fapl_mpio)
HDF5.HAS_ROS3[] = API._has_symbol(:H5Pset_fapl_ros3)

@require MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" (
HDF5.has_parallel() && include("mpio.jl")
)
end

include("ros3.jl")
end # module
45 changes: 45 additions & 0 deletions src/drivers/ros3.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""
ROS3()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update the docstring signature

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added additonal signatures below this line.


This is the read-only virtual driver that enables access to HDF5 objects stored in AWS S3
"""
struct ROS3 <: Driver
fa::API.H5FD_ros3_fapl_t
end

function ROS3(
version::Integer,
auth::Bool,
region::AbstractString,
id::AbstractString,
key::AbstractString
)
return (ROS3 ∘ API.H5FD_ros3_fapl_t)(
version,
auth,
Base.unsafe_convert(Cstring, region),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be a regular convert or does it have to be an unsafe_convert ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It needs to be an unsafe_convert:

julia> convert(Cstring, "hello")
ERROR: MethodError: Cannot `convert` an object of type String to an object of type Cstring
Closest candidates are:
  convert(::Type{Cstring}, ::Union{Ptr{Int8}, Ptr{Nothing}, Ptr{UInt8}}) at ~/src/julia-1.7.1/share/julia/base/c.jl:167
  convert(::Type{T}, ::T) where T at ~/src/julia-1.7.1/share/julia/base/essentials.jl:218
Stacktrace:
 [1] top-level scope
   @ REPL[55]:1

julia> Base.unsafe_convert(Cstring, "hello")
Cstring(0x00007fc5c5e06838)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. you need to first call cconvert(Cstring, region) (this converts to String, and ensures it is nul-terminated)
  2. you then need to keep a reference to the result of this whenever you pass it to a ccall

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably the best option is to add them as extra fields to the ROS3 struct?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't we just do unsafe_convert(Cstring, String(region))?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I missed the original point of this all. Please correct me if I'm still wrong in my thinking
So the reason we need this is because we use the ROS3 instance in further calls (specifically h5p_set_fapl_ros3 h5p_get_fapl_ros3) and those need to convert the outward driver struct ROS3 to the internal libhdf5 struct H5FD_ros3_fapl_t and right now there's a mismatch in that ROS3 doesn't agree with the layout of H5FD_ros3_fapl_t?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to convert the Julia struct to the C struct. Part of the issue is that the Julia struct and its Strings could be garbage collected. Meanwhile, the Cstring is an unmanaged pointer to a null terminated character array.

Anyways, it turns out that the C struct definition here is wrong. The correct definition is

https://github.com/HDFGroup/hdf5/blob/100b22e6c23c44a082fd69b8c05a63c7492083f7/src/H5FDros3.h#L82-L88

typedef struct H5FD_ros3_fapl_t {
    int32_t version;
    hbool_t authenticate;
    char    aws_region[H5FD_ROS3_MAX_REGION_LEN + 1];
    char    secret_id[H5FD_ROS3_MAX_SECRET_ID_LEN + 1];
    char    secret_key[H5FD_ROS3_MAX_SECRET_KEY_LEN + 1];
} H5FD_ros3_fapl_t;

In Julia, that would be

struct H5FD_ros3_fapl_t
    version::Int32
    authenticate::Bool
    aws_region::NTuple{H5FD_ROS3_MAX_REGION_LEN + 1,Cchar}
    secret_id ::NTuple{H5FD_ROS3_MAX_SECRET_ID_LEN + 1, Cchar}
    secret_key::NTuple{H5FD_ROS3_MAX_SECRET_KEY_LEN + 1, Cchar};
end

I've started to rewrite this. Also, I think we need to check our definition of hbool_t. Currently it is UInt32, but I think it probably should be UInt8.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's probably not the first time there has been a mismatch in definitions. Do you want to push your changes on this branch?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I will later today. I just started it on a different computer than I am on right now. I didn't quite get the tests to pass yet on the branch I was working on.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I quickly saw this comment on the hbool issue aldanor/hdf5-rust#28
but perhaps it doesn't affect us since we dont' use msvc

Base.unsafe_convert(Cstring, id),
Base.unsafe_convert(Cstring, key)
)
end

ROS3(region::AbstractString, id::AbstractString, key::AbstractString) =
ROS3(1, true, region, id, key)
ROS3() = ROS3(1, false, "", "", "")

function get_driver(fapl::Properties, ::Type{ROS3})
r_fa = Ref{H5FD_ros3_fapl_t}()
API.h5p_get_fapl_ros3(fapl, r_fa)
return ROS3(r_fa[])
end

function set_driver!(fapl::Properties, driver::ROS3)
HDF5.has_ros3() || error(
"HDF5.jl has no ros3 support." *
" Make sure that you're using ROS3-enabled HDF5 libraries"
)
HDF5.init!(fapl)
API.h5p_set_fapl_ros3(fapl, Ref(driver.fa))
DRIVERS[API.h5p_get_driver(fapl)] = ROS3
return nothing
end
8 changes: 5 additions & 3 deletions src/file.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@ function h5open(
flag = swmr ? API.H5F_ACC_TRUNC | API.H5F_ACC_SWMR_WRITE : API.H5F_ACC_TRUNC
fid = API.h5f_create(filename, flag, fcpl, fapl)
else
ishdf5(filename) || error(
"unable to determine if $filename is accessible in the HDF5 format (file may not exist)"
)
gszep marked this conversation as resolved.
Show resolved Hide resolved
occursin(r"(s3a?|https?)://", filename) ||
ishdf5(filename) ||
error(
"unable to determine if $filename is accessible in the HDF5 format (file may not exist)"
)
if wr
flag = swmr ? API.H5F_ACC_RDWR | API.H5F_ACC_SWMR_WRITE : API.H5F_ACC_RDWR
else
Expand Down
13 changes: 13 additions & 0 deletions test/ros3.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
using HDF5
using Test

@testset "ros3" begin
@test HDF5.has_ros3()

h5open(
"http://s3.us-east-2.amazonaws.com/hdf5ros3/GMODO-SVM01.h5";
driver=HDF5.Drivers.ROS3()
) do f
@test keys(f) == ["All_Data", "Data_Products"]
end
end
4 changes: 4 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ end
include("mpio.jl")
end

if HDF5.has_ros3()
include("ros3.jl")
end

# Clean up after all resources
HDF5.API.h5_close()
end