-
Notifications
You must be signed in to change notification settings - Fork 5.5k
io: add io_uring wrapper #19339
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
io: add io_uring wrapper #19339
Changes from 17 commits
7b6ef8a
c0c60a0
60d32a7
39e4385
169be56
3a43748
735dcb0
4640a62
a050bb3
aaef114
57944d3
d7ac8f0
b27fabd
ea728dd
2930247
e588e1a
7fd6942
a1107e9
aa2ddae
9fd254f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| # DO NOT EDIT. This file is generated by tools/proto_format/proto_sync.py. | ||
|
|
||
| load("@envoy_api//bazel:api_build_system.bzl", "api_proto_package") | ||
|
|
||
| licenses(["notice"]) # Apache 2 | ||
|
|
||
| api_proto_package( | ||
| deps = ["@com_github_cncf_udpa//udpa/annotations:pkg"], | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| syntax = "proto3"; | ||
|
|
||
| package envoy.extensions.io.io_uring.v3; | ||
|
|
||
| import "google/protobuf/wrappers.proto"; | ||
|
|
||
| import "udpa/annotations/status.proto"; | ||
| import "validate/validate.proto"; | ||
|
|
||
| option java_package = "io.envoyproxy.envoy.extensions.io.io_uring.v3"; | ||
| option java_outer_classname = "IoUringProto"; | ||
| option java_multiple_files = true; | ||
| option go_package = "github.com/envoyproxy/go-control-plane/envoy/extensions/io/io_uring/v3;io_uringv3"; | ||
| option (udpa.annotations.file_status).package_version_status = ACTIVE; | ||
|
|
||
| // [#protodoc-title: ``io_uring`` configuration] | ||
|
|
||
| // Configuration for an IO interface that relies on Linux specific ``io_uring`` API. | ||
| message IoUring { | ||
| // The size of both submission and completion queues in queue entries. For heavily loaded | ||
| // processes 300 queue entries is a good enough value. If the load is not high and memory | ||
| // is a constraint then it's safe to have smaller queues. If not set, defaults to 300 | ||
| // queue entries. | ||
| google.protobuf.UInt32Value io_uring_size = 1 [(validate.rules).uint32 = {gte: 2}]; | ||
|
|
||
| // When this flag is specified, a kernel thread is created to perform submission queue | ||
| // polling. An ``io_uring`` instance configured in this way enables ``io_uring`` sockets to | ||
| // issue I/O without ever context switching into the kernel and with better latency. | ||
| // | ||
| // Please note that the polling kernel thread will waste CPU cycles after the ``io_uring`` | ||
| // instance becomes inactive for a grace period which is set to 1 second currently. The | ||
| // polling kernel thread will be started automatically as soon as the ``io_uring`` instance | ||
| // becomes active again. | ||
| bool use_submission_queue_polling = 2; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -112,6 +112,18 @@ REPOSITORY_LOCATIONS_SPEC = dict( | |
| release_date = "2021-06-03", | ||
| cpe = "N/A", | ||
| ), | ||
| com_github_axboe_liburing = dict( | ||
| project_name = "liburing", | ||
| project_desc = "C helpers to set up and tear down io_uring instances", | ||
| project_url = "https://github.com/axboe/liburing", | ||
| version = "2.1", | ||
| sha256 = "f1e0500cb3934b0b61c5020c3999a973c9c93b618faff1eba75aadc95bb03e07", | ||
| strip_prefix = "liburing-liburing-{version}", | ||
| urls = ["https://github.com/axboe/liburing/archive/liburing-{version}.tar.gz"], | ||
| use_category = ["dataplane_core", "controlplane"], | ||
| release_date = "2021-09-09", | ||
| cpe = "N/A", | ||
| ), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note #19082 (review) which seems to apply here as well. |
||
| # This dependency is built only when performance tracing is enabled with the | ||
| # option --define=perf_tracing=enabled. It's never built for releases. | ||
| com_github_google_perfetto = dict( | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| load( | ||
| "//bazel:envoy_build_system.bzl", | ||
| "envoy_cc_library", | ||
| "envoy_package", | ||
| ) | ||
|
|
||
| licenses(["notice"]) # Apache 2 | ||
|
|
||
| envoy_package() | ||
|
|
||
| envoy_cc_library( | ||
| name = "io_uring_interface", | ||
| hdrs = [ | ||
| "io_uring.h", | ||
| ], | ||
| deps = [ | ||
| "//envoy/server:bootstrap_extension_config_interface", | ||
| "//source/common/network:address_lib", | ||
| ], | ||
| ) | ||
|
|
||
| envoy_cc_library( | ||
| name = "io_uring_impl_lib", | ||
| srcs = [ | ||
| "io_uring_impl.cc", | ||
| ], | ||
| hdrs = [ | ||
| "io_uring_impl.h", | ||
| ], | ||
| external_deps = ["uring"], | ||
| deps = [ | ||
| ":io_uring_interface", | ||
| "@envoy_api//envoy/extensions/io/io_uring/v3:pkg_cc_proto", | ||
| ], | ||
| ) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,141 @@ | ||
| #pragma once | ||
|
rojkov marked this conversation as resolved.
|
||
|
|
||
| #include "envoy/common/pure.h" | ||
| #include "envoy/server/bootstrap_extension_config.h" | ||
|
|
||
| #include "source/common/network/address_impl.h" | ||
|
|
||
| namespace Envoy { | ||
| namespace Io { | ||
|
|
||
| /** | ||
| * Callback invoked when iterating over entries in the completion queue. | ||
| * @param user_data is any data attached to an entry submitted to the submission | ||
| * queue. | ||
| * @param result is a return code of submitted system call. | ||
| */ | ||
| using CompletionCb = std::function<void(void* user_data, int32_t result)>; | ||
|
|
||
| enum class IoUringResult { Ok, Busy, Failed }; | ||
|
|
||
| /** | ||
| * Abstract wrapper around `io_uring`. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the threading model assumed by IoUring? Which methods can be called from any thread and which can only be called from the IoRing thread?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. io_uring docs don't state any requirement regarding thread-safety though it probably should. In case of Envoy every IoUring is coupled with one libevent loop (it doesn't run its own) which is rarely posted from threads other than the one running it. So, I assumed that making IoUring thread local and calling its methods only from the thread it lives in simplifies everything. But perhaps it makes more sense to extend Dispatcher to own IoUrings.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It may make sense for the dispatcher to own the io_urings eventually instead of providing a getOrCreate factory method. We can get to that as development continues.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From examples I believe that uring is thread safe. However conceptually there should only one thread emptying the queue. But adding more I/O operations (preparing) into the ring can be done from any thread. |
||
| */ | ||
| class IoUring { | ||
| public: | ||
| virtual ~IoUring() = default; | ||
|
|
||
| /** | ||
| * Registers an eventfd file descriptor for the ring and returns it. | ||
| * It can be used for integration with event loops. | ||
| */ | ||
| virtual os_fd_t registerEventfd() PURE; | ||
|
|
||
| /** | ||
| * Resets the eventfd file descriptor for the ring. | ||
| */ | ||
| virtual void unregisterEventfd() PURE; | ||
|
|
||
| /** | ||
| * Returns true if an eventfd file descriptor is registered with the ring. | ||
| */ | ||
| virtual bool isEventfdRegistered() const PURE; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: comment
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added |
||
|
|
||
| /** | ||
| * Iterates over entries in the completion queue, calls the given callback for | ||
| * every entry and marks them consumed. | ||
| */ | ||
| virtual void forEveryCompletion(CompletionCb completion_cb) PURE; | ||
|
|
||
| /** | ||
| * Prepares an accept system call and puts it into the submission queue. | ||
| * Returns IoUringResult::Failed in case the submission queue is full already | ||
| * and IoUringResult::Ok otherwise. | ||
| */ | ||
| virtual IoUringResult prepareAccept(os_fd_t fd, struct sockaddr* remote_addr, | ||
| socklen_t* remote_addr_len, void* user_data) PURE; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use of void* in the interface seems a bit too low level. What is the ultimate purpose of this interface? Implement an alternate dispatcher.h implementation? I wonder if use of an user_data interface would improve the usability of the resulting class as it would allow for the removal of the CompletionCb argument to forEveryCompletion
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Particularly this interface hides the io_uring syscalls and makes unit testing of the new iohandle (not in this PR) easier a bit. Originally I used a custom type containing the network iohandle instead of *void, but in case of Block IO a different type might be needed. Probably this *void in the interface could be avoided with a template. I haven't thought of the possibility to implement an alternative Dispatcher, because its interface implies the file readiness model (i.e. Dispatcher::createFileEvent()), whereas io_uring is all about completeness of system operations. CompletionCb is an analog of FileReadyCb, but the former isn't called for a known file descriptor. A system operation and its context (including the arguments) are parts of a completeness event.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's ok to use void* for now, we may revisit this later as our understanding of this module improves. I'ld like to better understand how io_uring and dispatcher should interact. I could see some threads being exclusively one or the other. I'm not sure about how they would coexist in a thread while avoiding starvation.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see no problem of multiple io_urings coexisting in one dispatcher: all updates in their completion queues are reflected in their respective event notification file descriptors ( I have rewritten the tests to run the callbacks through Dispatcher::run(). Though I fail to come up with a simple enough API for Dispatcher to accommodate multiple io_urings if go this way. If we assume only one io_uring per Dispatcher then extending Dispatcher with something like For the case of multiple io_urings using IoUringId = uint32_t;
class DispatcherImpl {
public:
...
IoUringId addIoUring(const IoUringFactory& factory);
void registerCompletionEvent(IoUringId id, CompletionCb cb);
IoUring& getIoUring(IoUringId id);
...
private:
std::vector<IoUring> io_urings_;
}; |
||
|
|
||
| /** | ||
| * Prepares a connect system call and puts it into the submission queue. | ||
| * Returns IoUringResult::Failed in case the submission queue is full already | ||
| * and IoUringResult::Ok otherwise. | ||
| */ | ||
| virtual IoUringResult prepareConnect(os_fd_t fd, | ||
| const Network::Address::InstanceConstSharedPtr& address, | ||
| void* user_data) PURE; | ||
|
|
||
| /** | ||
| * Prepares a readv system call and puts it into the submission queue. | ||
| * Returns IoUringResult::Failed in case the submission queue is full already | ||
| * and IoUringResult::Ok otherwise. | ||
| */ | ||
| virtual IoUringResult prepareReadv(os_fd_t fd, const struct iovec* iovecs, unsigned nr_vecs, | ||
| off_t offset, void* user_data) PURE; | ||
|
|
||
| /** | ||
| * Prepares a writev system call and puts it into the submission queue. | ||
| * Returns IoUringResult::Failed in case the submission queue is full already | ||
| * and IoUringResult::Ok otherwise. | ||
| */ | ||
| virtual IoUringResult prepareWritev(os_fd_t fd, const struct iovec* iovecs, unsigned nr_vecs, | ||
| off_t offset, void* user_data) PURE; | ||
|
|
||
| /** | ||
| * Prepares a close system call and puts it into the submission queue. | ||
| * Returns IoUringResult::Failed in case the submission queue is full already | ||
| * and IoUringResult::Ok otherwise. | ||
| */ | ||
| virtual IoUringResult prepareClose(os_fd_t fd, void* user_data) PURE; | ||
|
|
||
| /** | ||
| * Submits the entries in the submission queue to the kernel using the | ||
| * `io_uring_enter()` system call. | ||
| * Returns IoUringResult::Ok in case of success and may return | ||
| * IoUringResult::Busy if we over commit the number of requests. In the latter | ||
| * case the application should drain the completion queue by handling some completions | ||
| * with the forEveryCompletion() method and try again. | ||
| */ | ||
| virtual IoUringResult submit() PURE; | ||
| }; | ||
|
|
||
| /** | ||
| * Abstract factory for IoUring wrappers. | ||
| */ | ||
| class IoUringFactory { | ||
| public: | ||
| virtual ~IoUringFactory() = default; | ||
|
|
||
| /** | ||
| * Returns an instance of IoUring and creates it if needed for the current | ||
| * thread. | ||
| */ | ||
| virtual IoUring& getOrCreate() const PURE; | ||
| }; | ||
|
|
||
| /** | ||
| * Class to be derived by all IoUringFactory implementations. | ||
| * | ||
| * It acts both as a IoUringFactory ans ad a BootstrapExtensionFactory. The | ||
| * latter is used, on the one hand, to configure and initialize the factory, on | ||
| * the other, for IoUringFactory lookup by leveraging the FactoryRegistry. As | ||
| * required for all bootstrap extensions, all derived classes should register | ||
| * via the REGISTER_FACTORY() macro as BootstrapExtensionFactory. | ||
| * | ||
| * IoUringFactory instances can be retrieved using the factory name, i.e., | ||
| * string returned by name() function implemented by all classes that derive | ||
| * IoUringFactoryBase, via Io::ioUringFactory(). | ||
| */ | ||
| class IoUringFactoryBase : public IoUringFactory, | ||
| public Server::Configuration::BootstrapExtensionFactory {}; | ||
|
|
||
| /* | ||
| * Lookup IoUringFactory instance by name. | ||
| */ | ||
| static inline const IoUringFactory* ioUringFactory(std::string name) { | ||
| auto factory = | ||
| Registry::FactoryRegistry<Server::Configuration::BootstrapExtensionFactory>::getFactory(name); | ||
| return dynamic_cast<IoUringFactory*>(factory); | ||
| } | ||
|
|
||
| } // namespace Io | ||
| } // namespace Envoy | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why isn't this in the bootstrap extension hierarchy? Looking below, I think this is what it is. This hierarchy is introduced in https://github.com/envoyproxy/envoy/pull/19467/files, it seems out bootstrap extensions have been adhoc scattered so far :(