Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
a9ca9f5
io: add iohandle backed with io_uring
rojkov Sep 3, 2021
5ccddde
update dependency
rojkov Nov 23, 2021
bf57019
add metadata to the extension
rojkov Nov 23, 2021
aa6158d
close valid fd in destructor
rojkov Nov 25, 2021
4d1d16b
use constexpr
rojkov Nov 25, 2021
bb8cce1
move Linux specific code to io_uring_impl.cc
rojkov Nov 25, 2021
ff5494e
Merge remote-tracking branch 'upstream/main' into io-uring
rojkov Nov 26, 2021
a4e7d33
Merge remote-tracking branch 'upstream/main' into io-uring
rojkov Nov 29, 2021
bc06800
fix windows builds by including IPPROTO_MPTCP definition
rojkov Nov 30, 2021
8e7bd74
update proto doc strings
rojkov Dec 9, 2021
0227a48
allocate separate buffers for every read op
rojkov Dec 10, 2021
980d34a
Merge remote-tracking branch 'upstream/main' into io-uring
rojkov Dec 10, 2021
e743fc2
fmt
rojkov Dec 10, 2021
23fc1ba
drop redundant constness
rojkov Dec 14, 2021
d269dae
Decouple IoUring wrapper from io_handle
rojkov Dec 14, 2021
5b80a6e
Move io_uring wrapper to a separate reusable package
rojkov Dec 15, 2021
620a532
add initial test for IoUring
rojkov Dec 15, 2021
6c41f50
handle errors more gracefully
rojkov Dec 20, 2021
a02ad71
Let upper layers handle errors
rojkov Dec 21, 2021
f5708d6
Add doc strings
rojkov Dec 22, 2021
b54ba51
try with newer kernel of ubuntu-20.04
rojkov Dec 29, 2021
c26d6f6
spelling
rojkov Dec 29, 2021
e30687e
Merge remote-tracking branch 'upstream/main' into io-uring
rojkov Dec 29, 2021
63d0b78
Fix merge from main
rojkov Dec 29, 2021
15b2730
run privileged docker as suggested by VM image maintainers
rojkov Dec 29, 2021
268997b
Revert "run privileged docker as suggested by VM image maintainers"
rojkov Feb 11, 2022
814dbb2
Revert "try with newer kernel of ubuntu-20.04"
rojkov Feb 11, 2022
19c23f8
address review comments
rojkov Jan 7, 2022
4684321
rename getOrCreateUring
rojkov Jan 11, 2022
451af2f
avoid using epoll calls directly in tests
rojkov Jan 17, 2022
6bc1821
Turn IoRingFactory into bootstrap extension
rojkov Jan 31, 2022
dc4151e
adapt to new IoUringFactory as a bootstrap extension
rojkov Jan 31, 2022
9d365c7
use Envoy::ThreadLocal API
rojkov Jan 31, 2022
25f1924
sync with io-uring-wrapper branch
rojkov Feb 9, 2022
89faf66
make IoUring compatible with Envoy::ThreadLocal
rojkov Feb 9, 2022
5ffae83
adapt iohandle to the new IoUring
rojkov Feb 9, 2022
7dfad5b
fix address sanitizer CI check
rojkov Feb 10, 2022
0ffeeee
Merge remote-tracking branch 'upstream/main' into io-uring-v2
rojkov Feb 11, 2022
0aabb0e
drop bootstrap extension
rojkov Mar 23, 2022
2e363f7
adapt to new IoUringFactory which isn't extension
rojkov Mar 23, 2022
e6cea15
use ThreadLocal::TypedSlot
rojkov Mar 23, 2022
e0a1ee0
Merge remote-tracking branch 'upstream/main' into io-uring-v2
rojkov Mar 23, 2022
4aef5c9
fix build
rojkov Mar 24, 2022
e86974a
Merge remote-tracking branch 'upstream/main' into io-uring
rojkov Apr 11, 2022
c8233aa
Merge remote-tracking branch 'upstream/main' into io-uring
rojkov Apr 25, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,8 @@ extensions/filters/http/oauth2 @rgs1 @derekargueta @snowp
/*/extensions/matching/common_inputs/environment @snowp @donyu
# user space socket pair, event, connection and listener
/*/extensions/io_socket/user_space @lambdai @antoniovicente
# io_uring-based IO socket
/*/extensions/io_socket/io_uring @rojkov @antoniovicente
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's chat about over slack about CODEOWNERS. I want to make sure I can satisfy this extension's ownership needs if my name is listed here :)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Matt suggested the code should be in the core. Then CODEOWNERS will remain untouched :)

/*/extensions/bootstrap/internal_listener @lambdai @adisuissa
# Default UUID4 request ID extension
/*/extensions/request_id/uuid @mattklein123 @alyssawilk
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
syntax = "proto3";

package envoy.extensions.network.socket_interface.v3;

import "google/protobuf/wrappers.proto";

import "udpa/annotations/status.proto";
import "validate/validate.proto";

option java_package = "io.envoyproxy.envoy.extensions.network.socket_interface.v3";
option java_outer_classname = "IoUringSocketInterfaceProto";
option java_multiple_files = true;
option go_package = "github.com/envoyproxy/go-control-plane/envoy/extensions/network/socket_interface/v3;socket_interfacev3";
option (udpa.annotations.file_status).package_version_status = ACTIVE;

// [#protodoc-title: ``io_uring`` Socket Interface configuration]

// Configuration for a socket interface that relies on Linux specific ``io_uring`` API to create
// sockets.
message IoUringSocketInterface {
// The size of read buffer. If not set, defaults to 8192.
google.protobuf.UInt32Value read_buffer_size = 1 [(validate.rules).uint32 = {gte: 4096}];
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

8192 seems a bit small.

Also, are there any restrictions on this being a multiple of 4096?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not aware of any limit. I don't run real life clusters and simply decided to start with two pages.


// The size of both submission and completion queues in queue entries. For heavily loaded
// processes 300 queue entries is a good enough value. If the load is not high and memory
// is a constraint then it's safe to have smaller queues. If not set, defaults to 300
// queue entries.
google.protobuf.UInt32Value io_uring_size = 2 [(validate.rules).uint32 = {gte: 16}];

// When this flag is specified, a kernel thread is created to perform submission queue
// polling. An ``io_uring`` instance configured in this way enables ``io_uring`` sockets to
// issue I/O without ever context switching into the kernel and with better latency.
//
// Please note that the polling kernel thread will waste CPU cycles after the ``io_uring``
// instance becomes inactive for a grace period which is set to 1 second currently. The
// polling kernel thread will be started automatically as soon as the ``io_uring`` instance
// becomes active again.
bool use_submission_queue_polling = 3;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there any performance notes / tradeoffs that should be explained here?

}
1 change: 1 addition & 0 deletions docs/root/api-v3/bootstrap/bootstrap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Bootstrap
:maxdepth: 2

../config/bootstrap/v3/bootstrap.proto
../extensions/network/socket_interface/v3/io_uring_socket_interface.proto
../config/metrics/v3/stats.proto
../config/metrics/v3/metrics_service.proto
../config/overload/v3/overload.proto
Expand Down
26 changes: 26 additions & 0 deletions docs/root/configuration/other_features/io_uring.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
.. _config_sock_interface_io_uring:

io_uring Socket Interface
=========================

* :ref:`v3 API reference <envoy_v3_api_msg_extensions.network.socket_interface.v3.IoUringSocketInterface>`

.. attention::

The io_uring socket interface extension is experimental and is currently under active development.

io_uring is an asynchronous I/O API implemented in the Linux kernel.
This socket interface uses [liburing](https://github.com/axboe/liburing) to integrate io_uring
with Envoy.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be helpful to add some info about performance/latency/cpu/memory/etc tradeoffs that we expect to have when this extension is complete. Or at least some mention of the potential which justifies the development of this extension.


Example configuration
---------------------

.. code-block:: yaml

bootstrap_extensions:
- name: envoy.extensions.io_socket.io_uring
typed_config:
"@type": type.googleapis.com/envoy.extensions.network.socket_interface.v3.IoUringSocketInterface
default_socket_interface: "envoy.extensions.network.socket_interface.io_uring"

1 change: 1 addition & 0 deletions docs/root/configuration/other_features/other_features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ Other features
:maxdepth: 2

internal_listener
io_uring
rate_limit
vcl
wasm
Expand Down
1 change: 1 addition & 0 deletions docs/root/faq/windows/win_not_supported_features.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ The most notable features that are not supported on Windows are:
* :ref:`Hot restart <arch_overview_hot_restart>`
* :ref:`Signed Exchange Filter <config_http_filters_sxg>`
* :ref:`VCL Socket Interface <config_sock_interface_vcl>`
* :ref:`io_uring Socket Interface <config_sock_interface_io_uring>`

There are certain Envoy features that require newer versions of Windows. These features explicitly document the required version.

Expand Down
1 change: 0 additions & 1 deletion source/common/event/file_event_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
#include <cstdint>

#include "source/common/common/assert.h"
#include "source/common/event/dispatcher_impl.h"

#include "event2/event.h"

Expand Down
1 change: 1 addition & 0 deletions source/extensions/extensions_build_config.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ EXTENSIONS = {

"envoy.io_socket.user_space": "//source/extensions/io_socket/user_space:config",
"envoy.bootstrap.internal_listener": "//source/extensions/bootstrap/internal_listener:config",
"envoy.io_socket.io_uring": "//source/extensions/io_socket/io_uring:config",

#
# TLS peer certification validators
Expand Down
5 changes: 5 additions & 0 deletions source/extensions/extensions_metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,11 @@ envoy.internal_redirect_predicates.safe_cross_scheme:
- envoy.internal_redirect_predicates
security_posture: robust_to_untrusted_downstream_and_upstream
status: stable
envoy.io_socket.io_uring:
categories:
- envoy.io_socket
security_posture: unknown
status: wip
envoy.io_socket.user_space:
categories:
- envoy.io_socket
Expand Down
43 changes: 43 additions & 0 deletions source/extensions/io_socket/io_uring/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
load(
"//bazel:envoy_build_system.bzl",
"envoy_cc_extension",
"envoy_cc_library",
"envoy_extension_package",
)

licenses(["notice"]) # Apache 2

envoy_extension_package()

envoy_cc_extension(
name = "config",
srcs = ["config.cc"],
hdrs = ["config.h"],
deps = [
":io_handle_impl_lib",
"//source/common/network:socket_interface_lib",
"@envoy_api//envoy/extensions/network/socket_interface/v3:pkg_cc_proto",
] + select({
"//bazel:linux": ["//source/common/io:io_uring_impl_lib"],
"//conditions:default": [],
}),
)

envoy_cc_library(
name = "io_handle_impl_lib",
srcs = [
"io_handle_impl.cc",
],
hdrs = [
"io_handle_impl.h",
],
deps = [
"//envoy/event:dispatcher_interface",
"//envoy/network:io_handle_interface",
"//source/common/api:os_sys_calls_lib",
"//source/common/buffer:buffer_lib",
"//source/common/io:io_uring_interface",
"//source/common/network:address_lib",
"//source/common/network:io_socket_error_lib",
],
)
123 changes: 123 additions & 0 deletions source/extensions/io_socket/io_uring/config.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#include "source/extensions/io_socket/io_uring/config.h"

#include "envoy/common/platform.h"
#include "envoy/extensions/network/socket_interface/v3/io_uring_socket_interface.pb.validate.h"

#include "source/common/api/os_sys_calls_impl.h"
#include "source/common/io/io_uring_impl.h"
#include "source/extensions/io_socket/io_uring/io_handle_impl.h"

namespace Envoy {
namespace Extensions {
namespace IoSocket {
namespace IoUring {

namespace {

constexpr uint32_t DefaultIoUringSize = 300;
constexpr uint32_t DefaultReadBufferSize = 8192;

} // namespace

void SocketInterfaceExtension::onServerInitialized() { factory_.onServerInitialized(); }

Network::IoHandlePtr
SocketInterfaceImpl::socket(Network::Socket::Type socket_type, Network::Address::Type addr_type,
Network::Address::IpVersion version, bool socket_v6only,
const Network::SocketCreationOptions& options) const {
int protocol = 0;
int flags = 0;

if (options.mptcp_enabled_) {
ASSERT(socket_type == Network::Socket::Type::Stream);
ASSERT(addr_type == Network::Address::Type::Ip);
protocol = IPPROTO_MPTCP;
}

if (socket_type == Network::Socket::Type::Stream) {
flags |= SOCK_STREAM;
} else {
flags |= SOCK_DGRAM;
}

int domain;
if (addr_type == Network::Address::Type::Ip) {
if (version == Network::Address::IpVersion::v6) {
domain = AF_INET6;
} else {
ASSERT(version == Network::Address::IpVersion::v4);
domain = AF_INET;
}
} else if (addr_type == Network::Address::Type::Pipe) {
domain = AF_UNIX;
} else {
PANIC("not implemented");
}

const Api::SysCallSocketResult result =
Api::OsSysCallsSingleton::get().socket(domain, flags, protocol);
RELEASE_ASSERT(SOCKET_VALID(result.return_value_),
fmt::format("socket(2) failed, got error: {}", errorDetails(result.errno_)));

ASSERT(io_uring_factory_ != nullptr);
return std::make_unique<IoUringSocketHandleImpl>(read_buffer_size_, *io_uring_factory_,
result.return_value_, socket_v6only, domain);
}

Network::IoHandlePtr
SocketInterfaceImpl::socket(Network::Socket::Type socket_type,
const Network::Address::InstanceConstSharedPtr addr,
const Network::SocketCreationOptions& options) const {
Network::Address::IpVersion ip_version =
addr->ip() ? addr->ip()->version() : Network::Address::IpVersion::v4;
int v6only = 0;
if (addr->type() == Network::Address::Type::Ip && ip_version == Network::Address::IpVersion::v6) {
v6only = addr->ip()->ipv6()->v6only();
}

Network::IoHandlePtr io_handle =
SocketInterfaceImpl::socket(socket_type, addr->type(), ip_version, v6only, options);
if (addr->type() == Network::Address::Type::Ip && ip_version == Network::Address::IpVersion::v6) {
// Setting IPV6_V6ONLY restricts the IPv6 socket to IPv6 connections only.
const Api::SysCallIntResult result = io_handle->setOption(
IPPROTO_IPV6, IPV6_V6ONLY, reinterpret_cast<const char*>(&v6only), sizeof(v6only));
RELEASE_ASSERT(!SOCKET_FAILURE(result.return_value_), "");
}
return io_handle;
}

bool SocketInterfaceImpl::ipFamilySupported(int domain) {
Api::OsSysCalls& os_sys_calls = Api::OsSysCallsSingleton::get();
const Api::SysCallSocketResult result = os_sys_calls.socket(domain, SOCK_STREAM, 0);
if (SOCKET_VALID(result.return_value_)) {
RELEASE_ASSERT(
os_sys_calls.close(result.return_value_).return_value_ == 0,
fmt::format("Fail to close fd: response code {}", errorDetails(result.return_value_)));
}
return SOCKET_VALID(result.return_value_);
}

Server::BootstrapExtensionPtr SocketInterfaceImpl::createBootstrapExtension(
const Protobuf::Message& message, Server::Configuration::ServerFactoryContext& context) {
auto config = MessageUtil::downcastAndValidate<
const envoy::extensions::network::socket_interface::v3::IoUringSocketInterface&>(
message, context.messageValidationContext().staticValidationVisitor());
read_buffer_size_ =
PROTOBUF_GET_WRAPPED_OR_DEFAULT(config, read_buffer_size, DefaultReadBufferSize);
io_uring_factory_ = std::make_unique<Io::IoUringFactoryImpl>(
PROTOBUF_GET_WRAPPED_OR_DEFAULT(config, io_uring_size, DefaultIoUringSize),
config.use_submission_queue_polling(), context.threadLocal());
return std::make_unique<SocketInterfaceExtension>(*this, *io_uring_factory_);
}

ProtobufTypes::MessagePtr SocketInterfaceImpl::createEmptyConfigProto() {
return std::make_unique<
envoy::extensions::network::socket_interface::v3::IoUringSocketInterface>();
}

REGISTER_FACTORY(SocketInterfaceImpl, Server::Configuration::BootstrapExtensionFactory);

} // namespace IoUring
} // namespace IoSocket
} // namespace Extensions
} // namespace Envoy
60 changes: 60 additions & 0 deletions source/extensions/io_socket/io_uring/config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#pragma once

#include "envoy/extensions/network/socket_interface/v3/io_uring_socket_interface.pb.h"

#include "source/common/network/socket_interface.h"

namespace Envoy {

namespace Io {
class IoUringFactory;
} // namespace Io

namespace Extensions {
namespace IoSocket {
namespace IoUring {

class SocketInterfaceExtension : public Network::SocketInterfaceExtension {
public:
SocketInterfaceExtension(Network::SocketInterface& sock_interface, Io::IoUringFactory& factory)
: Network::SocketInterfaceExtension(sock_interface), factory_(factory) {}

// Server::BootstrapExtension
void onServerInitialized() override;

protected:
Io::IoUringFactory& factory_;
};

class SocketInterfaceImpl : public Network::SocketInterfaceBase {
public:
// SocketInterface
Network::IoHandlePtr socket(Network::Socket::Type socket_type, Network::Address::Type addr_type,
Network::Address::IpVersion version, bool socket_v6only,
const Network::SocketCreationOptions& options) const override;
Network::IoHandlePtr socket(Network::Socket::Type socket_type,
const Network::Address::InstanceConstSharedPtr addr,
const Network::SocketCreationOptions& options) const override;
bool ipFamilySupported(int domain) override;

// Server::Configuration::BootstrapExtensionFactory
Server::BootstrapExtensionPtr
createBootstrapExtension(const Protobuf::Message& message,
Server::Configuration::ServerFactoryContext& context) override;

ProtobufTypes::MessagePtr createEmptyConfigProto() override;
std::string name() const override {
return "envoy.extensions.network.socket_interface.io_uring";
};

private:
uint32_t read_buffer_size_;
std::unique_ptr<Io::IoUringFactory> io_uring_factory_;
};

DECLARE_FACTORY(SocketInterfaceImpl);

} // namespace IoUring
} // namespace IoSocket
} // namespace Extensions
} // namespace Envoy
Loading