From 52a462c473f11f1f6a8c7dd151911038d55fac16 Mon Sep 17 00:00:00 2001 From: Marten Richter Date: Sat, 12 Mar 2022 16:55:22 +0000 Subject: [PATCH] libuv replace epoll and macos inspired by benbenz --- CMakeLists.txt | 12 +- .../quic/platform/impl/quic_epoll_clock.cc | 4 +- .../net/quic/platform/impl/quic_epoll_clock.h | 10 +- .../net/quic/platform/impl/quic_epoll_impl.h | 10 +- .../net/quic/platform/impl/quic_iovec_impl.h | 2 +- .../impl/simple_libuv_epoll_server.cc | 829 +++++++++++++ .../platform/impl/simple_libuv_epoll_server.h | 1058 +++++++++++++++++ .../quiche_platform_impl/quic_mutex_impl.h | 2 +- src/http3server.cc | 18 +- 9 files changed, 1921 insertions(+), 24 deletions(-) create mode 100644 platform/net/quic/platform/impl/simple_libuv_epoll_server.cc create mode 100644 platform/net/quic/platform/impl/simple_libuv_epoll_server.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 2472678b..5c68acd2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project (webtransport) set(CMAKE_POSITION_INDEPENDENT_CODE TRUE) -set(CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD 17) set(BUILD_TESTING OFF) @@ -69,6 +69,8 @@ platform/net/quic/platform/impl/quic_server_stats_impl.h platform/net/quic/platform/impl/quic_stack_trace_impl.h platform/net/quic/platform/impl/quic_epoll_clock.h platform/net/quic/platform/impl/quic_epoll_clock.cc +platform/net/quic/platform/impl/simple_libuv_epoll_server.cc +platform/net/quic/platform/impl/simple_libuv_epoll_server.h #protofiles #${PROTO_SRCS} #${CMAKE_CURRENT_BINARY_DIR}/third_party/quiche/quic/core/proto/cached_network_parameters.pb.h @@ -791,9 +793,11 @@ set_property(TARGET gquiche PROPERTY CXX_STANDARD 17) target_include_directories(gquiche PUBLIC third_party/quiche PUBLIC third_party/boringssl/src/include +PUBLIC third_party/abseil-cpp PUBLIC platform PUBLIC . PUBLIC ${CMAKE_CURRENT_BINARY_DIR} +PUBLIC ${CMAKE_JS_INC} ) target_link_libraries(gquiche absl::base @@ -806,6 +810,10 @@ absl::synchronization protobuf ssl crypto) +if(APPLE) + target_compile_definitions (gquiche PRIVATE __APPLE_USE_RFC_3542) +endif() + # google quiche build parameters end @@ -830,7 +838,7 @@ target_include_directories(${PROJECT_NAME} PRIVATE ${NODE_ADDON_API_DIR}) -add_definitions(-DNAPI_VERSION=3) +#add_definitions(-DNAPI_VERSION=3) diff --git a/platform/net/quic/platform/impl/quic_epoll_clock.cc b/platform/net/quic/platform/impl/quic_epoll_clock.cc index 82c91453..373467cf 100644 --- a/platform/net/quic/platform/impl/quic_epoll_clock.cc +++ b/platform/net/quic/platform/impl/quic_epoll_clock.cc @@ -2,11 +2,11 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "net/quic/platform/impl/quic_epoll_clock.h" -#include "epoll_server/simple_epoll_server.h" +#include "net/quic/platform/impl/simple_libuv_epoll_server.h" #include "quic/platform/api/quic_flag_utils.h" #include "quic/platform/api/quic_flags.h" namespace quic { -QuicEpollClock::QuicEpollClock(epoll_server::SimpleEpollServer* epoll_server) +QuicEpollClock::QuicEpollClock(QuicEpollServerImpl* epoll_server) : epoll_server_(epoll_server), largest_time_(QuicTime::Zero()) {} QuicEpollClock::~QuicEpollClock() {} QuicTime QuicEpollClock::ApproximateNow() const { diff --git a/platform/net/quic/platform/impl/quic_epoll_clock.h b/platform/net/quic/platform/impl/quic_epoll_clock.h index c3d18a15..1159fb44 100644 --- a/platform/net/quic/platform/impl/quic_epoll_clock.h +++ b/platform/net/quic/platform/impl/quic_epoll_clock.h @@ -6,15 +6,15 @@ // #include "base/compiler_specific.h" #include "quic/core/quic_clock.h" #include "quic/core/quic_time.h" -namespace epoll_server { -class SimpleEpollServer; -} // namespace epoll_server +#include "net/quic/platform/impl/quic_epoll_impl.h" + + namespace quic { // Clock to efficiently retrieve an approximately accurate time from an // net::EpollServer. class QuicEpollClock : public QuicClock { public: - explicit QuicEpollClock(epoll_server::SimpleEpollServer* epoll_server); + explicit QuicEpollClock(QuicEpollServerImpl* epoll_server); QuicEpollClock(const QuicEpollClock&) = delete; QuicEpollClock& operator=(const QuicEpollClock&) = delete; ~QuicEpollClock() override; @@ -31,7 +31,7 @@ class QuicEpollClock : public QuicClock { QuicTime ConvertWallTimeToQuicTime( const QuicWallTime& walltime) const override; protected: - epoll_server::SimpleEpollServer* epoll_server_; + QuicEpollServerImpl* epoll_server_; // Largest time returned from Now() so far. mutable QuicTime largest_time_; }; diff --git a/platform/net/quic/platform/impl/quic_epoll_impl.h b/platform/net/quic/platform/impl/quic_epoll_impl.h index afe2d214..10a5fa25 100644 --- a/platform/net/quic/platform/impl/quic_epoll_impl.h +++ b/platform/net/quic/platform/impl/quic_epoll_impl.h @@ -10,14 +10,14 @@ #ifndef NET_QUIC_PLATFORM_IMPL_QUIC_EPOLL_IMPL_H_ #define NET_QUIC_PLATFORM_IMPL_QUIC_EPOLL_IMPL_H_ -#include "epoll_server/simple_epoll_server.h" +#include "net/quic/platform/impl/simple_libuv_epoll_server.h" namespace quic { -using QuicEpollServerImpl = epoll_server::SimpleEpollServer; -using QuicEpollEventImpl = epoll_server::EpollEvent; -using QuicEpollAlarmBaseImpl = epoll_server::EpollAlarm; -using QuicEpollCallbackInterfaceImpl = epoll_server::EpollCallbackInterface; +using QuicEpollServerImpl = epoll_server::SimpleLibuvEpollServer; +using QuicEpollEventImpl = epoll_server::LibuvEpollEvent; +using QuicEpollAlarmBaseImpl = epoll_server::LibuvEpollAlarm; +using QuicEpollCallbackInterfaceImpl = epoll_server::LibuvEpollCallbackInterface; } // namespace quic diff --git a/platform/net/quic/platform/impl/quic_iovec_impl.h b/platform/net/quic/platform/impl/quic_iovec_impl.h index 511d20cc..a0c16ecb 100644 --- a/platform/net/quic/platform/impl/quic_iovec_impl.h +++ b/platform/net/quic/platform/impl/quic_iovec_impl.h @@ -14,7 +14,7 @@ struct iovec { void* iov_base; /* Pointer to data. */ size_t iov_len; /* Length of data. */ }; -#elif defined(linux) +#elif defined(linux) || defined(__APPLE__) #include #endif // defined(OS_WIN) diff --git a/platform/net/quic/platform/impl/simple_libuv_epoll_server.cc b/platform/net/quic/platform/impl/simple_libuv_epoll_server.cc new file mode 100644 index 00000000..0cc36e35 --- /dev/null +++ b/platform/net/quic/platform/impl/simple_libuv_epoll_server.cc @@ -0,0 +1,829 @@ + +// libuv based on initial portions of benbenz +// https://github.com/benbenz/webtransport/commit/2f198d13709308f59f419db6940ae7df5f8d0e0a + +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "net/quic/platform/impl/simple_libuv_epoll_server.h" + +#include // for errno +#include // for abort +#include // for strerror_r +#include // For read, pipe, close and write. + +#include +#include + +#include "epoll_server/platform/api/epoll_bug.h" +#include "epoll_server/platform/api/epoll_time.h" + +// Design notes: An efficient implementation of ready list has the following +// desirable properties: +// +// A. O(1) insertion into/removal from the list in any location. +// B. Once the callback is found by hash lookup using the fd, the lookup of +// corresponding entry in the list is O(1). +// C. Safe insertion into/removal from the list during list iteration. (The +// ready list's purpose is to enable completely event driven I/O model. +// Thus, all the interesting bits happen in the callback. It is critical +// to not place any restriction on the API during list iteration. +// +// The current implementation achieves these goals with the following design: +// +// - The ready list is constructed as a doubly linked list to enable O(1) +// insertion/removal (see man 3 queue). +// - The forward and backward links are directly embedded inside the +// CBAndEventMask struct. This enables O(1) lookup in the list for a given +// callback. (Techincally, we could've used std::list of hash_set::iterator, +// and keep a list::iterator in CBAndEventMask to achieve the same effect. +// However, iterators have two problems: no way to portably invalidate them, +// and no way to tell whether an iterator is singular or not. The only way to +// overcome these issues is to keep bools in both places, but that throws off +// memory alignment (up to 7 wasted bytes for each bool). The extra level of +// indirection will also likely be less cache friendly. Direct manipulation +// of link pointers makes it easier to retrieve the CBAndEventMask from the +// list, easier to check whether an CBAndEventMask is in the list, uses less +// memory (save 32 bytes/fd), and does not affect cache usage (we need to +// read in the struct to use the callback anyway).) +// - Embed the fd directly into CBAndEventMask and switch to using hash_set. +// This removes the need to store hash_map::iterator in the list just so that +// we can get both the fd and the callback. +// - The ready list is "one shot": each entry is removed before OnEvent is +// called. This removes the mutation-while-iterating problem. +// - Use two lists to keep track of callbacks. The ready_list_ is the one used +// for registration. Before iteration, the ready_list_ is swapped into the +// tmp_list_. Once iteration is done, tmp_list_ will be empty, and +// ready_list_ will have all the new ready fds. + +// The size we use for buffers passed to strerror_r +static const int kErrorBufferSize = 256; + +namespace epoll_server { + +template +class AutoReset { + public: + AutoReset(T* scoped_variable, T new_value) + : scoped_variable_(scoped_variable), + original_value_(std::move(*scoped_variable)) { + *scoped_variable_ = std::move(new_value); + } + AutoReset(const AutoReset&) = delete; + AutoReset& operator=(const AutoReset&) = delete; + + ~AutoReset() { *scoped_variable_ = std::move(original_value_); } + + private: + T* scoped_variable_; + T original_value_; +}; + +// Clears the pipe and returns. Used for waking the epoll server up. +class ReadPipeCallback : public LibuvEpollCallbackInterface { + public: + void OnEvent(int fd, LibuvEpollEvent* event) override { + DCHECK(event->in_events == UV_READABLE); + int data; + int data_read = 1; + // Read until the pipe is empty. + while (data_read > 0) { + data_read = read(fd, &data, sizeof(data)); + } + } + void OnShutdown(SimpleLibuvEpollServer* /*eps*/, int /*fd*/) override {} + void OnRegistration(SimpleLibuvEpollServer*, int, int) override {} + void OnModification(int, int) override {} // COV_NF_LINE + void OnUnregistration(int, bool) override {} // COV_NF_LINE + std::string Name() const override { return "ReadPipeCallback"; } +}; + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +SimpleLibuvEpollServer::SimpleLibuvEpollServer() + : timeout_in_us_(0), + recorded_now_in_us_(0), + ready_list_size_(0), + wake_cb_(new ReadPipeCallback), + read_fd_(-1), + write_fd_(-1), + in_wait_for_events_and_execute_callbacks_(false), + in_shutdown_(false), + last_delay_in_usec_(0) { + + uv_loop_init(&loop); + uv_timer_init(&loop, &looptimer); + + LIST_INIT(&ready_list_); + LIST_INIT(&tmp_list_); + + int pipe_fds[2]; + if (pipe(pipe_fds) < 0) { + // Unfortunately, it is impossible to test any such initialization in + // a constructor (as virtual methods do not yet work). + // This -could- be solved by moving initialization to an outside + // call... + int saved_errno = errno; + char buf[kErrorBufferSize]; + EPOLL_LOG(FATAL) << "Error " << saved_errno << " in pipe(): " + << strerror_r(saved_errno, buf, sizeof(buf)); + } + read_fd_ = pipe_fds[0]; + write_fd_ = pipe_fds[1]; + RegisterFD(read_fd_, wake_cb_.get(), UV_READABLE); +} + +void SimpleLibuvEpollServer::CleanupFDToCBMap() { + auto cb_iter = cb_map_.begin(); + while (cb_iter != cb_map_.end()) { + int fd = cb_iter->fd; + CB* cb = cb_iter->cb; + + cb_iter->in_use = true; + if (cb) { + cb->OnShutdown(this, fd); + } + + cb_map_.erase(cb_iter); + cb_iter = cb_map_.begin(); + } +} + +void SimpleLibuvEpollServer::CleanupTimeToAlarmCBMap() { + TimeToAlarmCBMap::iterator erase_it; + + // Call OnShutdown() on alarms. Note that the structure of the loop + // is similar to the structure of loop in the function HandleAlarms() + for (auto i = alarm_map_.begin(); i != alarm_map_.end();) { + // Note that OnShutdown() can call UnregisterAlarm() on + // other iterators. OnShutdown() should not call UnregisterAlarm() + // on self because by definition the iterator is not valid any more. + i->second->OnShutdown(this); + erase_it = i; + ++i; + alarm_map_.erase(erase_it); + } +} + +SimpleLibuvEpollServer::~SimpleLibuvEpollServer() { + DCHECK_EQ(in_shutdown_, false); + in_shutdown_ = true; +#ifdef EPOLL_SERVER_EVENT_TRACING + EPOLL_LOG(INFO) << "\n" << event_recorder_; +#endif + EPOLL_VLOG(2) << "Shutting down epoll server "; + CleanupFDToCBMap(); + + LIST_INIT(&ready_list_); + LIST_INIT(&tmp_list_); + + CleanupTimeToAlarmCBMap(); + + close(read_fd_); + close(write_fd_); + uv_timer_stop(&looptimer); + uv_close((uv_handle_t*) &looptimer, nullptr); + uv_loop_close(&loop); +} + +// Whether a CBAandEventMask is on the ready list is determined by a non-NULL +// le_prev pointer (le_next being NULL indicates end of list). +inline void SimpleLibuvEpollServer::AddToReadyList(CBAndEventMask* cb_and_mask) { + if (cb_and_mask->entry.le_prev == NULL) { + LIST_INSERT_HEAD(&ready_list_, cb_and_mask, entry); + ++ready_list_size_; + } +} + +inline void SimpleLibuvEpollServer::RemoveFromReadyList( + const CBAndEventMask& cb_and_mask) { + if (cb_and_mask.entry.le_prev != NULL) { + LIST_REMOVE(&cb_and_mask, entry); + // Clean up all the ready list states. Don't bother with the other fields + // as they are initialized when the CBAandEventMask is added to the ready + // list. This saves a few cycles in the inner loop. + cb_and_mask.entry.le_prev = NULL; + --ready_list_size_; + if (ready_list_size_ == 0) { + DCHECK(ready_list_.lh_first == NULL); + DCHECK(tmp_list_.lh_first == NULL); + } + } +} + +void SimpleLibuvEpollServer::RegisterFD(int fd, CB* cb, int event_mask) { + CHECK(cb); + EPOLL_VLOG(3) << "RegisterFD fd=" << fd << " event_mask=" << event_mask; + auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + if (cb_map_.end() != fd_i) { + // do we just abort, or do we just unregister the other callback? + // for now, lets just unregister the other callback. + + // unregister any callback that may already be registered for this FD. + CB* other_cb = fd_i->cb; + if (other_cb) { + // Must remove from the ready list before erasing. + RemoveFromReadyList(*fd_i); + other_cb->OnUnregistration(fd, true); + ModFD(fd, &fd_i->handle, event_mask); + } else { + // already unregistered, so just recycle the node. + AddFD(fd, &fd_i->handle, event_mask); + } + fd_i->cb = cb; + fd_i->event_mask = event_mask; + fd_i->events_to_fake = 0; + } else { + auto pair = cb_map_.insert(CBAndEventMask(cb, event_mask, fd)); + auto it = pair.first; + AddFD(fd, &it->handle, event_mask); + } + + // set the FD to be non-blocking. + SetNonblocking(fd); + + cb->OnRegistration(this, fd, event_mask); +} + +void SimpleLibuvEpollServer::SetNonblocking(int fd) { + int flags = fcntl(fd, F_GETFL, 0); + if (flags == -1) { + int saved_errno = errno; + char buf[kErrorBufferSize]; + EPOLL_LOG(FATAL) << "Error " << saved_errno << " doing fcntl(" << fd + << ", F_GETFL, 0): " + << strerror_r(saved_errno, buf, sizeof(buf)); + } + if (!(flags & O_NONBLOCK)) { + int saved_flags = flags; + flags = fcntl(fd, F_SETFL, flags | O_NONBLOCK); + if (flags == -1) { + // bad. + int saved_errno = errno; + char buf[kErrorBufferSize]; + EPOLL_LOG(FATAL) << "Error " << saved_errno << " doing fcntl(" << fd + << ", F_SETFL, " << saved_flags + << "): " << strerror_r(saved_errno, buf, sizeof(buf)); + } + } +} + +int SimpleLibuvEpollServer::libuv_wait_impl(int timeout_in_ms) { + + uv_timer_start(&looptimer, timercallback, timeout_in_ms, 0); + int ret = uv_run(&loop, UV_RUN_ONCE); + uv_timer_stop(&looptimer); + return ret; +} + +void SimpleLibuvEpollServer::RegisterFDForWrite(int fd, CB* cb) { + RegisterFD(fd, cb, UV_WRITABLE); +} + +void SimpleLibuvEpollServer::RegisterFDForReadWrite(int fd, CB* cb) { + RegisterFD(fd, cb, UV_READABLE | UV_WRITABLE); +} + +void SimpleLibuvEpollServer::RegisterFDForRead(int fd, CB* cb) { + RegisterFD(fd, cb, UV_READABLE); +} + +void SimpleLibuvEpollServer::UnregisterFD(int fd) { + auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + if (cb_map_.end() == fd_i || fd_i->cb == NULL) { + // Doesn't exist in server, or has gone through UnregisterFD once and still + // inside the callchain of OnEvent. + return; + } +#ifdef EPOLL_SERVER_EVENT_TRACING + event_recorder_.RecordUnregistration(fd); +#endif + CB* cb = fd_i->cb; + // Since the links are embedded within the struct, we must remove it from the + // list before erasing it from the hash_set. + RemoveFromReadyList(*fd_i); + DelFD(fd, &fd_i->handle); + cb->OnUnregistration(fd, false); + // fd_i->cb is NULL if that fd is unregistered inside the callchain of + // OnEvent. Since the SimpleEpollServer needs a valid CBAndEventMask after + // OnEvent returns in order to add it to the ready list, we cannot have + // UnregisterFD erase the entry if it is in use. Thus, a NULL fd_i->cb is used + // as a condition that tells the SimpleEpollServer that this entry is unused + // at a later point. + if (!fd_i->in_use) { + cb_map_.erase(fd_i); + } else { + // Remove all trace of the registration, and just keep the node alive long + // enough so the code that calls OnEvent doesn't have to worry about + // figuring out whether the CBAndEventMask is valid or not. + fd_i->cb = NULL; + fd_i->event_mask = 0; + fd_i->events_to_fake = 0; + } +} + +void SimpleLibuvEpollServer::ModifyCallback(int fd, int event_mask) { + ModifyFD(fd, ~0, event_mask); +} + +void SimpleLibuvEpollServer::StopRead(int fd) { ModifyFD(fd, UV_READABLE, 0); } + +void SimpleLibuvEpollServer::StartRead(int fd) { ModifyFD(fd, 0, UV_READABLE); } + +void SimpleLibuvEpollServer::StopWrite(int fd) { ModifyFD(fd, UV_WRITABLE, 0); } + +void SimpleLibuvEpollServer::StartWrite(int fd) { ModifyFD(fd, 0, UV_WRITABLE); } + +void SimpleLibuvEpollServer::HandleEvent(int fd, int event_mask) { +#ifdef EPOLL_SERVER_EVENT_TRACING + event_recorder_.RecordEpollEvent(fd, event_mask); +#endif + auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + if (fd_i == cb_map_.end() || fd_i->cb == NULL) { + // Ignore the event. + // This could occur if epoll() returns a set of events, and + // while processing event A (earlier) we removed the callback + // for event B (and are now processing event B). + return; + } + fd_i->events_asserted = event_mask; + CBAndEventMask* cb_and_mask = const_cast(&*fd_i); + AddToReadyList(cb_and_mask); +} + +void SimpleLibuvEpollServer::WaitForEventsAndExecuteCallbacks() { + if (in_wait_for_events_and_execute_callbacks_) { + EPOLL_LOG(DFATAL) << "Attempting to call WaitForEventsAndExecuteCallbacks" + " when an ancestor to the current function is already" + " WaitForEventsAndExecuteCallbacks!"; + // The line below is actually tested, but in coverage mode, + // we never see it. + return; // COV_NF_LINE + } + AutoReset recursion_guard(&in_wait_for_events_and_execute_callbacks_, + true); + if (alarm_map_.empty()) { + // no alarms, this is business as usual. + WaitForEventsAndCallHandleEvents(timeout_in_us_); + recorded_now_in_us_ = 0; + return; + } + + // store the 'now'. If we recomputed 'now' every iteration + // down below, then we might never exit that loop-- any + // long-running alarms might install other long-running + // alarms, etc. By storing it here now, we ensure that + // a more reasonable amount of work is done here. + int64_t now_in_us = NowInUsec(); + + // Get the first timeout from the alarm_map where it is + // stored in absolute time. + int64_t next_alarm_time_in_us = alarm_map_.begin()->first; + EPOLL_VLOG(4) << "next_alarm_time = " << next_alarm_time_in_us + << " now = " << now_in_us + << " timeout_in_us = " << timeout_in_us_; + + int64_t wait_time_in_us; + int64_t alarm_timeout_in_us = next_alarm_time_in_us - now_in_us; + + // If the next alarm is sooner than the default timeout, or if there is no + // timeout (timeout_in_us_ == -1), wake up when the alarm should fire. + // Otherwise use the default timeout. + if (alarm_timeout_in_us < timeout_in_us_ || timeout_in_us_ < 0) { + wait_time_in_us = std::max(alarm_timeout_in_us, static_cast(0)); + } else { + wait_time_in_us = timeout_in_us_; + } + + EPOLL_VLOG(4) << "wait_time_in_us = " << wait_time_in_us; + + // wait for events. + + WaitForEventsAndCallHandleEvents(wait_time_in_us); + CallAndReregisterAlarmEvents(); + recorded_now_in_us_ = 0; +} + +void SimpleLibuvEpollServer::SetFDReady(int fd, int events_to_fake) { + auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + if (cb_map_.end() != fd_i && fd_i->cb != NULL) { + // This const_cast is necessary for LIST_HEAD_INSERT to work. Declaring + // entry mutable is insufficient because LIST_HEAD_INSERT assigns the + // forward pointer of the list head to the current cb_and_mask, and the + // compiler complains that it can't assign a const T* to a T*. + CBAndEventMask* cb_and_mask = const_cast(&*fd_i); + // Note that there is no clearly correct behavior here when + // cb_and_mask->events_to_fake != 0 and this function is called. + // Of the two operations: + // cb_and_mask->events_to_fake = events_to_fake + // cb_and_mask->events_to_fake |= events_to_fake + // the first was picked because it discourages users from calling + // SetFDReady repeatedly to build up the correct event set as it is more + // efficient to call SetFDReady once with the correct, final mask. + cb_and_mask->events_to_fake = events_to_fake; + AddToReadyList(cb_and_mask); + } +} + +void SimpleLibuvEpollServer::SetFDNotReady(int fd) { + auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + if (cb_map_.end() != fd_i) { + RemoveFromReadyList(*fd_i); + } +} + +bool SimpleLibuvEpollServer::IsFDReady(int fd) const { + auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + return (cb_map_.end() != fd_i && fd_i->cb != NULL && + fd_i->entry.le_prev != NULL); +} + +void SimpleLibuvEpollServer::VerifyReadyList() const { + int count = 0; + CBAndEventMask* cur = ready_list_.lh_first; + for (; cur; cur = cur->entry.le_next) { + ++count; + } + for (cur = tmp_list_.lh_first; cur; cur = cur->entry.le_next) { + ++count; + } + CHECK_EQ(ready_list_size_, count) << "Ready list size does not match count"; +} + +void SimpleLibuvEpollServer::RegisterAlarm(int64_t timeout_time_in_us, AlarmCB* ac) { + EPOLL_VLOG(4) << "RegisteringAlarm " << ac << " at : " << timeout_time_in_us; + CHECK(ac); + if (all_alarms_.find(ac) != all_alarms_.end()) { + EPOLL_BUG(epoll_bug_1_1) << "Alarm already exists"; + } + + auto alarm_iter = alarm_map_.insert(std::make_pair(timeout_time_in_us, ac)); + + all_alarms_.insert(ac); + // Pass the iterator to the EpollAlarmCallbackInterface. + ac->OnRegistration(alarm_iter, this); +} + +// Unregister a specific alarm callback: iterator_token must be a +// valid iterator. The caller must ensure the validity of the iterator. +void SimpleLibuvEpollServer::UnregisterAlarm(const AlarmRegToken& iterator_token) { + AlarmCB* cb = iterator_token->second; + EPOLL_VLOG(4) << "UnregisteringAlarm " << cb; + alarm_map_.erase(iterator_token); + all_alarms_.erase(cb); + cb->OnUnregistration(); +} + +SimpleLibuvEpollServer::AlarmRegToken SimpleLibuvEpollServer::ReregisterAlarm( + SimpleLibuvEpollServer::AlarmRegToken iterator_token, + int64_t timeout_time_in_us) { + AlarmCB* cb = iterator_token->second; + alarm_map_.erase(iterator_token); + return alarm_map_.emplace(timeout_time_in_us, cb); +} + +int SimpleLibuvEpollServer::NumFDsRegistered() const { + DCHECK_GE(cb_map_.size(), 1u); + // Omit the internal FD (read_fd_) + return cb_map_.size() - 1; +} + +void SimpleLibuvEpollServer::Wake() { + char data = 'd'; // 'd' is for data. It's good enough for me. + int rv = write(write_fd_, &data, 1); + DCHECK_EQ(rv, 1); +} + +int64_t SimpleLibuvEpollServer::NowInUsec() const { return WallTimeNowInUsec(); } + +int64_t SimpleLibuvEpollServer::ApproximateNowInUsec() const { + if (recorded_now_in_us_ != 0) { + return recorded_now_in_us_; + } + return this->NowInUsec(); +} + +std::string SimpleLibuvEpollServer::EventMaskToString(int event_mask) { + std::string s; + if (event_mask & UV_READABLE) s += "UV_READABLE "; + if (event_mask & UV_DISCONNECT) s += "UV_DISCONNECT "; + if (event_mask & UV_WRITABLE) s += "UV_WRITABLE "; + if (event_mask & UV_PRIORITIZED) s += "UV_PRIORITIZED "; + return s; +} + +void SimpleLibuvEpollServer::LogStateOnCrash() { + EPOLL_LOG(ERROR) + << "-------------------Epoll Server-------------------------"; + EPOLL_LOG(ERROR) << "Epoll server " << this ; + EPOLL_LOG(ERROR) << "timeout_in_us_: " << timeout_in_us_; + + // Log sessions with alarms. + EPOLL_LOG(ERROR) << alarm_map_.size() << " alarms registered."; + for (auto it = alarm_map_.begin(); it != alarm_map_.end(); ++it) { + const bool skipped = + alarms_reregistered_and_should_be_skipped_.find(it->second) != + alarms_reregistered_and_should_be_skipped_.end(); + EPOLL_LOG(ERROR) << "Alarm " << it->second << " registered at time " + << it->first << " and should be skipped = " << skipped; + } + + EPOLL_LOG(ERROR) << cb_map_.size() << " fd callbacks registered."; + for (auto it = cb_map_.begin(); it != cb_map_.end(); ++it) { + EPOLL_LOG(ERROR) << "fd: " << it->fd << " with mask " << it->event_mask + << " registered with cb: " << it->cb; + } + EPOLL_LOG(ERROR) + << "-------------------/Epoll Server------------------------"; +} + +void SimpleLibuvEpollServer::eventcallback( uv_poll_t *handle, int status, int events ) { + SimpleLibuvEpollServer* server = (SimpleLibuvEpollServer*) handle->data ; + int fd ; + uv_fileno((uv_handle_t*)handle,&fd) ; + server->HandleEvent( fd , events ) ; +} + +void SimpleLibuvEpollServer::timercallback(uv_timer_t *handle) +{ + // NOOP +} + +void SimpleLibuvEpollServer::closecallback( uv_handle_t* handle ) { +} + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +void SimpleLibuvEpollServer::DelFD(int fd, uv_poll_t *handle) const { +#ifdef EPOLL_SERVER_EVENT_TRACING + event_recorder_.RecordFDMaskEvent(fd, 0, "DelFD"); +#endif + int error = uv_poll_stop(handle); + if (error) { + int saved_errno = error; + EPOLL_LOG(FATAL) << "Epoll set removal error for fd " << fd << ": " + << uv_strerror(saved_errno); + } + uv_close((uv_handle_t*)&handle, closecallback); +} + +//////////////////////////////////////// + +void SimpleLibuvEpollServer::AddFD(int fd, uv_poll_t *ee, int event_mask) const { + memset(ee, 0, sizeof(ee)); + ee->data = (void*) this; +#ifdef EPOLL_SERVER_EVENT_TRACING + event_recorder_.RecordFDMaskEvent(fd, ee.events, "AddFD"); +#endif + int error = uv_poll_init(const_cast(&loop), ee, fd); + if (error) { + int saved_errno = error; + EPOLL_LOG(FATAL) << "Epoll uv_poll_init error for fd " << fd << ": " + << uv_strerror(saved_errno); + return ; + } + error = uv_poll_start(ee, event_mask | UV_DISCONNECT, eventcallback); + if (error) { + int saved_errno = error; + EPOLL_LOG(FATAL) << "Epoll uv_poll_start error for fd " << fd << ": " + << uv_strerror(saved_errno); + return ; + } + +} + +//////////////////////////////////////// + +void SimpleLibuvEpollServer::ModFD(int fd, uv_poll_t* handle, int event_mask) const { +#ifdef EPOLL_SERVER_EVENT_TRACING + event_recorder_.RecordFDMaskEvent(fd, ee.events, "ModFD"); +#endif + EPOLL_VLOG(3) << "modifying fd= " << fd << " " + << EventMaskToString(event_mask); + // uv_poll_stop(handle); // not neccessary can be called multiple times + int error = uv_poll_start(handle, event_mask | UV_DISCONNECT,eventcallback); + + if (error) { + int saved_errno = error; + char buf[kErrorBufferSize]; + EPOLL_LOG(FATAL) << "Epoll set modification error for fd " << fd << ": " + << uv_strerror(saved_errno); + } +} + +//////////////////////////////////////// + +void SimpleLibuvEpollServer::ModifyFD(int fd, int remove_event, int add_event) { + auto fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); + if (cb_map_.end() == fd_i) { + EPOLL_VLOG(2) << "Didn't find the fd " << fd << "in internal structures"; + return; + } + + if (fd_i->cb != NULL) { + int& event_mask = fd_i->event_mask; + EPOLL_VLOG(3) << "fd= " << fd + << " event_mask before: " << EventMaskToString(event_mask); + event_mask &= ~remove_event; + event_mask |= add_event; + + EPOLL_VLOG(3) << " event_mask after: " << EventMaskToString(event_mask); + + ModFD(fd, &fd_i->handle, event_mask); + + fd_i->cb->OnModification(fd, event_mask); + } +} + +void SimpleLibuvEpollServer::WaitForEventsAndCallHandleEvents(int64_t timeout_in_us) { + if (timeout_in_us == 0 || ready_list_.lh_first != NULL) { + // If ready list is not empty, then don't sleep at all. + timeout_in_us = 0; + } else if (timeout_in_us < 0) { + EPOLL_LOG(INFO) << "Negative epoll timeout: " << timeout_in_us + << "us; epoll will wait forever for events."; + // If timeout_in_us is < 0 we are supposed to Wait forever. This means we + // should set timeout_in_us to -1000 so we will + // Wait(-1000/1000) == Wait(-1) == Wait forever. + timeout_in_us = -1000; + } else { + // If timeout is specified, and the ready list is empty. + if (timeout_in_us < 1000) { + timeout_in_us = 1000; + } + } + const int timeout_in_ms = timeout_in_us / 1000; + int64_t expected_wakeup_us = NowInUsec() + timeout_in_us; + + int nfds = libuv_wait_impl(timeout_in_ms); + EPOLL_VLOG(3) << "nfds=" << nfds; + +#ifdef EPOLL_SERVER_EVENT_TRACING + event_recorder_.RecordEpollWaitEvent(timeout_in_ms, nfds); +#endif + + // If you're wondering why the NowInUsec() is recorded here, the answer is + // simple: If we did it before the epoll_wait_impl, then the max error for + // the ApproximateNowInUs() call would be as large as the maximum length of + // epoll_wait, which can be arbitrarily long. Since this would make + // ApproximateNowInUs() worthless, we instead record the time -after- we've + // done epoll_wait, which guarantees that the maximum error is the amount of + // time it takes to process all the events generated by epoll_wait. + recorded_now_in_us_ = NowInUsec(); + + if (timeout_in_us > 0) { + int64_t delta = NowInUsec() - expected_wakeup_us; + last_delay_in_usec_ = delta > 0 ? delta : 0; + } else { + // timeout_in_us < 0 means we waited forever until an event; + // timeout_in_us == 0 means there was no kernel delay to track. + last_delay_in_usec_ = 0; + } + + if (nfds < 0) { + // Catch interrupted syscall and just ignore it and move on. + if (errno != EINTR && errno != 0) { + int saved_errno = errno; + char buf[kErrorBufferSize]; + EPOLL_LOG(FATAL) << "Error " << saved_errno << " in epoll_wait: " + << strerror_r(saved_errno, buf, sizeof(buf)); + } + } + + // Now run through the ready list. + if (ready_list_.lh_first) { + CallReadyListCallbacks(); + } +} + +void SimpleLibuvEpollServer::CallReadyListCallbacks() { + // Check pre-conditions. + DCHECK(tmp_list_.lh_first == NULL); + // Swap out the ready_list_ into the tmp_list_ before traversing the list to + // enable SetFDReady() to just push new items into the ready_list_. + std::swap(ready_list_.lh_first, tmp_list_.lh_first); + if (tmp_list_.lh_first) { + tmp_list_.lh_first->entry.le_prev = &tmp_list_.lh_first; + LibuvEpollEvent event(0); + while (tmp_list_.lh_first != NULL) { + DCHECK_GT(ready_list_size_, 0); + CBAndEventMask* cb_and_mask = tmp_list_.lh_first; + RemoveFromReadyList(*cb_and_mask); + + event.out_ready_mask = 0; + event.in_events = + cb_and_mask->events_asserted | cb_and_mask->events_to_fake; + // TODO(fenix): get rid of the two separate fields in cb_and_mask. + cb_and_mask->events_asserted = 0; + cb_and_mask->events_to_fake = 0; + { + // OnEvent() may call UnRegister, so we set in_use, here. Any + // UnRegister call will now simply set the cb to NULL instead of + // invalidating the cb_and_mask object (by deleting the object in the + // map to which cb_and_mask refers) + AutoReset in_use_guard(&(cb_and_mask->in_use), true); + cb_and_mask->cb->OnEvent(cb_and_mask->fd, &event); + } + + // Since OnEvent may have called UnregisterFD, we must check here that + // the callback is still valid. If it isn't, then UnregisterFD *was* + // called, and we should now get rid of the object. + if (cb_and_mask->cb == NULL) { + cb_map_.erase(*cb_and_mask); + } else if (event.out_ready_mask != 0) { + cb_and_mask->events_to_fake = event.out_ready_mask; + AddToReadyList(cb_and_mask); + } + } + } + DCHECK(tmp_list_.lh_first == NULL); +} + +void SimpleLibuvEpollServer::CallAndReregisterAlarmEvents() { + int64_t now_in_us = recorded_now_in_us_; + DCHECK_NE(0, recorded_now_in_us_); + + TimeToAlarmCBMap::iterator erase_it; + + // execute alarms. + for (auto i = alarm_map_.begin(); i != alarm_map_.end();) { + if (i->first > now_in_us) { + break; + } + AlarmCB* cb = i->second; + // Execute the OnAlarm() only if we did not register + // it in this loop itself. + const bool added_in_this_round = + alarms_reregistered_and_should_be_skipped_.find(cb) != + alarms_reregistered_and_should_be_skipped_.end(); + if (added_in_this_round) { + ++i; + continue; + } + all_alarms_.erase(cb); + const int64_t new_timeout_time_in_us = cb->OnAlarm(); + + erase_it = i; + ++i; + alarm_map_.erase(erase_it); + + if (new_timeout_time_in_us > 0) { + // We add to hash_set only if the new timeout is <= now_in_us. + // if timeout is > now_in_us then we have no fear that this alarm + // can be reexecuted in this loop, and hence we do not need to + // worry about a recursive loop. + EPOLL_DVLOG(3) << "Reregistering alarm " + << " " << cb << " " << new_timeout_time_in_us << " " + << now_in_us; + if (new_timeout_time_in_us <= now_in_us) { + alarms_reregistered_and_should_be_skipped_.insert(cb); + } + RegisterAlarm(new_timeout_time_in_us, cb); + } + } + alarms_reregistered_and_should_be_skipped_.clear(); +} + +LibuvEpollAlarm::LibuvEpollAlarm() : eps_(NULL), registered_(false) {} + +LibuvEpollAlarm::~LibuvEpollAlarm() { UnregisterIfRegistered(); } + +int64_t LibuvEpollAlarm::OnAlarm() { + registered_ = false; + return 0; +} + +void LibuvEpollAlarm::OnRegistration(const SimpleLibuvEpollServer::AlarmRegToken& token, + SimpleLibuvEpollServer* eps) { + DCHECK_EQ(false, registered_); + + token_ = token; + eps_ = eps; + registered_ = true; +} + +void LibuvEpollAlarm::OnUnregistration() { registered_ = false; } + +void LibuvEpollAlarm::OnShutdown(SimpleLibuvEpollServer* /*eps*/) { + registered_ = false; + eps_ = NULL; +} + +// If the alarm was registered, unregister it. +void LibuvEpollAlarm::UnregisterIfRegistered() { + if (!registered_) { + return; + } + + eps_->UnregisterAlarm(token_); +} + +void LibuvEpollAlarm::ReregisterAlarm(int64_t timeout_time_in_us) { + DCHECK(registered_); + token_ = eps_->ReregisterAlarm(token_, timeout_time_in_us); +} + +} // namespace epoll_server diff --git a/platform/net/quic/platform/impl/simple_libuv_epoll_server.h b/platform/net/quic/platform/impl/simple_libuv_epoll_server.h new file mode 100644 index 00000000..aa8278a9 --- /dev/null +++ b/platform/net/quic/platform/impl/simple_libuv_epoll_server.h @@ -0,0 +1,1058 @@ +// libuv based on initial portions of benbenz +// https://github.com/benbenz/webtransport/commit/2f198d13709308f59f419db6940ae7df5f8d0e0a + +// Copyright 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef QUICHE_LIBUV_SERVER_H_ +#define QUICHE_LIBUV_SERVER_H_ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +// #define EPOLL_SERVER_EVENT_TRACING 1 +// +// Defining EPOLL_SERVER_EVENT_TRACING +// causes code to exist which didn't before. +// This code tracks each event generated by the epollserver, +// as well as providing a per-fd-registered summary of +// events. Note that enabling this code vastly slows +// down operations, and uses substantially more +// memory. For these reasons, it should only be enabled by developers doing +// development at their workstations. +// +// A structure called 'EventRecorder' will exist when +// the macro is defined. See the EventRecorder class interface +// within the SimpleEpollServer class for more details. +#ifdef EPOLL_SERVER_EVENT_TRACING +#include +#endif + +#include + +#include "epoll_server/platform/api/epoll_export.h" +#include "epoll_server/platform/api/epoll_logging.h" + +namespace epoll_server { + +class SimpleLibuvEpollServer; +class LibuvEpollAlarmCallbackInterface; +class ReadPipeCallback; + +struct LibuvEpollEvent { + LibuvEpollEvent(int events) : in_events(events), out_ready_mask(0) {} + + int in_events; // incoming events + int out_ready_mask; // the new event mask for ready list (0 means don't + // get on the ready list). This field is always + // initialized to 0 when the event is passed to + // OnEvent. +}; + +// Callbacks which go into SimpleEpollServers are expected to derive from this +// class. +class LibuvEpollCallbackInterface { + public: + // Summary: + // Called when the callback is registered into a SimpleEpollServer. + // Args: + // eps - the poll server into which this callback was registered + // fd - the file descriptor which was registered + // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc) + // which was registered (and will initially be used + // in the epoll() calls) + virtual void OnRegistration(SimpleLibuvEpollServer* eps, int fd, + int event_mask) = 0; + + // Summary: + // Called when the event_mask is modified (for a file-descriptor) + // Args: + // fd - the file descriptor which was registered + // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc) + // which was is now curren (and will be used + // in subsequent epoll() calls) + virtual void OnModification(int fd, int event_mask) = 0; + + // Summary: + // Called whenever an event occurs on the file-descriptor. + // This is where the bulk of processing is expected to occur. + // Args: + // fd - the file descriptor which was registered + // event - a struct that contains the event mask (composed of EPOLLIN, + // EPOLLOUT, etc), a flag that indicates whether this is a true + // epoll_wait event vs one from the ready list, and an output + // parameter for OnEvent to inform the SimpleEpollServer whether to + // put this fd on the ready list. + virtual void OnEvent(int fd, LibuvEpollEvent* event) = 0; + + // Summary: + // Called when the file-descriptor is unregistered from the poll-server. + // Args: + // fd - the file descriptor which was registered, and of this call, is now + // unregistered. + // replaced - If true, this callback is being replaced by another, otherwise + // it is simply being removed. + virtual void OnUnregistration(int fd, bool replaced) = 0; + + // Summary: + // Called when the epoll server is shutting down. This is different from + // OnUnregistration because the subclass may want to clean up memory. + // This is called in leiu of OnUnregistration. + // Args: + // fd - the file descriptor which was registered. + virtual void OnShutdown(SimpleLibuvEpollServer* eps, int fd) = 0; + + // Summary: + // Returns a name describing the class for use in debug/error reporting. + virtual std::string Name() const = 0; + + virtual ~LibuvEpollCallbackInterface() {} + + protected: + LibuvEpollCallbackInterface() {} +}; + +//////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// + +class EPOLL_EXPORT_PRIVATE SimpleLibuvEpollServer { + public: + typedef LibuvEpollAlarmCallbackInterface AlarmCB; + typedef LibuvEpollCallbackInterface CB; + + typedef std::multimap TimeToAlarmCBMap; + typedef TimeToAlarmCBMap::iterator AlarmRegToken; + + // Summary: + // Constructor: + // By default, we don't wait any amount of time for events, and + // we suggest to the epoll-system that we're going to use on-the-order + // of 1024 FDs. + SimpleLibuvEpollServer(); + + SimpleLibuvEpollServer(const SimpleLibuvEpollServer&) = delete; + SimpleLibuvEpollServer operator=(const SimpleLibuvEpollServer&) = delete; + + //////////////////////////////////////// + + // Destructor + virtual ~SimpleLibuvEpollServer(); + + //////////////////////////////////////// + + // Summary + // Register a callback to be called whenever an event contained + // in the set of events included in event_mask occurs on the + // file-descriptor 'fd' + // + // Note that only one callback is allowed to be registered for + // any specific file-decriptor. + // + // If a callback is registered for a file-descriptor which has already + // been registered, then the previous callback is unregistered with + // the 'replaced' flag set to true. I.e. the previous callback's + // OnUnregistration() function is called like so: + // OnUnregistration(fd, true); + // + // The epoll server does NOT take on ownership of the callback: the callback + // creator is responsible for managing that memory. + // + // Args: + // fd - a valid file-descriptor + // cb - an instance of a subclass of EpollCallbackInterface + // event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating + // the events for which the callback would like to be + // called. + virtual void RegisterFD(int fd, CB* cb, int event_mask); + + //////////////////////////////////////// + + // Summary: + // A shortcut for RegisterFD which sets things up such that the + // callback is called when 'fd' is available for writing. + // Args: + // fd - a valid file-descriptor + // cb - an instance of a subclass of EpollCallbackInterface + virtual void RegisterFDForWrite(int fd, CB* cb); + + //////////////////////////////////////// + + // Summary: + // A shortcut for RegisterFD which sets things up such that the + // callback is called when 'fd' is available for reading or writing. + // Args: + // fd - a valid file-descriptor + // cb - an instance of a subclass of EpollCallbackInterface + virtual void RegisterFDForReadWrite(int fd, CB* cb); + + //////////////////////////////////////// + + // Summary: + // A shortcut for RegisterFD which sets things up such that the + // callback is called when 'fd' is available for reading. + // Args: + // fd - a valid file-descriptor + // cb - an instance of a subclass of EpollCallbackInterface + virtual void RegisterFDForRead(int fd, CB* cb); + + //////////////////////////////////////// + + // Summary: + // Removes the FD and the associated callback from the pollserver. + // If the callback is registered with other FDs, they will continue + // to be processed using the callback without modification. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the file-descriptor which should no-longer be monitored. + virtual void UnregisterFD(int fd); + + //////////////////////////////////////// + + // Summary: + // Modifies the event mask for the file-descriptor, replacing + // the old event_mask with the new one specified here. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the fd whose event mask should be modified. + // event_mask - the new event mask. + virtual void ModifyCallback(int fd, int event_mask); + + //////////////////////////////////////// + + // Summary: + // Modifies the event mask for the file-descriptor such that we + // no longer request events when 'fd' is readable. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the fd whose event mask should be modified. + virtual void StopRead(int fd); + + //////////////////////////////////////// + + // Summary: + // Modifies the event mask for the file-descriptor such that we + // request events when 'fd' is readable. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the fd whose event mask should be modified. + virtual void StartRead(int fd); + + //////////////////////////////////////// + + // Summary: + // Modifies the event mask for the file-descriptor such that we + // no longer request events when 'fd' is writable. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the fd whose event mask should be modified. + virtual void StopWrite(int fd); + + //////////////////////////////////////// + + // Summary: + // Modifies the event mask for the file-descriptor such that we + // request events when 'fd' is writable. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the fd whose event mask should be modified. + virtual void StartWrite(int fd); + + //////////////////////////////////////// + + // Summary: + // Looks up the callback associated with the file-descriptor 'fd'. + // If a callback is associated with this file-descriptor, then + // it's OnEvent() method is called with the file-descriptor 'fd', + // and event_mask 'event_mask' + // + // If no callback is registered for this file-descriptor, nothing + // will happen as a result of this call. + // + // This function is used internally by the SimpleEpollServer, but is + // available publicly so that events might be 'faked'. Calling + // this function with an fd and event_mask is equivalent (as far + // as the callback is concerned) to having a real event generated + // by epoll (except, of course, that read(), etc won't necessarily + // be able to read anything) + // Args: + // fd - the file-descriptor on which an event has occurred. + // event_mask - a bitmask representing the events which have occurred + // on/for this fd. This bitmask is composed of + // POLLIN, POLLOUT, etc. + // + void HandleEvent(int fd, int event_mask); + + // Summary: + // Call this when you want the pollserver to + // wait for events and execute the callbacks associated with + // the file-descriptors on which those events have occurred. + // Depending on the value of timeout_in_us_, this may or may + // not return immediately. Please reference the set_timeout() + // function for the specific behaviour. + virtual void WaitForEventsAndExecuteCallbacks(); + + // Summary: + // When an fd is registered to use edge trigger notification, the ready + // list can be used to simulate level trigger semantics. Edge trigger + // registration doesn't send an initial event, and only rising edge (going + // from blocked to unblocked) events are sent. A callback can put itself on + // the ready list by calling SetFDReady() after calling RegisterFD(). The + // OnEvent method of all callbacks associated with the fds on the ready + // list will be called immediately after processing the events returned by + // epoll_wait(). The fd is removed from the ready list before the + // callback's OnEvent() method is invoked. To stay on the ready list, the + // OnEvent() (or some function in that call chain) must call SetFDReady + // again. When a fd is unregistered using UnregisterFD(), the fd is + // automatically removed from the ready list. + // + // When the callback for a edge triggered fd hits the falling edge (about + // to block, either because of it got an EAGAIN, or had a short read/write + // operation), it should remove itself from the ready list using + // SetFDNotReady() (since OnEvent cannot distinguish between invocation + // from the ready list vs from a normal epoll event). All four ready list + // methods are safe to be called within the context of the callbacks. + // + // Since the ready list invokes EpollCallbackInterface::OnEvent, only fds + // that are registered with the SimpleEpollServer will be put on the ready + // list. SetFDReady() and SetFDNotReady() will do nothing if the + // SimpleEpollServer doesn't know about the fd passed in. + // + // Since the ready list cannot reliably determine proper set of events + // which should be sent to the callback, SetFDReady() requests the caller + // to provide the ready list with the event mask, which will be used later + // when OnEvent() is invoked by the ready list. Hence, the event_mask + // passedto SetFDReady() does not affect the actual epoll registration of + // the fd with the kernel. If a fd is already put on the ready list, and + // SetFDReady() is called again for that fd with a different event_mask, + // the event_mask will be updated. + virtual void SetFDReady(int fd, int events_to_fake); + + virtual void SetFDNotReady(int fd); + + // Summary: + // IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as + // debugging tools and for writing unit tests. + // ISFDReady() returns whether a fd is in the ready list. + // ReadyListSize() returns the number of fds on the ready list. + // VerifyReadyList() checks the consistency of internal data structure. It + // will CHECK if it finds an error. + virtual bool IsFDReady(int fd) const; + + size_t ReadyListSize() const { return ready_list_size_; } + + void VerifyReadyList() const; + + //////////////////////////////////////// + + // Summary: + // Registers an alarm 'ac' to go off at time 'timeout_time_in_us'. + // If the callback returns a positive number from its OnAlarm() function, + // then the callback will be re-registered at that time, else the alarm + // owner is responsible for freeing up memory. + // + // Important: A give AlarmCB* can not be registered again if it is already + // registered. If a user wants to register a callback again it should first + // unregister the previous callback before calling RegisterAlarm again. + // Args: + // timeout_time_in_us - the absolute time at which the alarm should go off + // ac - the alarm which will be called. + virtual void RegisterAlarm(int64_t timeout_time_in_us, AlarmCB* ac); + + // Summary: + // Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() + + // delta_in_us). While this is somewhat less accurate (see the description + // for ApproximateNowInUs() to see how 'approximate'), the error is never + // worse than the amount of time it takes to process all events in one + // WaitForEvents. As with 'RegisterAlarm()', if the callback returns a + // positive number from its OnAlarm() function, then the callback will be + // re-registered at that time, else the alarm owner is responsible for + // freeing up memory. + // Note that this function is purely a convienence. The + // same thing may be accomplished by using RegisterAlarm with + // ApproximateNowInUs() directly. + // + // Important: A give AlarmCB* can not be registered again if it is already + // registered. If a user wants to register a callback again it should first + // unregister the previous callback before calling RegisterAlarm again. + // Args: + // delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at + // which point the alarm should go off. + // ac - the alarm which will be called. + void RegisterAlarmApproximateDelta(int64_t delta_in_us, AlarmCB* ac) { + RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac); + } + + //////////////////////////////////////// + + // Summary: + // Unregister the alarm referred to by iterator_token; Callers should + // be warned that a token may have become already invalid when OnAlarm() + // is called, was unregistered, or OnShutdown was called on that alarm. + // Args: + // iterator_token - iterator to the alarm callback to unregister. + virtual void UnregisterAlarm( + const SimpleLibuvEpollServer::AlarmRegToken& iterator_token); + + virtual SimpleLibuvEpollServer::AlarmRegToken ReregisterAlarm( + SimpleLibuvEpollServer::AlarmRegToken iterator_token, + int64_t timeout_time_in_us); + + //////////////////////////////////////// + + // Summary: + // returns the number of file-descriptors registered in this + // SimpleEpollServer. + // Returns: + // number of FDs registered (discounting the internal pipe used for Wake) + virtual int NumFDsRegistered() const; + + // Summary: + // Force the epoll server to wake up (by writing to an internal pipe). + virtual void Wake(); + + // Summary: + // Wrapper around WallTimer's NowInUsec. We do this so that we can test + // SimpleEpollServer without using the system clock (and can avoid the + // flakiness that would ensue) + // Returns: + // the current time as number of microseconds since the Unix epoch. + virtual int64_t NowInUsec() const; + + // Summary: + // Since calling NowInUsec() many thousands of times per + // WaitForEventsAndExecuteCallbacks function call is, to say the least, + // inefficient, we allow users to use an approximate time instead. The + // time returned from this function is as accurate as NowInUsec() when + // WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's + // callstack. + // However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then + // this function returns the time at which the + // WaitForEventsAndExecuteCallbacks function started to process events or + // alarms. + // + // Essentially, this function makes available a fast and mostly accurate + // mechanism for getting the time for any function handling an event or + // alarm. When functions which are not handling callbacks or alarms call + // this function, they get the slow and "absolutely" accurate time. + // + // Users should be encouraged to use this function. + // Returns: + // the "approximate" current time as number of microseconds since the Unix + // epoch. + virtual int64_t ApproximateNowInUsec() const; + + static std::string EventMaskToString(int event_mask); + + // Summary: + // Logs the state of the epoll server with EPOLL_LOG(ERROR). + void LogStateOnCrash(); + + // Summary: + // Set the timeout to the value specified. + // If the timeout is set to a negative number, + // WaitForEventsAndExecuteCallbacks() will only return when an event has + // occurred + // If the timeout is set to zero, + // WaitForEventsAndExecuteCallbacks() will return immediately + // If the timeout is set to a positive number, + // WaitForEventsAndExecuteCallbacks() will return when an event has + // occurred, or when timeout_in_us microseconds has elapsed, whichever + // is first. + // Args: + // timeout_in_us - value specified depending on behaviour desired. + // See above. + void set_timeout_in_us(int64_t timeout_in_us) { + timeout_in_us_ = timeout_in_us; + } + + //////////////////////////////////////// + + // Summary: + // Accessor for the current value of timeout_in_us. + int timeout_in_us_for_test() const { return timeout_in_us_; } + + // Summary: + // Returns true when the SimpleEpollServer() is being destroyed. + bool in_shutdown() const { return in_shutdown_; } + bool ShutdownCalled() const { return in_shutdown(); } + + // Compatibility stub. + void Shutdown() {} + + // Summary: + // A function for implementing the ready list. It invokes OnEvent for each + // of the fd in the ready list, and takes care of adding them back to the + // ready list if the callback requests it (by checking that out_ready_mask + // is non-zero). + void CallReadyListCallbacks(); + + int64_t LastDelayInUsec() const { return last_delay_in_usec_; } + + protected: + virtual void SetNonblocking(int fd); + + // This exists here so that we can override this function in unittests + // in order to make effective mock SimpleEpollServer objects. + virtual int libuv_wait_impl(int timeout_in_ms); + + + static void eventcallback( uv_poll_t *handle, int status, int events); + static void timercallback(uv_timer_t *handle); + static void closecallback( uv_handle_t* handle); + + // this struct is used internally, and is never used by anything external + // to this class. Some of its members are declared mutable to get around the + // restriction imposed by hash_set. Since hash_set knows nothing about the + // objects it stores, it has to assume that every bit of the object is used + // in the hash function and equal_to comparison. Thus hash_set::iterator is a + // const iterator. In this case, the only thing that must stay constant is + // fd. Everything else are just along for the ride and changing them doesn't + // compromise the hash_set integrity. + struct CBAndEventMask { + CBAndEventMask() + : cb(NULL), + fd(-1), + event_mask(0), + events_asserted(0), + events_to_fake(0), + in_use(false) { + entry.le_next = NULL; + entry.le_prev = NULL; + } + + CBAndEventMask(LibuvEpollCallbackInterface* cb, int event_mask, int fd) + : cb(cb), + fd(fd), + event_mask(event_mask), + events_asserted(0), + events_to_fake(0), + in_use(false) { + entry.le_next = NULL; + entry.le_prev = NULL; + } + + // Required operator for hash_set. Normally operator== should be a free + // standing function. However, since CBAndEventMask is a protected type and + // it will never be a base class, it makes no difference. + bool operator==(const CBAndEventMask& cb_and_mask) const { + return fd == cb_and_mask.fd; + } + // A callback. If the fd is unregistered inside the callchain of OnEvent, + // the cb will be set to NULL. + mutable LibuvEpollCallbackInterface* cb; + + mutable LIST_ENTRY(CBAndEventMask) entry; + // file descriptor registered with the epoll server. + int fd; + // handle structure + mutable uv_poll_t handle; + // the current event_mask registered for this callback. + mutable int event_mask; + // the event_mask that was returned by epoll + mutable int events_asserted; + // the event_mask for the ready list to use to call OnEvent. + mutable int events_to_fake; + // toggle around calls to OnEvent to tell UnregisterFD to not erase the + // iterator because HandleEvent is using it. + mutable bool in_use; + }; + + // Custom hash function to be used by hash_set. + struct CBAndEventMaskHash { + size_t operator()(const CBAndEventMask& cb_and_eventmask) const { + return static_cast(cb_and_eventmask.fd); + } + }; + + using FDToCBMap = std::unordered_set; + + // the following four functions are OS-specific, and are likely + // to be changed in a subclass if the poll/select method is changed + // from epoll. + + // Summary: + // Deletes a file-descriptor from the set of FDs that should be + // monitored with epoll. + // Note that this only deals with modifying data relating -directly- + // with the epoll call-- it does not modify any data within the + // epoll_server. + // Args: + // fd - the file descriptor to-be-removed from the monitoring set + virtual void DelFD(int fd, uv_poll_t *handle) const; + + //////////////////////////////////////// + + // Summary: + // Adds a file-descriptor to the set of FDs that should be + // monitored with epoll. + // Note that this only deals with modifying data relating -directly- + // with the epoll call. + // Args: + // fd - the file descriptor to-be-added to the monitoring set + // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc + // OR'd together) which will be associated with this + // FD initially. + virtual void AddFD(int fd, uv_poll_t *handle, int event_mask) const; + + //////////////////////////////////////// + + // Summary: + // Modifies a file-descriptor in the set of FDs that should be + // monitored with epoll. + // Note that this only deals with modifying data relating -directly- + // with the epoll call. + // Args: + // fd - the file descriptor to-be-added to the monitoring set + // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc + // OR'd together) which will be associated with this + // FD after this call. + virtual void ModFD(int fd, uv_poll_t *handle, int event_mask) const; + + //////////////////////////////////////// + + // Summary: + // Modified the event mask associated with an FD in the set of + // data needed by epoll. + // Events are removed before they are added, thus, if ~0 is put + // in 'remove_event', whatever is put in 'add_event' will be + // the new event mask. + // If the file-descriptor specified is not registered in the + // epoll_server, then nothing happens as a result of this call. + // Args: + // fd - the file descriptor whose event mask is to be modified + // remove_event - the events which are to be removed from the current + // event_mask + // add_event - the events which are to be added to the current event_mask + // + // + virtual void ModifyFD(int fd, int remove_event, int add_event); + + //////////////////////////////////////// + + // Summary: + // Waits for events, and calls HandleEvents() for each + // fd, event pair discovered to possibly have an event. + // Note that a callback (B) may get a spurious event if + // another callback (A) has closed a file-descriptor N, and + // the callback (B) has a newly opened file-descriptor, which + // also happens to be N. + virtual void WaitForEventsAndCallHandleEvents(int64_t timeout_in_us); + + // Summary: + // An internal function for implementing the ready list. It adds a fd's + // CBAndEventMask to the ready list. If the fd is already on the ready + // list, it is a no-op. + void AddToReadyList(CBAndEventMask* cb_and_mask); + + // Summary: + // An internal function for implementing the ready list. It remove a fd's + // CBAndEventMask from the ready list. If the fd is not on the ready list, + // it is a no-op. + void RemoveFromReadyList(const CBAndEventMask& cb_and_mask); + + // Summary: + // Calls any pending alarms that should go off and reregisters them if they + // were recurring. + virtual void CallAndReregisterAlarmEvents(); + + + // The mapping of file-descriptor to CBAndEventMasks + FDToCBMap cb_map_; + + // Custom hash function to be used by hash_set. + struct AlarmCBHash { + size_t operator()(AlarmCB* const& p) const { + return reinterpret_cast(p); + } + }; + + // TODO(sushantj): Having this hash_set is avoidable. We currently have it + // only so that we can enforce stringent checks that a caller can not register + // the same alarm twice. One option is to have an implementation in which + // this hash_set is used only in the debug mode. + using AlarmCBMap = std::unordered_set; + AlarmCBMap all_alarms_; + + TimeToAlarmCBMap alarm_map_; + + // The amount of time in microseconds that we'll wait before returning + // from the WaitForEventsAndExecuteCallbacks() function. + // If this is positive, wait that many microseconds. + // If this is negative, wait forever, or for the first event that occurs + // If this is zero, never wait for an event. + int64_t timeout_in_us_; + + // This is nonzero only after the invocation of epoll_wait_impl within + // WaitForEventsAndCallHandleEvents and before the function + // WaitForEventsAndExecuteCallbacks returns. At all other times, this is + // zero. This enables us to have relatively accurate time returned from the + // ApproximateNowInUs() function. See that function for more details. + int64_t recorded_now_in_us_; + + // This is used to implement CallAndReregisterAlarmEvents. This stores + // all alarms that were reregistered because OnAlarm() returned a + // value > 0 and the time at which they should be executed is less that + // the current time. By storing such alarms in this map we ensure + // that while calling CallAndReregisterAlarmEvents we do not call + // OnAlarm on any alarm in this set. This ensures that we do not + // go in an infinite loop. + AlarmCBMap alarms_reregistered_and_should_be_skipped_; + + LIST_HEAD(ReadyList, CBAndEventMask) ready_list_; + LIST_HEAD(TmpList, CBAndEventMask) tmp_list_; + int ready_list_size_; + + uv_loop_t loop; // event loop + uv_timer_t looptimer; // event loop timer + +#ifdef EPOLL_SERVER_EVENT_TRACING +#error "EPOLL_SERVER_EVENT_TRACING is not implemented for libuv" + struct EventRecorder { + public: + EventRecorder() : num_records_(0), record_threshold_(10000) {} + + ~EventRecorder() { Clear(); } + + // When a number of events equals the record threshold, + // the collected data summary for all FDs will be written + // to EPOLL_LOG(INFO). Note that this does not include the + // individual events (if you'reinterested in those, you'll + // have to get at them programmatically). + // After any such flushing to EPOLL_LOG(INFO) all events will + // be cleared. + // Note that the definition of an 'event' is a bit 'hazy', + // as it includes the 'Unregistration' event, and perhaps + // others. + void set_record_threshold(int64_t new_threshold) { + record_threshold_ = new_threshold; + } + + void Clear() { + for (int i = 0; i < debug_events_.size(); ++i) { + delete debug_events_[i]; + } + debug_events_.clear(); + unregistered_fds_.clear(); + event_counts_.clear(); + } + + void MaybeRecordAndClear() { + ++num_records_; + if ((num_records_ > record_threshold_) && (record_threshold_ > 0)) { + EPOLL_LOG(INFO) << "\n" << *this; + num_records_ = 0; + Clear(); + } + } + + void RecordFDMaskEvent(int fd, int mask, const char* function) { + FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function); + debug_events_.push_back(fdmo); + MaybeRecordAndClear(); + } + + void RecordEpollWaitEvent(int timeout_in_ms, int num_events_generated) { + EpollWaitOutput* ewo = + new EpollWaitOutput(timeout_in_ms, num_events_generated); + debug_events_.push_back(ewo); + MaybeRecordAndClear(); + } + + void RecordEpollEvent(int fd, int event_mask) { + Events& events_for_fd = event_counts_[fd]; + events_for_fd.AssignFromMask(event_mask); + MaybeRecordAndClear(); + } + + friend ostream& operator<<(ostream& os, const EventRecorder& er) { + for (int i = 0; i < er.unregistered_fds_.size(); ++i) { + os << "fd: " << er.unregistered_fds_[i] << "\n"; + os << er.unregistered_fds_[i]; + } + for (EventCountsMap::const_iterator i = er.event_counts_.begin(); + i != er.event_counts_.end(); ++i) { + os << "fd: " << i->first << "\n"; + os << i->second; + } + for (int i = 0; i < er.debug_events_.size(); ++i) { + os << *(er.debug_events_[i]) << "\n"; + } + return os; + } + + void RecordUnregistration(int fd) { + EventCountsMap::iterator i = event_counts_.find(fd); + if (i != event_counts_.end()) { + unregistered_fds_.push_back(i->second); + event_counts_.erase(i); + } + MaybeRecordAndClear(); + } + + protected: + class DebugOutput { + public: + friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) { + debug_output.OutputToStream(os); + return os; + } + virtual void OutputToStream(ostream* os) const = 0; + virtual ~DebugOutput() {} + }; + + class FDMaskOutput : public DebugOutput { + public: + FDMaskOutput(int fd, int mask, const char* function) + : fd_(fd), mask_(mask), function_(function) {} + virtual void OutputToStream(ostream* os) const { + (*os) << "func: " << function_ << "\tfd: " << fd_; + if (mask_ != 0) { + (*os) << "\tmask: " << EventMaskToString(mask_); + } + } + int fd_; + int mask_; + const char* function_; + }; + + class EpollWaitOutput : public DebugOutput { + public: + EpollWaitOutput(int timeout_in_ms, int num_events_generated) + : timeout_in_ms_(timeout_in_ms), + num_events_generated_(num_events_generated) {} + virtual void OutputToStream(ostream* os) const { + (*os) << "timeout_in_ms: " << timeout_in_ms_ + << "\tnum_events_generated: " << num_events_generated_; + } + + protected: + int timeout_in_ms_; + int num_events_generated_; + }; + + struct Events { + Events() + : epoll_in(0), + epoll_pri(0), + epoll_out(0), + epoll_rdnorm(0), + epoll_rdband(0), + epoll_wrnorm(0), + epoll_wrband(0), + epoll_msg(0), + epoll_err(0), + epoll_hup(0), + epoll_oneshot(0), + epoll_et(0) {} + + void AssignFromMask(int event_mask) { + if (event_mask & EPOLLIN) ++epoll_in; + if (event_mask & EPOLLPRI) ++epoll_pri; + if (event_mask & EPOLLOUT) ++epoll_out; + if (event_mask & EPOLLRDNORM) ++epoll_rdnorm; + if (event_mask & EPOLLRDBAND) ++epoll_rdband; + if (event_mask & EPOLLWRNORM) ++epoll_wrnorm; + if (event_mask & EPOLLWRBAND) ++epoll_wrband; + if (event_mask & EPOLLMSG) ++epoll_msg; + if (event_mask & EPOLLERR) ++epoll_err; + if (event_mask & EPOLLHUP) ++epoll_hup; + if (event_mask & EPOLLONESHOT) ++epoll_oneshot; + if (event_mask & EPOLLET) ++epoll_et; + } + + friend ostream& operator<<(ostream& os, const Events& ev) { + if (ev.epoll_in) { + os << "\t EPOLLIN: " << ev.epoll_in << "\n"; + } + if (ev.epoll_pri) { + os << "\t EPOLLPRI: " << ev.epoll_pri << "\n"; + } + if (ev.epoll_out) { + os << "\t EPOLLOUT: " << ev.epoll_out << "\n"; + } + if (ev.epoll_rdnorm) { + os << "\t EPOLLRDNORM: " << ev.epoll_rdnorm << "\n"; + } + if (ev.epoll_rdband) { + os << "\t EPOLLRDBAND: " << ev.epoll_rdband << "\n"; + } + if (ev.epoll_wrnorm) { + os << "\t EPOLLWRNORM: " << ev.epoll_wrnorm << "\n"; + } + if (ev.epoll_wrband) { + os << "\t EPOLLWRBAND: " << ev.epoll_wrband << "\n"; + } + if (ev.epoll_msg) { + os << "\t EPOLLMSG: " << ev.epoll_msg << "\n"; + } + if (ev.epoll_err) { + os << "\t EPOLLERR: " << ev.epoll_err << "\n"; + } + if (ev.epoll_hup) { + os << "\t EPOLLHUP: " << ev.epoll_hup << "\n"; + } + if (ev.epoll_oneshot) { + os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n"; + } + if (ev.epoll_et) { + os << "\t EPOLLET: " << ev.epoll_et << "\n"; + } + return os; + } + + unsigned int epoll_in; + unsigned int epoll_pri; + unsigned int epoll_out; + unsigned int epoll_rdnorm; + unsigned int epoll_rdband; + unsigned int epoll_wrnorm; + unsigned int epoll_wrband; + unsigned int epoll_msg; + unsigned int epoll_err; + unsigned int epoll_hup; + unsigned int epoll_oneshot; + unsigned int epoll_et; + }; + + std::vector debug_events_; + std::vector unregistered_fds_; + using EventCountsMap = std::unordered_map; + EventCountsMap event_counts_; + int64_t num_records_; + int64_t record_threshold_; + }; + + void ClearEventRecords() { event_recorder_.Clear(); } + void WriteEventRecords(ostream* os) const { (*os) << event_recorder_; } + + mutable EventRecorder event_recorder_; + +#endif + + private: + // Helper functions used in the destructor. + void CleanupFDToCBMap(); + void CleanupTimeToAlarmCBMap(); + + // The callback registered to the fds below. As the purpose of their + // registration is to wake the epoll server it just clears the pipe and + // returns. + std::unique_ptr wake_cb_; + + // A pipe owned by the epoll server. The server will be registered to listen + // on read_fd_ and can be woken by Wake() which writes to write_fd_. + int read_fd_; + int write_fd_; + + // This boolean is checked to see if it is false at the top of the + // WaitForEventsAndExecuteCallbacks function. If not, then it either returns + // without doing work, and logs to ERROR, or aborts the program (in + // DEBUG mode). If so, then it sets the bool to true, does work, and + // sets it back to false when done. This catches unwanted recursion. + bool in_wait_for_events_and_execute_callbacks_; + + // Returns true when the SimpleEpollServer() is being destroyed. + bool in_shutdown_; + int64_t last_delay_in_usec_; +}; + +class LibuvEpollAlarmCallbackInterface { + public: + // Summary: + // Called when an alarm times out. Invalidates an AlarmRegToken. + // WARNING: If a token was saved to refer to an alarm callback, OnAlarm must + // delete it, as the reference is no longer valid. + // Returns: + // the unix time (in microseconds) at which this alarm should be signaled + // again, or 0 if the alarm should be removed. + virtual int64_t OnAlarm() = 0; + + // Summary: + // Called when the an alarm is registered. Invalidates an AlarmRegToken. + // Args: + // token: the iterator to the alarm registered in the alarm map. + // WARNING: this token becomes invalid when the alarm fires, is + // unregistered, or OnShutdown is called on that alarm. + // eps: the epoll server the alarm is registered with. + virtual void OnRegistration(const SimpleLibuvEpollServer::AlarmRegToken& token, + SimpleLibuvEpollServer* eps) = 0; + + // Summary: + // Called when the an alarm is unregistered. + // WARNING: It is not valid to unregister a callback and then use the token + // that was saved to refer to the callback. + virtual void OnUnregistration() = 0; + + // Summary: + // Called when the epoll server is shutting down. + // Invalidates the AlarmRegToken that was given when this alarm was + // registered. + virtual void OnShutdown(SimpleLibuvEpollServer* eps) = 0; + + virtual ~LibuvEpollAlarmCallbackInterface() {} + + protected: + LibuvEpollAlarmCallbackInterface() {} +}; + +// A simple alarm which unregisters itself on destruction. +// +// PLEASE NOTE: +// Any classes overriding these functions must either call the implementation +// of the parent class, or is must otherwise make sure that the 'registered_' +// boolean and the token, 'token_', are updated appropriately. +class EPOLL_EXPORT_PRIVATE LibuvEpollAlarm : public LibuvEpollAlarmCallbackInterface { + public: + LibuvEpollAlarm(); + + ~LibuvEpollAlarm() override; + + // Marks the alarm as unregistered and returns 0. The return value may be + // safely ignored by subclasses. + int64_t OnAlarm() override; + + // Marks the alarm as registered, and stores the token. + void OnRegistration(const SimpleLibuvEpollServer::AlarmRegToken& token, + SimpleLibuvEpollServer* eps) override; + + // Marks the alarm as unregistered. + void OnUnregistration() override; + + // Marks the alarm as unregistered. + void OnShutdown(SimpleLibuvEpollServer* eps) override; + + // If the alarm was registered, unregister it. + void UnregisterIfRegistered(); + + // Reregisters the alarm at specified time. + void ReregisterAlarm(int64_t timeout_time_in_us); + + bool registered() const { return registered_; } + + const SimpleLibuvEpollServer* eps() const { return eps_; } + + private: + SimpleLibuvEpollServer::AlarmRegToken token_; + SimpleLibuvEpollServer* eps_; + bool registered_; +}; + +} // namespace epoll_server + +#endif // QUICHE_EPOLL_SERVER_H_ diff --git a/platform/quiche_platform_impl/quic_mutex_impl.h b/platform/quiche_platform_impl/quic_mutex_impl.h index fb16ba6d..d4cf73fc 100644 --- a/platform/quiche_platform_impl/quic_mutex_impl.h +++ b/platform/quiche_platform_impl/quic_mutex_impl.h @@ -1,7 +1,7 @@ // from envoy so LICENSE.envoy applies // use default impl #ifndef QUIC_MUTEX_IMPL -#define QUIC_MITEX_IMPL +#define QUIC_MUTEX_IMPL #include "third_party/quiche/common/platform/default/quiche_platform_impl/quic_mutex_impl.h" diff --git a/src/http3server.cc b/src/http3server.cc index 1bfc6d8e..b7d2c35f 100644 --- a/src/http3server.cc +++ b/src/http3server.cc @@ -107,7 +107,9 @@ namespace quic socket_api.EnableReceiveTimestamp(fd_); sockaddr_storage addr = address.generic_address(); - int rc = bind(fd_, reinterpret_cast(&addr), sizeof(addr)); + // @BENBENZ: fix on mac OSX (was needed or a EINVAL is returned) (from api::Bind in quic_udp_socket_posix.cc) + int addr_len = address.host().IsIPv4() ? sizeof(sockaddr_in) : sizeof(sockaddr_in6); + int rc = bind(fd_, reinterpret_cast(&addr), addr_len); if (rc < 0) { QUIC_LOG(ERROR) << "Bind failed: " << strerror(errno) << "\n"; @@ -126,7 +128,7 @@ namespace quic port_ = address.port(); } - const int kEpollFlags = EPOLLIN | EPOLLOUT | EPOLLET; + const int kEpollFlags = UV_READABLE | UV_WRITABLE | UV_DISCONNECT; epoll_server_.set_timeout_in_us(-1); // negative values would mean wait forever epoll_server_.RegisterFD(fd_, this, kEpollFlags); @@ -652,9 +654,9 @@ namespace quic QUICHE_DCHECK_EQ(fd, fd_); event->out_ready_mask = 0; - if (event->in_events & EPOLLIN) + if (event->in_events & UV_READABLE) { - QUIC_DVLOG(1) << "EPOLLIN"; + QUIC_DVLOG(1) << "UV_READABLE"; dispatcher_->ProcessBufferedChlos(kNumSessionsToCreatePerSocketEvent); @@ -668,16 +670,16 @@ namespace quic if (dispatcher_->HasChlosBuffered()) { - // Register EPOLLIN event to consume buffered CHLO(s). - event->out_ready_mask |= EPOLLIN; + // Register UV_READABLE event to consume buffered CHLO(s). + event->out_ready_mask |= UV_READABLE; } } - if (event->in_events & EPOLLOUT) + if (event->in_events & UV_WRITABLE) { dispatcher_->OnCanWrite(); if (dispatcher_->HasPendingWrites()) { - event->out_ready_mask |= EPOLLOUT; + event->out_ready_mask |= UV_WRITABLE; } } }