diff --git a/include/nighthawk/client/process.h b/include/nighthawk/client/process.h index 943517726..e0b7fa36d 100644 --- a/include/nighthawk/client/process.h +++ b/include/nighthawk/client/process.h @@ -2,6 +2,8 @@ #include "nighthawk/client/output_collector.h" +#include "absl/status/status.h" + namespace Nighthawk { namespace Client { @@ -15,9 +17,9 @@ class Process { /** * @param collector used to transform output into the desired format. - * @return bool true iff execution was successfull. + * @return OK if execution succeeded or was cancelled, otherwise error details. */ - virtual bool run(OutputCollector& collector) PURE; + virtual absl::Status run(OutputCollector& collector) PURE; /** * Shuts down the worker. Mandatory call before destructing. diff --git a/source/client/client.cc b/source/client/client.cc index 174e9250b..c599c3c25 100644 --- a/source/client/client.cc +++ b/source/client/client.cc @@ -77,7 +77,7 @@ bool Main::run() { } OutputFormatterFactoryImpl output_formatter_factory; OutputCollectorImpl output_collector(time_system, *options_); - bool result; + absl::Status result; { auto signal_handler = std::make_unique([&process]() { process->requestExecutionCancellation(); }); @@ -92,12 +92,12 @@ bool Main::run() { std::cout << *formatted_proto; } process->shutdown(); - if (!result) { - ENVOY_LOG(error, "An error ocurred."); - } else { - ENVOY_LOG(info, "Done."); + if (!result.ok()) { + ENVOY_LOG(error, "An error occurred: {}.", result.message()); + return false; } - return result; + ENVOY_LOG(info, "Done."); + return true; } } // namespace Client diff --git a/source/client/options_impl.cc b/source/client/options_impl.cc index 5019df334..248c433c6 100644 --- a/source/client/options_impl.cc +++ b/source/client/options_impl.cc @@ -667,11 +667,12 @@ OptionsImpl::OptionsImpl(const nighthawk::client::CommandLineOptions& options) { void OptionsImpl::setNonTrivialDefaults() { concurrency_ = "1"; - // By default, we don't tolerate error status codes and connection failures, and will report - // upon observing those. + // By default, we don't tolerate error status codes, connection failures, or stream resets which + // could indicate a protocol mismatch, and will report upon observing those. failure_predicates_["benchmark.http_4xx"] = 0; failure_predicates_["benchmark.http_5xx"] = 0; failure_predicates_["benchmark.pool_connection_failure"] = 0; + failure_predicates_["benchmark.stream_resets"] = 0; // Also, fail fast when a remote request source is specified that we can't connect to or otherwise // fails. failure_predicates_["requestsource.upstream_rq_5xx"] = 0; diff --git a/source/client/process_impl.cc b/source/client/process_impl.cc index 69318a662..46e7456e1 100644 --- a/source/client/process_impl.cc +++ b/source/client/process_impl.cc @@ -28,6 +28,7 @@ #include "external/envoy/source/common/thread_local/thread_local_impl.h" #include "external/envoy/source/server/server.h" +#include "absl/status/status.h" #include "absl/strings/str_replace.h" #include "absl/types/optional.h" @@ -473,18 +474,19 @@ void ProcessImpl::setupStatsSinks(const envoy::config::bootstrap::v3::Bootstrap& } } -bool ProcessImpl::runInternal(OutputCollector& collector, const std::vector& uris, - const UriPtr& request_source_uri, const UriPtr& tracing_uri, - const absl::optional& scheduled_start) { +absl::Status ProcessImpl::runInternal(OutputCollector& collector, const std::vector& uris, + const UriPtr& request_source_uri, const UriPtr& tracing_uri, + const absl::optional& scheduled_start) { const Envoy::SystemTime now = time_system_.systemTime(); if (scheduled_start.value_or(now) < now) { ENVOY_LOG(error, "Scheduled execution date already transpired."); - return false; + return absl::InternalError("Scheduled execution date already transpired."); } { auto guard = std::make_unique(workers_lock_); if (cancelled_) { - return true; + ENVOY_LOG(info, "Execution was cancelled before it started."); + return absl::OkStatus(); } int number_of_workers = determineConcurrency(); shutdown_ = false; @@ -598,17 +600,32 @@ bool ProcessImpl::runInternal(OutputCollector& collector, const std::vector uris; UriPtr request_source_uri; UriPtr tracing_uri; try { - // TODO(oschaaf): See if we can rid of resolving here. - // We now only do it to validate. + // Resolving URIs validates the syntax and also detects DNS errors. if (options_.uri().has_value()) { uris.push_back(std::make_unique(options_.uri().value())); } else { @@ -632,8 +649,11 @@ bool ProcessImpl::run(OutputCollector& collector) { tracing_uri->resolve(*dispatcher_, Utility::translateFamilyOptionString(options_.addressFamily())); } - } catch (const UriException&) { - return false; + } catch (const UriException& ex) { + return absl::InvalidArgumentError( + absl::StrCat("URI exception (for example, malformed URI syntax, bad " + "MultiTarget path, unresolvable host DNS): ", + ex.what())); } try { @@ -641,7 +661,7 @@ bool ProcessImpl::run(OutputCollector& collector) { options_.scheduled_start()); } catch (Envoy::EnvoyException& ex) { ENVOY_LOG(error, "Fatal exception: {}", ex.what()); - throw; + return absl::InternalError(absl::StrCat("Fatal exception: ", ex.what())); } } diff --git a/source/client/process_impl.h b/source/client/process_impl.h index 81684815b..4ede26577 100644 --- a/source/client/process_impl.h +++ b/source/client/process_impl.h @@ -75,9 +75,9 @@ class ProcessImpl : public Process, public Envoy::Logger::Loggable>& stats_sinks); - bool runInternal(OutputCollector& collector, const std::vector& uris, - const UriPtr& request_source_uri, const UriPtr& tracing_uri, - const absl::optional& schedule); + absl::Status runInternal(OutputCollector& collector, const std::vector& uris, + const UriPtr& request_source_uri, const UriPtr& tracing_uri, + const absl::optional& schedule); /** * Compute the offset at which execution should start. We adhere to the scheduled start passed in diff --git a/source/client/remote_process_impl.cc b/source/client/remote_process_impl.cc index d69922eb4..4f9815b39 100644 --- a/source/client/remote_process_impl.cc +++ b/source/client/remote_process_impl.cc @@ -14,6 +14,8 @@ #include "client/options_impl.h" +#include "absl/status/status.h" + namespace Nighthawk { namespace Client { @@ -22,7 +24,7 @@ RemoteProcessImpl::RemoteProcessImpl(const Options& options, : options_(options), service_client_(std::make_unique()), stub_(stub) {} -bool RemoteProcessImpl::run(OutputCollector& collector) { +absl::Status RemoteProcessImpl::run(OutputCollector& collector) { Nighthawk::Client::CommandLineOptionsPtr options = options_.toCommandLineOptions(); // We don't forward the option that requests remote execution. Today, // nighthawk_service will ignore the option, but if someone ever changes that this @@ -33,10 +35,11 @@ bool RemoteProcessImpl::run(OutputCollector& collector) { service_client_->PerformNighthawkBenchmark(&stub_, *options); if (result.ok()) { collector.setOutput(result.value().output()); - return true; + return absl::OkStatus(); } ENVOY_LOG(error, "Remote execution failure: {}", result.status().message()); - return false; + return absl::Status(static_cast(result.status().code()), + absl::StrCat("Remote execution failure: ", result.status().message())); } bool RemoteProcessImpl::requestExecutionCancellation() { diff --git a/source/client/remote_process_impl.h b/source/client/remote_process_impl.h index e689e1c0d..b8e6eb630 100644 --- a/source/client/remote_process_impl.h +++ b/source/client/remote_process_impl.h @@ -26,10 +26,9 @@ class RemoteProcessImpl : public Process, public Envoy::Logger::Loggableset_code(grpc::StatusCode::INTERNAL); - // TODO(https://github.com/envoyproxy/nighthawk/issues/181): wire through error descriptions, so - // we can do better here. - response.mutable_error_detail()->set_message("Unknown failure"); + const absl::Status run_status = process.run(output_collector); + if (!run_status.ok()) { + response.mutable_error_detail()->set_code(static_cast(run_status.code())); + response.mutable_error_detail()->set_message(std::string(run_status.message())); } *(response.mutable_output()) = output_collector.toProto(); process.shutdown(); diff --git a/test/integration/BUILD b/test/integration/BUILD index 1290e6090..38577e322 100644 --- a/test/integration/BUILD +++ b/test/integration/BUILD @@ -20,6 +20,7 @@ py_library( "//:nighthawk_output_transform", "//:nighthawk_service", "//:nighthawk_test_server", + "//test/request_source:testdata", "@envoy//test/config/integration/certs", ], deps = [ diff --git a/test/options_test.cc b/test/options_test.cc index c0b847fd1..73a06ef92 100644 --- a/test/options_test.cc +++ b/test/options_test.cc @@ -297,6 +297,7 @@ TEST_P(RequestSourcePluginTestFixture, CreatesOptionsImplWithRequestSourceConfig // comparison below. EXPECT_EQ(1, command->mutable_failure_predicates()->erase("benchmark.http_4xx")); EXPECT_EQ(1, command->mutable_failure_predicates()->erase("benchmark.http_5xx")); + EXPECT_EQ(1, command->mutable_failure_predicates()->erase("benchmark.stream_resets")); EXPECT_EQ(1, command->mutable_failure_predicates()->erase("requestsource.upstream_rq_5xx")); // TODO(#433) @@ -438,6 +439,7 @@ TEST_F(OptionsImplTest, TlsContext) { // comparison below. EXPECT_EQ(1, cmd->mutable_failure_predicates()->erase("benchmark.http_4xx")); EXPECT_EQ(1, cmd->mutable_failure_predicates()->erase("benchmark.http_5xx")); + EXPECT_EQ(1, cmd->mutable_failure_predicates()->erase("benchmark.stream_resets")); EXPECT_EQ(1, cmd->mutable_failure_predicates()->erase("requestsource.upstream_rq_5xx")); // TODO(#433) OptionsImpl options_from_proto(*cmd); @@ -500,6 +502,7 @@ TEST_F(OptionsImplTest, MultiTarget) { // textual comparison below. EXPECT_EQ(1, cmd->mutable_failure_predicates()->erase("benchmark.http_4xx")); EXPECT_EQ(1, cmd->mutable_failure_predicates()->erase("benchmark.http_5xx")); + EXPECT_EQ(1, cmd->mutable_failure_predicates()->erase("benchmark.stream_resets")); EXPECT_EQ(1, cmd->mutable_failure_predicates()->erase("requestsource.upstream_rq_5xx")); // TODO(#433) OptionsImpl options_from_proto(*cmd); diff --git a/test/process_test.cc b/test/process_test.cc index 3b49916aa..90ce4ffaf 100644 --- a/test/process_test.cc +++ b/test/process_test.cc @@ -65,14 +65,12 @@ class FakeStatsSinkFactory : public NighthawkStatsSinkFactory { // expect failure. class ProcessTest : public TestWithParam { public: - enum class RunExpectation { EXPECT_SUCCESS, EXPECT_FAILURE }; - ProcessTest() : loopback_address_(Envoy::Network::Test::getLoopbackAddressUrlString(GetParam())), options_(TestUtility::createOptionsImpl( fmt::format("foo --duration 1 -v error --rps 10 https://{}/", loopback_address_))){}; - void runProcess(RunExpectation expectation, bool do_cancel = false, + void runProcess(absl::StatusCode expected_status_code, bool do_cancel = false, bool terminate_right_away = false) { ProcessPtr process = std::make_unique(*options_, time_system_); OutputCollectorImpl collector(time_system_, *options_); @@ -91,9 +89,8 @@ class ProcessTest : public TestWithParam { cancel_thread.join(); } } - const auto result = - process->run(collector) ? RunExpectation::EXPECT_SUCCESS : RunExpectation::EXPECT_FAILURE; - EXPECT_EQ(result, expectation); + absl::Status run_status = process->run(collector); + EXPECT_EQ(run_status.code(), expected_status_code); if (do_cancel) { if (cancel_thread.joinable()) { cancel_thread.join(); @@ -126,17 +123,17 @@ INSTANTIATE_TEST_SUITE_P(IpVersions, ProcessTest, Envoy::TestUtility::ipTestParamsToString); TEST_P(ProcessTest, TwoProcessInSequence) { - runProcess(RunExpectation::EXPECT_FAILURE); + runProcess(absl::StatusCode::kInternal); options_ = TestUtility::createOptionsImpl( fmt::format("foo --h2 --duration 1 --rps 10 https://{}/", loopback_address_)); - runProcess(RunExpectation::EXPECT_FAILURE); + runProcess(absl::StatusCode::kInternal); } // TODO(oschaaf): move to python int. tests once it adds to coverage. TEST_P(ProcessTest, BadTracerSpec) { options_ = TestUtility::createOptionsImpl( fmt::format("foo --trace foo://localhost:79/api/v1/spans https://{}/", loopback_address_)); - runProcess(RunExpectation::EXPECT_FAILURE); + runProcess(absl::StatusCode::kInvalidArgument); } TEST_P(ProcessTest, CancelDuringLoadTest) { @@ -146,14 +143,14 @@ TEST_P(ProcessTest, CancelDuringLoadTest) { options_ = TestUtility::createOptionsImpl( fmt::format("foo --duration 300 --failure-predicate foo:0 --concurrency 2 https://{}/", loopback_address_)); - runProcess(RunExpectation::EXPECT_SUCCESS, true); + runProcess(absl::StatusCode::kOk, /*do_cancel=*/true); } TEST_P(ProcessTest, CancelExecutionBeforeBeginLoadTest) { options_ = TestUtility::createOptionsImpl( fmt::format("foo --duration 300 --failure-predicate foo:0 --concurrency 2 https://{}/", loopback_address_)); - runProcess(RunExpectation::EXPECT_SUCCESS, true, true); + runProcess(absl::StatusCode::kOk, /*do_cancel=*/true, /*terminate_right_away=*/true); } TEST_P(ProcessTest, RunProcessWithStatsSinkConfigured) { @@ -164,7 +161,7 @@ TEST_P(ProcessTest, RunProcessWithStatsSinkConfigured) { "--stats-sinks {} https://{}/", kSinkName, loopback_address_)); numFlushes = 0; - runProcess(RunExpectation::EXPECT_FAILURE); + runProcess(absl::StatusCode::kInternal); EXPECT_GT(numFlushes, 0); } @@ -176,7 +173,7 @@ TEST_P(ProcessTest, NoFlushWhenCancelExecutionBeforeLoadTestBegin) { "2 --stats-flush-interval 1 --stats-sinks {} https://{}/", kSinkName, loopback_address_)); numFlushes = 0; - runProcess(RunExpectation::EXPECT_SUCCESS, true, true); + runProcess(absl::StatusCode::kOk, /*do_cancel=*/true, /*terminate_right_away=*/true); EXPECT_EQ(numFlushes, 0); } @@ -196,9 +193,9 @@ class ProcessTestWithSimTime : public Envoy::Event::TestUsingSimulatedTime, auto run_thread = std::thread([this, &verify_callback] { ProcessPtr process = std::make_unique(*options_, simTime()); OutputCollectorImpl collector(simTime(), *options_); - const bool result = process->run(collector); + const absl::Status result = process->run(collector); process->shutdown(); - verify_callback(result, collector.toProto()); + verify_callback(result.ok(), collector.toProto()); }); // We introduce real-world sleeps to give the executing ProcessImpl diff --git a/test/service_test.cc b/test/service_test.cc index 3350d2dbb..fd9268315 100644 --- a/test/service_test.cc +++ b/test/service_test.cc @@ -90,7 +90,7 @@ class ServiceTest : public TestWithParam { options->mutable_requests_per_second()->set_value(3); } - void runWithFailingValidationExpectations(bool expect_output, + void runWithFailingValidationExpectations(bool expect_output, int expected_error_code, absl::string_view match_error = "") { auto r = stub_->ExecutionStream(&context_); r->Write(request_, {}); @@ -100,7 +100,7 @@ class ServiceTest : public TestWithParam { ASSERT_FALSE(match_error.empty()); EXPECT_TRUE(response_.has_error_detail()); EXPECT_EQ(response_.has_output(), expect_output); - EXPECT_EQ(grpc::StatusCode::INTERNAL, response_.error_detail().code()); + EXPECT_EQ(response_.error_detail().code(), expected_error_code); EXPECT_THAT(response_.error_detail().message(), HasSubstr(std::string(match_error))); EXPECT_TRUE(status.ok()); } @@ -149,7 +149,7 @@ TEST_P(ServiceTestWithParameterizedConstructor, stream->WritesDone(); EXPECT_TRUE(stream->Read(&response_)); ASSERT_TRUE(response_.has_error_detail()); - EXPECT_THAT(response_.error_detail().message(), HasSubstr(std::string("Unknown failure"))); + EXPECT_THAT(response_.error_detail().message(), HasSubstr(std::string("failure predicate"))); EXPECT_TRUE(response_.has_output()); EXPECT_GE(response_.output().results(0).counters().size(), 8); grpc::Status status = stream->Finish(); @@ -167,7 +167,7 @@ TEST_P(ServiceTest, Basic) { r->WritesDone(); EXPECT_TRUE(r->Read(&response_)); ASSERT_TRUE(response_.has_error_detail()); - EXPECT_THAT(response_.error_detail().message(), HasSubstr(std::string("Unknown failure"))); + EXPECT_THAT(response_.error_detail().message(), HasSubstr(std::string("failure predicate"))); EXPECT_TRUE(response_.has_output()); EXPECT_GE(response_.output().results(0).counters().size(), 8); auto status = r->Finish(); @@ -183,7 +183,7 @@ TEST_P(ServiceTest, NoConcurrentStart) { EXPECT_TRUE(r->WritesDone()); EXPECT_TRUE(r->Read(&response_)); ASSERT_TRUE(response_.has_error_detail()); - EXPECT_THAT(response_.error_detail().message(), HasSubstr(std::string("Unknown failure"))); + EXPECT_THAT(response_.error_detail().message(), HasSubstr(std::string("failure predicate"))); EXPECT_TRUE(response_.has_output()); EXPECT_FALSE(r->Read(&response_)); auto status = r->Finish(); @@ -206,7 +206,8 @@ TEST_P(ServiceTest, InvalidRps) { auto options = request_.mutable_start_request()->mutable_options(); options->mutable_requests_per_second()->set_value(0); // We do not expect output, because the options proto is not valid, and can't be echoed back. - runWithFailingValidationExpectations(false, "value must be inside range"); + runWithFailingValidationExpectations(false, ::grpc::StatusCode::INTERNAL, + "value must be inside range"); } // We didn't implement updates yet, ensure we indicate so. @@ -239,7 +240,7 @@ TEST_P(ServiceTest, Unresolvable) { auto options = request_.mutable_start_request()->mutable_options(); options->mutable_uri()->set_value("http://unresolvable-host/"); // We expect output, because the options proto is valid. - runWithFailingValidationExpectations(true, "Unknown failure"); + runWithFailingValidationExpectations(true, ::grpc::StatusCode::INVALID_ARGUMENT, "URI exception"); } } // namespace