From 8f04fd8d27dcd259bd77026d3e7389b5c202a70e Mon Sep 17 00:00:00 2001 From: Filip Niksic Date: Fri, 28 Jun 2024 09:20:17 -0700 Subject: [PATCH] Add an IO function for reading inputs from blob files. Reorganized the runtime to use the new function and removed unused code. PiperOrigin-RevId: 647704904 --- common/blob_file.cc | 19 ++- common/remote_file.h | 3 + common/remote_file_oss.cc | 4 + e2e_tests/functional_test.cc | 6 +- fuzztest/BUILD | 18 ++- fuzztest/CMakeLists.txt | 19 ++- fuzztest/fuzztest_macros.cc | 3 +- fuzztest/init_fuzztest.cc | 1 + fuzztest/internal/centipede_adaptor.cc | 23 ++-- fuzztest/internal/googletest_adaptor.cc | 1 + fuzztest/internal/io.cc | 90 +++++++++++++- fuzztest/internal/io.h | 21 ++++ fuzztest/internal/io_test.cc | 82 ++++++++++++- fuzztest/internal/runtime.cc | 153 ++++++++++-------------- fuzztest/internal/runtime.h | 25 ++-- 15 files changed, 342 insertions(+), 126 deletions(-) diff --git a/common/blob_file.cc b/common/blob_file.cc index 6ba3377c..9fafe8e5 100644 --- a/common/blob_file.cc +++ b/common/blob_file.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -186,6 +187,10 @@ class DefaultBlobFileReader : public BlobFileReader { riegeli_reader_.Reset(riegeli::kClosed); #endif // CENTIPEDE_DISABLE_RIEGELI + // Because we fall back to a legacy reader, we can't distinguish between + // an empty blob file and a file that is not a blob file. Once this behavior + // changes (e.g., we get rid of the legacy reader), consider b/349115475 and + // track down places that would benefit from this distinction. legacy_reader_ = std::make_unique(); if (absl::Status s = legacy_reader_->Open(path); !s.ok()) { legacy_reader_ = nullptr; @@ -285,9 +290,13 @@ class RiegeliWriter : public BlobFileWriter { } absl::Status Write(ByteSpan blob) override { + std::string_view blob_view = AsStringView(blob); const auto now = absl::Now(); if (!PreWriteFlush(blob.size())) return writer_.status(); - if (!writer_.WriteRecord(AsStringView(blob))) return writer_.status(); + if (!writer_.WriteRecord( + absl::string_view{blob_view.data(), blob_view.size()})) { + return writer_.status(); + } if (!PostWriteFlush(blob.size())) return writer_.status(); write_duration_ += absl::Now() - now; if (written_blobs_ + buffered_blobs_ % 10000 == 0) @@ -436,11 +445,13 @@ ByteArray PackBytesForAppendFile(ByteSpan blob) { size_t size = blob.size(); uint8_t size_bytes[sizeof(size)]; std::memcpy(size_bytes, &size, sizeof(size)); - res.insert(res.end(), &kPackBegMagic[0], &kPackBegMagic[kMagicLen]); + res.insert(res.end(), std::begin(kPackBegMagic), + std::begin(kPackBegMagic) + kMagicLen); res.insert(res.end(), hash.begin(), hash.end()); - res.insert(res.end(), &size_bytes[0], &size_bytes[sizeof(size_bytes)]); + res.insert(res.end(), std::begin(size_bytes), std::end(size_bytes)); res.insert(res.end(), blob.begin(), blob.end()); - res.insert(res.end(), &kPackEndMagic[0], &kPackEndMagic[kMagicLen]); + res.insert(res.end(), std::begin(kPackEndMagic), + std::begin(kPackEndMagic) + kMagicLen); return res; } diff --git a/common/remote_file.h b/common/remote_file.h index 008b2b59..92bd367a 100644 --- a/common/remote_file.h +++ b/common/remote_file.h @@ -96,6 +96,9 @@ void RemoteFileGetContents(const std::filesystem::path &path, // Returns true if `path` exists. bool RemotePathExists(std::string_view path); +// Returns true if `path` is a directory. +bool RemotePathIsDirectory(std::string_view path); + // Returns the size of the file at `path` in bytes. The file must exist. int64_t RemoteFileGetSize(std::string_view path); diff --git a/common/remote_file_oss.cc b/common/remote_file_oss.cc index 40eaec08..b8a1e108 100644 --- a/common/remote_file_oss.cc +++ b/common/remote_file_oss.cc @@ -162,6 +162,10 @@ bool RemotePathExists(std::string_view path) { return std::filesystem::exists(path); } +bool RemotePathIsDirectory(std::string_view path) { + return std::filesystem::is_directory(path); +} + int64_t RemoteFileGetSize(std::string_view path) { FILE *f = std::fopen(path.data(), "r"); CHECK(f != nullptr) << VV(path); diff --git a/e2e_tests/functional_test.cc b/e2e_tests/functional_test.cc index 39d317c8..5266b3de 100644 --- a/e2e_tests/functional_test.cc +++ b/e2e_tests/functional_test.cc @@ -801,8 +801,10 @@ void ExpectCorpusInputMessageInLogs(absl::string_view logs, int num_inputs) { HasSubstr(absl::StrFormat("%d inputs to rerun", num_inputs))) << logs; #else - EXPECT_THAT(logs, HasSubstr(absl::StrFormat( - "Parsed %d inputs and ignored 0 inputs", num_inputs))) + EXPECT_THAT(logs, + HasSubstr(absl::StrFormat( + "In total, loaded %d inputs and ignored 0 invalid inputs", + num_inputs))) << logs; #endif } diff --git a/fuzztest/BUILD b/fuzztest/BUILD index 945b8e96..7af1c4e6 100644 --- a/fuzztest/BUILD +++ b/fuzztest/BUILD @@ -76,6 +76,7 @@ cc_library( "fuzztest_macros.h", ], deps = [ + ":io", ":registration", ":registry", "@com_google_absl//absl/log:check", @@ -117,6 +118,7 @@ cc_library( ":configuration", ":flag_name", ":googletest_adaptor", + ":io", ":logging", ":registry", ":runtime", @@ -452,6 +454,7 @@ cc_library( ":configuration", ":corpus_database", ":flag_name", + ":io", ":registry", ":runtime", "@com_google_absl//absl/strings", @@ -477,9 +480,15 @@ cc_library( hdrs = ["internal/io.h"], deps = [ ":logging", + "@com_google_absl//absl/functional:function_ref", "@com_google_absl//absl/hash", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/strings:string_view", + "@com_google_absl//absl/types:span", + "@com_google_fuzztest//common:blob_file", + "@com_google_fuzztest//common:defs", + "@com_google_fuzztest//common:remote_file", ] + select({ "//conditions:default": [], }), @@ -491,7 +500,11 @@ cc_test( deps = [ ":fuzztest_core", ":io", + "@com_google_absl//absl/log:check", + "@com_google_absl//absl/status", "@com_google_absl//absl/strings", + "@com_google_fuzztest//common:blob_file", + "@com_google_fuzztest//common:defs", "@com_google_googletest//:gtest_main", ], ) @@ -582,19 +595,20 @@ cc_library( ":flag_name", ":io", ":logging", - ":meta", ":printer", ":registration", ":seed_seq", ":serialization", - ":type_support", + ":status", "@com_google_absl//absl/functional:any_invocable", + "@com_google_absl//absl/functional:bind_front", "@com_google_absl//absl/functional:function_ref", "@com_google_absl//absl/log:check", "@com_google_absl//absl/random", "@com_google_absl//absl/random:bit_gen_ref", "@com_google_absl//absl/random:distributions", "@com_google_absl//absl/status", + "@com_google_absl//absl/status:statusor", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", "@com_google_absl//absl/time", diff --git a/fuzztest/CMakeLists.txt b/fuzztest/CMakeLists.txt index e335894f..d77f34ef 100644 --- a/fuzztest/CMakeLists.txt +++ b/fuzztest/CMakeLists.txt @@ -57,6 +57,7 @@ fuzztest_cc_library( SRCS "fuzztest_macros.cc" DEPS + fuzztest::io fuzztest::registration fuzztest::registry ) @@ -83,6 +84,7 @@ fuzztest_cc_library( DEPS fuzztest::configuration fuzztest::googletest_adaptor + fuzztest::io fuzztest::logging fuzztest::registry fuzztest::runtime @@ -375,6 +377,7 @@ fuzztest_cc_library( DEPS fuzztest::configuration fuzztest::corpus_database + fuzztest::io fuzztest::registry fuzztest::runtime absl::strings @@ -401,9 +404,16 @@ fuzztest_cc_library( SRCS "internal/io.cc" DEPS + fuzztest::blob_file + fuzztest::defs fuzztest::logging + fuzztest::remote_file + absl::function_ref absl::hash + absl::status absl::str_format + absl::string_view + absl::span ) fuzztest_cc_test( @@ -412,8 +422,12 @@ fuzztest_cc_test( SRCS "internal/io_test.cc" DEPS + fuzztest::blob_file + fuzztest::defs fuzztest::fuzztest_core fuzztest::io + absl::check + absl::status absl::strings GTest::gmock_main ) @@ -511,19 +525,20 @@ fuzztest_cc_library( fuzztest::fixture_driver fuzztest::io fuzztest::logging - fuzztest::meta fuzztest::printer fuzztest::registration fuzztest::seed_seq fuzztest::serialization - fuzztest::type_support + fuzztest::status absl::any_invocable + absl::bind_front absl::function_ref absl::check absl::random_random absl::random_bit_gen_ref absl::random_distributions absl::status + absl::statusor absl::strings absl::str_format absl::time diff --git a/fuzztest/fuzztest_macros.cc b/fuzztest/fuzztest_macros.cc index ca11706c..71151573 100644 --- a/fuzztest/fuzztest_macros.cc +++ b/fuzztest/fuzztest_macros.cc @@ -2,7 +2,7 @@ #include #include -#include +#include // NOLINT #include #include #include @@ -17,6 +17,7 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_split.h" #include "absl/strings/string_view.h" +#include "./fuzztest/internal/io.h" namespace fuzztest { diff --git a/fuzztest/init_fuzztest.cc b/fuzztest/init_fuzztest.cc index 26850faa..d7e910de 100644 --- a/fuzztest/init_fuzztest.cc +++ b/fuzztest/init_fuzztest.cc @@ -32,6 +32,7 @@ #include "./fuzztest/internal/configuration.h" #include "./fuzztest/internal/flag_name.h" #include "./fuzztest/internal/googletest_adaptor.h" +#include "./fuzztest/internal/io.h" #include "./fuzztest/internal/logging.h" #include "./fuzztest/internal/registry.h" #include "./fuzztest/internal/runtime.h" diff --git a/fuzztest/internal/centipede_adaptor.cc b/fuzztest/internal/centipede_adaptor.cc index 9e270d59..53723d65 100644 --- a/fuzztest/internal/centipede_adaptor.cc +++ b/fuzztest/internal/centipede_adaptor.cc @@ -170,8 +170,9 @@ class CentipedeAdaptorRunnerCallbacks : public centipede::RunnerCallbacks { } bool Execute(centipede::ByteSpan input) override { - if (auto parsed_input = - fuzzer_impl_.TryParse({(char*)input.data(), input.size()})) { + auto parsed_input = + fuzzer_impl_.TryParse({(char*)input.data(), input.size()}); + if (parsed_input.ok()) { fuzzer_impl_.RunOneInput({*std::move(parsed_input)}); return true; } @@ -183,13 +184,12 @@ class CentipedeAdaptorRunnerCallbacks : public centipede::RunnerCallbacks { std::vector seeds = fuzzer_impl_.fixture_driver_->GetSeeds(); CorpusDatabase corpus_database(configuration_); - for (const std::string& corpus_file : - corpus_database.GetCoverageInputsIfAny( - fuzzer_impl_.test_.full_name())) { - auto corpus_value = fuzzer_impl_.GetCorpusValueFromFile(corpus_file); - if (!corpus_value) continue; - seeds.push_back(*corpus_value); - } + fuzzer_impl_.ForEachInput( + corpus_database.GetCoverageInputsIfAny(fuzzer_impl_.test_.full_name()), + [&](absl::string_view /*file_path*/, std::optional /*blob_idx*/, + FuzzTestFuzzerImpl::Input input) { + seeds.push_back(std::move(input.args)); + }); constexpr int kInitialValuesInSeeds = 32; for (int i = 0; i < kInitialValuesInSeeds; ++i) { seeds.push_back(fuzzer_impl_.params_domain_.Init(prng_)); @@ -226,9 +226,10 @@ class CentipedeAdaptorRunnerCallbacks : public centipede::RunnerCallbacks { inputs[absl::Uniform(prng_, 0, inputs.size())].data; auto parsed_origin = fuzzer_impl_.TryParse({(const char*)origin.data(), origin.size()}); - if (!parsed_origin) + if (!parsed_origin.ok()) { parsed_origin = fuzzer_impl_.params_domain_.Init(prng_); - auto mutant = FuzzTestFuzzerImpl::Input{*parsed_origin}; + } + auto mutant = FuzzTestFuzzerImpl::Input{*std::move(parsed_origin)}; fuzzer_impl_.MutateValue(mutant, prng_); mutant_data = fuzzer_impl_.params_domain_.SerializeCorpus(mutant.args).ToString(); diff --git a/fuzztest/internal/googletest_adaptor.cc b/fuzztest/internal/googletest_adaptor.cc index 5b63e927..d70fc258 100644 --- a/fuzztest/internal/googletest_adaptor.cc +++ b/fuzztest/internal/googletest_adaptor.cc @@ -13,6 +13,7 @@ #include "./fuzztest/internal/configuration.h" #include "./fuzztest/internal/corpus_database.h" #include "./fuzztest/internal/flag_name.h" +#include "./fuzztest/internal/io.h" #include "./fuzztest/internal/registry.h" #include "./fuzztest/internal/runtime.h" diff --git a/fuzztest/internal/io.cc b/fuzztest/internal/io.cc index 864c2be6..ae4afe73 100644 --- a/fuzztest/internal/io.cc +++ b/fuzztest/internal/io.cc @@ -15,10 +15,10 @@ #include "./fuzztest/internal/io.h" #include -#include #include -#include +#include // NOLINT #include +#include #include #include #include @@ -27,9 +27,15 @@ #include #include +#include "absl/functional/function_ref.h" #include "absl/hash/hash.h" +#include "absl/status/status.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" +#include "absl/types/span.h" +#include "./common/blob_file.h" +#include "./common/defs.h" +#include "./common/remote_file.h" #include "./fuzztest/internal/logging.h" #if defined(__APPLE__) @@ -73,6 +79,14 @@ std::vector ListDirectoryRecursively(absl::string_view path) { FUZZTEST_INTERNAL_CHECK(false, "Filesystem API not supported in iOS/MacOS"); } +void ForEachSerializedInput(absl::Span file_paths, + absl::FunctionRef blob_idx, std::string input)> + consume) { + FUZZTEST_INTERNAL_CHECK(false, "Filesystem API not supported in iOS/MacOS"); +} + #else bool WriteFile(absl::string_view path, absl::string_view contents) { @@ -206,4 +220,76 @@ std::vector> ReadFilesFromDirectory( return out; } +#if !defined(FUZZTEST_STUB_FILESYSTEM) + +// TODO(b/348702296): Consider merging with `centipede::ReadShard()`. +void ForEachSerializedInput(absl::Span file_paths, + absl::FunctionRef blob_idx, std::string input)> + consume) { + int total_loaded_inputs = 0; + int total_invalid_inputs = 0; + for (const std::string& file_path : file_paths) { + FUZZTEST_INTERNAL_CHECK_PRECONDITION(centipede::RemotePathExists(file_path), + "File path ", file_path, + " does not exist."); + FUZZTEST_INTERNAL_CHECK_PRECONDITION( + !centipede::RemotePathIsDirectory(file_path), "File path ", file_path, + " is a directory."); + int loaded_inputs_from_file = 0; + int invalid_inputs_from_file = 0; + // The reader cannot be reused for multiple files because of the way it + // handles its internal state. So we instantiate a new reader for each file. + std::unique_ptr reader = + centipede::DefaultBlobFileReaderFactory(); + if (reader->Open(file_path).ok()) { + centipede::ByteSpan blob; + for (int blob_idx = 0; reader->Read(blob).ok(); ++blob_idx) { + absl::Status result = consume( + file_path, blob_idx, std::string(centipede::AsStringView(blob))); + if (result.ok()) { + ++loaded_inputs_from_file; + } else { + ++invalid_inputs_from_file; + absl::FPrintF(GetStderr(), + "[!] Invalid input at index %d in file %s: %s\n", + blob_idx, file_path, result.message()); + } + } + } + if (loaded_inputs_from_file + invalid_inputs_from_file > 0) { + // The file was a blob file and we read some inputs from it. + absl::FPrintF( + GetStderr(), + "[*] Loaded %d inputs and ignored %d invalid inputs from %s.\n", + loaded_inputs_from_file, invalid_inputs_from_file, file_path); + total_loaded_inputs += loaded_inputs_from_file; + total_invalid_inputs += invalid_inputs_from_file; + continue; + } + // The file was not a blob file (or, unlikely, it was an empty blob file); + // read its contents directly. + // TODO(b/349115475): Currently, we cannot distinguish between an empty blob + // file and a file that is not a blob file. Once we can, we should not fall + // back to reading the file directly if it is an empty blob file. + std::string contents; + centipede::RemoteFileGetContents(file_path, contents); + absl::Status result = consume(file_path, std::nullopt, std::move(contents)); + if (result.ok()) { + ++total_loaded_inputs; + } else { + ++total_invalid_inputs; + absl::FPrintF(GetStderr(), "[!] Invalid input file %s: %s\n", file_path, + result.message()); + } + } + absl::FPrintF( + GetStderr(), + "[*] In total, loaded %d inputs and ignored %d invalid inputs.\n", + total_loaded_inputs, total_invalid_inputs); +} + +#endif // !defined(FUZZTEST_STUB_FILESYSTEM) + } // namespace fuzztest::internal diff --git a/fuzztest/internal/io.h b/fuzztest/internal/io.h index 34812243..5c0b13e7 100644 --- a/fuzztest/internal/io.h +++ b/fuzztest/internal/io.h @@ -17,9 +17,13 @@ #include #include +#include #include +#include "absl/functional/function_ref.h" +#include "absl/status/status.h" #include "absl/strings/string_view.h" +#include "absl/types/span.h" namespace fuzztest::internal { @@ -67,6 +71,23 @@ absl::string_view Basename(absl::string_view filename); std::vector> ReadFilesFromDirectory( absl::string_view dir); +// Iterates over all serialized inputs in `file_paths` (not directory paths) and +// calls `consume` on each one. Supports both blob files containing multiple +// blobs, in which case `consume` is called for each blob, and individual files, +// in which case `consume` is called on the file's contents. Ignores invalid +// files. CHECK-fails if `file_paths` contains paths that don't exist or if it +// contains a directory. +// +// `consume` is a function that takes a file path, an optional blob index in the +// file (for blob files), and an input in the given file at the given blob index +// (if applicable). It returns an `absl::Status` indicating whether the input +// was successfully consumed. +void ForEachSerializedInput(absl::Span file_paths, + absl::FunctionRef blob_idx, std::string input)> + consume); + } // namespace fuzztest::internal #endif // FUZZTEST_FUZZTEST_INTERNAL_IO_H_ diff --git a/fuzztest/internal/io_test.cc b/fuzztest/internal/io_test.cc index 5b9818b8..34c4ddbb 100644 --- a/fuzztest/internal/io_test.cc +++ b/fuzztest/internal/io_test.cc @@ -21,13 +21,21 @@ #include #include #include -#include +#include // NOLINT +#include #include #include +#include +#include +#include #include "gmock/gmock.h" #include "gtest/gtest.h" +#include "absl/log/check.h" +#include "absl/status/status.h" #include "absl/strings/str_cat.h" +#include "./common/blob_file.h" +#include "./common/defs.h" #include "./fuzztest/fuzztest_core.h" namespace fuzztest::internal { @@ -193,5 +201,77 @@ TEST(IOTest, ListDirectoryReturnsEmptyVectorWhenDirectoryDoesNotExist) { EXPECT_THAT(ListDirectory("/doesnt_exist/"), IsEmpty()); } +TEST(ForEachSerializedInputTest, ReadsInputsFromSerializedFilesAndBlobFiles) { + const std::string tmp_dir = TmpDir("test_dir"); + const std::string serialized_file = + std::filesystem::path(tmp_dir) / "serialized_file"; + const std::string blob_file = std::filesystem::path(tmp_dir) / "blob_file"; + TestWrite(serialized_file, "Input1"); + std::unique_ptr writer = + centipede::DefaultBlobFileWriterFactory(); + CHECK(writer->Open(blob_file, "w").ok()); + CHECK(writer->Write(centipede::AsByteSpan(absl::string_view("Input2"))).ok()); + CHECK(writer->Write(centipede::AsByteSpan(absl::string_view("Input3"))).ok()); + CHECK(writer->Close().ok()); + + using InputInFile = std::tuple, std::string>; + std::vector inputs; + ForEachSerializedInput({serialized_file, blob_file}, + [&](absl::string_view file_path, + std::optional blob_idx, std::string input) { + inputs.emplace_back(std::string(file_path), blob_idx, + std::move(input)); + return absl::OkStatus(); + }); + EXPECT_THAT(inputs, UnorderedElementsAre( + InputInFile{serialized_file, std::nullopt, "Input1"}, + InputInFile{blob_file, 0, "Input2"}, + InputInFile{blob_file, 1, "Input3"})); + std::filesystem::remove_all(tmp_dir); +} + +TEST(ForEachSerializedInputTest, IgnoresUnconsumedInputs) { + const std::string tmp_dir = TmpDir("test_dir"); + const std::string file = std::filesystem::path(tmp_dir) / "file"; + std::unique_ptr writer = + centipede::DefaultBlobFileWriterFactory(); + CHECK(writer->Open(file, "w").ok()); + CHECK(writer->Write(centipede::AsByteSpan(absl::string_view("Ignore"))).ok()); + CHECK(writer->Write(centipede::AsByteSpan(absl::string_view("Accept"))).ok()); + CHECK(writer->Close().ok()); + + using InputInFile = std::tuple, std::string>; + std::vector inputs; + ForEachSerializedInput( + {file}, [&](absl::string_view file_path, std::optional blob_idx, + std::string input) { + if (input == "Ignore") return absl::InvalidArgumentError("Ignore"); + inputs.emplace_back(std::string(file_path), blob_idx, std::move(input)); + return absl::OkStatus(); + }); + EXPECT_THAT(inputs, UnorderedElementsAre(InputInFile{file, 1, "Accept"})); + std::filesystem::remove_all(tmp_dir); +} + +TEST(ForEachSerializedInputTest, DiesOnDirectoriesInFilePaths) { + const std::string tmp_dir = TmpDir("test_dir"); + const std::string dir = std::filesystem::path(tmp_dir) / "dir"; + std::filesystem::create_directory(dir); + + EXPECT_DEATH(ForEachSerializedInput( + {dir}, [&](absl::string_view, std::optional, + std::string) { return absl::OkStatus(); }), + "is a directory"); + std::filesystem::remove_all(tmp_dir); +} + +TEST(ForEachSerializedInputTest, DiesOnNonExistingFilePaths) { + EXPECT_DEATH( + ForEachSerializedInput({"/doesnt_exist/file"}, + [&](absl::string_view, std::optional, + std::string) { return absl::OkStatus(); }), + "does not exist"); +} + } // namespace } // namespace fuzztest::internal diff --git a/fuzztest/internal/runtime.cc b/fuzztest/internal/runtime.cc index 639d29d2..0d4f6109 100644 --- a/fuzztest/internal/runtime.cc +++ b/fuzztest/internal/runtime.cc @@ -14,7 +14,7 @@ #include "./fuzztest/internal/runtime.h" -#include "absl/strings/str_split.h" +#include "./fuzztest/internal/status.h" #if !defined(_WIN32) && !defined(__Fuchsia__) #define FUZZTEST_HAS_RUSAGE @@ -31,12 +31,12 @@ #include #include #include -#include #include #include // NOLINT #include #include +#include "absl/functional/bind_front.h" #include "absl/functional/function_ref.h" #include "absl/log/check.h" #include "absl/random/bit_gen_ref.h" @@ -48,6 +48,7 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/str_replace.h" +#include "absl/strings/str_split.h" #include "absl/strings/string_view.h" #include "absl/time/clock.h" #include "absl/time/time.h" @@ -55,7 +56,6 @@ #include "./fuzztest/internal/configuration.h" #include "./fuzztest/internal/corpus_database.h" #include "./fuzztest/internal/coverage.h" -#include "./fuzztest/internal/domains/domain_base.h" #include "./fuzztest/internal/fixture_driver.h" #include "./fuzztest/internal/flag_name.h" #include "./fuzztest/internal/io.h" @@ -462,51 +462,33 @@ FuzzTestFuzzerImpl::~FuzzTestFuzzerImpl() { Runtime::instance().DisableReporter(); } -std::optional FuzzTestFuzzerImpl::TryParse( +absl::StatusOr FuzzTestFuzzerImpl::TryParse( absl::string_view data) { auto ir_value = IRObject::FromString(data); if (!ir_value) { - absl::FPrintF(GetStderr(), "[!] Unexpected file format.\n"); - return std::nullopt; + return absl::InvalidArgumentError("Unexpected file format"); } auto corpus_value = params_domain_.ParseCorpus(*ir_value); if (!corpus_value) { - absl::FPrintF(GetStderr(), "[!] Unexpected intermediate representation.\n"); - return std::nullopt; + return absl::InvalidArgumentError("Unexpected intermediate representation"); } - absl::Status is_valid = params_domain_.ValidateCorpusValue(*corpus_value); if (!is_valid.ok()) { - absl::FPrintF(GetStderr(), "[!] Invalid corpus value: %s\n", - is_valid.ToString()); - return std::nullopt; + return Prefix(is_valid, "Invalid corpus value"); } - return corpus_value; + return *corpus_value; } -std::optional -FuzzTestFuzzerImpl::GetCorpusValueFromFile(const std::string& path) { - const auto content = ReadFile(path); - if (!content) { - absl::FPrintF(GetStderr(), - "[!] Failed to read file or directory (might be empty): %s\n", - path); - return std::nullopt; - } - auto corpus_value = TryParse(*content); - if (!corpus_value) { - absl::FPrintF(GetStderr(), - "[!] Skipping invalid input file %s.\n===\n%s\n===\n", path, - *content); +void FuzzTestFuzzerImpl::ReplayInput(absl::string_view file_path, + std::optional blob_idx, + const Input& input) { + if (blob_idx.has_value()) { + absl::FPrintF(GetStderr(), "[.] Replaying input at index %d in %s\n", + *blob_idx, file_path); + } else { + absl::FPrintF(GetStderr(), "[.] Replaying %s\n", file_path); } - return corpus_value; -} - -void FuzzTestFuzzerImpl::ReplayInput(const std::string& path) { - auto corpus_value = GetCorpusValueFromFile(path); - if (!corpus_value) return; - absl::FPrintF(GetStderr(), "[.] Replaying %s\n", path); - RunOneInput({*corpus_value}); + RunOneInput(input); } bool FuzzTestFuzzerImpl::ReplayInputsIfAvailable( @@ -515,15 +497,14 @@ bool FuzzTestFuzzerImpl::ReplayInputsIfAvailable( // reproducing the crash, fuzzing mode should be used. runtime_.SetRunMode(RunMode::kFuzz); + auto replay_input = absl::bind_front(&FuzzTestFuzzerImpl::ReplayInput, this); if (const auto file_paths = GetFilesToReplay()) { - for (const std::string& path : *file_paths) { - ReplayInput(path); - } + ForEachInput(*file_paths, replay_input); return true; } if (configuration.crashing_input_to_reproduce.has_value()) { configuration.preprocess_crash_reproducing(); - ReplayInput(*configuration.crashing_input_to_reproduce); + ForEachInput({*configuration.crashing_input_to_reproduce}, replay_input); return true; } @@ -592,19 +573,16 @@ std::optional FuzzTestFuzzerImpl::ReadReproducerToMinimize() { absl::FPrintF(GetStderr(), "[*] Minimizing reproducer: %s\n", file); - auto data = ReadFile(std::string(file)); - - if (!data) { - FUZZTEST_INTERNAL_CHECK(false, "Failed to read minimizer file!"); - } - - auto res = TryParse(*data); - if (!res) { - absl::FPrintF(GetStderr(), "[!] Invalid input file %s.\n===\n%s\n===\n", - file, *data); - FUZZTEST_INTERNAL_CHECK(false, "Failed to read minimizer file!"); - } - return res; + std::optional reproducer; + ForEachInput({std::string(file)}, + [&](absl::string_view, std::optional, Input input) { + FUZZTEST_INTERNAL_CHECK(!reproducer.has_value(), + "Multiple inputs found in ", file); + reproducer = std::move(input.args); + }); + FUZZTEST_INTERNAL_CHECK(reproducer.has_value(), + "Failed to read minimizer file!"); + return *reproducer; } void FuzzTestFuzzerImpl::MutateValue(Input& input, absl::BitGenRef prng) { @@ -702,26 +680,18 @@ void FuzzTestFuzzerImpl::TrySampleAndUpdateInMemoryCorpus(Input sample, UpdateCorpusDistribution(); } -void FuzzTestFuzzerImpl::ForEachInputFile( +void FuzzTestFuzzerImpl::ForEachInput( absl::Span files, - absl::FunctionRef consume) { - int parsed_input_counter = 0; - int invalid_input_counter = 0; - for (const auto& path : files) { - std::optional data = ReadFile(path); - if (!data) continue; - if (auto corpus_value = TryParse(*data)) { - ++parsed_input_counter; - consume(Input{*std::move(corpus_value)}); - } else { - ++invalid_input_counter; - absl::FPrintF(GetStderr(), "[!] Invalid input file %s.\n", path); - } - } - absl::FPrintF(GetStderr(), - "[*] Parsed %d inputs and ignored %d inputs from the test " - "suite input dir.\n", - parsed_input_counter, invalid_input_counter); + absl::FunctionRef, Input)> + consume) { + ForEachSerializedInput( + files, [this, consume](absl::string_view file_path, + std::optional blob_idx, std::string data) { + absl::StatusOr corpus_value = TryParse(data); + if (!corpus_value.ok()) return corpus_value.status(); + consume(file_path, blob_idx, Input{*std::move(corpus_value)}); + return absl::OkStatus(); + }); } bool FuzzTestFuzzerImpl::MinimizeCorpusIfInMinimizationMode( @@ -732,7 +702,8 @@ bool FuzzTestFuzzerImpl::MinimizeCorpusIfInMinimizationMode( std::vector files = ListDirectory(std::string(inputdir)); // Shuffle to potentially improve previously minimized corpus. std::shuffle(files.begin(), files.end(), prng); - ForEachInputFile(files, [this](Input&& input) { + ForEachInput(files, [this](absl::string_view /*file_path*/, + std::optional /*blob_idx*/, Input input) { TrySample(input, /*write_to_file=*/true); }); return true; @@ -744,8 +715,10 @@ FuzzTestFuzzerImpl::TryReadCorpusFromFiles() { auto inputdir = absl::NullSafeStringView(getenv("FUZZTEST_TESTSUITE_IN_DIR")); if (inputdir.empty()) return inputs; std::vector files = ListDirectory(std::string(inputdir)); - ForEachInputFile( - files, [&inputs](Input&& input) { inputs.push_back(std::move(input)); }); + ForEachInput(files, [&inputs](absl::string_view /*file_path*/, + std::optional /*blob_idx*/, Input input) { + inputs.push_back(std::move(input)); + }); return inputs; } @@ -788,15 +761,15 @@ void FuzzTestFuzzerImpl::PopulateFromSeeds( // used in minimization or coverage replay. /*write_to_file=*/true); } - for (const auto& corpus_file : corpus_files) { - auto seed = GetCorpusValueFromFile(corpus_file); - if (!seed) continue; - TrySampleAndUpdateInMemoryCorpus( - Input{*seed}, - // Dump the seed to the corpus so that it is present when the corpus is - // used in minimization or coverage replay. - /*write_to_file=*/true); - } + ForEachInput(corpus_files, + [this](absl::string_view /*file_path*/, + std::optional /*blob_idx*/, Input input) { + TrySampleAndUpdateInMemoryCorpus( + std::move(input), + // Dump the seed to the corpus so that it is present when + // the corpus is used in minimization or coverage replay. + /*write_to_file=*/true); + }); } size_t GetStackLimitFromEnvOrConfiguration(const Configuration& configuration) { @@ -847,14 +820,12 @@ void FuzzTestFuzzerImpl::RunInUnitTestMode(const Configuration& configuration) { } CorpusDatabase corpus_database(configuration); - for (const std::string& file : - corpus_database.GetRegressionInputs(test_.full_name())) { - ReplayInput(file); - } - for (const std::string& file : - corpus_database.GetCoverageInputsIfAny(test_.full_name())) { - ReplayInput(file); - } + auto replay_input = + absl::bind_front(&FuzzTestFuzzerImpl::ReplayInput, this); + ForEachInput(corpus_database.GetRegressionInputs(test_.full_name()), + replay_input); + ForEachInput(corpus_database.GetCoverageInputsIfAny(test_.full_name()), + replay_input); runtime_.SetRunMode(RunMode::kUnitTest); diff --git a/fuzztest/internal/runtime.h b/fuzztest/internal/runtime.h index f529d0c9..b04484e0 100644 --- a/fuzztest/internal/runtime.h +++ b/fuzztest/internal/runtime.h @@ -32,6 +32,7 @@ #include "absl/functional/function_ref.h" #include "absl/random/bit_gen_ref.h" #include "absl/random/discrete_distribution.h" +#include "absl/status/statusor.h" #include "absl/strings/str_cat.h" #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" @@ -41,13 +42,9 @@ #include "./fuzztest/internal/coverage.h" #include "./fuzztest/internal/domains/domain.h" #include "./fuzztest/internal/fixture_driver.h" -#include "./fuzztest/internal/io.h" #include "./fuzztest/internal/logging.h" -#include "./fuzztest/internal/meta.h" #include "./fuzztest/internal/registration.h" #include "./fuzztest/internal/seed_seq.h" -#include "./fuzztest/internal/serialization.h" -#include "./fuzztest/internal/type_support.h" namespace fuzztest { @@ -272,7 +269,7 @@ class FuzzTestFuzzerImpl : public FuzzTestFuzzer { std::optional ReadReproducerToMinimize(); - std::optional TryParse(absl::string_view data); + absl::StatusOr TryParse(absl::string_view data); void MutateValue(Input& input, absl::BitGenRef prng); @@ -292,8 +289,15 @@ class FuzzTestFuzzerImpl : public FuzzTestFuzzer { void TrySampleAndUpdateInMemoryCorpus(Input sample, bool write_to_file = true); - void ForEachInputFile(absl::Span files, - absl::FunctionRef consume); + // Iterates over inputs in `files` and calls `consume` on each input. + // `consume` is a function that takes a file path, an optional blob index in + // the file (for blob files with multiple blobs), and an input in the given + // file at the given blob index (if applicable). + void ForEachInput( + absl::Span files, + absl::FunctionRef blob_idx, Input input)> + consume); // Returns true if we're in minimization mode. bool MinimizeCorpusIfInMinimizationMode(absl::BitGenRef prng); @@ -308,9 +312,10 @@ class FuzzTestFuzzerImpl : public FuzzTestFuzzer { bool ShouldStop(); - std::optional GetCorpusValueFromFile( - const std::string& path); - void ReplayInput(const std::string& path); + // Prints a message indicating that we're replaying an input from `file_path` + // at `blob_idx` (if applicable) and then runs `input`. + void ReplayInput(absl::string_view file_path, std::optional blob_idx, + const Input& input); const FuzzTest& test_; std::unique_ptr fixture_driver_;