initial commit of fuzzers from https://github.com/pauldreik/fuzzfmt

pauldreik · Apr 27, 2019 · 6cbd91a · 6cbd91a
1 parent eaddfb1
commit 6cbd91a
Show file tree

Hide file tree

Showing 6 changed files with 649 additions and 0 deletions.
diff --git a/fuzzing/CMakeLists.txt b/fuzzing/CMakeLists.txt
@@ -0,0 +1,42 @@
+# for fuzzing libfmt http://fmtlib.net/
+#
+# by Paul Dreik 20190420
+# https://www.pauldreik.se/
+
+
+cmake_minimum_required(VERSION 3.10)
+
+project(fmt_fuzzers LANGUAGES CXX)
+
+add_definitions(-DFUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION=1)
+
+add_subdirectory(fmt)
+
+
+# settings this links in a main. useful for reproducing,
+# kcov, gdb, afl, valgrind.
+# (note that libFuzzer can also reproduce, just pass it the files)
+option(reproduce_mode "enables the reproduce mode, instead of libFuzzer" On)
+
+#find all fuzzers.
+file(GLOB SOURCES "fuzz*.cpp")
+
+if(reproduce_mode)
+  set(prefix reproducer_)
+  add_definitions(-DIMPLEMENT_MAIN=1)
+else()
+  # this assumes clang is used
+  string(APPEND CMAKE_CXX_FLAGS " -fsanitize=fuzzer")
+  set(prefix fuzzer_)
+endif()
+
+macro(implement_fuzzer sourcefile)
+  get_filename_component(basename ${sourcefile} NAME_WE)
+  add_executable(${prefix}${basename} ${sourcefile})
+  target_link_libraries(${prefix}${basename}  PRIVATE fmt)
+  set_property(TARGET ${prefix}${basename} PROPERTY CXX_STANDARD 17)
+endmacro()
+
+foreach(X IN ITEMS ${SOURCES})
+    implement_fuzzer(${X})
+endforeach()
diff --git a/fuzzing/README.md b/fuzzing/README.md
@@ -0,0 +1,67 @@
+# FMT Fuzzer
+This is for fuzzing libfmt which is proposed for standardization, so it's extra
+important that bugs are smoked out.
+
+It has found bugs:
+- [fmt github #1124](https://github.com/fmtlib/fmt/issues/1124)
+- [fmt github #1127](https://github.com/fmtlib/fmt/issues/1127)
+
+Unfortunately one has to limit the maximum memory allocation, otherwise
+the fuzzing will soon interrupt after trying to allocate many GB of memory. That is why the submodule
+does not point to upstream fmt, but instead to a [branch in fmt fork](https://github.com/pauldreik/fmt/tree/fuzz) which introduces the nice blocks like:
+```cpp
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+if(spec.precision>100000) {
+ throw std::runtime_error("fuzz mode - avoiding large precision");
+}
+#endif
+```
+This macro is the defacto standard for making fuzzing practically possible, see [the libFuzzer documentation](https://llvm.org/docs/LibFuzzer.html#fuzzer-friendly-build-mode).
+
+
+With afl, reaches about 3000 iterations per second on a single core.
+With libFuzzer, about 200000.
+
+# AFL
+Building with afl and undefined behaviour sanitizer:
+```sh
+mkdir build-afl-ubsan
+cd build-afl-ubsan
+CXX=afl-g++ CXXFLAGS="-fsanitize=undefined" cmake .. -Dreproduce_mode=on
+make
+```
+
+corpus minimization:
+```sh
+afl-cmin  -i lots/of/files/ -o corpus/ -- ./reproducer_fuzz_two_args @@
+```
+
+fuzzing:
+```sh
+export UBSAN_OPTIONS=abort_on_error=1
+afl-fuzz -i corpus -o out -- ./reproducer_fuzz_two_args @@
+```
+
+# libFuzzer
+
+## with sanitizers
+
+```sh
+mkdir build-libfuzzer-sanitizers
+cd build-libfuzzer-sanitizers/
+CXX=clang++ CXXFLAGS="-fsanitize=address,undefined -O3" cmake .. -Dreproduce_mode=off
+make
+mkdir out
+./fuzzer_fuzz_two_args out corpus
+```
+
+## plain (good for speed, corpus exploration)
+
+```sh
+mkdir build-libfuzzer-plain
+cd build-libfuzzer-plain/
+CXX=clang++ CXXFLAGS="-O3" cmake .. -Dreproduce_mode=off
+make
+mkdir -p out corpus
+./fuzzer_fuzz_two_args out corpus
+```
diff --git a/fuzzing/fuzz_named_arg.cpp b/fuzzing/fuzz_named_arg.cpp
@@ -0,0 +1,142 @@
+#include <cstdint>
+#include <fmt/core.h>
+#include <fmt/chrono.h>
+#include <iostream>
+#include <stdexcept>
+#include <type_traits>
+#include <vector>
+
+template<typename Item1>
+void
+doit(const uint8_t* Data, std::size_t Size, int argsize)
+{
+
+  const auto N1 = sizeof(Item1);
+  if (Size <= N1) {
+    return;
+  }
+  Item1 item1{};
+  if constexpr (std::is_same<Item1, bool>::value) {
+    item1 = !!Data[0];
+  } else {
+    std::memcpy(&item1, Data, N1);
+  }
+  Data += N1;
+  Size -= N1;
+
+  // how many chars should be used for the argument name?
+  if (argsize <= 0 || argsize >= Size) {
+    return;
+  }
+  std::vector<char> argname(argsize + 1);
+  std::memcpy(argname.data(), Data, argsize);
+  Data += argsize;
+  Size -= argsize;
+
+  // allocates as tight as possible, making it easier to catch buffer overruns.
+  // also, make it null terminated.
+  std::vector<char> buf(Size + 1);
+  std::memcpy(buf.data(), Data, Size);
+  std::string message =
+    fmt::format(buf.data(), fmt::arg(argname.data(), item1));
+}
+
+void
+doit_time(const uint8_t* Data, std::size_t Size)
+{
+  using Item = std::time_t;
+  const auto N = sizeof(Item);
+  if (Size <= N) {
+    return;
+  }
+  Item item{};
+  std::memcpy(&item, Data, N);
+  Data += N;
+  Size -= N;
+  // allocates as tight as possible, making it easier to catch buffer overruns
+  std::vector<char> buf(Data, Data + Size);
+  buf.resize(Size + 1, '\0');
+  auto* b = std::localtime(&item);
+  if (b) {
+    std::string message = fmt::format(buf.data(), *b);
+  }
+}
+
+// for dynamic dispatching to an explicit instantiation
+template<typename Callback>
+void
+invoke(int index, Callback callback)
+{
+  switch (index) {
+    case 0:
+      callback(bool{});
+      break;
+    case 1:
+      callback(char{});
+      break;
+    case 2:
+      callback(short{});
+      break;
+    case 3:
+      callback(int{});
+      break;
+    case 4:
+      callback(long{});
+      break;
+    case 5:
+      callback(float{});
+      break;
+    case 6:
+      callback(double{});
+      break;
+    case 7:
+      using LD = long double;
+      callback(LD{});
+      break;
+  }
+}
+
+extern "C" int
+LLVMFuzzerTestOneInput(const uint8_t* Data, std::size_t Size)
+{
+
+  if (Size <= 3) {
+    return 0;
+  }
+
+  // switch types depending on the first byte of the input
+  const auto first = Data[0] & 0x0F;
+  const auto second = (Data[0] & 0xF0) >> 4;
+  Data++;
+  Size--;
+
+  auto outer = [=](auto param1) { doit<decltype(param1)>(Data, Size, second); };
+
+  try {
+    invoke(first, outer);
+  } catch (std::exception& e) {
+  }
+  return 0;
+}
+
+#ifdef IMPLEMENT_MAIN
+#include <cassert>
+#include <fstream>
+#include <sstream>
+#include <vector>
+int
+main(int argc, char* argv[])
+{
+  for (int i = 1; i < argc; ++i) {
+    std::ifstream in(argv[i]);
+    assert(in);
+    in.seekg(0, std::ios_base::end);
+    const auto pos = in.tellg();
+    in.seekg(0, std::ios_base::beg);
+    std::vector<char> buf(pos);
+    in.read(buf.data(), buf.size());
+    assert(in.gcount() == pos);
+    LLVMFuzzerTestOneInput((const uint8_t*)buf.data(), buf.size());
+  }
+}
+#endif
diff --git a/fuzzing/fuzz_one_arg.cpp b/fuzzing/fuzz_one_arg.cpp
@@ -0,0 +1,122 @@
+#include <cstdint>
+#include <fmt/core.h>
+#include <stdexcept>
+#include <type_traits>
+#include <vector>
+
+#include <fmt/chrono.h>
+
+template<typename Item>
+void
+doit(const uint8_t* Data, std::size_t Size)
+{
+  const auto N = sizeof(Item);
+  if (Size <= N) {
+    return;
+  }
+  Item item{};
+  if constexpr (std::is_same<Item, bool>::value) {
+    item = !!Data[0];
+  } else {
+    std::memcpy(&item, Data, N);
+  }
+  Data += N;
+  Size -= N;
+  // allocates as tight as possible, making it easier to catch buffer overruns
+  std::vector<char> buf(Data, Data + Size);
+  buf.resize(Size + 1, '\0');
+  std::string message = fmt::format(buf.data(), item);
+}
+
+void
+doit_time(const uint8_t* Data, std::size_t Size)
+{
+  using Item = std::time_t;
+  const auto N = sizeof(Item);
+  if (Size <= N) {
+    return;
+  }
+  Item item{};
+  std::memcpy(&item, Data, N);
+  Data += N;
+  Size -= N;
+  // allocates as tight as possible, making it easier to catch buffer overruns.
+  // also, make it null terminated.
+  std::vector<char> buf(Size + 1);
+  std::memcpy(buf.data(), Data, Size);
+  auto* b = std::localtime(&item);
+  if (b) {
+    std::string message = fmt::format(buf.data(), *b);
+  }
+}
+
+extern "C" int
+LLVMFuzzerTestOneInput(const uint8_t* Data, std::size_t Size)
+{
+
+  if (Size <= 3) {
+    return 0;
+  }
+
+  const auto first = Data[0];
+  Data++;
+  Size--;
+
+  try {
+    switch (first) {
+      case 0:
+        doit<bool>(Data, Size);
+        break;
+      case 1:
+        doit<char>(Data, Size);
+        break;
+      case 2:
+        doit<short>(Data, Size);
+        break;
+      case 3:
+        doit<int>(Data, Size);
+        break;
+      case 4:
+        doit<long>(Data, Size);
+        break;
+      case 5:
+        doit<float>(Data, Size);
+        break;
+      case 6:
+        doit<double>(Data, Size);
+        break;
+      case 7:
+        doit<long double>(Data, Size);
+        break;
+      case 8:
+        doit_time(Data, Size);
+        break;
+      default:
+        break;
+    }
+  } catch (std::exception& e) {
+  }
+  return 0;
+}
+
+#ifdef IMPLEMENT_MAIN
+#include <cassert>
+#include <fstream>
+#include <sstream>
+#include <vector>
+int
+main(int argc, char* argv[])
+{
+  for (int i = 1; i < argc; ++i) {
+    std::ifstream in(argv[i]);
+    assert(in);
+    in.seekg(0, std::ios_base::end);
+    const auto pos = in.tellg();
+    in.seekg(0, std::ios_base::beg);
+    std::vector<char> buf(pos);
+    in.read(buf.data(), buf.size());
+    assert(in.gcount() == pos);
+    LLVMFuzzerTestOneInput((const uint8_t*)buf.data(), buf.size());
+  }
+}
+#endif