diff --git a/CMake/FindLZ4.cmake b/CMake/FindLZ4.cmake
new file mode 100644
index 00000000000..671c7526d29
--- /dev/null
+++ b/CMake/FindLZ4.cmake
@@ -0,0 +1,22 @@
+# Finds liblz4.
+#
+# This module defines:
+# LZ4_FOUND
+# LZ4_INCLUDE_DIR
+# LZ4_LIBRARY
+#
+
+find_path(LZ4_INCLUDE_DIR NAMES lz4.h)
+find_library(LZ4_LIBRARY NAMES lz4)
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(
+    LZ4 DEFAULT_MSG
+    LZ4_LIBRARY LZ4_INCLUDE_DIR
+)
+
+if (LZ4_FOUND)
+    message(STATUS "Found LZ4: ${LZ4_LIBRARY}")
+endif()
+
+mark_as_advanced(LZ4_INCLUDE_DIR LZ4_LIBRARY)
diff --git a/CMake/FindSnappy.cmake b/CMake/FindSnappy.cmake
new file mode 100644
index 00000000000..44beb17fc73
--- /dev/null
+++ b/CMake/FindSnappy.cmake
@@ -0,0 +1,17 @@
+# Find the Snappy libraries
+#
+# This module defines:
+# SNAPPY_FOUND
+# SNAPPY_INCLUDE_DIR
+# SNAPPY_LIBRARY
+
+find_path(SNAPPY_INCLUDE_DIR NAMES snappy.h)
+find_library(SNAPPY_LIBRARY NAMES snappy)
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(
+    SNAPPY DEFAULT_MSG
+    SNAPPY_LIBRARY SNAPPY_INCLUDE_DIR
+)
+
+mark_as_advanced(SNAPPY_INCLUDE_DIR SNAPPY_LIBRARY)
diff --git a/CMake/FindZstd.cmake b/CMake/FindZstd.cmake
new file mode 100644
index 00000000000..ba5e3e1c00f
--- /dev/null
+++ b/CMake/FindZstd.cmake
@@ -0,0 +1,22 @@
+#
+# - Try to find Facebook zstd library
+# This will define
+# ZSTD_FOUND
+# ZSTD_INCLUDE_DIR
+# ZSTD_LIBRARY
+#
+
+find_path(ZSTD_INCLUDE_DIR NAMES zstd.h)
+find_library(ZSTD_LIBRARY NAMES zstd)
+
+include(FindPackageHandleStandardArgs)
+FIND_PACKAGE_HANDLE_STANDARD_ARGS(
+    ZSTD DEFAULT_MSG
+    ZSTD_LIBRARY ZSTD_INCLUDE_DIR
+)
+
+if (ZSTD_FOUND)
+    message(STATUS "Found Zstd: ${ZSTD_LIBRARY}")
+endif()
+
+mark_as_advanced(ZSTD_INCLUDE_DIR ZSTD_LIBRARY)
diff --git a/CMake/FollyCompilerUnix.cmake b/CMake/FollyCompilerUnix.cmake
index 7bf94741217..14cee30ecc3 100644
--- a/CMake/FollyCompilerUnix.cmake
+++ b/CMake/FollyCompilerUnix.cmake
@@ -11,7 +11,7 @@ function(apply_folly_compile_options_to_target THETARGET)
       "FOLLY_XLOG_STRIP_PREFIXES=\"${FOLLY_DIR_PREFIXES}\""
   )
   target_compile_options(${THETARGET}
-    PUBLIC
+    PRIVATE
       -g
       -std=gnu++14
       -finput-charset=UTF-8
@@ -23,6 +23,7 @@ function(apply_folly_compile_options_to_target THETARGET)
       -Wno-error=deprecated-declarations
       -Wno-sign-compare
       -Wno-unused
+      -Wno-inconsistent-missing-override
       -Wunused-label
       -Wunused-result
       -Wnon-virtual-dtor
diff --git a/CMake/FollyConfigChecks.cmake b/CMake/FollyConfigChecks.cmake
index af9d06ff649..7a2e12e87ac 100644
--- a/CMake/FollyConfigChecks.cmake
+++ b/CMake/FollyConfigChecks.cmake
@@ -7,7 +7,6 @@ include(CheckTypeSize)
 include(CheckCXXCompilerFlag)
 
 CHECK_INCLUDE_FILE_CXX(malloc.h FOLLY_HAVE_MALLOC_H)
-CHECK_INCLUDE_FILE_CXX(bits/functexcept.h FOLLY_HAVE_BITS_FUNCTEXCEPT_H)
 CHECK_INCLUDE_FILE_CXX(bits/c++config.h FOLLY_HAVE_BITS_CXXCONFIG_H)
 CHECK_INCLUDE_FILE_CXX(features.h FOLLY_HAVE_FEATURES_H)
 CHECK_INCLUDE_FILE_CXX(linux/membarrier.h FOLLY_HAVE_LINUX_MEMBARRIER_H)
@@ -32,13 +31,13 @@ if(NOT CMAKE_SYSTEM_NAME STREQUAL "Windows")
     list(APPEND FOLLY_CXX_FLAGS -Wshadow-compatible-local)
   endif()
 
-  CHECK_CXX_COMPILER_FLAG(-Wno-noexcept-type COMPILER_HAS_W_NOEXCEPT_TYPE)
+  CHECK_CXX_COMPILER_FLAG(-Wnoexcept-type COMPILER_HAS_W_NOEXCEPT_TYPE)
   if (COMPILER_HAS_W_NOEXCEPT_TYPE)
     list(APPEND FOLLY_CXX_FLAGS -Wno-noexcept-type)
   endif()
 
   CHECK_CXX_COMPILER_FLAG(
-      -Wno-nullability-completeness
+      -Wnullability-completeness
       COMPILER_HAS_W_NULLABILITY_COMPLETENESS)
   if (COMPILER_HAS_W_NULLABILITY_COMPLETENESS)
     list(APPEND FOLLY_CXX_FLAGS -Wno-nullability-completeness)
@@ -124,11 +123,16 @@ check_type_size(__int128 INT128_SIZE LANGUAGE CXX)
 if (NOT INT128_SIZE STREQUAL "")
   set(FOLLY_HAVE_INT128_T ON)
   check_cxx_source_compiles("
+    #include <functional>
     #include <type_traits>
+    #include <utility>
     static_assert(
       ::std::is_same<::std::make_signed<unsigned __int128>::type,
                      __int128>::value,
       \"signed form of 'unsigned __uint128' must be '__int128'.\");
+    static_assert(
+        sizeof(::std::hash<__int128>{}(0)) > 0, \
+        \"std::hash<__int128> is disabled.\");
     int main() { return 0; }"
     HAVE_INT128_TRAITS
   )
diff --git a/CMake/folly-config.cmake.in b/CMake/folly-config.cmake.in
new file mode 100644
index 00000000000..35016a56584
--- /dev/null
+++ b/CMake/folly-config.cmake.in
@@ -0,0 +1,19 @@
+# CMake configuration file for folly
+# It defines the following variables
+#  FOLLY_INCLUDE_DIRS
+#  FOLLY_LIBRARIES
+ 
+# Compute paths
+get_filename_component(FOLLY_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
+get_filename_component(
+  FOLLY_INCLUDE_DIRS
+  "${FOLLY_CMAKE_DIR}/../../include"
+  ABSOLUTE)
+
+# Include the folly-targets.cmake file, which is generated from our CMake rules
+if(NOT TARGET Folly::folly)
+  include("${FOLLY_CMAKE_DIR}/folly-targets.cmake")
+endif()
+ 
+# Set FOLLY_LIBRARIES from our Folly::folly target
+set(FOLLY_LIBRARIES Folly::folly)
diff --git a/CMake/folly-config.h.cmake b/CMake/folly-config.h.cmake
index 51debe7a7cf..840af127f1f 100755
--- a/CMake/folly-config.h.cmake
+++ b/CMake/folly-config.h.cmake
@@ -36,7 +36,6 @@
 #cmakedefine FOLLY_HAVE_LIBGLOG 1
 
 #cmakedefine FOLLY_HAVE_MALLOC_H 1
-#cmakedefine FOLLY_HAVE_BITS_FUNCTEXCEPT_H 1
 #cmakedefine FOLLY_HAVE_BITS_CXXCONFIG_H 1
 #cmakedefine FOLLY_HAVE_FEATURES_H 1
 #cmakedefine FOLLY_HAVE_LINUX_MEMBARRIER_H 1
diff --git a/CMake/folly-deps.cmake b/CMake/folly-deps.cmake
index 0e687b983fc..591c0e44233 100755
--- a/CMake/folly-deps.cmake
+++ b/CMake/folly-deps.cmake
@@ -26,7 +26,13 @@ find_package(gflags CONFIG QUIET)
 if (gflags_FOUND)
   message(STATUS "Found gflags from package config")
   set(FOLLY_HAVE_LIBGFLAGS ON)
-  set(FOLLY_SHINY_DEPENDENCIES ${FOLLY_SHINY_DEPENDENCIES} gflags)
+  if (TARGET gflags-shared)
+    set(FOLLY_SHINY_DEPENDENCIES ${FOLLY_SHINY_DEPENDENCIES} gflags-shared)
+  elseif (TARGET gflags)
+    set(FOLLY_SHINY_DEPENDENCIES ${FOLLY_SHINY_DEPENDENCIES} gflags)
+  else()
+    message(FATAL_ERROR "Unable to determine the target name for the GFlags package.")
+  endif()
   list(APPEND CMAKE_REQUIRED_LIBRARIES ${GFLAGS_LIBRARIES})
   list(APPEND CMAKE_REQUIRED_INCLUDES ${GFLAGS_INCLUDE_DIR})
 else()
@@ -98,19 +104,25 @@ if (LIBLZMA_FOUND)
   list(APPEND FOLLY_LINK_LIBRARIES ${LIBLZMA_LIBRARIES})
 endif()
 
-# TODO: We should ideally build FindXXX modules for the following libraries,
-# rather than the simple checks we currently have here.
-CHECK_INCLUDE_FILE_CXX(zstd.h FOLLY_HAVE_LIBZSTD)
-if (FOLLY_HAVE_LIBZSTD)
-  list(APPEND FOLLY_LINK_LIBRARIES zstd)
+find_package(LZ4 MODULE)
+set(FOLLY_HAVE_LIBLZ4 ${LZ4_FOUND})
+if (LZ4_FOUND)
+  list(APPEND FOLLY_INCLUDE_DIRECTORIES ${LZ4_INCLUDE_DIR})
+  list(APPEND FOLLY_LINK_LIBRARIES ${LZ4_LIBRARY})
 endif()
-CHECK_INCLUDE_FILE_CXX(snappy.h FOLLY_HAVE_LIBSNAPPY)
-if (FOLLY_HAVE_LIBSNAPPY)
-  list(APPEND FOLLY_LINK_LIBRARIES snappy)
+
+find_package(Zstd MODULE)
+set(FOLLY_HAVE_LIBZSTD ${ZSTD_FOUND})
+if(ZSTD_FOUND)
+  list(APPEND FOLLY_INCLUDE_DIRECTORIES ${ZSTD_INCLUDE_DIR})
+  list(APPEND FOLLY_LINK_LIBRARIES ${ZSTD_LIBRARY})
 endif()
-CHECK_INCLUDE_FILE_CXX(lz4.h FOLLY_HAVE_LIBLZ4)
-if (FOLLY_HAVE_LIBLZ4)
-  list(APPEND FOLLY_LINK_LIBRARIES lz4)
+
+find_package(Snappy MODULE)
+set(FOLLY_HAVE_LIBSNAPPY ${SNAPPY_FOUND})
+if (SNAPPY_FOUND)
+  list(APPEND FOLLY_INCLUDE_DIRECTORIES ${SNAPPY_INCLUDE_DIR})
+  list(APPEND FOLLY_LINK_LIBRARIES ${SNAPPY_LIBRARY})
 endif()
 
 find_package(LibDwarf)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5dfd48eb546..22e02955fe9 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -267,12 +267,20 @@ add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/folly_dep.cpp
   COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/folly_dep.cpp
   DEPENDS folly_base folly_fingerprint
 )
-add_library(folly ${CMAKE_CURRENT_BINARY_DIR}/folly_dep.cpp $<TARGET_OBJECTS:folly_base>)
+add_library(folly
+  ${CMAKE_CURRENT_BINARY_DIR}/folly_dep.cpp
+  $<TARGET_OBJECTS:folly_base>
+)
 apply_folly_compile_options_to_target(folly)
 source_group("" FILES ${CMAKE_CURRENT_BINARY_DIR}/folly_dep.cpp)
 
 target_link_libraries(folly PUBLIC ${FOLLY_LINK_LIBRARIES})
-target_include_directories(folly PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>)
+target_include_directories(folly
+  INTERFACE
+    ${FOLLY_INCLUDE_DIRECTORIES}
+    $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>
+    $<INSTALL_INTERFACE:include>
+)
 
 install(TARGETS folly
   EXPORT folly
@@ -285,23 +293,33 @@ auto_install_files(folly ${FOLLY_DIR}
   ${FOLLY_DIR}/detail/SlowFingerprint.h
   ${FOLLY_DIR}/detail/FingerprintPolynomial.h
 )
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/folly/folly-config.h DESTINATION include/folly)
+install(
+  FILES ${CMAKE_CURRENT_BINARY_DIR}/folly/folly-config.h
+  DESTINATION include/folly
+  COMPONENT dev
+)
+
+# Generate the folly-config.cmake file for installation so that
+# downstream projects that use on folly can easily depend on it in their CMake
+# files using "find_package(folly CONFIG)"
+configure_file(
+  ${CMAKE_CURRENT_SOURCE_DIR}/CMake/folly-config.cmake.in
+  ${CMAKE_CURRENT_BINARY_DIR}/folly-config.cmake
+  @ONLY
+)
+install(
+  FILES ${CMAKE_CURRENT_BINARY_DIR}/folly-config.cmake
+  DESTINATION share/folly
+  COMPONENT dev
+)
 install(
   EXPORT folly
   DESTINATION share/folly
   NAMESPACE Folly::
   FILE folly-targets.cmake
+  COMPONENT dev
 )
 
-# We need a wrapper config file to do the find_package calls to ensure
-# that all our dependencies are available to link against.
-file(
-  COPY ${CMAKE_CURRENT_SOURCE_DIR}/CMake/folly-deps.cmake
-  DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/
-)
-file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/folly-deps.cmake "\ninclude(folly-targets.cmake)\n")
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/folly-deps.cmake DESTINATION share/folly RENAME folly-config.cmake)
-
 option(BUILD_TESTS "If enabled, compile the tests." OFF)
 option(BUILD_BROKEN_TESTS "If enabled, compile tests that are known to be broken." OFF)
 option(BUILD_HANGING_TESTS "If enabled, compile tests that are known to hang." OFF)
@@ -332,6 +350,8 @@ if (BUILD_TESTS)
     ${FOLLY_DIR}/test/SingletonTestStructs.cpp
     ${FOLLY_DIR}/test/SocketAddressTestHelper.cpp
     ${FOLLY_DIR}/test/SocketAddressTestHelper.h
+    ${FOLLY_DIR}/experimental/logging/test/ConfigHelpers.cpp
+    ${FOLLY_DIR}/experimental/logging/test/ConfigHelpers.h
     ${FOLLY_DIR}/experimental/logging/test/TestLogHandler.cpp
     ${FOLLY_DIR}/experimental/logging/test/TestLogHandler.h
     ${FOLLY_DIR}/futures/test/TestExecutor.cpp
@@ -387,6 +407,8 @@ if (BUILD_TESTS)
       # EnumerateTest.cpp since it uses macros to define tests.
       #TEST enumerate_test SOURCES EnumerateTest.cpp
       TEST evicting_cache_map_test SOURCES EvictingCacheMapTest.cpp
+      TEST f14_map_test SOURCES F14MapTest.cpp
+      TEST f14_set_test SOURCES F14SetTest.cpp
       TEST foreach_test SOURCES ForeachTest.cpp
       TEST merge_test SOURCES MergeTest.cpp
       TEST sparse_byte_set_test SOURCES SparseByteSetTest.cpp
diff --git a/folly/CPortability.h b/folly/CPortability.h
index 988022753b4..61898fa6fef 100644
--- a/folly/CPortability.h
+++ b/folly/CPortability.h
@@ -19,6 +19,8 @@
 /* These definitions are in a separate file so that they
  * may be included from C- as well as C++-based projects. */
 
+#include <folly/portability/Config.h>
+
 /**
  * Portable version check.
  */
@@ -154,3 +156,10 @@
 #else
 #define FOLLY_ATTR_VISIBILITY_HIDDEN
 #endif
+
+// An attribute for marking symbols as weak, if supported
+#if FOLLY_HAVE_WEAK_SYMBOLS
+#define FOLLY_ATTR_WEAK __attribute__((__weak__))
+#else
+#define FOLLY_ATTR_WEAK
+#endif
diff --git a/folly/ConcurrentSkipList-inl.h b/folly/ConcurrentSkipList-inl.h
index 8476393e868..183ce737654 100644
--- a/folly/ConcurrentSkipList-inl.h
+++ b/folly/ConcurrentSkipList-inl.h
@@ -33,8 +33,8 @@
 #include <glog/logging.h>
 
 #include <folly/Memory.h>
-#include <folly/MicroSpinLock.h>
 #include <folly/ThreadLocal.h>
+#include <folly/synchronization/MicroSpinLock.h>
 
 namespace folly { namespace detail {
 
@@ -62,22 +62,24 @@ class SkipListNode : private boost::noncopyable {
 
     size_t size = sizeof(SkipListNode) +
       height * sizeof(std::atomic<SkipListNode*>);
-    auto* node = static_cast<SkipListNode*>(alloc.allocate(size));
+    auto storage = std::allocator_traits<NodeAlloc>::allocate(alloc, size);
     // do placement new
-    new (node) SkipListNode(uint8_t(height), std::forward<U>(data), isHead);
-    return node;
+    return new (storage)
+        SkipListNode(uint8_t(height), std::forward<U>(data), isHead);
   }
 
   template <typename NodeAlloc>
   static void destroy(NodeAlloc& alloc, SkipListNode* node) {
+    size_t size = sizeof(SkipListNode) +
+        node->height_ * sizeof(std::atomic<SkipListNode*>);
     node->~SkipListNode();
-    alloc.deallocate(node);
+    std::allocator_traits<NodeAlloc>::deallocate(alloc, node, size);
   }
 
   template <typename NodeAlloc>
-  struct DestroyIsNoOp : std::integral_constant<bool,
-    IsArenaAllocator<NodeAlloc>::value &&
-    boost::has_trivial_destructor<SkipListNode>::value> { };
+  struct DestroyIsNoOp : StrictConjunction<
+                             AllocatorHasTrivialDeallocate<NodeAlloc>,
+                             boost::has_trivial_destructor<SkipListNode>> {};
 
   // copy the head node to a new head node assuming lock acquired
   SkipListNode* copyHead(SkipListNode* node) {
diff --git a/folly/ConcurrentSkipList.h b/folly/ConcurrentSkipList.h
index 10e398541b3..2d508e49bca 100644
--- a/folly/ConcurrentSkipList.h
+++ b/folly/ConcurrentSkipList.h
@@ -131,16 +131,16 @@ Sample usage:
 #include <folly/ConcurrentSkipList-inl.h>
 #include <folly/Likely.h>
 #include <folly/Memory.h>
-#include <folly/MicroSpinLock.h>
+#include <folly/synchronization/MicroSpinLock.h>
 
 namespace folly {
 
 template <
     typename T,
     typename Comp = std::less<T>,
-    // All nodes are allocated using provided SimpleAllocator,
+    // All nodes are allocated using provided SysAllocator,
     // it should be thread-safe.
-    typename NodeAlloc = SysAlloc,
+    typename NodeAlloc = SysAllocator<void>,
     int MAX_HEIGHT = 24>
 class ConcurrentSkipList {
   // MAX_HEIGHT needs to be at least 2 to suppress compiler
diff --git a/folly/ConstexprMath.h b/folly/ConstexprMath.h
index f8defbd043b..ca9532954b0 100644
--- a/folly/ConstexprMath.h
+++ b/folly/ConstexprMath.h
@@ -16,6 +16,8 @@
 
 #pragma once
 
+#include <cstdint>
+#include <limits>
 #include <type_traits>
 
 namespace folly {
@@ -146,4 +148,55 @@ constexpr T constexpr_pow(T base, std::size_t exp) {
               (exp % 2 ? base : T(1));
 }
 
+template <typename T>
+constexpr T constexpr_add_overflow_clamped(T a, T b) {
+  using L = std::numeric_limits<T>;
+  using M = std::intmax_t;
+  static_assert(
+      !std::is_integral<T>::value || sizeof(T) <= sizeof(M),
+      "Integral type too large!");
+  // clang-format off
+  return
+    // don't do anything special for non-integral types.
+    !std::is_integral<T>::value ? a + b :
+    // for narrow integral types, just convert to intmax_t.
+    sizeof(T) < sizeof(M)
+      ? T(constexpr_clamp(M(a) + M(b), M(L::min()), M(L::max()))) :
+    // when a >= 0, cannot add more than `MAX - a` onto a.
+    !(a < 0) ? a + constexpr_min(b, T(L::max() - a)) :
+    // a < 0 && b >= 0, `a + b` will always be in valid range of type T.
+    !(b < 0) ? a + b :
+    // a < 0 && b < 0, keep the result >= MIN.
+               a + constexpr_max(b, T(L::min() - a));
+  // clang-format on
+}
+
+template <typename T>
+constexpr T constexpr_sub_overflow_clamped(T a, T b) {
+  using L = std::numeric_limits<T>;
+  using M = std::intmax_t;
+  static_assert(
+      !std::is_integral<T>::value || sizeof(T) <= sizeof(M),
+      "Integral type too large!");
+  // clang-format off
+  return
+    // don't do anything special for non-integral types.
+    !std::is_integral<T>::value ? a - b :
+    // for unsigned type, keep result >= 0.
+    std::is_unsigned<T>::value ? (a < b ? 0 : a - b) :
+    // for narrow signed integral types, just convert to intmax_t.
+    sizeof(T) < sizeof(M)
+      ? T(constexpr_clamp(M(a) - M(b), M(L::min()), M(L::max()))) :
+    // (a >= 0 && b >= 0) || (a < 0 && b < 0), `a - b` will always be valid.
+    (a < 0) == (b < 0) ? a - b :
+    // MIN < b, so `-b` should be in valid range (-MAX <= -b <= MAX),
+    // convert subtraction to addition.
+    L::min() < b ? constexpr_add_overflow_clamped(a, T(-b)) :
+    // -b = -MIN = (MAX + 1) and a <= -1, result is in valid range.
+    a < 0 ? a - b :
+    // -b = -MIN = (MAX + 1) and a >= 0, result > MAX.
+            L::max();
+  // clang-format on
+}
+
 } // namespace folly
diff --git a/folly/DynamicConverter.h b/folly/DynamicConverter.h
index c486d671280..3212df47047 100644
--- a/folly/DynamicConverter.h
+++ b/folly/DynamicConverter.h
@@ -368,6 +368,21 @@ struct DynamicConstructor<std::pair<A, B>, void> {
   }
 };
 
+// vector<bool>
+template <>
+struct DynamicConstructor<std::vector<bool>, void> {
+  static dynamic construct(const std::vector<bool>& x) {
+    dynamic d = dynamic::array;
+    // Intentionally specifying the type as bool here.
+    // std::vector<bool>'s iterators return a proxy which is a prvalue
+    // and hence cannot bind to an lvalue reference such as auto&
+    for (bool item : x) {
+      d.push_back(toDynamic(item));
+    }
+    return d;
+  }
+};
+
 ///////////////////////////////////////////////////////////////////////////////
 // implementation
 
diff --git a/folly/FBString.h b/folly/FBString.h
index aa312b1dc9d..35e6d8540b9 100644
--- a/folly/FBString.h
+++ b/folly/FBString.h
@@ -56,8 +56,8 @@
 
 #include <folly/Traits.h>
 #include <folly/hash/Hash.h>
+#include <folly/lang/Exception.h>
 #include <folly/memory/Malloc.h>
-#include <folly/portability/BitsFunctexcept.h>
 
 // When used in folly, assertions are not disabled.
 #define FBSTRING_ASSERT(expr) assert(expr)
@@ -83,7 +83,7 @@ FOLLY_GCC_DISABLE_WARNING("-Wshadow")
 FOLLY_GCC_DISABLE_WARNING("-Warray-bounds")
 
 // FBString cannot use throw when replacing std::string, though it may still
-// use std::__throw_*
+// use folly::throw_exception
 // nolint
 #define throw FOLLY_FBSTRING_MAY_NOT_USE_THROW
 
@@ -1052,12 +1052,10 @@ template <
     class Storage = fbstring_core<E>>
 #endif
 class basic_fbstring {
-  static void enforce(
-      bool condition,
-      void (*throw_exc)(const char*),
-      const char* msg) {
+  template <typename Ex, typename... Args>
+  FOLLY_ALWAYS_INLINE static void enforce(bool condition, Args&&... args) {
     if (!condition) {
-      throw_exc(msg);
+      throw_exception<Ex>(static_cast<Args&&>(args)...);
     }
   }
 
@@ -1100,16 +1098,8 @@ class basic_fbstring {
 
   typedef E* iterator;
   typedef const E* const_iterator;
-  typedef std::reverse_iterator<iterator
-#ifdef NO_ITERATOR_TRAITS
-                                , value_type
-#endif
-                                > reverse_iterator;
-  typedef std::reverse_iterator<const_iterator
-#ifdef NO_ITERATOR_TRAITS
-                                , const value_type
-#endif
-                                > const_reverse_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
 
   static constexpr size_type npos = size_type(-1);
   typedef std::true_type IsRelocatable;
@@ -1338,7 +1328,7 @@ class basic_fbstring {
   size_type capacity() const { return store_.capacity(); }
 
   void reserve(size_type res_arg = 0) {
-    enforce(res_arg <= max_size(), std::__throw_length_error, "");
+    enforce<std::length_error>(res_arg <= max_size(), "");
     store_.reserve(res_arg);
   }
 
@@ -1364,12 +1354,12 @@ class basic_fbstring {
   }
 
   const_reference at(size_type n) const {
-    enforce(n <= size(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(n < size(), "");
     return (*this)[n];
   }
 
   reference at(size_type n) {
-    enforce(n < size(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(n < size(), "");
     return (*this)[n];
   }
 
@@ -1454,13 +1444,13 @@ class basic_fbstring {
 
   basic_fbstring& insert(size_type pos1, const basic_fbstring& str,
                          size_type pos2, size_type n) {
-    enforce(pos2 <= str.length(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(pos2 <= str.length(), "");
     procrustes(n, str.length() - pos2);
     return insert(pos1, str.data() + pos2, n);
   }
 
   basic_fbstring& insert(size_type pos, const value_type* s, size_type n) {
-    enforce(pos <= length(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(pos <= length(), "");
     insert(begin() + pos, s, s + n);
     return *this;
   }
@@ -1470,7 +1460,7 @@ class basic_fbstring {
   }
 
   basic_fbstring& insert(size_type pos, size_type n, value_type c) {
-    enforce(pos <= length(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(pos <= length(), "");
     insert(begin() + pos, n, c);
     return *this;
   }
@@ -1536,7 +1526,7 @@ class basic_fbstring {
   basic_fbstring& erase(size_type pos = 0, size_type n = npos) {
     Invariant checker(*this);
 
-    enforce(pos <= length(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(pos <= length(), "");
     procrustes(n, length() - pos);
     std::copy(begin() + pos + n, end(), begin() + pos);
     resize(length() - n);
@@ -1545,7 +1535,7 @@ class basic_fbstring {
 
   iterator erase(iterator position) {
     const size_type pos(position - begin());
-    enforce(pos <= size(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(pos <= size(), "");
     erase(pos, 1);
     return begin() + pos;
   }
@@ -1568,7 +1558,7 @@ class basic_fbstring {
   basic_fbstring& replace(size_type pos1, size_type n1,
                           const basic_fbstring& str,
                           size_type pos2, size_type n2) {
-    enforce(pos2 <= str.length(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(pos2 <= str.length(), "");
     return replace(pos1, n1, str.data() + pos2,
                    std::min(n2, str.size() - pos2));
   }
@@ -1590,7 +1580,7 @@ class basic_fbstring {
                           StrOrLength s_or_n2, NumOrChar n_or_c) {
     Invariant checker(*this);
 
-    enforce(pos <= size(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(pos <= size(), "");
     procrustes(n1, length() - pos);
     const iterator b = begin() + pos;
     return replace(b, b + n1, s_or_n2, n_or_c);
@@ -1674,7 +1664,7 @@ class basic_fbstring {
   }
 
   size_type copy(value_type* s, size_type n, size_type pos = 0) const {
-    enforce(pos <= size(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(pos <= size(), "");
     procrustes(n, size() - pos);
 
     if (n != 0) {
@@ -1792,12 +1782,12 @@ class basic_fbstring {
   }
 
   basic_fbstring substr(size_type pos = 0, size_type n = npos) const& {
-    enforce(pos <= size(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(pos <= size(), "");
     return basic_fbstring(data() + pos, std::min(n, size() - pos));
   }
 
   basic_fbstring substr(size_type pos = 0, size_type n = npos) && {
-    enforce(pos <= size(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(pos <= size(), "");
     erase(0, pos);
     if (n < size()) {
       resize(n);
@@ -1822,7 +1812,7 @@ class basic_fbstring {
 
   int compare(size_type pos1, size_type n1,
               const value_type* s, size_type n2) const {
-    enforce(pos1 <= size(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(pos1 <= size(), "");
     procrustes(n1, size() - pos1);
     // The line below fixed by Jean-Francois Bastien, 04-23-2007. Thanks!
     const int r = traits_type::compare(pos1 + data(), s, std::min(n1, n2));
@@ -1832,7 +1822,7 @@ class basic_fbstring {
   int compare(size_type pos1, size_type n1,
               const basic_fbstring& str,
               size_type pos2, size_type n2) const {
-    enforce(pos2 <= str.size(), std::__throw_out_of_range, "");
+    enforce<std::out_of_range>(pos2 <= str.size(), "");
     return compare(pos1, n1, str.data() + pos2,
                    std::min(n2, str.size() - pos2));
   }
@@ -1855,7 +1845,7 @@ template <typename E, class T, class A, class S>
 FOLLY_MALLOC_NOINLINE inline typename basic_fbstring<E, T, A, S>::size_type
 basic_fbstring<E, T, A, S>::traitsLength(const value_type* s) {
   return s ? traits_type::length(s)
-           : (std::__throw_logic_error(
+           : (throw_exception<std::logic_error>(
                   "basic_fbstring: null pointer initializer not valid"),
               0);
 }
@@ -1893,7 +1883,7 @@ template <typename TP>
 inline typename std::enable_if<
     std::is_same<
         typename std::decay<TP>::type,
-        typename basic_fbstring<E, T, A, S>::value_type>::value,
+        typename folly::basic_fbstring<E, T, A, S>::value_type>::value,
     basic_fbstring<E, T, A, S>&>::type
 basic_fbstring<E, T, A, S>::operator=(TP c) {
   Invariant checker(*this);
@@ -1941,7 +1931,7 @@ template <typename E, class T, class A, class S>
 inline basic_fbstring<E, T, A, S>& basic_fbstring<E, T, A, S>::append(
     const basic_fbstring& str, const size_type pos, size_type n) {
   const size_type sz = str.size();
-  enforce(pos <= sz, std::__throw_out_of_range, "");
+  enforce<std::out_of_range>(pos <= sz, "");
   procrustes(n, sz - pos);
   return append(str.data() + pos, n);
 }
@@ -1992,7 +1982,7 @@ template <typename E, class T, class A, class S>
 inline basic_fbstring<E, T, A, S>& basic_fbstring<E, T, A, S>::assign(
     const basic_fbstring& str, const size_type pos, size_type n) {
   const size_type sz = str.size();
-  enforce(pos <= sz, std::__throw_out_of_range, "");
+  enforce<std::out_of_range>(pos <= sz, "");
   procrustes(n, sz - pos);
   return assign(str.data() + pos, n);
 }
diff --git a/folly/FBVector.h b/folly/FBVector.h
index 6d1a9de99cd..de9d722634a 100644
--- a/folly/FBVector.h
+++ b/folly/FBVector.h
@@ -38,8 +38,8 @@
 #include <folly/FormatTraits.h>
 #include <folly/Likely.h>
 #include <folly/Traits.h>
+#include <folly/lang/Exception.h>
 #include <folly/memory/Malloc.h>
-#include <folly/portability/BitsFunctexcept.h>
 
 //=============================================================================
 // forward declaration
@@ -1073,7 +1073,8 @@ class fbvector {
   }
   const_reference at(size_type n) const {
     if (UNLIKELY(n >= size())) {
-      std::__throw_out_of_range("fbvector: index is greater than size.");
+      throw_exception<std::out_of_range>(
+          "fbvector: index is greater than size.");
     }
     return (*this)[n];
   }
diff --git a/folly/FixedString.h b/folly/FixedString.h
index a94abbfd9c5..d4fa907ae11 100644
--- a/folly/FixedString.h
+++ b/folly/FixedString.h
@@ -32,7 +32,7 @@
 #include <folly/Portability.h>
 #include <folly/Range.h>
 #include <folly/Utility.h>
-#include <folly/portability/BitsFunctexcept.h>
+#include <folly/lang/Exception.h>
 #include <folly/portability/Constexpr.h>
 
 namespace folly {
@@ -63,8 +63,9 @@ using FixedStringBase = FixedStringBase_<>;
 // it's testing for fails. In this way, precondition violations are reported
 // at compile-time instead of at runtime.
 [[noreturn]] inline void assertOutOfBounds() {
-  assert(false && "Array index out of bounds in BasicFixedString");
-  std::__throw_out_of_range("Array index out of bounds in BasicFixedString");
+  assert(!"Array index out of bounds in BasicFixedString");
+  throw_exception<std::out_of_range>(
+      "Array index out of bounds in BasicFixedString");
 }
 
 constexpr std::size_t checkOverflow(std::size_t i, std::size_t max) {
@@ -79,9 +80,7 @@ constexpr std::size_t checkOverflowOrNpos(std::size_t i, std::size_t max) {
 
 // Intentionally NOT constexpr. See note above for assertOutOfBounds
 [[noreturn]] inline void assertNotNullTerminated() noexcept {
-  assert(
-      false &&
-      "Non-null terminated string used to initialize a BasicFixedString");
+  assert(!"Non-null terminated string used to initialize a BasicFixedString");
   std::terminate(); // Fail hard, fail fast.
 }
 
@@ -1104,20 +1103,20 @@ class BasicFixedString : private detail::fixedstring::FixedStringBase {
    * \throw std::out_of_range when i > size()
    */
   FOLLY_CPP14_CONSTEXPR Char& at(std::size_t i) noexcept(false) {
-    return i <= size_
-        ? data_[i]
-        : (std::__throw_out_of_range("Out of range in BasicFixedString::at"),
-           data_[size_]);
+    return i <= size_ ? data_[i]
+                      : (throw_exception<std::out_of_range>(
+                             "Out of range in BasicFixedString::at"),
+                         data_[size_]);
   }
 
   /**
    * \overload
    */
   constexpr const Char& at(std::size_t i) const noexcept(false) {
-    return i <= size_
-        ? data_[i]
-        : (std::__throw_out_of_range("Out of range in BasicFixedString::at"),
-           data_[size_]);
+    return i <= size_ ? data_[i]
+                      : (throw_exception<std::out_of_range>(
+                             "Out of range in BasicFixedString::at"),
+                         data_[size_]);
   }
 
   /**
diff --git a/folly/Lazy.h b/folly/Lazy.h
index f1d81cad514..e821dfb12ef 100644
--- a/folly/Lazy.h
+++ b/folly/Lazy.h
@@ -90,8 +90,15 @@ template <class Func>
 struct Lazy {
   typedef typename std::result_of<Func()>::type result_type;
 
+  static_assert(
+      !std::is_const<Func>::value,
+      "Func should not be a const-qualified type");
+  static_assert(
+      !std::is_reference<Func>::value,
+      "Func should not be a reference type");
+
   explicit Lazy(Func&& f) : func_(std::move(f)) {}
-  explicit Lazy(Func& f)  : func_(f) {}
+  explicit Lazy(const Func& f) : func_(f) {}
 
   Lazy(Lazy&& o)
     : value_(std::move(o.value_))
@@ -103,19 +110,26 @@ struct Lazy {
   Lazy& operator=(Lazy&&) = delete;
 
   const result_type& operator()() const {
-    return const_cast<Lazy&>(*this)();
+    ensure_initialized();
+
+    return *value_;
   }
 
   result_type& operator()() {
+    ensure_initialized();
+
+    return *value_;
+  }
+
+ private:
+  void ensure_initialized() const {
     if (!value_) {
       value_ = func_();
     }
-    return *value_;
   }
 
- private:
-  Optional<result_type> value_;
-  Func func_;
+  mutable Optional<result_type> value_;
+  mutable Func func_;
 };
 
 } // namespace detail
@@ -123,11 +137,8 @@ struct Lazy {
 //////////////////////////////////////////////////////////////////////
 
 template <class Func>
-detail::Lazy<typename std::remove_reference<Func>::type>
-lazy(Func&& fun) {
-  return detail::Lazy<typename std::remove_reference<Func>::type>(
-    std::forward<Func>(fun)
-  );
+auto lazy(Func&& fun) {
+  return detail::Lazy<remove_cvref_t<Func>>(std::forward<Func>(fun));
 }
 
 //////////////////////////////////////////////////////////////////////
diff --git a/folly/Makefile.am b/folly/Makefile.am
index e17d62c386b..c16ea24ddef 100644
--- a/folly/Makefile.am
+++ b/folly/Makefile.am
@@ -59,9 +59,14 @@ nobase_follyinclude_HEADERS = \
 	container/Access.h \
 	container/Array.h \
 	container/detail/BitIteratorDetail.h \
+	container/detail/F14Memory.h \
+	container/detail/F14Policy.h \
+	container/detail/F14Table.h \
 	container/Iterator.h \
 	container/Enumerate.h \
 	container/EvictingCacheMap.h \
+	container/F14Map.h \
+	container/F14Set.h \
 	container/Foreach.h \
 	container/Foreach-inl.h \
 	container/SparseByteSet.h \
@@ -101,6 +106,7 @@ nobase_follyinclude_HEADERS = \
 	executors/IOThreadPoolExecutor.h \
 	executors/NotificationQueueExecutor.h \
 	executors/ScheduledExecutor.h \
+	executors/SequencedExecutor.h \
 	executors/SerialExecutor.h \
 	executors/ThreadPoolExecutor.h \
 	executors/ThreadedExecutor.h \
@@ -328,6 +334,7 @@ nobase_follyinclude_HEADERS = \
 	lang/Assume.h \
 	lang/Bits.h \
 	lang/ColdClass.h \
+	lang/Exception.h \
 	lang/Launder.h \
 	lang/RValueReferenceWrapper.h \
 	lang/SafeAssert.h \
@@ -363,7 +370,6 @@ nobase_follyinclude_HEADERS = \
 	Portability.h \
 	portability/Asm.h \
 	portability/Atomic.h \
-	portability/BitsFunctexcept.h \
 	portability/Builtins.h \
 	portability/Config.h \
 	portability/Constexpr.h \
@@ -438,6 +444,7 @@ nobase_follyinclude_HEADERS = \
 	synchronization/Baton.h \
 	synchronization/CallOnce.h \
 	synchronization/LifoSem.h \
+	synchronization/MicroSpinLock.h \
 	synchronization/ParkingLot.h \
 	synchronization/PicoSpinLock.h \
 	synchronization/RWSpinLock.h \
@@ -460,7 +467,8 @@ nobase_follyinclude_HEADERS = \
 	Synchronized.h \
 	SynchronizedPtr.h \
 	test/FBStringTestBenchmarks.cpp.h \
-	test/FBVectorTestBenchmarks.cpp.h \
+	test/FBVectorBenchmarks.cpp.h \
+	test/FBVectorTests.cpp.h \
 	test/function_benchmark/benchmark_impl.h \
 	test/function_benchmark/test_functions.h \
 	test/SynchronizedTestLib.h \
@@ -496,7 +504,6 @@ libfollybase_la_SOURCES = \
 	Format.cpp \
 	FormatArg.cpp \
 	memory/MallctlHelper.cpp \
-	portability/BitsFunctexcept.cpp \
 	String.cpp \
 	Unicode.cpp
 
@@ -506,6 +513,7 @@ libfolly_la_SOURCES = \
 	compression/Counters.cpp \
 	compression/Zlib.cpp \
 	concurrency/CacheLocality.cpp \
+	container/detail/F14Table.cpp \
 	detail/AtFork.cpp \
 	detail/Futex.cpp \
 	detail/IPAddress.cpp \
diff --git a/folly/Memory.h b/folly/Memory.h
index 0a0f75789d0..af935f3d4c5 100644
--- a/folly/Memory.h
+++ b/folly/Memory.h
@@ -27,8 +27,12 @@
 #include <type_traits>
 #include <utility>
 
+#include <folly/ConstexprMath.h>
+#include <folly/Likely.h>
 #include <folly/Traits.h>
 #include <folly/functional/Invoke.h>
+#include <folly/lang/Align.h>
+#include <folly/lang/Exception.h>
 #include <folly/portability/Config.h>
 #include <folly/portability/Malloc.h>
 
@@ -182,330 +186,308 @@ std::weak_ptr<T> to_weak_ptr(const std::shared_ptr<T>& ptr) {
   return std::weak_ptr<T>(ptr);
 }
 
-struct SysBufferDeleter {
-  void operator()(void* p) const {
-    ::free(p);
-  }
+namespace detail {
+template <typename T>
+struct lift_void_to_char {
+  using type = T;
+};
+template <>
+struct lift_void_to_char<void> {
+  using type = char;
 };
-
-using SysBufferUniquePtr = std::unique_ptr<void, SysBufferDeleter>;
-inline SysBufferUniquePtr allocate_sys_buffer(size_t size) {
-  return SysBufferUniquePtr(::malloc(size));
 }
 
 /**
- * A SimpleAllocator must provide two methods:
+ * SysAllocator
  *
- *    void* allocate(size_t size);
- *    void deallocate(void* ptr);
- *
- * which, respectively, allocate a block of size bytes (aligned to the
- * maximum alignment required on your system), throwing std::bad_alloc
- * if the allocation can't be satisfied, and free a previously
- * allocated block.
- *
- * SysAlloc resembles the standard allocator.
+ * Resembles std::allocator, the default Allocator, but wraps std::malloc and
+ * std::free.
  */
-class SysAlloc {
+template <typename T>
+class SysAllocator {
+ private:
+  using Self = SysAllocator<T>;
+
  public:
-  void* allocate(size_t size) {
-    void* p = ::malloc(size);
+  using value_type = T;
+
+  T* allocate(size_t count) {
+    using lifted = typename detail::lift_void_to_char<T>::type;
+    auto const p = std::malloc(sizeof(lifted) * count);
     if (!p) {
-      throw std::bad_alloc();
+      throw_exception<std::bad_alloc>();
     }
-    return p;
+    return static_cast<T*>(p);
   }
-  void deallocate(void* p) {
-    ::free(p);
+  void deallocate(T* p, size_t /* count */) {
+    std::free(p);
   }
-};
 
-/**
- * StlAllocator wraps a SimpleAllocator into a STL-compliant
- * allocator, maintaining an instance pointer to the simple allocator
- * object.  The underlying SimpleAllocator object must outlive all
- * instances of StlAllocator using it.
- *
- * But note that if you pass StlAllocator<MallocAllocator,...> to a
- * standard container it will be larger due to the contained state
- * pointer.
- *
- * @author: Tudor Bosman <tudorb@fb.com>
- */
+  friend bool operator==(Self const&, Self const&) noexcept {
+    return true;
+  }
+  friend bool operator!=(Self const&, Self const&) noexcept {
+    return false;
+  }
+};
 
-// This would be so much simpler with std::allocator_traits, but gcc 4.6.2
-// doesn't support it.
-template <class Alloc, class T> class StlAllocator;
+class DefaultAlign {
+ private:
+  using Self = DefaultAlign;
+  std::size_t align_;
 
-template <class Alloc> class StlAllocator<Alloc, void> {
  public:
-  typedef void value_type;
-  typedef void* pointer;
-  typedef const void* const_pointer;
-
-  StlAllocator() : alloc_(nullptr) { }
-  explicit StlAllocator(Alloc* a) : alloc_(a) { }
+  explicit DefaultAlign(std::size_t align) noexcept : align_(align) {
+    assert(!(align_ < sizeof(void*)) && bool("bad align: too small"));
+    assert(!(align_ & (align_ - 1)) && bool("bad align: not power-of-two"));
+  }
+  std::size_t operator()() const noexcept {
+    return align_;
+  }
 
-  Alloc* alloc() const {
-    return alloc_;
+  friend bool operator==(Self const& a, Self const& b) noexcept {
+    return a.align_ == b.align_;
+  }
+  friend bool operator!=(Self const& a, Self const& b) noexcept {
+    return a.align_ != b.align_;
   }
+};
 
-  template <class U> struct rebind {
-    typedef StlAllocator<Alloc, U> other;
-  };
+template <std::size_t Align>
+class FixedAlign {
+ private:
+  static_assert(!(Align < sizeof(void*)), "bad align: too small");
+  static_assert(!(Align & (Align - 1)), "bad align: not power-of-two");
+  using Self = FixedAlign<Align>;
 
-  bool operator!=(const StlAllocator<Alloc, void>& other) const {
-    return alloc_ != other.alloc_;
+ public:
+  constexpr std::size_t operator()() const noexcept {
+    return Align;
   }
 
-  bool operator==(const StlAllocator<Alloc, void>& other) const {
-    return alloc_ == other.alloc_;
+  friend bool operator==(Self const&, Self const&) noexcept {
+    return true;
   }
+  friend bool operator!=(Self const&, Self const&) noexcept {
+    return false;
+  }
+};
 
+/**
+ * AlignedSysAllocator
+ *
+ * Resembles std::allocator, the default Allocator, but wraps aligned_malloc and
+ * aligned_free.
+ *
+ * Accepts a policy parameter for providing the alignment, which must:
+ *   * be invocable as std::size_t() noexcept, returning the alignment
+ *   * be noexcept-copy-constructible
+ *   * have noexcept operator==
+ *   * have noexcept operator!=
+ *   * not be final
+ *
+ * DefaultAlign and FixedAlign<std::size_t>, provided above, are valid policies.
+ */
+template <typename T, typename Align = DefaultAlign>
+class AlignedSysAllocator : private Align {
  private:
-  Alloc* alloc_;
-};
+  using Self = AlignedSysAllocator<T, Align>;
+
+  constexpr Align const& align() const {
+    return *this;
+  }
 
-template <class Alloc, class T>
-class StlAllocator {
  public:
-  typedef T value_type;
-  typedef T* pointer;
-  typedef const T* const_pointer;
-  typedef T& reference;
-  typedef const T& const_reference;
+  static_assert(std::is_nothrow_copy_constructible<Align>::value, "");
+  static_assert(is_nothrow_invocable_r<std::size_t, Align>::value, "");
 
-  typedef ptrdiff_t difference_type;
-  typedef size_t size_type;
+  using value_type = T;
 
-  StlAllocator() : alloc_(nullptr) { }
-  explicit StlAllocator(Alloc* a) : alloc_(a) { }
+  using propagate_on_container_copy_assignment = std::true_type;
+  using propagate_on_container_move_assignment = std::true_type;
+  using propagate_on_container_swap = std::true_type;
 
-  template <class U> StlAllocator(const StlAllocator<Alloc, U>& other)
-    : alloc_(other.alloc()) { }
+  using Align::Align;
 
-  T* allocate(size_t n, const void* /* hint */ = nullptr) {
-    return static_cast<T*>(alloc_->allocate(n * sizeof(T)));
-  }
+  // TODO: remove this ctor, which is required only by gcc49
+  template <
+      typename S = Align,
+      _t<std::enable_if<std::is_default_constructible<S>::value, int>> = 0>
+  constexpr AlignedSysAllocator() noexcept(noexcept(Align())) : Align() {}
 
-  void deallocate(T* p, size_t /* n */) { alloc_->deallocate(p); }
+  template <typename U>
+  constexpr explicit AlignedSysAllocator(
+      AlignedSysAllocator<U, Align> const& other) noexcept
+      : Align(other.align()) {}
 
-  size_t max_size() const {
-    return std::numeric_limits<size_t>::max();
+  T* allocate(size_t count) {
+    using lifted = typename detail::lift_void_to_char<T>::type;
+    auto const p = aligned_malloc(sizeof(lifted) * count, align()());
+    if (!p) {
+      if (FOLLY_UNLIKELY(errno != ENOMEM)) {
+        std::terminate();
+      }
+      throw_exception<std::bad_alloc>();
+    }
+    return static_cast<T*>(p);
   }
-
-  T* address(T& x) const {
-    return std::addressof(x);
+  void deallocate(T* p, size_t /* count */) {
+    aligned_free(p);
   }
 
-  const T* address(const T& x) const {
-    return std::addressof(x);
+  friend bool operator==(Self const& a, Self const& b) noexcept {
+    return a.align() == b.align();
   }
-
-  template <class... Args>
-  void construct(T* p, Args&&... args) {
-    new (p) T(std::forward<Args>(args)...);
+  friend bool operator!=(Self const& a, Self const& b) noexcept {
+    return a.align() != b.align();
   }
+};
 
-  void destroy(T* p) {
-    p->~T();
-  }
+/**
+ * CxxAllocatorAdaptor
+ *
+ * A type conforming to C++ concept Allocator, delegating operations to an
+ * unowned Inner which has this required interface:
+ *
+ *   void* allocate(std::size_t)
+ *   void deallocate(void*, std::size_t)
+ *
+ * Note that Inner is *not* a C++ Allocator.
+ */
+template <typename T, class Inner>
+class CxxAllocatorAdaptor {
+ private:
+  using Self = CxxAllocatorAdaptor<T, Inner>;
 
-  Alloc* alloc() const {
-    return alloc_;
-  }
+  template <typename U, typename UAlloc>
+  friend class CxxAllocatorAdaptor;
 
-  template <class U> struct rebind {
-    typedef StlAllocator<Alloc, U> other;
-  };
+  std::reference_wrapper<Inner> ref_;
 
-  bool operator!=(const StlAllocator<Alloc, T>& other) const {
-    return alloc_ != other.alloc_;
-  }
+ public:
+  using value_type = T;
+
+  using propagate_on_container_copy_assignment = std::true_type;
+  using propagate_on_container_move_assignment = std::true_type;
+  using propagate_on_container_swap = std::true_type;
+
+  explicit CxxAllocatorAdaptor(Inner& ref) : ref_(ref) {}
+
+  template <typename U>
+  explicit CxxAllocatorAdaptor(CxxAllocatorAdaptor<U, Inner> const& other)
+      : ref_(other.ref_) {}
 
-  bool operator==(const StlAllocator<Alloc, T>& other) const {
-    return alloc_ == other.alloc_;
+  T* allocate(std::size_t n) {
+    using lifted = typename detail::lift_void_to_char<T>::type;
+    return static_cast<T*>(ref_.get().allocate(sizeof(lifted) * n));
+  }
+  void deallocate(T* p, std::size_t n) {
+    using lifted = typename detail::lift_void_to_char<T>::type;
+    ref_.get().deallocate(p, sizeof(lifted) * n);
   }
 
- private:
-  Alloc* alloc_;
+  friend bool operator==(Self const& a, Self const& b) noexcept {
+    return std::addressof(a.ref_.get()) == std::addressof(b.ref_.get());
+  }
+  friend bool operator!=(Self const& a, Self const& b) noexcept {
+    return std::addressof(a.ref_.get()) != std::addressof(b.ref_.get());
+  }
 };
 
-/**
- * Helper function to obtain rebound allocators
- *
- * @author: Marcelo Juchem <marcelo@fb.com>
- */
-template <typename T, typename Allocator>
-typename Allocator::template rebind<T>::other rebind_allocator(
-  Allocator const& allocator
-) {
-  return typename Allocator::template rebind<T>::other(allocator);
-}
-
 /*
- * Helper classes/functions for creating a unique_ptr using a custom
- * allocator.
+ * allocator_delete
+ *
+ * A deleter which automatically works with a given allocator.
  *
- * @author: Marcelo Juchem <marcelo@fb.com>
+ * Derives from the allocator to take advantage of the empty base
+ * optimization when possible.
  */
-
-// Derives from the allocator to take advantage of the empty base
-// optimization when possible.
-template <typename Allocator>
-class allocator_delete
-  : private std::remove_reference<Allocator>::type
-{
-  typedef typename std::remove_reference<Allocator>::type allocator_type;
+template <typename Alloc>
+class allocator_delete : private std::remove_reference<Alloc>::type {
+ private:
+  using allocator_type = typename std::remove_reference<Alloc>::type;
+  using allocator_traits = std::allocator_traits<allocator_type>;
+  using value_type = typename allocator_traits::value_type;
+  using pointer = typename allocator_traits::pointer;
 
  public:
-  typedef typename Allocator::pointer pointer;
-
   allocator_delete() = default;
+  allocator_delete(allocator_delete const&) = default;
+  allocator_delete(allocator_delete&&) = default;
+  allocator_delete& operator=(allocator_delete const&) = default;
+  allocator_delete& operator=(allocator_delete&&) = default;
 
   explicit allocator_delete(const allocator_type& allocator)
-    : allocator_type(allocator)
-  {}
+      : allocator_type(allocator) {}
 
   explicit allocator_delete(allocator_type&& allocator)
-    : allocator_type(std::move(allocator))
-  {}
+      : allocator_type(std::move(allocator)) {}
 
   template <typename U>
   allocator_delete(const allocator_delete<U>& other)
-    : allocator_type(other.get_allocator())
-  {}
+      : allocator_type(other.get_allocator()) {}
 
-  allocator_type& get_allocator() const {
-    return *const_cast<allocator_delete*>(this);
+  allocator_type const& get_allocator() const {
+    return *this;
   }
 
   void operator()(pointer p) const {
-    if (!p) {
-      return;
-    }
-    const_cast<allocator_delete*>(this)->destroy(p);
-    const_cast<allocator_delete*>(this)->deallocate(p, 1);
+    auto alloc = get_allocator();
+    allocator_traits::destroy(alloc, p);
+    allocator_traits::deallocate(alloc, p, 1);
   }
 };
 
-namespace detail {
-
-FOLLY_CREATE_MEMBER_INVOKE_TRAITS(destroy_invoke_traits, destroy);
-
-} // namespace detail
-
-template <typename Allocator, typename Value>
-using is_simple_allocator =
-    Negation<detail::destroy_invoke_traits::is_invocable<Allocator, Value*>>;
-
-template <typename T, typename Allocator>
-struct as_stl_allocator {
-  typedef typename std::conditional<
-    is_simple_allocator<
-      typename std::remove_reference<Allocator>::type,
-      typename std::remove_reference<T>::type
-    >::value,
-    folly::StlAllocator<
-      typename std::remove_reference<Allocator>::type,
-      typename std::remove_reference<T>::type
-    >,
-    typename std::remove_reference<Allocator>::type
-  >::type type;
-};
-
-template <typename T, typename Allocator>
-typename std::enable_if<
-  is_simple_allocator<
-    typename std::remove_reference<Allocator>::type,
-    typename std::remove_reference<T>::type
-  >::value,
-  folly::StlAllocator<
-    typename std::remove_reference<Allocator>::type,
-    typename std::remove_reference<T>::type
-  >
->::type make_stl_allocator(Allocator&& allocator) {
-  return folly::StlAllocator<
-    typename std::remove_reference<Allocator>::type,
-    typename std::remove_reference<T>::type
-  >(&allocator);
-}
-
-template <typename T, typename Allocator>
-typename std::enable_if<
-  !is_simple_allocator<
-    typename std::remove_reference<Allocator>::type,
-    typename std::remove_reference<T>::type
-  >::value,
-  typename std::remove_reference<Allocator>::type
->::type make_stl_allocator(Allocator&& allocator) {
-  return std::move(allocator);
-}
-
 /**
- * AllocatorUniquePtr: a unique_ptr that supports both STL-style
- * allocators and SimpleAllocator
- *
- * @author: Marcelo Juchem <marcelo@fb.com>
- */
-
-template <typename T, typename Allocator>
-struct AllocatorUniquePtr {
-  typedef std::unique_ptr<T,
-    folly::allocator_delete<
-      typename std::conditional<
-        is_simple_allocator<
-          typename std::remove_reference<Allocator>::type,
-          typename std::remove_reference<T>::type
-        >::value,
-        folly::StlAllocator<typename std::remove_reference<Allocator>::type, T>,
-        typename std::remove_reference<Allocator>::type
-      >::type
-    >
-  > type;
-};
-
-/**
- * Functions to allocate a unique_ptr / shared_ptr, supporting both
- * STL-style allocators and SimpleAllocator, analog to std::allocate_shared
- *
- * @author: Marcelo Juchem <marcelo@fb.com>
+ * allocate_unique, like std::allocate_shared but for std::unique_ptr
  */
-
-template <typename T, typename Allocator, typename ...Args>
-typename AllocatorUniquePtr<T, Allocator>::type allocate_unique(
-  Allocator&& allocator, Args&&... args
-) {
-  auto stlAllocator = folly::make_stl_allocator<T>(
-    std::forward<Allocator>(allocator)
-  );
-  auto p = stlAllocator.allocate(1);
-
+template <typename T, typename Alloc, typename... Args>
+std::unique_ptr<T, allocator_delete<Alloc>> allocate_unique(
+    Alloc const& alloc,
+    Args&&... args) {
+  using traits = std::allocator_traits<Alloc>;
+  auto copy = alloc;
+  auto const p = traits::allocate(copy, 1);
   try {
-    stlAllocator.construct(p, std::forward<Args>(args)...);
-
-    return {p,
-      folly::allocator_delete<decltype(stlAllocator)>(std::move(stlAllocator))
-    };
+    traits::construct(copy, p, static_cast<Args&&>(args)...);
+    return {p, allocator_delete<Alloc>(std::move(copy))};
   } catch (...) {
-    stlAllocator.deallocate(p, 1);
+    traits::deallocate(copy, p, 1);
     throw;
   }
 }
 
-template <typename T, typename Allocator, typename ...Args>
-std::shared_ptr<T> allocate_shared(Allocator&& allocator, Args&&... args) {
-  return std::allocate_shared<T>(
-    folly::make_stl_allocator<T>(std::forward<Allocator>(allocator)),
-    std::forward<Args>(args)...
-  );
+struct SysBufferDeleter {
+  void operator()(void* ptr) {
+    std::free(ptr);
+  }
+};
+using SysBufferUniquePtr = std::unique_ptr<void, SysBufferDeleter>;
+
+inline SysBufferUniquePtr allocate_sys_buffer(std::size_t size) {
+  auto p = std::malloc(size);
+  if (!p) {
+    throw_exception<std::bad_alloc>();
+  }
+  return {p, {}};
 }
 
 /**
- * IsArenaAllocator<T>::value describes whether SimpleAllocator has
- * no-op deallocate().
+ * AllocatorHasTrivialDeallocate
+ *
+ * Unambiguously inherits std::integral_constant<bool, V> for some bool V.
+ *
+ * Describes whether a C++ Aallocator has trivial, i.e. no-op, deallocate().
+ *
+ * Also may be used to describe types which may be used with
+ * CxxAllocatorAdaptor.
  */
-template <class T> struct IsArenaAllocator : std::false_type { };
+template <typename Alloc>
+struct AllocatorHasTrivialDeallocate : std::false_type {};
+
+template <typename T, class Alloc>
+struct AllocatorHasTrivialDeallocate<CxxAllocatorAdaptor<T, Alloc>>
+    : AllocatorHasTrivialDeallocate<Alloc> {};
 
 /*
  * folly::enable_shared_from_this
diff --git a/folly/MicroSpinLock.h b/folly/MicroSpinLock.h
index 17d181f79dc..5654705d181 100644
--- a/folly/MicroSpinLock.h
+++ b/folly/MicroSpinLock.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2015-present Facebook, Inc.
+ * Copyright 2011-present Facebook, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,145 +14,4 @@
  * limitations under the License.
  */
 
-/*
- * N.B. You most likely do _not_ want to use MicroSpinLock or any
- * other kind of spinlock.  Consider MicroLock instead.
- *
- * In short, spinlocks in preemptive multi-tasking operating systems
- * have serious problems and fast mutexes like std::mutex are almost
- * certainly the better choice, because letting the OS scheduler put a
- * thread to sleep is better for system responsiveness and throughput
- * than wasting a timeslice repeatedly querying a lock held by a
- * thread that's blocked, and you can't prevent userspace
- * programs blocking.
- *
- * Spinlocks in an operating system kernel make much more sense than
- * they do in userspace.
- */
-
-#pragma once
-
-/*
- * @author Keith Adams <kma@fb.com>
- * @author Jordan DeLong <delong.j@fb.com>
- */
-
-#include <array>
-#include <atomic>
-#include <cassert>
-#include <cstdint>
-#include <mutex>
-#include <type_traits>
-
-#include <folly/Portability.h>
-#include <folly/lang/Align.h>
-#include <folly/synchronization/detail/Sleeper.h>
-
-namespace folly {
-
-/*
- * A really, *really* small spinlock for fine-grained locking of lots
- * of teeny-tiny data.
- *
- * Zero initializing these is guaranteed to be as good as calling
- * init(), since the free state is guaranteed to be all-bits zero.
- *
- * This class should be kept a POD, so we can used it in other packed
- * structs (gcc does not allow __attribute__((__packed__)) on structs that
- * contain non-POD data).  This means avoid adding a constructor, or
- * making some members private, etc.
- */
-struct MicroSpinLock {
-  enum { FREE = 0, LOCKED = 1 };
-  // lock_ can't be std::atomic<> to preserve POD-ness.
-  uint8_t lock_;
-
-  // Initialize this MSL.  It is unnecessary to call this if you
-  // zero-initialize the MicroSpinLock.
-  void init() {
-    payload()->store(FREE);
-  }
-
-  bool try_lock() {
-    return cas(FREE, LOCKED);
-  }
-
-  void lock() {
-    detail::Sleeper sleeper;
-    do {
-      while (payload()->load() != FREE) {
-        sleeper.wait();
-      }
-    } while (!try_lock());
-    assert(payload()->load() == LOCKED);
-  }
-
-  void unlock() {
-    assert(payload()->load() == LOCKED);
-    payload()->store(FREE, std::memory_order_release);
-  }
-
- private:
-  std::atomic<uint8_t>* payload() {
-    return reinterpret_cast<std::atomic<uint8_t>*>(&this->lock_);
-  }
-
-  bool cas(uint8_t compare, uint8_t newVal) {
-    return std::atomic_compare_exchange_strong_explicit(payload(), &compare, newVal,
-                                                        std::memory_order_acquire,
-                                                        std::memory_order_relaxed);
-  }
-};
-static_assert(
-    std::is_pod<MicroSpinLock>::value,
-    "MicroSpinLock must be kept a POD type.");
-
-//////////////////////////////////////////////////////////////////////
-
-/**
- * Array of spinlocks where each one is padded to prevent false sharing.
- * Useful for shard-based locking implementations in environments where
- * contention is unlikely.
- */
-
-// TODO: generate it from configure (`getconf LEVEL1_DCACHE_LINESIZE`)
-#define FOLLY_CACHE_LINE_SIZE 64
-
-template <class T, size_t N>
-struct alignas(max_align_v) SpinLockArray {
-  T& operator[](size_t i) {
-    return data_[i].lock;
-  }
-
-  const T& operator[](size_t i) const {
-    return data_[i].lock;
-  }
-
-  constexpr size_t size() const { return N; }
-
- private:
-  struct PaddedSpinLock {
-    PaddedSpinLock() : lock() {}
-    T lock;
-    char padding[FOLLY_CACHE_LINE_SIZE - sizeof(T)];
-  };
-  static_assert(sizeof(PaddedSpinLock) == FOLLY_CACHE_LINE_SIZE,
-                "Invalid size of PaddedSpinLock");
-
-  // Check if T can theoretically cross a cache line.
-  static_assert(
-      max_align_v > 0 && FOLLY_CACHE_LINE_SIZE % max_align_v == 0 &&
-          sizeof(T) <= max_align_v,
-      "T can cross cache line boundaries");
-
-  char padding_[FOLLY_CACHE_LINE_SIZE];
-  std::array<PaddedSpinLock, N> data_;
-};
-
-//////////////////////////////////////////////////////////////////////
-
-typedef std::lock_guard<MicroSpinLock> MSLGuard;
-
-//////////////////////////////////////////////////////////////////////
-
-} // namespace folly
+#include <folly/synchronization/MicroSpinLock.h> // @shim
diff --git a/folly/Range.h b/folly/Range.h
index c4e8b27de1f..159f4009ba8 100644
--- a/folly/Range.h
+++ b/folly/Range.h
@@ -21,7 +21,7 @@
 
 #include <folly/Portability.h>
 #include <folly/hash/SpookyHashV2.h>
-#include <folly/portability/BitsFunctexcept.h>
+#include <folly/lang/Exception.h>
 #include <folly/portability/Constexpr.h>
 #include <folly/portability/String.h>
 
@@ -224,7 +224,7 @@ class Range : private boost::totally_ordered<Range<Iter>> {
   template <class T = Iter, typename detail::IsCharPointer<T>::const_type = 0>
   Range(const std::string& str, std::string::size_type startFrom) {
     if (UNLIKELY(startFrom > str.size())) {
-      std::__throw_out_of_range("index out of range");
+      throw_exception<std::out_of_range>("index out of range");
     }
     b_ = str.data() + startFrom;
     e_ = str.data() + str.size();
@@ -236,7 +236,7 @@ class Range : private boost::totally_ordered<Range<Iter>> {
       std::string::size_type startFrom,
       std::string::size_type size) {
     if (UNLIKELY(startFrom > str.size())) {
-      std::__throw_out_of_range("index out of range");
+      throw_exception<std::out_of_range>("index out of range");
     }
     b_ = str.data() + startFrom;
     if (str.size() - startFrom < size) {
@@ -274,7 +274,7 @@ class Range : private boost::totally_ordered<Range<Iter>> {
     auto const cdata = container.data();
     auto const csize = container.size();
     if (UNLIKELY(startFrom > csize)) {
-      std::__throw_out_of_range("index out of range");
+      throw_exception<std::out_of_range>("index out of range");
     }
     b_ = cdata + startFrom;
     e_ = cdata + csize;
@@ -296,7 +296,7 @@ class Range : private boost::totally_ordered<Range<Iter>> {
     auto const cdata = container.data();
     auto const csize = container.size();
     if (UNLIKELY(startFrom > csize)) {
-      std::__throw_out_of_range("index out of range");
+      throw_exception<std::out_of_range>("index out of range");
     }
     b_ = cdata + startFrom;
     if (csize - startFrom < size) {
@@ -521,14 +521,14 @@ class Range : private boost::totally_ordered<Range<Iter>> {
 
   value_type& at(size_t i) {
     if (i >= size()) {
-      std::__throw_out_of_range("index out of range");
+      throw_exception<std::out_of_range>("index out of range");
     }
     return b_[i];
   }
 
   const value_type& at(size_t i) const {
     if (i >= size()) {
-      std::__throw_out_of_range("index out of range");
+      throw_exception<std::out_of_range>("index out of range");
     }
     return b_[i];
   }
@@ -564,21 +564,21 @@ class Range : private boost::totally_ordered<Range<Iter>> {
 
   void advance(size_type n) {
     if (UNLIKELY(n > size())) {
-      std::__throw_out_of_range("index out of range");
+      throw_exception<std::out_of_range>("index out of range");
     }
     b_ += n;
   }
 
   void subtract(size_type n) {
     if (UNLIKELY(n > size())) {
-      std::__throw_out_of_range("index out of range");
+      throw_exception<std::out_of_range>("index out of range");
     }
     e_ -= n;
   }
 
   Range subpiece(size_type first, size_type length = npos) const {
     if (UNLIKELY(first > size())) {
-      std::__throw_out_of_range("index out of range");
+      throw_exception<std::out_of_range>("index out of range");
     }
 
     return Range(b_ + first, std::min(length, size() - first));
@@ -775,7 +775,7 @@ class Range : private boost::totally_ordered<Range<Iter>> {
     } else if (e == e_) {
       e_ = b;
     } else {
-      std::__throw_out_of_range("index out of range");
+      throw_exception<std::out_of_range>("index out of range");
     }
   }
 
@@ -841,7 +841,7 @@ class Range : private boost::totally_ordered<Range<Iter>> {
    */
   size_t replaceAll(const_range_type source, const_range_type dest) {
     if (source.size() != dest.size()) {
-      throw std::invalid_argument(
+      throw_exception<std::invalid_argument>(
           "replacement must have the same size as source");
     }
 
@@ -1360,6 +1360,12 @@ struct hasher<
   }
 };
 
+template <typename H, typename K>
+struct IsAvalanchingHasher;
+
+template <typename T, typename E, typename K>
+struct IsAvalanchingHasher<hasher<folly::Range<T*>, E>, K> : std::true_type {};
+
 /**
  * _sp is a user-defined literal suffix to make an appropriate Range
  * specialization from a literal string.
diff --git a/folly/SharedMutex.h b/folly/SharedMutex.h
index 6b87128c675..a209d991120 100644
--- a/folly/SharedMutex.h
+++ b/folly/SharedMutex.h
@@ -274,6 +274,10 @@ class SharedMutexImpl {
     }
 
 #ifndef NDEBUG
+    // These asserts check that everybody has released the lock before it
+    // is destroyed.  If you arrive here while debugging that is likely
+    // the problem.  (You could also have general heap corruption.)
+
     // if a futexWait fails to go to sleep because the value has been
     // changed, we don't necessarily clean up the wait bits, so it is
     // possible they will be set here in a correct system
diff --git a/folly/SingletonThreadLocal.h b/folly/SingletonThreadLocal.h
index 7a01ee269d4..0eb3d384e32 100644
--- a/folly/SingletonThreadLocal.h
+++ b/folly/SingletonThreadLocal.h
@@ -97,8 +97,9 @@ class SingletonThreadLocal {
   };
 
   FOLLY_EXPORT FOLLY_ALWAYS_INLINE static Wrapper& getWrapperInline() {
-    static LeakySingleton<ThreadLocal<Wrapper>, Tag> singleton;
-    return *singleton.get();
+    /* library-local */ static auto entry =
+        detail::createGlobal<ThreadLocal<Wrapper>, Tag>();
+    return **entry;
   }
 
   FOLLY_NOINLINE static Wrapper& getWrapperOutline() {
diff --git a/folly/String-inl.h b/folly/String-inl.h
index b91ea9dd934..8c973682162 100644
--- a/folly/String-inl.h
+++ b/folly/String-inl.h
@@ -96,6 +96,7 @@ void cUnescape(StringPiece str, String& out, bool strict) {
       continue;
     }
     out.append(&*last, p - last);
+    ++p;
     if (p == str.end()) {  // backslash at end of string
       if (strict) {
         throw std::invalid_argument("incomplete escape sequence");
@@ -104,7 +105,6 @@ void cUnescape(StringPiece str, String& out, bool strict) {
       last = p;
       continue;
     }
-    ++p;
     char e = detail::cUnescapeTable[static_cast<unsigned char>(*p)];
     if (e == 'O') {  // octal
       unsigned char val = 0;
diff --git a/folly/Subprocess.cpp b/folly/Subprocess.cpp
index 7058f8b2852..671553edb47 100644
--- a/folly/Subprocess.cpp
+++ b/folly/Subprocess.cpp
@@ -310,6 +310,14 @@ void Subprocess::spawn(
   pipesGuard.dismiss();
 }
 
+// With -Wclobbered, gcc complains about vfork potentially cloberring the
+// childDir variable, even though we only use it on the child side of the
+// vfork.
+
+FOLLY_PUSH_WARNING
+#if !defined(__clang__)
+FOLLY_GCC_DISABLE_WARNING("-Wclobbered")
+#endif
 void Subprocess::spawnInternal(
     std::unique_ptr<const char*[]> argv,
     const char* executable,
@@ -446,6 +454,7 @@ void Subprocess::spawnInternal(
   pid_ = pid;
   returnCode_ = ProcessReturnCode::makeRunning();
 }
+FOLLY_POP_WARNING
 
 int Subprocess::prepareChild(const Options& options,
                              const sigset_t* sigmask,
diff --git a/folly/Utility.h b/folly/Utility.h
index ab3caeb97c2..909f72d19c9 100644
--- a/folly/Utility.h
+++ b/folly/Utility.h
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <cstdint>
+#include <limits>
 #include <type_traits>
 #include <utility>
 
@@ -364,4 +365,20 @@ using MoveOnly = moveonly_::MoveOnly;
 template <bool B>
 using Bool = std::integral_constant<bool, B>;
 
+template <typename T>
+constexpr auto to_signed(T const& t) -> typename std::make_signed<T>::type {
+  using S = typename std::make_signed<T>::type;
+  // note: static_cast<S>(t) would be more straightforward, but it would also be
+  // implementation-defined behavior and that is typically to be avoided; the
+  // following code optimized into the same thing, though
+  return std::numeric_limits<S>::max() < t ? -static_cast<S>(~t) + S{-1}
+                                           : static_cast<S>(t);
+}
+
+template <typename T>
+constexpr auto to_unsigned(T const& t) -> typename std::make_unsigned<T>::type {
+  using U = typename std::make_unsigned<T>::type;
+  return static_cast<U>(t);
+}
+
 } // namespace folly
diff --git a/folly/chrono/Conv.h b/folly/chrono/Conv.h
index 1dba116d609..ced6bc5f32d 100644
--- a/folly/chrono/Conv.h
+++ b/folly/chrono/Conv.h
@@ -283,8 +283,7 @@ struct CheckOverflowToDuration {
       constexpr auto maxCount = std::numeric_limits<typename Tgt::rep>::max();
       constexpr auto maxSeconds = maxCount / Tgt::period::den;
 
-      auto unsignedSeconds =
-          static_cast<typename std::make_unsigned<Seconds>::type>(seconds);
+      auto unsignedSeconds = to_unsigned(seconds);
       if (LIKELY(unsignedSeconds < maxSeconds)) {
         return ConversionCode::SUCCESS;
       }
@@ -297,8 +296,7 @@ struct CheckOverflowToDuration {
         if (subseconds <= 0) {
           return ConversionCode::SUCCESS;
         }
-        if (static_cast<typename std::make_unsigned<Subseconds>::type>(
-                subseconds) <= maxSubseconds) {
+        if (to_unsigned(subseconds) <= maxSubseconds) {
           return ConversionCode::SUCCESS;
         }
       }
@@ -307,8 +305,7 @@ struct CheckOverflowToDuration {
       return ConversionCode::NEGATIVE_OVERFLOW;
     } else {
       constexpr auto minCount =
-          static_cast<typename std::make_signed<typename Tgt::rep>::type>(
-              std::numeric_limits<typename Tgt::rep>::lowest());
+          to_signed(std::numeric_limits<typename Tgt::rep>::lowest());
       constexpr auto minSeconds = (minCount / Tgt::period::den);
       if (LIKELY(seconds >= minSeconds)) {
         return ConversionCode::SUCCESS;
diff --git a/folly/compression/Counters.cpp b/folly/compression/Counters.cpp
index b44959f2204..de425cdb770 100644
--- a/folly/compression/Counters.cpp
+++ b/folly/compression/Counters.cpp
@@ -15,16 +15,10 @@
  */
 
 #include <folly/compression/Counters.h>
-#include <folly/portability/Config.h>
+#include <folly/Portability.h>
 
 namespace folly {
-#if FOLLY_HAVE_WEAK_SYMBOLS
-#define FOLLY_WEAK_SYMBOL __attribute__((__weak__))
-#else
-#define FOLLY_WEAK_SYMBOL
-#endif
-
-folly::Function<void(double)> FOLLY_WEAK_SYMBOL makeCompressionCounterHandler(
+FOLLY_ATTR_WEAK folly::Function<void(double)> makeCompressionCounterHandler(
     folly::io::CodecType,
     folly::StringPiece,
     folly::Optional<int>,
diff --git a/folly/concurrency/CacheLocality.cpp b/folly/concurrency/CacheLocality.cpp
index 38dd957904f..4eaf3dcd78b 100644
--- a/folly/concurrency/CacheLocality.cpp
+++ b/folly/concurrency/CacheLocality.cpp
@@ -34,13 +34,13 @@ namespace folly {
 
 /// Returns the best real CacheLocality information available
 static CacheLocality getSystemLocalityInfo() {
-#ifdef __linux__
-  try {
-    return CacheLocality::readFromSysfs();
-  } catch (...) {
-    // keep trying
+  if (kIsLinux) {
+    try {
+      return CacheLocality::readFromSysfs();
+    } catch (...) {
+      // keep trying
+    }
   }
-#endif
 
   long numCpus = sysconf(_SC_NPROCESSORS_CONF);
   if (numCpus <= 0) {
@@ -249,7 +249,7 @@ void* SimpleAllocator::allocateHard() {
   // Allocate a new slab.
   mem_ = static_cast<uint8_t*>(folly::aligned_malloc(allocSize_, allocSize_));
   if (!mem_) {
-    std::__throw_bad_alloc();
+    throw_exception<std::bad_alloc>();
   }
   end_ = mem_ + allocSize_;
   blocks_.push_back(mem_);
diff --git a/folly/concurrency/CacheLocality.h b/folly/concurrency/CacheLocality.h
index 0f13b3745df..09bc7e26f13 100644
--- a/folly/concurrency/CacheLocality.h
+++ b/folly/concurrency/CacheLocality.h
@@ -34,7 +34,7 @@
 #include <folly/Portability.h>
 #include <folly/hash/Hash.h>
 #include <folly/lang/Align.h>
-#include <folly/portability/BitsFunctexcept.h>
+#include <folly/lang/Exception.h>
 #include <folly/system/ThreadId.h>
 
 namespace folly {
@@ -405,7 +405,7 @@ class SimpleAllocator {
  * Note that allocation and deallocation takes a per-sizeclass lock.
  */
 template <size_t Stripes>
-class CoreAllocator {
+class CoreRawAllocator {
  public:
   class Allocator {
     static constexpr size_t AllocSize{4096};
@@ -437,13 +437,13 @@ class CoreAllocator {
         void* mem =
             aligned_malloc(size, hardware_destructive_interference_size);
         if (!mem) {
-          std::__throw_bad_alloc();
+          throw_exception<std::bad_alloc>();
         }
         return mem;
       }
       return allocators_[cl].allocate();
     }
-    void deallocate(void* mem) {
+    void deallocate(void* mem, size_t = 0) {
       if (!mem) {
         return;
       }
@@ -469,19 +469,14 @@ class CoreAllocator {
   Allocator allocators_[Stripes];
 };
 
-template <size_t Stripes>
-typename CoreAllocator<Stripes>::Allocator* getCoreAllocator(size_t stripe) {
+template <typename T, size_t Stripes>
+CxxAllocatorAdaptor<T, typename CoreRawAllocator<Stripes>::Allocator>
+getCoreAllocator(size_t stripe) {
   // We cannot make sure that the allocator will be destroyed after
   // all the objects allocated with it, so we leak it.
-  static Indestructible<CoreAllocator<Stripes>> allocator;
-  return allocator->get(stripe);
-}
-
-template <typename T, size_t Stripes>
-StlAllocator<typename CoreAllocator<Stripes>::Allocator, T> getCoreAllocatorStl(
-    size_t stripe) {
-  auto alloc = getCoreAllocator<Stripes>(stripe);
-  return StlAllocator<typename CoreAllocator<Stripes>::Allocator, T>(alloc);
+  static Indestructible<CoreRawAllocator<Stripes>> allocator;
+  return CxxAllocatorAdaptor<T, typename CoreRawAllocator<Stripes>::Allocator>(
+      *allocator->get(stripe));
 }
 
 } // namespace folly
diff --git a/folly/concurrency/ConcurrentHashMap.h b/folly/concurrency/ConcurrentHashMap.h
index 8b5b0b94459..7050558ec1d 100644
--- a/folly/concurrency/ConcurrentHashMap.h
+++ b/folly/concurrency/ConcurrentHashMap.h
@@ -444,13 +444,17 @@ class ConcurrentHashMap {
     void next() {
       while (it_ == parent_->ensureSegment(segment_)->cend() &&
              segment_ < parent_->NumShards) {
-        segment_++;
-        auto seg = parent_->segments_[segment_].load(std::memory_order_acquire);
-        if (segment_ < parent_->NumShards) {
-          if (!seg) {
-            continue;
+        SegmentT* seg{nullptr};
+        while (!seg) {
+          segment_++;
+          seg = parent_->segments_[segment_].load(std::memory_order_acquire);
+          if (segment_ < parent_->NumShards) {
+            if (!seg) {
+              continue;
+            }
+            it_ = seg->cbegin();
           }
-          it_ = seg->cbegin();
+          break;
         }
       }
     }
diff --git a/folly/concurrency/CoreCachedSharedPtr.h b/folly/concurrency/CoreCachedSharedPtr.h
index 9e5bc19799d..3109d720012 100644
--- a/folly/concurrency/CoreCachedSharedPtr.h
+++ b/folly/concurrency/CoreCachedSharedPtr.h
@@ -44,11 +44,11 @@ class CoreCachedSharedPtr {
   }
 
   void reset(const std::shared_ptr<T>& p = nullptr) {
-    // Allocate each Holder in a different CoreAllocator stripe to
+    // Allocate each Holder in a different CoreRawAllocator stripe to
     // prevent false sharing. Their control blocks will be adjacent
     // thanks to allocate_shared().
     for (auto slot : folly::enumerate(slots_)) {
-      auto alloc = getCoreAllocatorStl<Holder, kNumSlots>(slot.index);
+      auto alloc = getCoreAllocator<Holder, kNumSlots>(slot.index);
       auto holder = std::allocate_shared<Holder>(alloc, p);
       *slot = std::shared_ptr<T>(holder, p.get());
     }
@@ -114,11 +114,11 @@ class AtomicCoreCachedSharedPtr {
 
   void reset(const std::shared_ptr<T>& p = nullptr) {
     auto newslots = folly::make_unique<Slots>();
-    // Allocate each Holder in a different CoreAllocator stripe to
+    // Allocate each Holder in a different CoreRawAllocator stripe to
     // prevent false sharing. Their control blocks will be adjacent
     // thanks to allocate_shared().
     for (auto slot : folly::enumerate(newslots->slots_)) {
-      auto alloc = getCoreAllocatorStl<Holder, kNumSlots>(slot.index);
+      auto alloc = getCoreAllocator<Holder, kNumSlots>(slot.index);
       auto holder = std::allocate_shared<Holder>(alloc, p);
       *slot = std::shared_ptr<T>(holder, p.get());
     }
diff --git a/folly/concurrency/DynamicBoundedQueue.h b/folly/concurrency/DynamicBoundedQueue.h
index 18ff4928935..9aed4641682 100644
--- a/folly/concurrency/DynamicBoundedQueue.h
+++ b/folly/concurrency/DynamicBoundedQueue.h
@@ -596,7 +596,7 @@ class DynamicBoundedQueue {
       if ((debit + weight <= capacity) && tryAddDebit(weight)) {
         return true;
       }
-      if (Clock::now() >= deadline) {
+      if (deadline < Clock::time_point::max() && Clock::now() >= deadline) {
         return false;
       }
       if (MayBlock) {
diff --git a/folly/concurrency/UnboundedQueue.h b/folly/concurrency/UnboundedQueue.h
index 9ea8a06a91d..1a75516a88d 100644
--- a/folly/concurrency/UnboundedQueue.h
+++ b/folly/concurrency/UnboundedQueue.h
@@ -23,6 +23,7 @@
 #include <glog/logging.h>
 
 #include <folly/ConstexprMath.h>
+#include <folly/Optional.h>
 #include <folly/concurrency/CacheLocality.h>
 #include <folly/experimental/hazptr/hazptr.h>
 #include <folly/lang/Align.h>
@@ -266,7 +267,16 @@ class UnboundedQueue {
 
   /** try_dequeue */
   FOLLY_ALWAYS_INLINE bool try_dequeue(T& item) noexcept {
-    return tryDequeueUntil(item, std::chrono::steady_clock::time_point::min());
+    auto o = try_dequeue();
+    if (LIKELY(o.has_value())) {
+      item = std::move(*o);
+      return true;
+    }
+    return false;
+  }
+
+  FOLLY_ALWAYS_INLINE folly::Optional<T> try_dequeue() noexcept {
+    return tryDequeueUntil(std::chrono::steady_clock::time_point::min());
   }
 
   /** try_dequeue_until */
@@ -274,7 +284,20 @@ class UnboundedQueue {
   FOLLY_ALWAYS_INLINE bool try_dequeue_until(
       T& item,
       const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
-    return tryDequeueUntil(item, deadline);
+    folly::Optional<T> o = try_dequeue_until(deadline);
+
+    if (LIKELY(o.has_value())) {
+      item = std::move(*o);
+      return true;
+    }
+
+    return false;
+  }
+
+  template <typename Clock, typename Duration>
+  FOLLY_ALWAYS_INLINE folly::Optional<T> try_dequeue_until(
+      const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
+    return tryDequeueUntil(deadline);
   }
 
   /** try_dequeue_for */
@@ -282,10 +305,24 @@ class UnboundedQueue {
   FOLLY_ALWAYS_INLINE bool try_dequeue_for(
       T& item,
       const std::chrono::duration<Rep, Period>& duration) noexcept {
-    if (LIKELY(try_dequeue(item))) {
+    folly::Optional<T> o = try_dequeue_for(duration);
+
+    if (LIKELY(o.has_value())) {
+      item = std::move(*o);
       return true;
     }
-    return tryDequeueUntil(item, std::chrono::steady_clock::now() + duration);
+
+    return false;
+  }
+
+  template <typename Rep, typename Period>
+  FOLLY_ALWAYS_INLINE folly::Optional<T> try_dequeue_for(
+      const std::chrono::duration<Rep, Period>& duration) noexcept {
+    folly::Optional<T> o = try_dequeue();
+    if (LIKELY(o.has_value())) {
+      return o;
+    }
+    return tryDequeueUntil(std::chrono::steady_clock::now() + duration);
   }
 
   /** size */
@@ -369,26 +406,24 @@ class UnboundedQueue {
 
   /** tryDequeueUntil */
   template <typename Clock, typename Duration>
-  FOLLY_ALWAYS_INLINE bool tryDequeueUntil(
-      T& item,
+  FOLLY_ALWAYS_INLINE folly::Optional<T> tryDequeueUntil(
       const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
     if (SingleConsumer) {
       Segment* s = head();
-      return tryDequeueUntilSC(s, item, deadline);
+      return tryDequeueUntilSC(s, deadline);
     } else {
       // Using hazptr_holder instead of hazptr_local because it is
-      // possible to call ~T() and it may happen to use hazard pointers.
+      //  possible to call ~T() and it may happen to use hazard pointers.
       folly::hazptr::hazptr_holder hptr;
       Segment* s = hptr.get_protected(c_.head);
-      return tryDequeueUntilMC(s, item, deadline);
+      return tryDequeueUntilMC(s, deadline);
     }
   }
 
   /** tryDequeueUntilSC */
   template <typename Clock, typename Duration>
-  FOLLY_ALWAYS_INLINE bool tryDequeueUntilSC(
+  FOLLY_ALWAYS_INLINE folly::Optional<T> tryDequeueUntilSC(
       Segment* s,
-      T& item,
       const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
     Ticket t = consumerTicket();
     DCHECK_GE(t, s->minTicket());
@@ -396,45 +431,44 @@ class UnboundedQueue {
     size_t idx = index(t);
     Entry& e = s->entry(idx);
     if (UNLIKELY(!tryDequeueWaitElem(e, t, deadline))) {
-      return false;
+      return folly::Optional<T>();
     }
     setConsumerTicket(t + 1);
-    e.takeItem(item);
+    auto ret = e.takeItem();
     if (responsibleForAdvance(t)) {
       advanceHead(s);
     }
-    return true;
+    return ret;
   }
 
   /** tryDequeueUntilMC */
   template <typename Clock, typename Duration>
-  FOLLY_ALWAYS_INLINE bool tryDequeueUntilMC(
+  FOLLY_ALWAYS_INLINE folly::Optional<T> tryDequeueUntilMC(
       Segment* s,
-      T& item,
       const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
     while (true) {
       Ticket t = consumerTicket();
       if (UNLIKELY(t >= (s->minTicket() + SegmentSize))) {
         s = tryGetNextSegmentUntil(s, deadline);
         if (s == nullptr) {
-          return false; // timed out
+          return folly::Optional<T>(); // timed out
         }
         continue;
       }
       size_t idx = index(t);
       Entry& e = s->entry(idx);
       if (UNLIKELY(!tryDequeueWaitElem(e, t, deadline))) {
-        return false;
+        return folly::Optional<T>();
       }
       if (!c_.ticket.compare_exchange_weak(
               t, t + 1, std::memory_order_acq_rel, std::memory_order_acquire)) {
         continue;
       }
-      e.takeItem(item);
+      auto ret = e.takeItem();
       if (responsibleForAdvance(t)) {
         advanceHead(s);
       }
-      return true;
+      return ret;
     }
   }
 
@@ -444,15 +478,10 @@ class UnboundedQueue {
       Entry& e,
       Ticket t,
       const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
-    while (true) {
-      if (LIKELY(e.tryWaitUntil(deadline))) {
-        return true;
-      }
-      if (t >= producerTicket()) {
-        return false;
-      }
-      asm_volatile_pause();
+    if (LIKELY(e.tryWaitUntil(deadline))) {
+      return true;
     }
+    return t < producerTicket();
   }
 
   /** findSegment */
@@ -648,6 +677,11 @@ class UnboundedQueue {
       getItem(item);
     }
 
+    FOLLY_ALWAYS_INLINE folly::Optional<T> takeItem() noexcept {
+      flag_.wait();
+      return getItem();
+    }
+
     template <typename Clock, typename Duration>
     FOLLY_ALWAYS_INLINE bool tryWaitUntil(
         const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
@@ -663,6 +697,13 @@ class UnboundedQueue {
       destroyItem();
     }
 
+    FOLLY_ALWAYS_INLINE folly::Optional<T> getItem() noexcept {
+      folly::Optional<T> ret = std::move(*(itemPtr()));
+      destroyItem();
+
+      return ret;
+    }
+
     FOLLY_ALWAYS_INLINE T* itemPtr() noexcept {
       return static_cast<T*>(static_cast<void*>(&item_));
     }
diff --git a/folly/concurrency/detail/ConcurrentHashMap-detail.h b/folly/concurrency/detail/ConcurrentHashMap-detail.h
index 23a9355feed..b75d1735320 100644
--- a/folly/concurrency/detail/ConcurrentHashMap-detail.h
+++ b/folly/concurrency/detail/ConcurrentHashMap-detail.h
@@ -36,6 +36,19 @@ class HazptrDeleter {
   }
 };
 
+template <typename Allocator>
+class HazptrBucketDeleter {
+  size_t count_;
+
+ public:
+  HazptrBucketDeleter(size_t count) : count_(count) {}
+  HazptrBucketDeleter() = default;
+  template <typename Bucket>
+  void operator()(Bucket* bucket) {
+    bucket->destroy(count_);
+  }
+};
+
 template <
     typename KeyType,
     typename ValueType,
@@ -171,7 +184,7 @@ class NodeT : public folly::hazptr::hazptr_obj_base<
  * * insert / erase could be lock / wait free.  Would need to be
  *   careful that assign and rehash don't conflict (possibly with
  *   reader/writer lock, or microlock per node or per bucket, etc).
- *   Java 8 goes halfway, and and does lock per bucket, except for the
+ *   Java 8 goes halfway, and does lock per bucket, except for the
  *   first item, that is inserted with a CAS (which is somewhat
  *   specific to java having a lock per object)
  *
@@ -221,23 +234,22 @@ class alignas(64) ConcurrentHashMapSegment {
       float load_factor,
       size_t max_size)
       : load_factor_(load_factor), max_size_(max_size) {
-    auto buckets = (Buckets*)Allocator().allocate(sizeof(Buckets));
     initial_buckets = folly::nextPowTwo(initial_buckets);
     DCHECK(
         max_size_ == 0 ||
         (isPowTwo(max_size_) &&
          (folly::popcount(max_size_ - 1) + ShardBits <= 32)));
-    new (buckets) Buckets(initial_buckets);
+    auto buckets = Buckets::create(initial_buckets);
     buckets_.store(buckets, std::memory_order_release);
     load_factor_nodes_ = initial_buckets * load_factor_;
+    bucket_count_.store(initial_buckets, std::memory_order_relaxed);
   }
 
   ~ConcurrentHashMapSegment() {
     auto buckets = buckets_.load(std::memory_order_relaxed);
     // We can delete and not retire() here, since users must have
     // their own synchronization around destruction.
-    buckets->~Buckets();
-    Allocator().deallocate((uint8_t*)buckets, sizeof(Buckets));
+    buckets->destroy(bucket_count_.load(std::memory_order_relaxed));
   }
 
   size_t size() {
@@ -364,6 +376,7 @@ class alignas(64) ConcurrentHashMapSegment {
     auto h = HashFn()(k);
     std::unique_lock<Mutex> g(m_);
 
+    size_t bcount = bucket_count_.load(std::memory_order_relaxed);
     auto buckets = buckets_.load(std::memory_order_relaxed);
     // Check for rehash needed for DOES_NOT_EXIST
     if (size_ >= load_factor_nodes_ && type == InsertType::DOES_NOT_EXIST) {
@@ -371,11 +384,12 @@ class alignas(64) ConcurrentHashMapSegment {
         // Would exceed max size.
         throw std::bad_alloc();
       }
-      rehash(buckets->bucket_count_ << 1);
+      rehash(bcount << 1);
       buckets = buckets_.load(std::memory_order_relaxed);
+      bcount = bucket_count_.load(std::memory_order_relaxed);
     }
 
-    auto idx = getIdx(buckets, h);
+    auto idx = getIdx(bcount, h);
     auto head = &buckets->buckets_[idx];
     auto node = head->load(std::memory_order_relaxed);
     auto headnode = node;
@@ -386,7 +400,7 @@ class alignas(64) ConcurrentHashMapSegment {
     while (node) {
       // Is the key found?
       if (KeyEqual()(k, node->getItem().first)) {
-        it.setNode(node, buckets, idx);
+        it.setNode(node, buckets, bcount, idx);
         haznode.reset(node);
         if (type == InsertType::MATCH) {
           if (!match(node->getItem().second)) {
@@ -427,12 +441,13 @@ class alignas(64) ConcurrentHashMapSegment {
         // Would exceed max size.
         throw std::bad_alloc();
       }
-      rehash(buckets->bucket_count_ << 1);
+      rehash(bcount << 1);
 
       // Reload correct bucket.
       buckets = buckets_.load(std::memory_order_relaxed);
+      bcount <<= 1;
       hazbuckets.reset(buckets);
-      idx = getIdx(buckets, h);
+      idx = getIdx(bcount, h);
       head = &buckets->buckets_[idx];
       headnode = head->load(std::memory_order_relaxed);
     }
@@ -448,26 +463,26 @@ class alignas(64) ConcurrentHashMapSegment {
     }
     cur->next_.store(headnode, std::memory_order_relaxed);
     head->store(cur, std::memory_order_release);
-    it.setNode(cur, buckets, idx);
+    it.setNode(cur, buckets, bcount, idx);
     return true;
   }
 
   // Must hold lock.
   void rehash(size_t bucket_count) {
     auto buckets = buckets_.load(std::memory_order_relaxed);
-    auto newbuckets = (Buckets*)Allocator().allocate(sizeof(Buckets));
-    new (newbuckets) Buckets(bucket_count);
+    auto newbuckets = Buckets::create(bucket_count);
 
     load_factor_nodes_ = bucket_count * load_factor_;
 
-    for (size_t i = 0; i < buckets->bucket_count_; i++) {
+    auto oldcount = bucket_count_.load(std::memory_order_relaxed);
+    for (size_t i = 0; i < oldcount; i++) {
       auto bucket = &buckets->buckets_[i];
       auto node = bucket->load(std::memory_order_relaxed);
       if (!node) {
         continue;
       }
       auto h = HashFn()(node->getItem().first);
-      auto idx = getIdx(newbuckets, h);
+      auto idx = getIdx(bucket_count, h);
       // Reuse as long a chain as possible from the end.  Since the
       // nodes don't have previous pointers, the longest last chain
       // will be the same for both the previous hashmap and the new one,
@@ -478,7 +493,7 @@ class alignas(64) ConcurrentHashMapSegment {
       auto last = node->next_.load(std::memory_order_relaxed);
       for (; last != nullptr;
            last = last->next_.load(std::memory_order_relaxed)) {
-        auto k = getIdx(newbuckets, HashFn()(last->getItem().first));
+        auto k = getIdx(bucket_count, HashFn()(last->getItem().first));
         if (k != lastidx) {
           lastidx = k;
           lastrun = last;
@@ -494,7 +509,7 @@ class alignas(64) ConcurrentHashMapSegment {
            node = node->next_.load(std::memory_order_relaxed)) {
         auto newnode = (Node*)Allocator().allocate(sizeof(Node));
         new (newnode) Node(node);
-        auto k = getIdx(newbuckets, HashFn()(node->getItem().first));
+        auto k = getIdx(bucket_count, HashFn()(node->getItem().first));
         auto prevhead = &newbuckets->buckets_[k];
         newnode->next_.store(prevhead->load(std::memory_order_relaxed));
         prevhead->store(newnode, std::memory_order_relaxed);
@@ -502,10 +517,13 @@ class alignas(64) ConcurrentHashMapSegment {
     }
 
     auto oldbuckets = buckets_.load(std::memory_order_relaxed);
+    seqlock_.fetch_add(1, std::memory_order_release);
+    bucket_count_.store(bucket_count, std::memory_order_release);
     buckets_.store(newbuckets, std::memory_order_release);
+    seqlock_.fetch_add(1, std::memory_order_release);
     oldbuckets->retire(
         folly::hazptr::default_hazptr_domain(),
-        concurrenthashmap::HazptrDeleter<Allocator>());
+        concurrenthashmap::HazptrBucketDeleter<Allocator>(oldcount));
   }
 
   bool find(Iterator& res, const KeyType& k) {
@@ -513,15 +531,18 @@ class alignas(64) ConcurrentHashMapSegment {
     folly::hazptr::hazptr_local<1> hlocal;
     auto haznext = &hlocal[0];
     auto h = HashFn()(k);
-    auto buckets = res.hazptrs_[0].get_protected(buckets_);
-    auto idx = getIdx(buckets, h);
+    size_t bcount;
+    Buckets* buckets;
+    getBucketsAndCount(bcount, buckets, res.hazptrs_[0]);
+
+    auto idx = getIdx(bcount, h);
     auto prev = &buckets->buckets_[idx];
     auto node = hazcurr->get_protected(*prev);
     while (node) {
       if (KeyEqual()(k, node->getItem().first)) {
         // We may be using hlocal, make sure we are using hazptrs_
         res.hazptrs_[1].reset(node);
-        res.setNode(node, buckets, idx);
+        res.setNode(node, buckets, bcount, idx);
         return true;
       }
       node = haznext[0].get_protected(node->next_);
@@ -541,8 +562,9 @@ class alignas(64) ConcurrentHashMapSegment {
     {
       std::lock_guard<Mutex> g(m_);
 
+      size_t bcount = bucket_count_.load(std::memory_order_relaxed);
       auto buckets = buckets_.load(std::memory_order_relaxed);
-      auto idx = getIdx(buckets, h);
+      auto idx = getIdx(bcount, h);
       auto head = &buckets->buckets_[idx];
       node = head->load(std::memory_order_relaxed);
       Node* prev = nullptr;
@@ -562,7 +584,10 @@ class alignas(64) ConcurrentHashMapSegment {
           if (iter) {
             iter->hazptrs_[0].reset(buckets);
             iter->setNode(
-                node->next_.load(std::memory_order_acquire), buckets, idx);
+                node->next_.load(std::memory_order_acquire),
+                buckets,
+                bcount,
+                idx);
             iter->next();
           }
           size_--;
@@ -589,33 +614,38 @@ class alignas(64) ConcurrentHashMapSegment {
   // throw if hash or key_eq functions throw.
   void erase(Iterator& res, Iterator& pos) {
     erase_internal(pos->first, &res);
+    // Invalidate the iterator.
+    pos = cend();
   }
 
   void clear() {
-    auto buckets = buckets_.load(std::memory_order_relaxed);
-    auto newbuckets = (Buckets*)Allocator().allocate(sizeof(Buckets));
-    new (newbuckets) Buckets(buckets->bucket_count_);
+    size_t bcount = bucket_count_.load(std::memory_order_relaxed);
+    Buckets* buckets;
+    auto newbuckets = Buckets::create(bcount);
     {
       std::lock_guard<Mutex> g(m_);
+      buckets = buckets_.load(std::memory_order_relaxed);
       buckets_.store(newbuckets, std::memory_order_release);
       size_ = 0;
     }
     buckets->retire(
         folly::hazptr::default_hazptr_domain(),
-        concurrenthashmap::HazptrDeleter<Allocator>());
+        concurrenthashmap::HazptrBucketDeleter<Allocator>(bcount));
   }
 
   void max_load_factor(float factor) {
     std::lock_guard<Mutex> g(m_);
     load_factor_ = factor;
-    auto buckets = buckets_.load(std::memory_order_relaxed);
-    load_factor_nodes_ = buckets->bucket_count_ * load_factor_;
+    load_factor_nodes_ =
+        bucket_count_.load(std::memory_order_relaxed) * load_factor_;
   }
 
   Iterator cbegin() {
     Iterator res;
-    auto buckets = res.hazptrs_[0].get_protected(buckets_);
-    res.setNode(nullptr, buckets, 0);
+    size_t bcount;
+    Buckets* buckets;
+    getBucketsAndCount(bcount, buckets, res.hazptrs_[0]);
+    res.setNode(nullptr, buckets, bcount, 0);
     res.next();
     return res;
   }
@@ -628,29 +658,37 @@ class alignas(64) ConcurrentHashMapSegment {
   // allocating buckets_ at the same time.
   class Buckets : public folly::hazptr::hazptr_obj_base<
                       Buckets,
-                      concurrenthashmap::HazptrDeleter<Allocator>> {
+                      concurrenthashmap::HazptrBucketDeleter<Allocator>> {
+    Buckets() {}
+    ~Buckets() {}
+
    public:
-    explicit Buckets(size_t count) : bucket_count_(count) {
-      buckets_ =
-          (Atom<Node*>*)Allocator().allocate(sizeof(Atom<Node*>) * count);
-      new (buckets_) Atom<Node*>[ count ];
+    static Buckets* create(size_t count) {
+      auto buf =
+          Allocator().allocate(sizeof(Buckets) + sizeof(Atom<Node*>) * count);
+      auto buckets = new (buf) Buckets();
       for (size_t i = 0; i < count; i++) {
-        buckets_[i].store(nullptr, std::memory_order_relaxed);
+        auto bucket = new (&buckets->buckets_[i]) Atom<Node*>();
+        bucket->store(nullptr, std::memory_order_relaxed);
       }
+      return buckets;
     }
-    ~Buckets() {
-      for (size_t i = 0; i < bucket_count_; i++) {
+
+    void destroy(size_t count) {
+      for (size_t i = 0; i < count; i++) {
         auto elem = buckets_[i].load(std::memory_order_relaxed);
         if (elem) {
           elem->release();
         }
+        typedef Atom<Node*> Element;
+        buckets_[i].~Element();
       }
+      this->~Buckets();
       Allocator().deallocate(
-          (uint8_t*)buckets_, sizeof(Atom<Node*>) * bucket_count_);
+          (uint8_t*)this, sizeof(Atom<Node*>) * count + sizeof(*this));
     }
 
-    size_t bucket_count_;
-    Atom<Node*>* buckets_{nullptr};
+    Atom<Node*> buckets_[0];
   };
 
  public:
@@ -660,10 +698,12 @@ class alignas(64) ConcurrentHashMapSegment {
     FOLLY_ALWAYS_INLINE explicit Iterator(std::nullptr_t) : hazptrs_(nullptr) {}
     FOLLY_ALWAYS_INLINE ~Iterator() {}
 
-    void setNode(Node* node, Buckets* buckets, uint64_t idx) {
+    void
+    setNode(Node* node, Buckets* buckets, size_t bucket_count, uint64_t idx) {
       node_ = node;
       buckets_ = buckets;
       idx_ = idx;
+      bucket_count_ = bucket_count;
     }
 
     const value_type& operator*() const {
@@ -688,11 +728,10 @@ class alignas(64) ConcurrentHashMapSegment {
 
     void next() {
       while (!node_) {
-        if (idx_ >= buckets_->bucket_count_) {
+        if (idx_ >= bucket_count_) {
           break;
         }
         DCHECK(buckets_);
-        DCHECK(buckets_->buckets_);
         node_ = hazptrs_[1].get_protected(buckets_->buckets_[idx_]);
         if (node_) {
           break;
@@ -721,6 +760,7 @@ class alignas(64) ConcurrentHashMapSegment {
       idx_ = o.idx_;
       buckets_ = o.buckets_;
       hazptrs_[0].reset(buckets_);
+      bucket_count_ = o.bucket_count_;
       return *this;
     }
 
@@ -730,6 +770,7 @@ class alignas(64) ConcurrentHashMapSegment {
       idx_ = o.idx_;
       buckets_ = o.buckets_;
       hazptrs_[0].reset(buckets_);
+      bucket_count_ = o.bucket_count_;
     }
 
     /* implicit */ Iterator(Iterator&& o) noexcept
@@ -737,6 +778,7 @@ class alignas(64) ConcurrentHashMapSegment {
       node_ = o.node_;
       buckets_ = o.buckets_;
       idx_ = o.idx_;
+      bucket_count_ = o.bucket_count_;
     }
 
     // These are accessed directly from the functions above
@@ -745,22 +787,42 @@ class alignas(64) ConcurrentHashMapSegment {
    private:
     Node* node_{nullptr};
     Buckets* buckets_{nullptr};
-    uint64_t idx_;
+    size_t bucket_count_{0};
+    uint64_t idx_{0};
   };
 
  private:
   // Shards have already used low ShardBits of the hash.
   // Shift it over to use fresh bits.
-  uint64_t getIdx(Buckets* buckets, size_t hash) {
-    return (hash >> ShardBits) & (buckets->bucket_count_ - 1);
+  uint64_t getIdx(size_t bucket_count, size_t hash) {
+    return (hash >> ShardBits) & (bucket_count - 1);
+  }
+  void getBucketsAndCount(
+      size_t& bcount,
+      Buckets*& buckets,
+      folly::hazptr::hazptr_holder& hazptr) {
+    while (true) {
+      auto seqlock = seqlock_.load(std::memory_order_acquire);
+      bcount = bucket_count_.load(std::memory_order_acquire);
+      buckets = hazptr.get_protected(buckets_);
+      auto seqlock2 = seqlock_.load(std::memory_order_acquire);
+      if (!(seqlock & 1) && (seqlock == seqlock2)) {
+        break;
+      }
+    }
+    DCHECK(buckets);
   }
 
+  Mutex m_;
   float load_factor_;
   size_t load_factor_nodes_;
   size_t size_{0};
   size_t const max_size_;
-  Atom<Buckets*> buckets_{nullptr};
-  Mutex m_;
+
+  // Fields needed for read-only access, on separate cacheline.
+  alignas(64) Atom<Buckets*> buckets_{nullptr};
+  std::atomic<uint64_t> seqlock_{0};
+  Atom<size_t> bucket_count_;
 };
 } // namespace detail
 } // namespace folly
diff --git a/folly/concurrency/test/CacheLocalityTest.cpp b/folly/concurrency/test/CacheLocalityTest.cpp
index 8122826174c..c34f2a1596c 100644
--- a/folly/concurrency/test/CacheLocalityTest.cpp
+++ b/folly/concurrency/test/CacheLocalityTest.cpp
@@ -21,7 +21,6 @@
 #include <glog/logging.h>
 #include <memory>
 #include <thread>
-#include <type_traits>
 #include <unordered_map>
 
 using namespace folly;
@@ -412,8 +411,8 @@ TEST(AccessSpreader, Wrapping) {
   }
 }
 
-TEST(CoreAllocator, Basic) {
-  CoreAllocator<32> alloc;
+TEST(CoreRawAllocator, Basic) {
+  CoreRawAllocator<32> alloc;
   auto a = alloc.get(0);
   auto res = a->allocate(8);
   memset(res, 0, 8);
diff --git a/folly/concurrency/test/ConcurrentHashMapTest.cpp b/folly/concurrency/test/ConcurrentHashMapTest.cpp
index 8682474e422..cd60bb0a573 100644
--- a/folly/concurrency/test/ConcurrentHashMapTest.cpp
+++ b/folly/concurrency/test/ConcurrentHashMapTest.cpp
@@ -89,22 +89,18 @@ TEST(ConcurrentHashMap, MoveTest) {
 struct foo {
   static int moved;
   static int copied;
-  foo(foo&& o) noexcept {
-    (void*)&o;
+  foo(foo&&) noexcept {
     moved++;
   }
-  foo& operator=(foo&& o) {
-    (void*)&o;
+  foo& operator=(foo&&) {
     moved++;
     return *this;
   }
-  foo& operator=(const foo& o) {
-    (void*)&o;
+  foo& operator=(const foo&) {
     copied++;
     return *this;
   }
-  foo(const foo& o) {
-    (void*)&o;
+  foo(const foo&) {
     copied++;
   }
   foo() {}
@@ -594,25 +590,107 @@ TEST(ConcurrentHashMap, RefcountTest) {
 }
 
 struct Wrapper {
-  Wrapper() = default;
+  explicit Wrapper(bool& del_) : del(del_) {}
   ~Wrapper() {
     del = true;
   }
 
-  static bool del;
+  bool& del;
 };
 
-bool Wrapper::del = false;
-
 TEST(ConcurrentHashMap, Deletion) {
-  EXPECT_FALSE(Wrapper::del);
+  bool del{false};
 
   {
     ConcurrentHashMap<int, std::shared_ptr<Wrapper>> map;
 
-    map.insert(0, std::make_shared<Wrapper>());
+    map.insert(0, std::make_shared<Wrapper>(del));
+  }
+
+  EXPECT_TRUE(del);
+}
+
+TEST(ConcurrentHashMap, DeletionWithErase) {
+  bool del{false};
+
+  {
+    ConcurrentHashMap<int, std::shared_ptr<Wrapper>> map;
+
+    map.insert(0, std::make_shared<Wrapper>(del));
     map.erase(0);
   }
 
-  EXPECT_TRUE(Wrapper::del);
+  EXPECT_TRUE(del);
+}
+
+TEST(ConcurrentHashMap, DeletionWithIterator) {
+  bool del{false};
+
+  {
+    ConcurrentHashMap<int, std::shared_ptr<Wrapper>> map;
+
+    map.insert(0, std::make_shared<Wrapper>(del));
+    auto it = map.find(0);
+    map.erase(it);
+  }
+
+  EXPECT_TRUE(del);
+}
+
+TEST(ConcurrentHashMap, DeletionWithForLoop) {
+  bool del{false};
+
+  {
+    ConcurrentHashMap<int, std::shared_ptr<Wrapper>> map;
+
+    map.insert(0, std::make_shared<Wrapper>(del));
+    for (auto it = map.cbegin(); it != map.cend(); ++it) {
+      EXPECT_EQ(it->first, 0);
+    }
+  }
+
+  EXPECT_TRUE(del);
+}
+
+TEST(ConcurrentHashMap, DeletionMultiple) {
+  bool del1{false}, del2{false};
+
+  {
+    ConcurrentHashMap<int, std::shared_ptr<Wrapper>> map;
+
+    map.insert(0, std::make_shared<Wrapper>(del1));
+    map.insert(1, std::make_shared<Wrapper>(del2));
+  }
+
+  EXPECT_TRUE(del1);
+  EXPECT_TRUE(del2);
+}
+
+TEST(ConcurrentHashMap, DeletionAssigned) {
+  bool del1{false}, del2{false};
+
+  {
+    ConcurrentHashMap<int, std::shared_ptr<Wrapper>> map;
+
+    map.insert(0, std::make_shared<Wrapper>(del1));
+    map.insert_or_assign(0, std::make_shared<Wrapper>(del2));
+  }
+
+  EXPECT_TRUE(del1);
+  EXPECT_TRUE(del2);
+}
+
+TEST(ConcurrentHashMap, DeletionMultipleMaps) {
+  bool del1{false}, del2{false};
+
+  {
+    ConcurrentHashMap<int, std::shared_ptr<Wrapper>> map1;
+    ConcurrentHashMap<int, std::shared_ptr<Wrapper>> map2;
+
+    map1.insert(0, std::make_shared<Wrapper>(del1));
+    map2.insert(0, std::make_shared<Wrapper>(del2));
+  }
+
+  EXPECT_TRUE(del1);
+  EXPECT_TRUE(del2);
 }
diff --git a/folly/container/EvictingCacheMap.h b/folly/container/EvictingCacheMap.h
index d937077cac3..974e43ab70e 100644
--- a/folly/container/EvictingCacheMap.h
+++ b/folly/container/EvictingCacheMap.h
@@ -24,7 +24,7 @@
 #include <boost/iterator/iterator_adaptor.hpp>
 #include <boost/utility.hpp>
 
-#include <folly/portability/BitsFunctexcept.h>
+#include <folly/lang/Exception.h>
 
 namespace folly {
 
@@ -90,13 +90,23 @@ namespace folly {
  * unless evictions of LRU items are triggered by calling prune() by clients
  * (using their own eviction criteria).
  */
-template <class TKey, class TValue, class THash = std::hash<TKey>>
+template <
+    class TKey,
+    class TValue,
+    class THash = std::hash<TKey>,
+    class TKeyEqual = std::equal_to<TKey>>
 class EvictingCacheMap {
  private:
   // typedefs for brevity
   struct Node;
+  struct KeyHasher;
+  struct KeyValueEqual;
   typedef boost::intrusive::link_mode<boost::intrusive::safe_link> link_mode;
-  typedef boost::intrusive::unordered_set<Node> NodeMap;
+  typedef boost::intrusive::unordered_set<
+      Node,
+      boost::intrusive::hash<KeyHasher>,
+      boost::intrusive::equal<KeyValueEqual>>
+      NodeMap;
   typedef boost::intrusive::list<Node> NodeList;
   typedef std::pair<const TKey, TValue> TPair;
 
@@ -144,13 +154,19 @@ class EvictingCacheMap {
    * @param clearSize the number of elements to clear at a time when the
    *     eviction size is reached.
    */
-  explicit EvictingCacheMap(std::size_t maxSize, std::size_t clearSize = 1)
+  explicit EvictingCacheMap(
+      std::size_t maxSize,
+      std::size_t clearSize = 1,
+      const THash& keyHash = THash(),
+      const TKeyEqual& keyEqual = TKeyEqual())
       : nIndexBuckets_(std::max(maxSize / 2, std::size_t(kMinNumIndexBuckets))),
         indexBuckets_(new typename NodeMap::bucket_type[nIndexBuckets_]),
         indexTraits_(indexBuckets_.get(), nIndexBuckets_),
-        index_(indexTraits_),
+        keyHash_(keyHash),
+        keyEqual_(keyEqual),
+        index_(indexTraits_, keyHash_, keyEqual_),
         maxSize_(maxSize),
-        clearSize_(clearSize) { }
+        clearSize_(clearSize) {}
 
   EvictingCacheMap(const EvictingCacheMap&) = delete;
   EvictingCacheMap& operator=(const EvictingCacheMap&) = delete;
@@ -214,7 +230,7 @@ class EvictingCacheMap {
   TValue& get(const TKey& key) {
     auto it = find(key);
     if (it == end()) {
-      std::__throw_out_of_range("Key does not exist");
+      throw_exception<std::out_of_range>("Key does not exist");
     }
     return it->second;
   }
@@ -246,7 +262,7 @@ class EvictingCacheMap {
   const TValue& getWithoutPromotion(const TKey& key) const {
     auto it = findWithoutPromotion(key);
     if (it == end()) {
-      std::__throw_out_of_range("Key does not exist");
+      throw_exception<std::out_of_range>("Key does not exist");
     }
     return it->second;
   }
@@ -412,37 +428,36 @@ class EvictingCacheMap {
   }
 
  private:
-  struct Node
-    : public boost::intrusive::unordered_set_base_hook<link_mode>,
-      public boost::intrusive::list_base_hook<link_mode> {
+  struct Node : public boost::intrusive::unordered_set_base_hook<link_mode>,
+                public boost::intrusive::list_base_hook<link_mode> {
     Node(const TKey& key, TValue&& value)
-        : pr(std::make_pair(key, std::move(value))) {
-    }
+        : pr(std::make_pair(key, std::move(value))) {}
     TPair pr;
-    friend bool operator==(const Node& lhs, const Node& rhs) {
-      return lhs.pr.first == rhs.pr.first;
-    }
-    friend std::size_t hash_value(const Node& node) {
-      return THash()(node.pr.first);
-    }
   };
 
   struct KeyHasher {
-    std::size_t operator()(const Node& node) {
-      return THash()(node.pr.first);
+    KeyHasher(const THash& keyHash) : hash(keyHash) {}
+    std::size_t operator()(const Node& node) const {
+      return hash(node.pr.first);
     }
-    std::size_t operator()(const TKey& key) {
-      return THash()(key);
+    std::size_t operator()(const TKey& key) const {
+      return hash(key);
     }
+    THash hash;
   };
 
   struct KeyValueEqual {
-    bool operator()(const TKey& lhs, const Node& rhs) {
-      return lhs == rhs.pr.first;
+    KeyValueEqual(const TKeyEqual& keyEqual) : equal(keyEqual) {}
+    bool operator()(const TKey& lhs, const Node& rhs) const {
+      return equal(lhs, rhs.pr.first);
+    }
+    bool operator()(const Node& lhs, const TKey& rhs) const {
+      return equal(lhs.pr.first, rhs);
     }
-    bool operator()(const Node& lhs, const TKey& rhs) {
-      return lhs.pr.first == rhs;
+    bool operator()(const Node& lhs, const Node& rhs) const {
+      return equal(lhs.pr.first, rhs.pr.first);
     }
+    TKeyEqual equal;
   };
 
   /**
@@ -453,11 +468,11 @@ class EvictingCacheMap {
    *    (a std::pair of const TKey, TValue) or index_.end() if it does not exist
    */
   typename NodeMap::iterator findInIndex(const TKey& key) {
-    return index_.find(key, KeyHasher(), KeyValueEqual());
+    return index_.find(key, KeyHasher(keyHash_), KeyValueEqual(keyEqual_));
   }
 
   typename NodeMap::const_iterator findInIndex(const TKey& key) const {
-    return index_.find(key, KeyHasher(), KeyValueEqual());
+    return index_.find(key, KeyHasher(keyHash_), KeyValueEqual(keyEqual_));
   }
 
   /**
@@ -493,6 +508,8 @@ class EvictingCacheMap {
   std::size_t nIndexBuckets_;
   std::unique_ptr<typename NodeMap::bucket_type[]> indexBuckets_;
   typename NodeMap::bucket_traits indexTraits_;
+  THash keyHash_;
+  TKeyEqual keyEqual_;
   NodeMap index_;
   NodeList lru_;
   std::size_t maxSize_;
diff --git a/folly/container/F14.md b/folly/container/F14.md
new file mode 100644
index 00000000000..e7116ec58b1
--- /dev/null
+++ b/folly/container/F14.md
@@ -0,0 +1,235 @@
+# F14 Hash Table
+
+F14 is a 14-way probing hash table that resolves collisions by double
+hashing.  Up to 14 keys are stored in a chunk at a single hash table
+position.  SSE2 vector instructions are used to filter within a chunk;
+intra-chunk search takes only a handful of instructions.  **F14** refers
+to the fact that the algorithm **F**ilters up to **14** keys at a time.
+This strategy allows the hash table to be operated at a high maximum
+load factor (12/14) while still keeping probe chains very short.
+
+F14 provides compelling replacements for most of the hash tables we use in
+production at Facebook.  Switching to it can improve memory efficiency
+and performance at the same time.  The hash table implementations
+widely deployed in C++ at Facebook exist along a spectrum of space/time
+tradeoffs.  The fastest is the least memory efficient, and the most
+memory efficient (google::sparse_hash_map) is much slower than the rest.
+F14 moves the curve, simultaneously improving memory efficiency and
+performance when compared to most of the existing algorithms.
+
+## F14 VARIANTS
+
+The core hash table implementation has a pluggable storage strategy,
+with three policies provided:
+
+F14NodeMap stores values indirectly, calling malloc on each insert like
+std::unordered_map.  This implementation is the most memory efficient
+for medium and large keys.  It provides the same iterator and reference
+stability guarantees as the standard map while being faster and more
+memory efficient, so you can substitute F14NodeMap for std::unordered_map
+safely in production code.  F14's filtering substantially reduces
+indirection (and cache misses) when compared to std::unordered_map.
+
+F14ValueMap stores values inline, like google::dense_hash_map.
+Inline storage is the most memory efficient for small values, but for
+medium and large values it wastes space.  Because it can tolerate a much
+higher load factor, F14ValueMap is almost twice as memory efficient as
+dense_hash_map while also faster for most workloads.
+
+F14VectorMap keeps values packed in a contiguous array.  The main hash
+array stores 32-bit indexes into the value vector.  Compared to the
+existing internal implementations that use a similar strategy, F14 is
+slower for simple keys and small or medium-sized tables (because of the
+cost of bit mixing), faster for complex keys and large tables, and saves
+about 16 bytes per entry on average.
+
+We also provide:
+
+F14FastMap is an alias to F14ValueMap or F14VectorMap depending on
+entry size. When the key and mapped_type are less than 24 bytes it
+typedefs to F14ValueMap. For medium and large entries it typedefs to
+F14VectorMap. This strategy provides the best performance, while also
+providing better memory efficiency than dense_hash_map or the other hash
+tables in use at Facebook that don't individually allocate nodes.
+
+## WHICH F14 VARIANT IS RIGHT FOR ME?
+
+F14FastMap is a good default choice. If you care more about memory
+efficiency than performance, F14NodeMap is better for medium and
+large entries.  F14NodeMap is the only F14 variant that doesn't move
+its elements, so in the rare case that you need reference stability you
+should use it.
+
+## TRANSPARENT (HETEROGENEOUS) HASH AND EQUALITY
+
+In some cases it makes sense to define hash and key equality across
+types.  For example, StringPiece's hash and equality are capable of
+accepting std::string (because std::string is implicitly convertible
+to StringPiece).  If you mark the hash functor and key equality functor
+as _transparent_, then F14 will allow you to search the table directly
+using any of the accepted key types without converting the key.
+
+For example, using H =
+folly::transparent<folly::hasher<folly::StringPiece>> and E
+= folly::transparent<std::equal_to<folly::StringPiece>>, an
+F14FastSet<std::string, H, E> will allow you to find or count using
+a StringPiece key (as well as std::string key).  Note that this is
+possible even though there is no implicit conversion from StringPiece
+to std::string.
+
+## WHY CHUNKS?
+
+Assuming that you have a magic wand that lets you search all of the keys
+in a chunk in a single step (our wand is called _mm_cmpeq_epi8), then
+using chunks fundamentally improves the load factor/collision tradeoff.
+The cost is proportional only to the number of chunks visited to find
+the key.
+
+It's kind of like the birthday paradox in reverse.  In a room with 23
+people there is a 50/50 chance that two of them have the same birthday
+(overflowing a chunk with capacity 1), but the chance that 8 of them
+were born in the same week (overflowing a chunk with capacity 7) is
+very small.  Even though the chance of any two people being born in
+the same week is higher (1/52 instead of 1/365), the larger number of
+coincidences required means that the final probability is much lower
+(less than 1 in a million). It would require 160 people to reach a 50/50
+chance that 8 of them were born in the same week.
+
+## WHY PROBING?
+
+Chaining to a new chunk on collision is not very memory efficient,
+because the new chunk is almost certain to be under-filled.  We tried
+chaining to individual entries, but that bloated the lookup code and
+can't match the performance of a probing strategy.
+
+At our max load factor of 12/14, the expected probe length when searching
+for an existing key (find hit) is 1.04, and fewer than 1% of keys are
+not found in one of the first 3 chunks.  When searching for a key that is
+not in the map (find miss) the expected probe length at max load factor
+is 1.275 and the P99 probe length is 4.
+
+CHUNK OVERFLOW COUNTS: REFERENCE-COUNTED TOMBSTONES
+
+Hash tables with a complex probing strategy (quadratic or double-hashing)
+typically use a tombstone on erase, because it is very difficult to
+find the keys that might have been displaced by a full bucket (i.e.,
+chunk in F14).  If the probing strategy allows only a small number of
+potential destinations for a displaced key (linear probing, Robin Hood
+hashing, or Cuckoo hashing), it is also an option to find a displaced key,
+relocate it, and then recursively repair the new hole.
+
+Tombstones must be eventually reclaimed to deal with workloads that
+continuously insert and erase.  google::dense_hash_map eventually triggers
+a rehash in this case, for example.  Unfortunately, to avoid quadratic
+behavior this rehash may have to halve the max load factor of the table,
+resulting in a huge decrease in memory efficiency.
+
+Although most probing algorithms just keep probing until they find an
+empty slot, probe lengths can be substantially reduced if you track
+whether a bucket has actually rejected a key.  This "overflow bit"
+is set when an attempt is made to place a key into the bucket but the
+bucket was full.  (An especially unlucky key might have to try several
+buckets, setting the overflow bit in each.)  Amble and Knuth describe an
+overflow bit in the "Further development" section of "Ordered hash tables"
+(https://academic.oup.com/comjnl/article/17/2/135/525363).
+
+The overflow bit subsumes the role of a tombstone, since a tombstone's
+only effect is to cause a probe search to continue.  Unlike a tombstone,
+however, the overflow bit is a property of the keys that were displaced
+rather than the key that was erased.  It's only a small step to turn
+this into a counter that records the number of displaced keys, and that
+can be decremented on erase.  Overflow counts give us both an earlier
+exit from probing and the effect of a reference-counted tombstone.
+They automatically clean themselves up in a steady-state insert and
+erase workload, giving us the upsides of double-hashing without the
+normal downsides of tombstones.
+
+## HOW DOES VECTOR FILTERING WORK?
+
+F14 computes a secondary hash value for each key, which we call the key's
+tag.  Tags are 1 byte: 7 bits of entropy with the top bit set.  The 14
+tags are joined with 2 additional bytes of metadata to form a 16-byte
+aligned __m128i at the beginning of the chunk.  When we're looking for a
+key we can compare the needle's tag to all 14 tags in a chunk in parallel.
+The result of the comparison is a bitmask that identifies only slots in
+a chunk that might have a non-empty matching key.  Failing searches are
+unlikely to perform any key comparisons, successful searches are likely
+to perform exactly 1 comparison, and all of the resulting branches are
+pretty predictable.
+
+The vector search uses SSE2 intrinsics.  SSE2 is a non-optional part
+of the x86_64 platform, so every 64-bit x86 platform supports them.
+AARCH64's vector instructions will allow a similar strategy, although
+the lack of a movemask operation complicates things a bit.
+
+## WHAT ABOUT MEMORY OVERHEAD FOR SMALL TABLES?
+
+The F14 algorithm works well for large tables, because the tags can
+fit in cache even when the keys and values can't.  Tiny hash tables are
+by far the most numerous, however, so it's important that we minimize
+the footprint when the table is empty or has only 1 or 2 elements.
+Conveniently, tags cause keys to be densely packed into the bottom of
+a chunk and filter all memory accesses to the portions of a chunk that
+are not used.  That means that we can also support capacities that are
+a fraction of 1 chunk with no change to any of the search and insertion
+algorithms.  The only change required is in the check to see if a rehash
+is required.  F14's first three capacities all use one chunk and one
+16-byte metadata vector, but allocate space for 2, 6, and then 12 keys.
+
+## IS F14NODEMAP FULLY STANDARDS-COMPLIANT?
+
+No.  F14 does provide full support for stateful allocators, fancy
+pointers, and as many parts of the C++ standard for unordered associative
+containers as it can, but it is not fully standards-compliant.
+
+We don't know of a way to efficiently implement the full bucket API
+in a table that uses double-hashed probing, in particular size_type
+bucket(key_type const&).  This function must compute the bucket index
+for any key, even before it is inserted into the table.  That means
+that a local_iterator range can't partition the key space by the chunk
+that terminated probing during insert; the only partition choice with
+reasonable locality would be the first-choice chunk.  The probe sequence
+for a key in double-hashing depends on the key, not the first-choice
+chunk, however, so it is infeasible to search for all of the displaced
+keys given only their first-choice location.  We're unwilling to use an
+inferior probing strategy or dedicate space to the required metadata just
+to support the full bucket API.  Implementing the rest of the bucket API,
+such as local_iterator begin(size_type), would not be difficult.
+
+F14 does not allow max_load_factor to be adjusted.  Probing tables
+can't support load factors greater than 1, so the standards-required
+ability to temporarily disable rehashing by temporarily setting a very
+high max load factor just isn't possible.  We have also measured that
+there is no performance advantage to forcing a low load factor, so it's
+better just to omit the field and save space in every F14 instance.
+This is part of the way we get empty maps down to 32 bytes.  The void
+max_load_factor(float) method is still present, but does nothing.  We use
+the default max_load_factor of 1.0f all of the time, adjusting the value
+returned from size_type bucket_count() so that the externally-visible
+load factor reaches 1 just as the actual internal load factor reaches
+our threshold of 12/14.
+
+The standard requires that a hash table be iterable in O(size()) time
+regardless of its load factor (rather than O(bucket_count()).  That means
+if you insert 1 million keys then erase all but 10, iteration should
+be O(10).  For std::unordered_map the cost of supporting this scenario
+is an extra level of indirection in every read and every write, which is
+part of why we can improve substantially on its performance.  Low load
+factor iteration occurs in practice when erasing keys during iteration
+(for example by repeatedly calling map.erase(map.begin())), so we provide
+the weaker guarantee that iteration is O(size()) after erasing any prefix
+of the iteration order.  F14VectorMap doesn't have this problem.
+
+The standard requires that clear() be O(size()), which has the practical
+effect of prohibiting a change to bucket_count.  F14 deallocates
+all memory during clear() if it has space for more than 100 keys, to
+avoid leaving a large table that will be expensive to iterate (see the
+previous paragraph).  google::dense_hash_map works around this tradeoff
+by providing both clear() and clear_no_resize(); we could do something
+similar.
+
+F14NodeMap does not currently support the C++17 node API, but it could
+be trivially added.
+
+* Nathan Bronson -- <ngbronson@fb.com>
+* Xiao Shi -- <xshi@fb.com>
diff --git a/folly/container/F14Map.h b/folly/container/F14Map.h
new file mode 100644
index 00000000000..5278ae97bbd
--- /dev/null
+++ b/folly/container/F14Map.h
@@ -0,0 +1,990 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+/**
+ * F14NodeMap, F14ValueMap, and F14VectorMap
+ *
+ * F14FastMap is a conditional typedef to F14ValueMap or F14VectorMap
+ *
+ * See F14.md
+ *
+ * @author Nathan Bronson <ngbronson@fb.com>
+ * @author Xiao Shi       <xshi@fb.com>
+ */
+
+#include <stdexcept>
+
+#include <folly/Traits.h>
+#include <folly/functional/ApplyTuple.h>
+#include <folly/lang/Exception.h>
+#include <folly/lang/SafeAssert.h>
+
+#include <folly/container/detail/F14Policy.h>
+#include <folly/container/detail/F14Table.h>
+
+#if !FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+
+#include <unordered_map>
+
+namespace folly {
+
+template <typename... Args>
+using F14NodeMap = std::unordered_map<Args...>;
+template <typename... Args>
+using F14ValueMap = std::unordered_map<Args...>;
+template <typename... Args>
+using F14VectorMap = std::unordered_map<Args...>;
+template <typename... Args>
+using F14FastMap = std::unordered_map<Args...>;
+
+} // namespace folly
+
+#else // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+
+namespace folly {
+namespace f14 {
+namespace detail {
+
+template <typename Policy>
+class F14BasicMap {
+  template <
+      typename K,
+      typename T,
+      typename H = typename Policy::Hasher,
+      typename E = typename Policy::KeyEqual>
+  using IfIsTransparent = folly::_t<EnableIfIsTransparent<void, H, E, K, T>>;
+
+ public:
+  //// PUBLIC - Member types
+
+  using key_type = typename Policy::Key;
+  using mapped_type = typename Policy::Mapped;
+  using value_type = typename Policy::Value;
+  using size_type = std::size_t;
+  using difference_type = std::ptrdiff_t;
+  using hasher = typename Policy::Hasher;
+  using key_equal = typename Policy::KeyEqual;
+  using allocator_type = typename Policy::Alloc;
+  using reference = value_type&;
+  using const_reference = value_type const&;
+  using pointer = typename std::allocator_traits<allocator_type>::pointer;
+  using const_pointer =
+      typename std::allocator_traits<allocator_type>::const_pointer;
+  using iterator = typename Policy::Iter;
+  using const_iterator = typename Policy::ConstIter;
+
+ private:
+  using ItemIter = typename Policy::ItemIter;
+
+ public:
+  //// PUBLIC - Member functions
+
+  F14BasicMap() noexcept(F14Table<Policy>::kDefaultConstructIsNoexcept)
+      : F14BasicMap(0) {}
+
+  explicit F14BasicMap(
+      std::size_t initialCapacity,
+      hasher const& hash = hasher{},
+      key_equal const& eq = key_equal{},
+      allocator_type const& alloc = allocator_type{})
+      : table_{initialCapacity, hash, eq, alloc} {}
+
+  explicit F14BasicMap(std::size_t initialCapacity, allocator_type const& alloc)
+      : F14BasicMap(initialCapacity, hasher{}, key_equal{}, alloc) {}
+
+  explicit F14BasicMap(
+      std::size_t initialCapacity,
+      hasher const& hash,
+      allocator_type const& alloc)
+      : F14BasicMap(initialCapacity, hash, key_equal{}, alloc) {}
+
+  explicit F14BasicMap(allocator_type const& alloc) : F14BasicMap(0, alloc) {}
+
+  template <typename InputIt>
+  F14BasicMap(
+      InputIt first,
+      InputIt last,
+      std::size_t initialCapacity = 0,
+      hasher const& hash = hasher{},
+      key_equal const& eq = key_equal{},
+      allocator_type const& alloc = allocator_type{})
+      : table_{initialCapacity, hash, eq, alloc} {
+    initialInsert(first, last, initialCapacity);
+  }
+
+  template <typename InputIt>
+  F14BasicMap(
+      InputIt first,
+      InputIt last,
+      std::size_t initialCapacity,
+      allocator_type const& alloc)
+      : table_{initialCapacity, hasher{}, key_equal{}, alloc} {
+    initialInsert(first, last, initialCapacity);
+  }
+
+  template <typename InputIt>
+  F14BasicMap(
+      InputIt first,
+      InputIt last,
+      std::size_t initialCapacity,
+      hasher const& hash,
+      allocator_type const& alloc)
+      : table_{initialCapacity, hash, key_equal{}, alloc} {
+    initialInsert(first, last, initialCapacity);
+  }
+
+  F14BasicMap(F14BasicMap const& rhs) = default;
+
+  F14BasicMap(F14BasicMap const& rhs, allocator_type const& alloc)
+      : table_{rhs.table_, alloc} {}
+
+  F14BasicMap(F14BasicMap&& rhs) = default;
+
+  F14BasicMap(F14BasicMap&& rhs, allocator_type const& alloc) noexcept(
+      F14Table<Policy>::kAllocIsAlwaysEqual)
+      : table_{std::move(rhs.table_), alloc} {}
+
+  F14BasicMap(
+      std::initializer_list<value_type> init,
+      std::size_t initialCapacity = 0,
+      hasher const& hash = hasher{},
+      key_equal const& eq = key_equal{},
+      allocator_type const& alloc = allocator_type{})
+      : table_{initialCapacity, hash, eq, alloc} {
+    initialInsert(init.begin(), init.end(), initialCapacity);
+  }
+
+  F14BasicMap(
+      std::initializer_list<value_type> init,
+      std::size_t initialCapacity,
+      allocator_type const& alloc)
+      : table_{initialCapacity, hasher{}, key_equal{}, alloc} {
+    initialInsert(init.begin(), init.end(), initialCapacity);
+  }
+
+  F14BasicMap(
+      std::initializer_list<value_type> init,
+      std::size_t initialCapacity,
+      hasher const& hash,
+      allocator_type const& alloc)
+      : table_{initialCapacity, hash, key_equal{}, alloc} {
+    initialInsert(init.begin(), init.end(), initialCapacity);
+  }
+
+  F14BasicMap& operator=(F14BasicMap const&) = default;
+
+  F14BasicMap& operator=(F14BasicMap&&) = default;
+
+  allocator_type get_allocator() const noexcept {
+    return table_.alloc();
+  }
+
+  //// PUBLIC - Iterators
+
+  iterator begin() noexcept {
+    return table_.makeIter(table_.begin());
+  }
+  const_iterator begin() const noexcept {
+    return cbegin();
+  }
+  const_iterator cbegin() const noexcept {
+    return table_.makeConstIter(table_.begin());
+  }
+
+  iterator end() noexcept {
+    return table_.makeIter(table_.end());
+  }
+  const_iterator end() const noexcept {
+    return cend();
+  }
+  const_iterator cend() const noexcept {
+    return table_.makeConstIter(table_.end());
+  }
+
+  //// PUBLIC - Capacity
+
+  bool empty() const noexcept {
+    return table_.empty();
+  }
+
+  std::size_t size() const noexcept {
+    return table_.size();
+  }
+
+  std::size_t max_size() const noexcept {
+    return table_.max_size();
+  }
+
+  F14TableStats computeStats() const noexcept {
+    return table_.computeStats();
+  }
+
+  //// PUBLIC - Modifiers
+
+  void clear() noexcept {
+    table_.clear();
+  }
+
+  std::pair<iterator, bool> insert(value_type const& value) {
+    return emplace(value);
+  }
+
+  template <typename P>
+  std::enable_if_t<
+      std::is_constructible<value_type, P&&>::value,
+      std::pair<iterator, bool>>
+  insert(P&& value) {
+    return emplace(std::forward<P>(value));
+  }
+
+  std::pair<iterator, bool> insert(value_type&& value) {
+    return emplace(std::move(value));
+  }
+
+  // std::unordered_map's hinted insertion API is misleading.  No
+  // implementation I've seen actually uses the hint.  Code restructuring
+  // by the caller to use the hinted API is at best unnecessary, and at
+  // worst a pessimization.  It is used, however, so we provide it.
+
+  iterator insert(const_iterator /*hint*/, value_type const& value) {
+    return insert(value).first;
+  }
+
+  template <typename P>
+  std::enable_if_t<std::is_constructible<value_type, P&&>::value, iterator>
+  insert(const_iterator /*hint*/, P&& value) {
+    return insert(std::forward<P>(value)).first;
+  }
+
+  iterator insert(const_iterator /*hint*/, value_type&& value) {
+    return insert(std::move(value)).first;
+  }
+
+  template <class... Args>
+  iterator emplace_hint(const_iterator /*hint*/, Args&&... args) {
+    return emplace(std::forward<Args>(args)...).first;
+  }
+
+ private:
+  template <class InputIt>
+  FOLLY_ALWAYS_INLINE void
+  bulkInsert(InputIt first, InputIt last, bool autoReserve) {
+    if (autoReserve) {
+      table_.reserveForInsert(std::distance(first, last));
+    }
+    while (first != last) {
+      insert(*first);
+      ++first;
+    }
+  }
+
+  template <class InputIt>
+  void initialInsert(InputIt first, InputIt last, std::size_t initialCapacity) {
+    FOLLY_SAFE_DCHECK(empty() && bucket_count() >= initialCapacity, "");
+
+    // It's possible that there are a lot of duplicates in first..last and
+    // so we will oversize ourself.  The common case, however, is that
+    // we can avoid a lot of rehashing if we pre-expand.  The behavior
+    // is easy to disable at a particular call site by asking for an
+    // initialCapacity of 1.
+    bool autoReserve =
+        std::is_same<
+            typename std::iterator_traits<InputIt>::iterator_category,
+            std::random_access_iterator_tag>::value &&
+        initialCapacity == 0;
+    bulkInsert(first, last, autoReserve);
+  }
+
+ public:
+  template <class InputIt>
+  void insert(InputIt first, InputIt last) {
+    // Bulk reserve is a heuristic choice, so it can backfire.  We restrict
+    // ourself to situations that mimic bulk construction without an
+    // explicit initialCapacity.
+    bool autoReserve =
+        std::is_same<
+            typename std::iterator_traits<InputIt>::iterator_category,
+            std::random_access_iterator_tag>::value &&
+        bucket_count() == 0;
+    bulkInsert(first, last, autoReserve);
+  }
+
+  void insert(std::initializer_list<value_type> ilist) {
+    insert(ilist.begin(), ilist.end());
+  }
+
+  template <typename M>
+  std::pair<iterator, bool> insert_or_assign(key_type const& key, M&& obj) {
+    auto rv = try_emplace(key, std::forward<M>(obj));
+    if (!rv.second) {
+      rv.first->second = std::forward<M>(obj);
+    }
+    return rv;
+  }
+
+  template <typename M>
+  std::pair<iterator, bool> insert_or_assign(key_type&& key, M&& obj) {
+    auto rv = try_emplace(std::move(key), std::forward<M>(obj));
+    if (!rv.second) {
+      rv.first->second = std::forward<M>(obj);
+    }
+    return rv;
+  }
+
+  template <typename M>
+  iterator
+  insert_or_assign(const_iterator /*hint*/, key_type const& key, M&& obj) {
+    return insert_or_assign(key, std::move(obj)).first;
+  }
+
+  template <typename M>
+  iterator insert_or_assign(const_iterator /*hint*/, key_type&& key, M&& obj) {
+    return insert_or_assign(std::move(key), std::move(obj)).first;
+  }
+
+ private:
+  std::pair<ItemIter, bool> emplaceItem() {
+    // rare but valid
+    return table_.tryEmplaceValue(key_type{});
+  }
+
+  template <typename U2>
+  std::pair<ItemIter, bool> emplaceItem(key_type&& x, U2&& y) {
+    // best case
+    return table_.tryEmplaceValue(x, std::move(x), std::forward<U2>(y));
+  }
+
+  template <typename U2>
+  std::pair<ItemIter, bool> emplaceItem(key_type const& x, U2&& y) {
+    // okay case, no construction unless we will actually insert
+    return table_.tryEmplaceValue(x, x, std::forward<U2>(y));
+  }
+
+  template <typename U1, typename U2>
+  std::enable_if_t<
+      !std::is_same<key_type, folly::remove_cvref_t<U1>>::value,
+      std::pair<ItemIter, bool>>
+  emplaceItem(U1&& x, U2&& y) {
+    static_assert(
+        !std::is_same<key_type, folly::remove_cvref_t<U1>>::value,
+        "method signature bug");
+
+    // We can either construct key_type on the stack and move it if we end
+    // up inserting, or use a policy-specific mechanism to construct the
+    // item (possibly indirect) and then destroy it if we don't end up
+    // using it.  The cost of being wrong is much higher for the latter
+    // so we choose the former (unlike std::unordered_map::emplace).
+    key_type k(std::forward<U1>(x));
+    return table_.tryEmplaceValue(k, std::move(k), std::forward<U2>(y));
+  }
+
+  template <typename U1, typename U2>
+  std::pair<ItemIter, bool> emplaceItem(std::pair<U1, U2> const& p) {
+    return emplaceItem(p.first, p.second);
+  }
+
+  template <typename U1, typename U2>
+  std::pair<ItemIter, bool> emplaceItem(std::pair<U1, U2>&& p) {
+    return emplaceItem(std::move(p.first), std::move(p.second));
+  }
+
+  template <typename U1, class... Args2>
+  std::enable_if_t<
+      std::is_same<folly::remove_cvref_t<U1>, key_type>::value,
+      std::pair<ItemIter, bool>>
+  emplaceItem(
+      std::piecewise_construct_t,
+      std::tuple<U1>&& first_args,
+      std::tuple<Args2...>&& second_args) {
+    // We take care to forward by reference even if the caller didn't
+    // use forward_as_tuple properly
+    return table_.tryEmplaceValue(
+        std::get<0>(first_args),
+        std::piecewise_construct,
+        std::tuple<std::add_rvalue_reference_t<U1>>{std::move(first_args)},
+        std::tuple<std::add_rvalue_reference_t<Args2>...>{
+            std::move(second_args)});
+  }
+
+  template <class... Args1, class... Args2>
+  std::enable_if_t<
+      std::tuple_size<std::tuple<Args1...>>::value != 1 ||
+          !std::is_same<
+              folly::remove_cvref_t<
+                  std::tuple_element_t<0, std::tuple<Args1..., value_type>>>,
+              key_type>::value,
+      std::pair<ItemIter, bool>>
+  emplaceItem(
+      std::piecewise_construct_t,
+      std::tuple<Args1...>&& first_args,
+      std::tuple<Args2...>&& second_args) {
+    auto k = folly::make_from_tuple<key_type>(
+        std::tuple<std::add_rvalue_reference_t<Args1>...>{
+            std::move(first_args)});
+    return table_.tryEmplaceValue(
+        k,
+        std::piecewise_construct,
+        std::forward_as_tuple(std::move(k)),
+        std::tuple<std::add_rvalue_reference_t<Args2>...>{
+            std::move(second_args)});
+  }
+
+ public:
+  template <typename... Args>
+  std::pair<iterator, bool> emplace(Args&&... args) {
+    auto rv = emplaceItem(std::forward<Args>(args)...);
+    return std::make_pair(table_.makeIter(rv.first), rv.second);
+  }
+
+  template <typename... Args>
+  std::pair<iterator, bool> try_emplace(key_type const& key, Args&&... args) {
+    auto rv = table_.tryEmplaceValue(
+        key,
+        std::piecewise_construct,
+        std::forward_as_tuple(key),
+        std::forward_as_tuple(std::forward<Args>(args)...));
+    return std::make_pair(table_.makeIter(rv.first), rv.second);
+  }
+
+  template <typename... Args>
+  std::pair<iterator, bool> try_emplace(key_type&& key, Args&&... args) {
+    auto rv = table_.tryEmplaceValue(
+        key,
+        std::piecewise_construct,
+        std::forward_as_tuple(std::move(key)),
+        std::forward_as_tuple(std::forward<Args>(args)...));
+    return std::make_pair(table_.makeIter(rv.first), rv.second);
+  }
+
+  template <typename... Args>
+  iterator
+  try_emplace(const_iterator /*hint*/, key_type const& key, Args&&... args) {
+    auto rv = table_.tryEmplaceValue(
+        key,
+        std::piecewise_construct,
+        std::forward_as_tuple(key),
+        std::forward_as_tuple(std::forward<Args>(args)...));
+    return table_.makeIter(rv.first);
+  }
+
+  template <typename... Args>
+  iterator
+  try_emplace(const_iterator /*hint*/, key_type&& key, Args&&... args) {
+    auto rv = table_.tryEmplaceValue(
+        key,
+        std::piecewise_construct,
+        std::forward_as_tuple(std::move(key)),
+        std::forward_as_tuple(std::forward<Args>(args)...));
+    return table_.makeIter(rv.first);
+  }
+
+  FOLLY_ALWAYS_INLINE iterator erase(const_iterator pos) {
+    // If we are inlined then gcc and clang can optimize away all of the
+    // work of itemPos.advance() if our return value is discarded.
+    auto itemPos = table_.unwrapIter(pos);
+    table_.erase(itemPos);
+    itemPos.advance();
+    return table_.makeIter(itemPos);
+  }
+
+  // This form avoids ambiguity when key_type has a templated constructor
+  // that accepts const_iterator
+  iterator erase(iterator pos) {
+    table_.erase(table_.unwrapIter(pos));
+    return ++pos;
+  }
+
+  iterator erase(const_iterator first, const_iterator last) {
+    auto itemFirst = table_.unwrapIter(first);
+    auto itemLast = table_.unwrapIter(last);
+    while (itemFirst != itemLast) {
+      table_.erase(itemFirst);
+      itemFirst.advance();
+    }
+    return table_.makeIter(itemFirst);
+  }
+
+  size_type erase(key_type const& key) {
+    return table_.erase(key);
+  }
+
+  //// PUBLIC - Lookup
+
+  FOLLY_ALWAYS_INLINE mapped_type& at(key_type const& key) {
+    return at(*this, key);
+  }
+
+  FOLLY_ALWAYS_INLINE mapped_type const& at(key_type const& key) const {
+    return at(*this, key);
+  }
+
+  mapped_type& operator[](key_type const& key) {
+    return try_emplace(key).first->second;
+  }
+
+  mapped_type& operator[](key_type&& key) {
+    return try_emplace(std::move(key)).first->second;
+  }
+
+  FOLLY_ALWAYS_INLINE std::size_t count(key_type const& key) const {
+    return table_.find(key).atEnd() ? 0 : 1;
+  }
+
+  template <typename K>
+  FOLLY_ALWAYS_INLINE IfIsTransparent<K, std::size_t> count(
+      K const& key) const {
+    return table_.find(key).atEnd() ? 0 : 1;
+  }
+
+  F14HashToken prehash(key_type const& key) const {
+    return table_.prehash(key);
+  }
+
+  template <typename K>
+  IfIsTransparent<K, F14HashToken> prehash(K const& key) const {
+    return table_.prehash(key);
+  }
+
+  FOLLY_ALWAYS_INLINE iterator find(key_type const& key) {
+    return table_.makeIter(table_.find(key));
+  }
+
+  FOLLY_ALWAYS_INLINE const_iterator find(key_type const& key) const {
+    return table_.makeConstIter(table_.find(key));
+  }
+
+  FOLLY_ALWAYS_INLINE iterator
+  find(F14HashToken const& token, key_type const& key) {
+    return table_.makeIter(table_.find(token, key));
+  }
+
+  FOLLY_ALWAYS_INLINE const_iterator
+  find(F14HashToken const& token, key_type const& key) const {
+    return table_.makeConstIter(table_.find(token, key));
+  }
+
+  template <typename K>
+  FOLLY_ALWAYS_INLINE IfIsTransparent<K, iterator> find(K const& key) {
+    return table_.makeIter(table_.find(key));
+  }
+
+  template <typename K>
+  FOLLY_ALWAYS_INLINE IfIsTransparent<K, const_iterator> find(
+      K const& key) const {
+    return table_.makeConstIter(table_.find(key));
+  }
+
+  template <typename K>
+  FOLLY_ALWAYS_INLINE IfIsTransparent<K, iterator> find(
+      F14HashToken const& token,
+      K const& key) {
+    return table_.makeIter(table_.find(token, key));
+  }
+
+  template <typename K>
+  FOLLY_ALWAYS_INLINE IfIsTransparent<K, const_iterator> find(
+      F14HashToken const& token,
+      K const& key) const {
+    return table_.makeConstIter(table_.find(token, key));
+  }
+
+  std::pair<iterator, iterator> equal_range(key_type const& key) {
+    return equal_range(*this, key);
+  }
+
+  std::pair<const_iterator, const_iterator> equal_range(
+      key_type const& key) const {
+    return equal_range(*this, key);
+  }
+
+  template <typename K>
+  IfIsTransparent<K, std::pair<iterator, iterator>> equal_range(K const& key) {
+    return equal_range(*this, key);
+  }
+
+  template <typename K>
+  IfIsTransparent<K, std::pair<const_iterator, const_iterator>> equal_range(
+      K const& key) const {
+    return equal_range(*this, key);
+  }
+
+  //// PUBLIC - Bucket interface
+
+  std::size_t bucket_count() const noexcept {
+    return table_.bucket_count();
+  }
+
+  std::size_t max_bucket_count() const noexcept {
+    return table_.max_bucket_count();
+  }
+
+  //// PUBLIC - Hash policy
+
+  float load_factor() const noexcept {
+    return table_.load_factor();
+  }
+
+  float max_load_factor() const noexcept {
+    return table_.max_load_factor();
+  }
+
+  void max_load_factor(float v) {
+    table_.max_load_factor(v);
+  }
+
+  void rehash(std::size_t bucketCapacity) {
+    // The standard's rehash() requires understanding the max load factor,
+    // which is easy to get wrong.  Since we don't actually allow adjustment
+    // of max_load_factor there is no difference.
+    reserve(bucketCapacity);
+  }
+
+  void reserve(std::size_t capacity) {
+    table_.reserve(capacity);
+  }
+
+  //// PUBLIC - Observers
+
+  hasher hash_function() const {
+    return table_.hasher();
+  }
+
+  key_equal key_eq() const {
+    return table_.keyEqual();
+  }
+
+ private:
+  template <typename Self, typename K>
+  FOLLY_ALWAYS_INLINE static auto& at(Self& self, K const& key) {
+    auto iter = self.find(key);
+    if (iter == self.end()) {
+      throw_exception<std::out_of_range>("at() did not find key");
+    }
+    return iter->second;
+  }
+
+  template <typename Self, typename K>
+  static auto equal_range(Self& self, K const& key) {
+    auto first = self.find(key);
+    auto last = first;
+    if (last != self.end()) {
+      ++last;
+    }
+    return std::make_pair(first, last);
+  }
+
+ protected:
+  F14Table<Policy> table_;
+};
+
+template <typename M>
+bool mapsEqual(M const& lhs, M const& rhs) {
+  if (lhs.size() != rhs.size()) {
+    return false;
+  }
+  for (auto& kv : lhs) {
+    auto iter = rhs.find(kv.first);
+    if (iter == rhs.end()) {
+      return false;
+    }
+    if (std::is_same<
+            typename M::key_equal,
+            std::equal_to<typename M::key_type>>::value) {
+      // find already checked key, just check value
+      if (!(kv.second == iter->second)) {
+        return false;
+      }
+    } else {
+      // spec says we compare key with == as well as with key_eq()
+      if (!(kv == *iter)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+} // namespace detail
+} // namespace f14
+
+template <
+    typename Key,
+    typename Mapped,
+    typename Hasher = f14::DefaultHasher<Key>,
+    typename KeyEqual = f14::DefaultKeyEqual<Key>,
+    typename Alloc = f14::DefaultAlloc<std::pair<Key const, Mapped>>>
+class F14ValueMap
+    : public f14::detail::F14BasicMap<f14::detail::MapPolicyWithDefaults<
+          f14::detail::ValueContainerPolicy,
+          Key,
+          Mapped,
+          Hasher,
+          KeyEqual,
+          Alloc>> {
+  using Policy = f14::detail::MapPolicyWithDefaults<
+      f14::detail::ValueContainerPolicy,
+      Key,
+      Mapped,
+      Hasher,
+      KeyEqual,
+      Alloc>;
+  using Super = f14::detail::F14BasicMap<Policy>;
+
+ public:
+  F14ValueMap() noexcept(
+      f14::detail::F14Table<Policy>::kDefaultConstructIsNoexcept)
+      : Super{} {}
+
+  using Super::Super;
+
+  void swap(F14ValueMap& rhs) noexcept(
+      f14::detail::F14Table<Policy>::kSwapIsNoexcept) {
+    this->table_.swap(rhs.table_);
+  }
+};
+
+template <typename K, typename M, typename H, typename E, typename A>
+void swap(
+    F14ValueMap<K, M, H, E, A>& lhs,
+    F14ValueMap<K, M, H, E, A>& rhs) noexcept(noexcept(lhs.swap(rhs))) {
+  lhs.swap(rhs);
+}
+
+template <typename K, typename M, typename H, typename E, typename A>
+bool operator==(
+    F14ValueMap<K, M, H, E, A> const& lhs,
+    F14ValueMap<K, M, H, E, A> const& rhs) {
+  return mapsEqual(lhs, rhs);
+}
+
+template <typename K, typename M, typename H, typename E, typename A>
+bool operator!=(
+    F14ValueMap<K, M, H, E, A> const& lhs,
+    F14ValueMap<K, M, H, E, A> const& rhs) {
+  return !(lhs == rhs);
+}
+
+template <
+    typename Key,
+    typename Mapped,
+    typename Hasher = f14::DefaultHasher<Key>,
+    typename KeyEqual = f14::DefaultKeyEqual<Key>,
+    typename Alloc = f14::DefaultAlloc<std::pair<Key const, Mapped>>>
+class F14NodeMap
+    : public f14::detail::F14BasicMap<f14::detail::MapPolicyWithDefaults<
+          f14::detail::NodeContainerPolicy,
+          Key,
+          Mapped,
+          Hasher,
+          KeyEqual,
+          Alloc>> {
+  using Policy = f14::detail::MapPolicyWithDefaults<
+      f14::detail::NodeContainerPolicy,
+      Key,
+      Mapped,
+      Hasher,
+      KeyEqual,
+      Alloc>;
+  using Super = f14::detail::F14BasicMap<Policy>;
+
+ public:
+  F14NodeMap() noexcept(
+      f14::detail::F14Table<Policy>::kDefaultConstructIsNoexcept)
+      : Super{} {}
+
+  using Super::Super;
+
+  void swap(F14NodeMap& rhs) noexcept(
+      f14::detail::F14Table<Policy>::kSwapIsNoexcept) {
+    this->table_.swap(rhs.table_);
+  }
+
+  // TODO extract and node_handle insert
+};
+
+template <typename K, typename M, typename H, typename E, typename A>
+void swap(
+    F14NodeMap<K, M, H, E, A>& lhs,
+    F14NodeMap<K, M, H, E, A>& rhs) noexcept(noexcept(lhs.swap(rhs))) {
+  lhs.swap(rhs);
+}
+
+template <typename K, typename M, typename H, typename E, typename A>
+bool operator==(
+    F14NodeMap<K, M, H, E, A> const& lhs,
+    F14NodeMap<K, M, H, E, A> const& rhs) {
+  return mapsEqual(lhs, rhs);
+}
+
+template <typename K, typename M, typename H, typename E, typename A>
+bool operator!=(
+    F14NodeMap<K, M, H, E, A> const& lhs,
+    F14NodeMap<K, M, H, E, A> const& rhs) {
+  return !(lhs == rhs);
+}
+
+template <
+    typename Key,
+    typename Mapped,
+    typename Hasher = f14::DefaultHasher<Key>,
+    typename KeyEqual = f14::DefaultKeyEqual<Key>,
+    typename Alloc = f14::DefaultAlloc<std::pair<Key const, Mapped>>>
+class F14VectorMap
+    : public f14::detail::F14BasicMap<f14::detail::MapPolicyWithDefaults<
+          f14::detail::VectorContainerPolicy,
+          Key,
+          Mapped,
+          Hasher,
+          KeyEqual,
+          Alloc>> {
+  using Policy = f14::detail::MapPolicyWithDefaults<
+      f14::detail::VectorContainerPolicy,
+      Key,
+      Mapped,
+      Hasher,
+      KeyEqual,
+      Alloc>;
+  using Super = f14::detail::F14BasicMap<Policy>;
+
+ public:
+  using typename Super::const_iterator;
+  using typename Super::iterator;
+  using typename Super::key_type;
+
+  F14VectorMap() noexcept(
+      f14::detail::F14Table<Policy>::kDefaultConstructIsNoexcept)
+      : Super{} {}
+
+  // inherit constructors
+  using Super::Super;
+
+  void swap(F14VectorMap& rhs) noexcept(
+      f14::detail::F14Table<Policy>::kSwapIsNoexcept) {
+    this->table_.swap(rhs.table_);
+  }
+
+  iterator begin() {
+    return this->table_.linearBegin(this->size());
+  }
+  const_iterator begin() const {
+    return cbegin();
+  }
+  const_iterator cbegin() const {
+    return this->table_.linearBegin(this->size());
+  }
+
+  iterator end() {
+    return this->table_.linearEnd();
+  }
+  const_iterator end() const {
+    return cend();
+  }
+  const_iterator cend() const {
+    return this->table_.linearEnd();
+  }
+
+ private:
+  void eraseUnderlying(typename Policy::ItemIter underlying) {
+    Alloc& a = this->table_.alloc();
+    auto values = this->table_.values_;
+
+    // destroy the value and remove the ptr from the base table
+    auto index = underlying.item();
+    std::allocator_traits<Alloc>::destroy(a, std::addressof(values[index]));
+    this->table_.erase(underlying);
+
+    // move the last element in values_ down and fix up the inbound index
+    auto tailIndex = this->size();
+    if (tailIndex != index) {
+      auto tail = this->table_.find(f14::detail::VectorContainerIndexSearch{
+          static_cast<uint32_t>(tailIndex)});
+      tail.item() = index;
+      auto p = std::addressof(values[index]);
+      folly::assume(p != nullptr);
+      this->table_.transfer(a, std::addressof(values[tailIndex]), p, 1);
+    }
+  }
+
+ public:
+  FOLLY_ALWAYS_INLINE iterator erase(const_iterator pos) {
+    auto index = this->table_.iterToIndex(pos);
+    auto underlying =
+        this->table_.find(f14::detail::VectorContainerIndexSearch{index});
+    eraseUnderlying(underlying);
+    return index == 0 ? end() : this->table_.indexToIter(index - 1);
+  }
+
+  // This form avoids ambiguity when key_type has a templated constructor
+  // that accepts const_iterator
+  FOLLY_ALWAYS_INLINE iterator erase(iterator pos) {
+    const_iterator cpos{pos};
+    return erase(cpos);
+  }
+
+  iterator erase(const_iterator first, const_iterator last) {
+    while (first != last) {
+      first = erase(first);
+    }
+    return first;
+  }
+
+  std::size_t erase(key_type const& key) {
+    auto underlying = this->table_.find(key);
+    if (underlying.atEnd()) {
+      return 0;
+    } else {
+      eraseUnderlying(underlying);
+      return 1;
+    }
+  }
+};
+
+template <typename K, typename M, typename H, typename E, typename A>
+void swap(
+    F14VectorMap<K, M, H, E, A>& lhs,
+    F14VectorMap<K, M, H, E, A>& rhs) noexcept(noexcept(lhs.swap(rhs))) {
+  lhs.swap(rhs);
+}
+
+template <typename K, typename M, typename H, typename E, typename A>
+bool operator==(
+    F14VectorMap<K, M, H, E, A> const& lhs,
+    F14VectorMap<K, M, H, E, A> const& rhs) {
+  return mapsEqual(lhs, rhs);
+}
+
+template <typename K, typename M, typename H, typename E, typename A>
+bool operator!=(
+    F14VectorMap<K, M, H, E, A> const& lhs,
+    F14VectorMap<K, M, H, E, A> const& rhs) {
+  return !(lhs == rhs);
+}
+
+template <
+    typename Key,
+    typename Mapped,
+    typename Hasher = f14::DefaultHasher<Key>,
+    typename KeyEqual = f14::DefaultKeyEqual<Key>,
+    typename Alloc = f14::DefaultAlloc<std::pair<Key const, Mapped>>>
+using F14FastMap = std::conditional_t<
+    sizeof(std::pair<Key const, Mapped>) < 24,
+    F14ValueMap<Key, Mapped, Hasher, KeyEqual, Alloc>,
+    F14VectorMap<Key, Mapped, Hasher, KeyEqual, Alloc>>;
+
+} // namespace folly
+
+#endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
diff --git a/folly/container/F14Set.h b/folly/container/F14Set.h
new file mode 100644
index 00000000000..68bfa5c65d0
--- /dev/null
+++ b/folly/container/F14Set.h
@@ -0,0 +1,756 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+/**
+ * F14NodeSet, F14ValueSet, and F14VectorSet
+ *
+ * F14FastSet is a conditional typedef to F14ValueSet or F14VectorSet
+ *
+ * See F14.md
+ *
+ * @author Nathan Bronson <ngbronson@fb.com>
+ * @author Xiao Shi       <xshi@fb.com>
+ */
+
+#include <folly/lang/SafeAssert.h>
+
+#include <folly/container/detail/F14Policy.h>
+#include <folly/container/detail/F14Table.h>
+
+#if !FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+
+#include <unordered_set>
+
+namespace folly {
+
+template <typename... Args>
+using F14NodeSet = std::unordered_set<Args...>;
+template <typename... Args>
+using F14ValueSet = std::unordered_set<Args...>;
+template <typename... Args>
+using F14VectorSet = std::unordered_set<Args...>;
+template <typename... Args>
+using F14FastSet = std::unordered_set<Args...>;
+
+} // namespace folly
+
+#else // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+
+namespace folly {
+namespace f14 {
+namespace detail {
+
+template <typename Policy>
+class F14BasicSet {
+  template <
+      typename K,
+      typename T,
+      typename H = typename Policy::Hasher,
+      typename E = typename Policy::KeyEqual>
+  using IfIsTransparent = folly::_t<EnableIfIsTransparent<void, H, E, K, T>>;
+
+ public:
+  //// PUBLIC - Member types
+
+  using key_type = typename Policy::Value;
+  using value_type = key_type;
+  using size_type = std::size_t;
+  using difference_type = std::ptrdiff_t;
+  using hasher = typename Policy::Hasher;
+  using key_equal = typename Policy::KeyEqual;
+  using allocator_type = typename Policy::Alloc;
+  using reference = value_type&;
+  using const_reference = value_type const&;
+  using pointer = typename std::allocator_traits<allocator_type>::pointer;
+  using const_pointer =
+      typename std::allocator_traits<allocator_type>::const_pointer;
+  using iterator = typename Policy::Iter;
+  using const_iterator = iterator;
+
+  //// PUBLIC - Member functions
+
+  F14BasicSet() noexcept(F14Table<Policy>::kDefaultConstructIsNoexcept)
+      : F14BasicSet(0) {}
+
+  explicit F14BasicSet(
+      std::size_t initialCapacity,
+      hasher const& hash = hasher{},
+      key_equal const& eq = key_equal{},
+      allocator_type const& alloc = allocator_type{})
+      : table_{initialCapacity, hash, eq, alloc} {}
+
+  explicit F14BasicSet(std::size_t initialCapacity, allocator_type const& alloc)
+      : F14BasicSet(initialCapacity, hasher{}, key_equal{}, alloc) {}
+
+  explicit F14BasicSet(
+      std::size_t initialCapacity,
+      hasher const& hash,
+      allocator_type const& alloc)
+      : F14BasicSet(initialCapacity, hash, key_equal{}, alloc) {}
+
+  explicit F14BasicSet(allocator_type const& alloc) : F14BasicSet(0, alloc) {}
+
+  template <typename InputIt>
+  F14BasicSet(
+      InputIt first,
+      InputIt last,
+      std::size_t initialCapacity = 0,
+      hasher const& hash = hasher{},
+      key_equal const& eq = key_equal{},
+      allocator_type const& alloc = allocator_type{})
+      : table_{initialCapacity, hash, eq, alloc} {
+    initialInsert(first, last, initialCapacity);
+  }
+
+  template <typename InputIt>
+  F14BasicSet(
+      InputIt first,
+      InputIt last,
+      std::size_t initialCapacity,
+      allocator_type const& alloc)
+      : table_{initialCapacity, hasher{}, key_equal{}, alloc} {
+    initialInsert(first, last, initialCapacity);
+  }
+
+  template <typename InputIt>
+  F14BasicSet(
+      InputIt first,
+      InputIt last,
+      std::size_t initialCapacity,
+      hasher const& hash,
+      allocator_type const& alloc)
+      : table_{initialCapacity, hash, key_equal{}, alloc} {
+    initialInsert(first, last, initialCapacity);
+  }
+
+  F14BasicSet(F14BasicSet const& rhs) = default;
+
+  F14BasicSet(F14BasicSet const& rhs, allocator_type const& alloc)
+      : table_(rhs.table_, alloc) {}
+
+  F14BasicSet(F14BasicSet&& rhs) = default;
+
+  F14BasicSet(F14BasicSet&& rhs, allocator_type const& alloc) noexcept(
+      F14Table<Policy>::kAllocIsAlwaysEqual)
+      : table_{std::move(rhs.table_), alloc} {}
+
+  F14BasicSet(
+      std::initializer_list<value_type> init,
+      std::size_t initialCapacity = 0,
+      hasher const& hash = hasher{},
+      key_equal const& eq = key_equal{},
+      allocator_type const& alloc = allocator_type{})
+      : table_{initialCapacity, hash, eq, alloc} {
+    initialInsert(init.begin(), init.end(), initialCapacity);
+  }
+
+  F14BasicSet(
+      std::initializer_list<value_type> init,
+      std::size_t initialCapacity,
+      allocator_type const& alloc)
+      : table_{initialCapacity, hasher{}, key_equal{}, alloc} {
+    initialInsert(init.begin(), init.end(), initialCapacity);
+  }
+
+  F14BasicSet(
+      std::initializer_list<value_type> init,
+      std::size_t initialCapacity,
+      hasher const& hash,
+      allocator_type const& alloc)
+      : table_{initialCapacity, hash, key_equal{}, alloc} {
+    initialInsert(init.begin(), init.end(), initialCapacity);
+  }
+
+  F14BasicSet& operator=(F14BasicSet const&) = default;
+
+  F14BasicSet& operator=(F14BasicSet&&) = default;
+
+  allocator_type get_allocator() const noexcept {
+    return table_.alloc();
+  }
+
+  //// PUBLIC - Iterators
+
+  iterator begin() noexcept {
+    return cbegin();
+  }
+  const_iterator begin() const noexcept {
+    return cbegin();
+  }
+  const_iterator cbegin() const noexcept {
+    return table_.makeIter(table_.begin());
+  }
+
+  iterator end() noexcept {
+    return cend();
+  }
+  const_iterator end() const noexcept {
+    return cend();
+  }
+  const_iterator cend() const noexcept {
+    return table_.makeIter(table_.end());
+  }
+
+  //// PUBLIC - Capacity
+
+  bool empty() const noexcept {
+    return table_.empty();
+  }
+
+  std::size_t size() const noexcept {
+    return table_.size();
+  }
+
+  std::size_t max_size() const noexcept {
+    return table_.max_size();
+  }
+
+  F14TableStats computeStats() const {
+    return table_.computeStats();
+  }
+
+  //// PUBLIC - Modifiers
+
+  void clear() noexcept {
+    table_.clear();
+  }
+
+  std::pair<iterator, bool> insert(value_type const& value) {
+    auto rv = table_.tryEmplaceValue(value, value);
+    return std::make_pair(table_.makeIter(rv.first), rv.second);
+  }
+
+  std::pair<iterator, bool> insert(value_type&& value) {
+    // tryEmplaceValue guarantees not to touch the first arg after touching
+    // any others, so although this looks fishy it is okay
+    value_type const& searchKey = value;
+    auto rv = table_.tryEmplaceValue(searchKey, std::move(value));
+    return std::make_pair(table_.makeIter(rv.first), rv.second);
+  }
+
+  // std::unordered_set's hinted insertion API is misleading.  No
+  // implementation I've seen actually uses the hint.  Code restructuring
+  // by the caller to use the hinted API is at best unnecessary, and at
+  // worst a pessimization.  It is used, however, so we provide it.
+
+  iterator insert(const_iterator /*hint*/, value_type const& value) {
+    return insert(value).first;
+  }
+
+  iterator insert(const_iterator /*hint*/, value_type&& value) {
+    return insert(std::move(value)).first;
+  }
+
+ private:
+  template <class InputIt>
+  FOLLY_ALWAYS_INLINE void
+  bulkInsert(InputIt first, InputIt last, bool autoReserve) {
+    if (autoReserve) {
+      table_.reserveForInsert(std::distance(first, last));
+    }
+    while (first != last) {
+      insert(*first);
+      ++first;
+    }
+  }
+
+  template <class InputIt>
+  void initialInsert(InputIt first, InputIt last, std::size_t initialCapacity) {
+    FOLLY_SAFE_DCHECK(empty() && bucket_count() >= initialCapacity, "");
+
+    // It's possible that there are a lot of duplicates in first..last and
+    // so we will oversize ourself.  The common case, however, is that
+    // we can avoid a lot of rehashing if we pre-expand.  The behavior
+    // is easy to disable at a particular call site by asking for an
+    // initialCapacity of 1.
+    bool autoReserve =
+        std::is_same<
+            typename std::iterator_traits<InputIt>::iterator_category,
+            std::random_access_iterator_tag>::value &&
+        initialCapacity == 0;
+    bulkInsert(first, last, autoReserve);
+  }
+
+ public:
+  template <class InputIt>
+  void insert(InputIt first, InputIt last) {
+    // Bulk reserve is a heuristic choice, so it can backfire.  We restrict
+    // ourself to situations that mimic bulk construction without an
+    // explicit initialCapacity.
+    bool autoReserve =
+        std::is_same<
+            typename std::iterator_traits<InputIt>::iterator_category,
+            std::random_access_iterator_tag>::value &&
+        bucket_count() == 0;
+    bulkInsert(first, last, autoReserve);
+  }
+
+  void insert(std::initializer_list<value_type> ilist) {
+    insert(ilist.begin(), ilist.end());
+  }
+
+  // node API doesn't make sense for value set, which stores values inline
+
+  // emplace won't actually be more efficient than insert until we
+  // add heterogeneous lookup, but it is still useful now from a code
+  // compactness standpoint.
+  template <class... Args>
+  std::pair<iterator, bool> emplace(Args&&... args) {
+    key_type key(std::forward<Args>(args)...);
+    return insert(std::move(key));
+  }
+
+  template <class... Args>
+  iterator emplace_hint(const_iterator /*hint*/, Args&&... args) {
+    return emplace(std::forward<Args>(args)...).first;
+  }
+
+  FOLLY_ALWAYS_INLINE iterator erase(const_iterator pos) {
+    // If we are inlined then gcc and clang can optimize away all of the
+    // work of ++pos if the caller discards it.
+    table_.erase(table_.unwrapIter(pos));
+    return ++pos;
+  }
+
+  iterator erase(const_iterator first, const_iterator last) {
+    while (first != last) {
+      table_.erase(table_.unwrapIter(first));
+      ++first;
+    }
+    return first;
+  }
+
+  size_type erase(key_type const& key) {
+    return table_.erase(key);
+  }
+
+  //// PUBLIC - Lookup
+
+  FOLLY_ALWAYS_INLINE std::size_t count(key_type const& key) const {
+    return table_.find(key).atEnd() ? 0 : 1;
+  }
+
+  template <typename K>
+  FOLLY_ALWAYS_INLINE IfIsTransparent<K, size_type> count(K const& key) const {
+    return table_.find(key).atEnd() ? 0 : 1;
+  }
+
+  F14HashToken prehash(key_type const& key) const {
+    return table_.prehash(key);
+  }
+
+  template <typename K>
+  IfIsTransparent<K, F14HashToken> prehash(K const& key) const {
+    return table_.prehash(key);
+  }
+
+  FOLLY_ALWAYS_INLINE iterator find(key_type const& key) {
+    return const_cast<F14BasicSet const*>(this)->find(key);
+  }
+
+  FOLLY_ALWAYS_INLINE const_iterator find(key_type const& key) const {
+    return table_.makeIter(table_.find(key));
+  }
+
+  FOLLY_ALWAYS_INLINE iterator
+  find(F14HashToken const& token, key_type const& key) {
+    return const_cast<F14BasicSet const*>(this)->find(token, key);
+  }
+
+  FOLLY_ALWAYS_INLINE const_iterator
+  find(F14HashToken const& token, key_type const& key) const {
+    return table_.makeIter(table_.find(token, key));
+  }
+
+  template <typename K>
+  FOLLY_ALWAYS_INLINE IfIsTransparent<K, iterator> find(K const& key) {
+    return const_cast<F14BasicSet const*>(this)->find(key);
+  }
+
+  template <typename K>
+  FOLLY_ALWAYS_INLINE IfIsTransparent<K, const_iterator> find(
+      K const& key) const {
+    return table_.makeIter(table_.find(key));
+  }
+
+  template <typename K>
+  FOLLY_ALWAYS_INLINE IfIsTransparent<K, iterator> find(
+      F14HashToken const& token,
+      K const& key) {
+    return const_cast<F14BasicSet const*>(this)->find(token, key);
+  }
+
+  template <typename K>
+  FOLLY_ALWAYS_INLINE IfIsTransparent<K, const_iterator> find(
+      F14HashToken const& token,
+      K const& key) const {
+    return table_.makeIter(table_.find(token, key));
+  }
+
+  std::pair<iterator, iterator> equal_range(key_type const& key) {
+    return equal_range(*this, key);
+  }
+
+  std::pair<const_iterator, const_iterator> equal_range(
+      key_type const& key) const {
+    return equal_range(*this, key);
+  }
+
+  template <typename K>
+  IfIsTransparent<K, std::pair<iterator, iterator>> equal_range(K const& key) {
+    return equal_range(*this, key);
+  }
+
+  template <typename K>
+  IfIsTransparent<K, std::pair<const_iterator, const_iterator>> equal_range(
+      K const& key) const {
+    return equal_range(*this, key);
+  }
+
+  //// PUBLIC - Bucket interface
+
+  std::size_t bucket_count() const noexcept {
+    return table_.bucket_count();
+  }
+
+  std::size_t max_bucket_count() const noexcept {
+    return table_.max_bucket_count();
+  }
+
+  //// PUBLIC - Hash policy
+
+  float load_factor() const noexcept {
+    return table_.load_factor();
+  }
+
+  float max_load_factor() const noexcept {
+    return table_.max_load_factor();
+  }
+
+  void max_load_factor(float v) {
+    table_.max_load_factor(v);
+  }
+
+  void rehash(std::size_t bucketCapacity) {
+    // The standard's rehash() requires understanding the max load factor,
+    // which is easy to get wrong.  Since we don't actually allow adjustment
+    // of max_load_factor there is no difference.
+    reserve(bucketCapacity);
+  }
+
+  void reserve(std::size_t capacity) {
+    table_.reserve(capacity);
+  }
+
+  //// PUBLIC - Observers
+
+  hasher hash_function() const {
+    return table_.hasher();
+  }
+
+  key_equal key_eq() const {
+    return table_.keyEqual();
+  }
+
+ private:
+  template <typename Self, typename K>
+  static auto equal_range(Self& self, K const& key) {
+    auto first = self.find(key);
+    auto last = first;
+    if (last != self.end()) {
+      ++last;
+    }
+    return std::make_pair(first, last);
+  }
+
+ protected:
+  F14Table<Policy> table_;
+};
+
+template <typename S>
+bool setsEqual(S const& lhs, S const& rhs) {
+  if (lhs.size() != rhs.size()) {
+    return false;
+  }
+  for (auto& k : lhs) {
+    auto iter = rhs.find(k);
+    if (iter == rhs.end()) {
+      return false;
+    }
+    if (!std::is_same<
+            typename S::key_equal,
+            std::equal_to<typename S::value_type>>::value) {
+      // spec says we compare key with == as well as with key_eq()
+      if (!(k == *iter)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+} // namespace detail
+} // namespace f14
+
+template <
+    typename Key,
+    typename Hasher = f14::DefaultHasher<Key>,
+    typename KeyEqual = f14::DefaultKeyEqual<Key>,
+    typename Alloc = f14::DefaultAlloc<Key>>
+class F14ValueSet
+    : public f14::detail::F14BasicSet<f14::detail::SetPolicyWithDefaults<
+          f14::detail::ValueContainerPolicy,
+          Key,
+          Hasher,
+          KeyEqual,
+          Alloc>> {
+  using Policy = f14::detail::SetPolicyWithDefaults<
+      f14::detail::ValueContainerPolicy,
+      Key,
+      Hasher,
+      KeyEqual,
+      Alloc>;
+  using Super = f14::detail::F14BasicSet<Policy>;
+
+ public:
+  F14ValueSet() noexcept(
+      f14::detail::F14Table<Policy>::kDefaultConstructIsNoexcept)
+      : Super{} {}
+
+  using Super::Super;
+
+  void swap(F14ValueSet& rhs) noexcept(
+      f14::detail::F14Table<Policy>::kSwapIsNoexcept) {
+    this->table_.swap(rhs.table_);
+  }
+};
+
+template <typename K, typename H, typename E, typename A>
+void swap(F14ValueSet<K, H, E, A>& lhs, F14ValueSet<K, H, E, A>& rhs) noexcept(
+    noexcept(lhs.swap(rhs))) {
+  lhs.swap(rhs);
+}
+
+template <typename K, typename H, typename E, typename A>
+bool operator==(
+    F14ValueSet<K, H, E, A> const& lhs,
+    F14ValueSet<K, H, E, A> const& rhs) {
+  return setsEqual(lhs, rhs);
+}
+
+template <typename K, typename H, typename E, typename A>
+bool operator!=(
+    F14ValueSet<K, H, E, A> const& lhs,
+    F14ValueSet<K, H, E, A> const& rhs) {
+  return !(lhs == rhs);
+}
+
+template <
+    typename Key,
+    typename Hasher = f14::DefaultHasher<Key>,
+    typename KeyEqual = f14::DefaultKeyEqual<Key>,
+    typename Alloc = f14::DefaultAlloc<Key>>
+class F14NodeSet
+    : public f14::detail::F14BasicSet<f14::detail::SetPolicyWithDefaults<
+          f14::detail::NodeContainerPolicy,
+          Key,
+          Hasher,
+          KeyEqual,
+          Alloc>> {
+  using Policy = f14::detail::SetPolicyWithDefaults<
+      f14::detail::NodeContainerPolicy,
+      Key,
+      Hasher,
+      KeyEqual,
+      Alloc>;
+  using Super = f14::detail::F14BasicSet<Policy>;
+
+ public:
+  F14NodeSet() noexcept(
+      f14::detail::F14Table<Policy>::kDefaultConstructIsNoexcept)
+      : Super{} {}
+
+  using Super::Super;
+
+  void swap(F14NodeSet& rhs) noexcept(
+      f14::detail::F14Table<Policy>::kSwapIsNoexcept) {
+    this->table_.swap(rhs.table_);
+  }
+};
+
+template <typename K, typename H, typename E, typename A>
+void swap(F14NodeSet<K, H, E, A>& lhs, F14NodeSet<K, H, E, A>& rhs) noexcept(
+    noexcept(lhs.swap(rhs))) {
+  lhs.swap(rhs);
+}
+
+template <typename K, typename H, typename E, typename A>
+bool operator==(
+    F14NodeSet<K, H, E, A> const& lhs,
+    F14NodeSet<K, H, E, A> const& rhs) {
+  return setsEqual(lhs, rhs);
+}
+
+template <typename K, typename H, typename E, typename A>
+bool operator!=(
+    F14NodeSet<K, H, E, A> const& lhs,
+    F14NodeSet<K, H, E, A> const& rhs) {
+  return !(lhs == rhs);
+}
+
+template <
+    typename Key,
+    typename Hasher = f14::DefaultHasher<Key>,
+    typename KeyEqual = f14::DefaultKeyEqual<Key>,
+    typename Alloc = f14::DefaultAlloc<Key>>
+class F14VectorSet
+    : public f14::detail::F14BasicSet<f14::detail::SetPolicyWithDefaults<
+          f14::detail::VectorContainerPolicy,
+          Key,
+          Hasher,
+          KeyEqual,
+          Alloc>> {
+  using Policy = f14::detail::SetPolicyWithDefaults<
+      f14::detail::VectorContainerPolicy,
+      Key,
+      Hasher,
+      KeyEqual,
+      Alloc>;
+  using Super = f14::detail::F14BasicSet<Policy>;
+
+ public:
+  using typename Super::const_iterator;
+  using typename Super::iterator;
+  using typename Super::key_type;
+
+  F14VectorSet() noexcept(
+      f14::detail::F14Table<Policy>::kDefaultConstructIsNoexcept)
+      : Super{} {}
+
+  // inherit constructors
+  using Super::Super;
+
+  void swap(F14VectorSet& rhs) noexcept(
+      f14::detail::F14Table<Policy>::kSwapIsNoexcept) {
+    this->table_.swap(rhs.table_);
+  }
+
+  iterator begin() {
+    return cbegin();
+  }
+  const_iterator begin() const {
+    return cbegin();
+  }
+  const_iterator cbegin() const {
+    return this->table_.linearBegin(this->size());
+  }
+
+  iterator end() {
+    return cend();
+  }
+  const_iterator end() const {
+    return cend();
+  }
+  const_iterator cend() const {
+    return this->table_.linearEnd();
+  }
+
+ private:
+  void eraseUnderlying(typename Policy::ItemIter underlying) {
+    Alloc& a = this->table_.alloc();
+    auto values = this->table_.values_;
+
+    // destroy the value and remove the ptr from the base table
+    auto index = underlying.item();
+    std::allocator_traits<Alloc>::destroy(a, std::addressof(values[index]));
+    this->table_.erase(underlying);
+
+    // move the last element in values_ down and fix up the inbound index
+    auto tailIndex = this->size();
+    if (tailIndex != index) {
+      auto tail = this->table_.find(f14::detail::VectorContainerIndexSearch{
+          static_cast<uint32_t>(tailIndex)});
+      tail.item() = index;
+      auto p = std::addressof(values[index]);
+      folly::assume(p != nullptr);
+      std::allocator_traits<Alloc>::construct(
+          a, p, std::move(values[tailIndex]));
+      std::allocator_traits<Alloc>::destroy(
+          a, std::addressof(values[tailIndex]));
+    }
+  }
+
+ public:
+  FOLLY_ALWAYS_INLINE iterator erase(const_iterator pos) {
+    auto underlying = this->table_.find(
+        f14::detail::VectorContainerIndexSearch{this->table_.iterToIndex(pos)});
+    eraseUnderlying(underlying);
+    return ++pos;
+  }
+
+  iterator erase(const_iterator first, const_iterator last) {
+    while (first != last) {
+      first = erase(first);
+    }
+    return first;
+  }
+
+  std::size_t erase(key_type const& key) {
+    auto underlying = this->table_.find(key);
+    if (underlying.atEnd()) {
+      return 0;
+    } else {
+      eraseUnderlying(underlying);
+      return 1;
+    }
+  }
+};
+
+template <typename K, typename H, typename E, typename A>
+void swap(
+    F14VectorSet<K, H, E, A>& lhs,
+    F14VectorSet<K, H, E, A>& rhs) noexcept(noexcept(lhs.swap(rhs))) {
+  lhs.swap(rhs);
+}
+
+template <typename K, typename H, typename E, typename A>
+bool operator==(
+    F14VectorSet<K, H, E, A> const& lhs,
+    F14VectorSet<K, H, E, A> const& rhs) {
+  return setsEqual(lhs, rhs);
+}
+
+template <typename K, typename H, typename E, typename A>
+bool operator!=(
+    F14VectorSet<K, H, E, A> const& lhs,
+    F14VectorSet<K, H, E, A> const& rhs) {
+  return !(lhs == rhs);
+}
+
+template <
+    typename Key,
+    typename Hasher = f14::DefaultHasher<Key>,
+    typename KeyEqual = f14::DefaultKeyEqual<Key>,
+    typename Alloc = f14::DefaultAlloc<Key>>
+using F14FastSet = std::conditional_t<
+    sizeof(Key) < 24,
+    F14ValueSet<Key, Hasher, KeyEqual, Alloc>,
+    F14VectorSet<Key, Hasher, KeyEqual, Alloc>>;
+
+} // namespace folly
+
+#endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
diff --git a/folly/container/detail/F14Memory.h b/folly/container/detail/F14Memory.h
new file mode 100644
index 00000000000..b34eba3b328
--- /dev/null
+++ b/folly/container/detail/F14Memory.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <type_traits>
+
+#include <folly/Portability.h>
+#include <folly/lang/SafeAssert.h>
+
+namespace folly {
+namespace f14 {
+namespace detail {
+
+template <typename Ptr>
+using NonConstPtr = typename std::pointer_traits<Ptr>::template rebind<
+    std::remove_const_t<typename std::pointer_traits<Ptr>::element_type>>;
+
+//////// TaggedPtr
+
+template <typename Ptr>
+class TaggedPtr {
+ public:
+  TaggedPtr() = default;
+  TaggedPtr(TaggedPtr const&) = default;
+  TaggedPtr(TaggedPtr&&) = default;
+  TaggedPtr& operator=(TaggedPtr const&) = default;
+  TaggedPtr& operator=(TaggedPtr&&) = default;
+
+  TaggedPtr(Ptr p, uint8_t e) noexcept : ptr_{p}, extra_{e} {}
+
+  /* implicit */ TaggedPtr(std::nullptr_t) noexcept {}
+
+  TaggedPtr& operator=(std::nullptr_t) noexcept {
+    ptr_ = nullptr;
+    extra_ = 0;
+    return *this;
+  }
+
+  typename std::pointer_traits<Ptr>::element_type& operator*() const noexcept {
+    return *ptr_;
+  }
+
+  typename std::pointer_traits<Ptr>::element_type* operator->() const noexcept {
+    return std::addressof(*ptr_);
+  }
+
+  Ptr ptr() const {
+    return ptr_;
+  }
+
+  void setPtr(Ptr p) {
+    ptr_ = p;
+  }
+
+  uint8_t extra() const {
+    return extra_;
+  }
+
+  void setExtra(uint8_t e) {
+    extra_ = e;
+  }
+
+  bool operator==(TaggedPtr const& rhs) const noexcept {
+    return ptr_ == rhs.ptr_ && extra_ == rhs.extra_;
+  }
+  bool operator!=(TaggedPtr const& rhs) const noexcept {
+    return !(*this == rhs);
+  }
+
+  bool operator<(TaggedPtr const& rhs) const noexcept {
+    return ptr_ != rhs.ptr_ ? ptr_ < rhs.ptr_ : extra_ < rhs.extra_;
+  }
+
+  bool operator==(std::nullptr_t) const noexcept {
+    return ptr_ == nullptr;
+  }
+  bool operator!=(std::nullptr_t) const noexcept {
+    return !(*this == nullptr);
+  }
+
+ private:
+  Ptr ptr_{};
+  uint8_t extra_{};
+};
+
+#if FOLLY_X64 || FOLLY_AARCH64
+
+template <typename T>
+class TaggedPtr<T*> {
+ public:
+  TaggedPtr() = default;
+  TaggedPtr(TaggedPtr const&) = default;
+  TaggedPtr(TaggedPtr&&) = default;
+  TaggedPtr& operator=(TaggedPtr const&) = default;
+  TaggedPtr& operator=(TaggedPtr&&) = default;
+
+  TaggedPtr(T* p, uint8_t e) noexcept
+      : raw_{(reinterpret_cast<uintptr_t>(p) << 8) | e} {
+    FOLLY_SAFE_DCHECK(ptr() == p, "");
+  }
+
+  /* implicit */ TaggedPtr(std::nullptr_t) noexcept : raw_{0} {}
+
+  TaggedPtr& operator=(std::nullptr_t) noexcept {
+    raw_ = 0;
+    return *this;
+  }
+
+  T& operator*() const noexcept {
+    return *ptr();
+  }
+
+  T* operator->() const noexcept {
+    return std::addressof(*ptr());
+  }
+
+  T* ptr() const {
+    return reinterpret_cast<T*>(raw_ >> 8);
+  }
+
+  void setPtr(T* p) {
+    *this = TaggedPtr{p, extra()};
+    FOLLY_SAFE_DCHECK(ptr() == p, "");
+  }
+
+  uint8_t extra() const {
+    return static_cast<uint8_t>(raw_);
+  }
+
+  void setExtra(uint8_t e) {
+    *this = TaggedPtr{ptr(), e};
+  }
+
+  bool operator==(TaggedPtr const& rhs) const {
+    return raw_ == rhs.raw_;
+  }
+  bool operator!=(TaggedPtr const& rhs) const {
+    return !(*this == rhs);
+  }
+
+  bool operator<(TaggedPtr const& rhs) const noexcept {
+    return raw_ < rhs.raw_;
+  }
+
+  bool operator==(std::nullptr_t) const noexcept {
+    return raw_ == 0;
+  }
+  bool operator!=(std::nullptr_t) const noexcept {
+    return !(*this == nullptr);
+  }
+
+ private:
+  // TODO: verify no high-bit extension needed on aarch64
+  uintptr_t raw_;
+};
+
+#endif // FOLLY_X64 || FOLLY_AARCH64
+
+} // namespace detail
+} // namespace f14
+} // namespace folly
diff --git a/folly/container/detail/F14Policy.h b/folly/container/detail/F14Policy.h
new file mode 100644
index 00000000000..22022ae6f0d
--- /dev/null
+++ b/folly/container/detail/F14Policy.h
@@ -0,0 +1,1189 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <folly/container/detail/F14Table.h>
+#include <folly/hash/Hash.h>
+#include <folly/lang/SafeAssert.h>
+
+#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+
+namespace folly {
+namespace f14 {
+namespace detail {
+
+template <typename KeyType, typename MappedType>
+using MapValueType = std::pair<KeyType const, MappedType>;
+
+template <typename KeyType, typename MappedTypeOrVoid>
+using SetOrMapValueType = std::conditional_t<
+    std::is_same<MappedTypeOrVoid, void>::value,
+    KeyType,
+    MapValueType<KeyType, MappedTypeOrVoid>>;
+
+// Policy provides the functionality of hasher, key_equal, and
+// allocator_type.  In addition, it can add indirection to the values
+// contained in the base table by defining a non-trivial value() method.
+//
+// To facilitate stateful implementations it is guaranteed that there
+// will be a 1:1 relationship between BaseTable and Policy instance:
+// policies will only be copied when their owning table is copied, and
+// they will only be moved when their owning table is moved.
+//
+// Key equality will have the user-supplied search key as its first
+// argument and the table contents as its second.  Heterogeneous lookup
+// should be handled on the first argument.
+//
+// Item is the data stored inline in the hash table's chunks.  The policy
+// controls how this is mapped to the corresponding Value.
+//
+// The policies defined in this file work for either set or map types.
+// Most of the functionality is identical. A few methods detect the
+// collection type by checking to see if MappedType is void, and then use
+// SFINAE to select the appropriate implementation.
+template <
+    typename KeyType,
+    typename MappedTypeOrVoid,
+    typename HasherOrVoid,
+    typename KeyEqualOrVoid,
+    typename AllocOrVoid,
+    typename ItemType>
+struct BasePolicy
+    : std::tuple<
+          Defaulted<HasherOrVoid, DefaultHasher<KeyType>>,
+          Defaulted<KeyEqualOrVoid, DefaultKeyEqual<KeyType>>,
+          Defaulted<
+              AllocOrVoid,
+              DefaultAlloc<SetOrMapValueType<KeyType, MappedTypeOrVoid>>>> {
+  using Key = KeyType;
+  using Mapped = MappedTypeOrVoid;
+  using Value = SetOrMapValueType<Key, Mapped>;
+  using Item = ItemType;
+  using Hasher = Defaulted<HasherOrVoid, DefaultHasher<Key>>;
+  using KeyEqual = Defaulted<KeyEqualOrVoid, DefaultKeyEqual<Key>>;
+  using Alloc = Defaulted<AllocOrVoid, DefaultAlloc<Value>>;
+  using AllocTraits = std::allocator_traits<Alloc>;
+
+  using InternalSizeType = std::size_t;
+
+  using Super = std::tuple<Hasher, KeyEqual, Alloc>;
+
+  static constexpr bool isAvalanchingHasher() {
+    return IsAvalanchingHasher<Hasher, Key>::value;
+  }
+
+  using Chunk = SSE2Chunk<Item>;
+  using ChunkPtr = typename std::pointer_traits<
+      typename AllocTraits::pointer>::template rebind<Chunk>;
+  using ItemIter = F14ItemIter<ChunkPtr>;
+
+  static constexpr bool kIsMap = !std::is_same<Key, Value>::value;
+  static_assert(
+      kIsMap == !std::is_void<MappedTypeOrVoid>::value,
+      "Assumption for the kIsMap check violated.");
+
+  static_assert(
+      std::is_same<typename AllocTraits::value_type, Value>::value,
+      "wrong allocator value_type");
+
+  BasePolicy(Hasher const& hasher, KeyEqual const& keyEqual, Alloc const& alloc)
+      : Super{hasher, keyEqual, alloc} {}
+
+  BasePolicy(BasePolicy const& rhs)
+      : Super{rhs.hasher(),
+              rhs.keyEqual(),
+              AllocTraits::select_on_container_copy_construction(rhs.alloc())} {
+  }
+
+  BasePolicy(BasePolicy const& rhs, Alloc const& alloc)
+      : Super{rhs.hasher(), rhs.keyEqual(), alloc} {}
+
+  BasePolicy(BasePolicy&& rhs) noexcept
+      : Super{std::move(rhs.hasher()),
+              std::move(rhs.keyEqual()),
+              std::move(rhs.alloc())} {}
+
+  BasePolicy(BasePolicy&& rhs, Alloc const& alloc) noexcept
+      : Super{std::move(rhs.hasher()), std::move(rhs.keyEqual()), alloc} {}
+
+  BasePolicy& operator=(BasePolicy const& rhs) {
+    hasher() = rhs.hasher();
+    keyEqual() = rhs.keyEqual();
+    if (AllocTraits::propagate_on_container_copy_assignment::value) {
+      alloc() = rhs.alloc();
+    }
+    return *this;
+  }
+
+  BasePolicy& operator=(BasePolicy&& rhs) noexcept {
+    hasher() = std::move(rhs.hasher());
+    keyEqual() = std::move(rhs.keyEqual());
+    if (AllocTraits::propagate_on_container_move_assignment::value) {
+      alloc() = std::move(rhs.alloc());
+    }
+    return *this;
+  }
+
+  void swapBasePolicy(BasePolicy& rhs) {
+    using std::swap;
+    swap(hasher(), rhs.hasher());
+    swap(keyEqual(), rhs.keyEqual());
+    if (AllocTraits::propagate_on_container_swap::value) {
+      swap(alloc(), rhs.alloc());
+    }
+  }
+
+  Hasher& hasher() {
+    return std::get<0>(*this);
+  }
+  Hasher const& hasher() const {
+    return std::get<0>(*this);
+  }
+  KeyEqual& keyEqual() {
+    return std::get<1>(*this);
+  }
+  KeyEqual const& keyEqual() const {
+    return std::get<1>(*this);
+  }
+  Alloc& alloc() {
+    return std::get<2>(*this);
+  }
+  Alloc const& alloc() const {
+    return std::get<2>(*this);
+  }
+
+  template <typename K>
+  std::size_t computeKeyHash(K const& key) const {
+    static_assert(
+        isAvalanchingHasher() == IsAvalanchingHasher<Hasher, K>::value, "");
+    return hasher()(key);
+  }
+
+  Key const& keyForValue(Key const& v) const {
+    return v;
+  }
+  Key const& keyForValue(
+      std::pair<Key const, std::conditional_t<kIsMap, Mapped, bool>> const& p)
+      const {
+    return p.first;
+  }
+
+  template <typename P>
+  bool
+  beforeCopy(std::size_t /*size*/, std::size_t /*capacity*/, P const& /*rhs*/) {
+    return false;
+  }
+
+  template <typename P>
+  void afterCopy(
+      bool /*undoState*/,
+      bool /*success*/,
+      std::size_t /*size*/,
+      std::size_t /*capacity*/,
+      P const& /*rhs*/) {}
+
+  bool beforeRehash(
+      std::size_t /*size*/,
+      std::size_t /*oldCapacity*/,
+      std::size_t /*newCapacity*/) {
+    return false;
+  }
+
+  void afterRehash(
+      bool /*undoState*/,
+      bool /*success*/,
+      std::size_t /*size*/,
+      std::size_t /*oldCapacity*/,
+      std::size_t /*newCapacity*/) {}
+
+  void beforeClear(std::size_t /*size*/, std::size_t) {}
+
+  void afterClear(std::size_t /*capacity*/) {}
+
+  void beforeReset(std::size_t /*size*/, std::size_t) {}
+
+  void afterReset() {}
+
+  void prefetchValue(Item const&) {
+    // Subclass should disable with prefetchBeforeRehash(),
+    // prefetchBeforeCopy(), and prefetchBeforeDestroy().  if they don't
+    // override this method, because neither gcc nor clang can figure
+    // out that DenseMaskIter with an empty body can be elided.
+    FOLLY_SAFE_DCHECK(false, "should be disabled");
+  }
+};
+
+// BaseIter is a convenience for concrete set and map implementations
+template <typename ValuePtr, typename Item>
+class BaseIter : public std::iterator<
+                     std::forward_iterator_tag,
+                     std::remove_const_t<
+                         typename std::pointer_traits<ValuePtr>::element_type>,
+                     std::ptrdiff_t,
+                     ValuePtr,
+                     decltype(*std::declval<ValuePtr>())> {
+ protected:
+  using Chunk = SSE2Chunk<Item>;
+  using ChunkPtr =
+      typename std::pointer_traits<ValuePtr>::template rebind<Chunk>;
+  using ItemIter = F14ItemIter<ChunkPtr>;
+
+  using ValueConstPtr = typename std::pointer_traits<ValuePtr>::template rebind<
+      std::add_const_t<typename std::pointer_traits<ValuePtr>::element_type>>;
+};
+
+//////// ValueContainer
+
+template <
+    typename Key,
+    typename Mapped,
+    typename HasherOrVoid,
+    typename KeyEqualOrVoid,
+    typename AllocOrVoid>
+class ValueContainerPolicy;
+
+template <typename ValuePtr>
+using ValueContainerIteratorBase = BaseIter<
+    ValuePtr,
+    std::remove_const_t<typename std::pointer_traits<ValuePtr>::element_type>>;
+
+template <typename ValuePtr>
+class ValueContainerIterator : public ValueContainerIteratorBase<ValuePtr> {
+  using Super = ValueContainerIteratorBase<ValuePtr>;
+  using typename Super::ItemIter;
+  using typename Super::ValueConstPtr;
+
+ public:
+  using typename Super::pointer;
+  using typename Super::reference;
+  using typename Super::value_type;
+
+  ValueContainerIterator() = default;
+  ValueContainerIterator(ValueContainerIterator const&) = default;
+  ValueContainerIterator(ValueContainerIterator&&) = default;
+  ValueContainerIterator& operator=(ValueContainerIterator const&) = default;
+  ValueContainerIterator& operator=(ValueContainerIterator&&) = default;
+  ~ValueContainerIterator() = default;
+
+  /*implicit*/ operator ValueContainerIterator<ValueConstPtr>() const {
+    return ValueContainerIterator<ValueConstPtr>{underlying_};
+  }
+
+  reference operator*() const {
+    return underlying_.item();
+  }
+
+  pointer operator->() const {
+    return std::pointer_traits<pointer>::pointer_to(**this);
+  }
+
+  ValueContainerIterator& operator++() {
+    underlying_.advance();
+    return *this;
+  }
+
+  ValueContainerIterator operator++(int) {
+    auto cur = *this;
+    ++*this;
+    return cur;
+  }
+
+  bool operator==(ValueContainerIterator<ValueConstPtr> const& rhs) const {
+    return underlying_ == rhs.underlying_;
+  }
+  bool operator!=(ValueContainerIterator<ValueConstPtr> const& rhs) const {
+    return !(*this == rhs);
+  }
+
+ private:
+  ItemIter underlying_;
+
+  explicit ValueContainerIterator(ItemIter const& underlying)
+      : underlying_{underlying} {}
+
+  template <typename K, typename M, typename H, typename E, typename A>
+  friend class ValueContainerPolicy;
+
+  template <typename P>
+  friend class ValueContainerIterator;
+};
+
+template <
+    typename Key,
+    typename MappedTypeOrVoid,
+    typename HasherOrVoid,
+    typename KeyEqualOrVoid,
+    typename AllocOrVoid>
+class ValueContainerPolicy : public BasePolicy<
+                                 Key,
+                                 MappedTypeOrVoid,
+                                 HasherOrVoid,
+                                 KeyEqualOrVoid,
+                                 AllocOrVoid,
+                                 SetOrMapValueType<Key, MappedTypeOrVoid>> {
+ public:
+  using Super = BasePolicy<
+      Key,
+      MappedTypeOrVoid,
+      HasherOrVoid,
+      KeyEqualOrVoid,
+      AllocOrVoid,
+      SetOrMapValueType<Key, MappedTypeOrVoid>>;
+  using typename Super::Alloc;
+  using typename Super::Item;
+  using typename Super::ItemIter;
+  using typename Super::Value;
+
+ private:
+  using Super::kIsMap;
+  using typename Super::AllocTraits;
+
+ public:
+  using ConstIter = ValueContainerIterator<typename AllocTraits::const_pointer>;
+  using Iter = std::conditional_t<
+      kIsMap,
+      ValueContainerIterator<typename AllocTraits::pointer>,
+      ConstIter>;
+
+  //////// F14Table policy
+
+  static constexpr bool prefetchBeforeRehash() {
+    return false;
+  }
+
+  static constexpr bool prefetchBeforeCopy() {
+    return false;
+  }
+
+  static constexpr bool prefetchBeforeDestroy() {
+    return false;
+  }
+
+  static constexpr bool destroyItemOnClear() {
+    return !std::is_trivially_destructible<Item>::value ||
+        !std::is_same<Alloc, std::allocator<Value>>::value;
+  }
+
+  // inherit constructors
+  using Super::Super;
+
+  void swapPolicy(ValueContainerPolicy& rhs) {
+    this->swapBasePolicy(rhs);
+  }
+
+  using Super::keyForValue;
+  static_assert(
+      std::is_same<Item, Value>::value,
+      "Item and Value should be the same type for ValueContainerPolicy.");
+
+  std::size_t computeItemHash(Item const& item) const {
+    return this->computeKeyHash(keyForValue(item));
+  }
+
+  template <typename K>
+  bool keyMatchesItem(K const& key, Item const& item) const {
+    return this->keyEqual()(key, keyForValue(item));
+  }
+
+  Value const& valueAtItemForCopy(Item const& item) const {
+    return item;
+  }
+
+  template <typename... Args>
+  void
+  constructValueAtItem(std::size_t /*size*/, Item* itemAddr, Args&&... args) {
+    Alloc& a = this->alloc();
+    folly::assume(itemAddr != nullptr);
+    AllocTraits::construct(a, itemAddr, std::forward<Args>(args)...);
+  }
+
+  template <typename T>
+  std::enable_if_t<std::is_nothrow_move_constructible<T>::value>
+  complainUnlessNothrowMove() {}
+
+  template <typename T>
+  FOLLY_DEPRECATED(
+      "use F14NodeMap/Set or mark key and mapped type move constructor nothrow")
+  std::enable_if_t<!std::is_nothrow_move_constructible<
+      T>::value> complainUnlessNothrowMove() {}
+
+  template <typename Dummy = int>
+  void moveItemDuringRehash(
+      Item* itemAddr,
+      Item& src,
+      typename std::enable_if_t<kIsMap, Dummy> = 0) {
+    complainUnlessNothrowMove<Key>();
+    complainUnlessNothrowMove<MappedTypeOrVoid>();
+
+    // map's choice of pair<K const,T> as value_type is unfortunate,
+    // because it means we either need a proxy iterator, a pointless key
+    // copy when moving items during rehash, or some sort of UB hack.
+    // See https://fb.quip.com/kKieAEtg0Pao for much more discussion of
+    // the possibilities.
+    //
+    // This code implements the hack.
+    // Laundering in the standard is only described as a solution for
+    // changes to const fields due to the creation of a new object
+    // lifetime (destroy and then placement new in the same location),
+    // but it seems highly likely that it will also cause the compiler
+    // to drop such assumptions that are violated due to our UB const_cast.
+    constructValueAtItem(
+        0,
+        itemAddr,
+        std::move(const_cast<Key&>(src.first)),
+        std::move(src.second));
+    if (destroyItemOnClear()) {
+      destroyItem(*folly::launder(std::addressof(src)));
+    }
+  }
+
+  template <typename Dummy = int>
+  void moveItemDuringRehash(
+      Item* itemAddr,
+      Item& src,
+      typename std::enable_if_t<!kIsMap, Dummy> = 0) {
+    complainUnlessNothrowMove<Item>();
+
+    constructValueAtItem(0, itemAddr, std::move(src));
+    if (destroyItemOnClear()) {
+      destroyItem(src);
+    }
+  }
+
+  void destroyItem(Item& item) {
+    Alloc& a = this->alloc();
+    AllocTraits::destroy(a, std::addressof(item));
+  }
+
+  std::size_t indirectBytesUsed(
+      std::size_t /*size*/,
+      std::size_t /*capacity*/,
+      ItemIter /*underlying*/) const {
+    return 0;
+  }
+
+  //////// F14BasicMap/Set policy
+
+  Iter makeIter(ItemIter const& underlying) const {
+    return Iter{underlying};
+  }
+  ConstIter makeConstIter(ItemIter const& underlying) const {
+    return ConstIter{underlying};
+  }
+  ItemIter const& unwrapIter(ConstIter const& iter) const {
+    return iter.underlying_;
+  }
+};
+
+//////// NodeContainer
+
+template <
+    typename Key,
+    typename Mapped,
+    typename HasherOrVoid,
+    typename KeyEqualOrVoid,
+    typename AllocOrVoid>
+class NodeContainerPolicy;
+
+template <typename ValuePtr>
+class NodeContainerIterator : public BaseIter<ValuePtr, NonConstPtr<ValuePtr>> {
+  using Super = BaseIter<ValuePtr, NonConstPtr<ValuePtr>>;
+  using typename Super::ItemIter;
+  using typename Super::ValueConstPtr;
+
+ public:
+  using typename Super::pointer;
+  using typename Super::reference;
+  using typename Super::value_type;
+
+  NodeContainerIterator() = default;
+  NodeContainerIterator(NodeContainerIterator const&) = default;
+  NodeContainerIterator(NodeContainerIterator&&) = default;
+  NodeContainerIterator& operator=(NodeContainerIterator const&) = default;
+  NodeContainerIterator& operator=(NodeContainerIterator&&) = default;
+  ~NodeContainerIterator() = default;
+
+  /*implicit*/ operator NodeContainerIterator<ValueConstPtr>() const {
+    return NodeContainerIterator<ValueConstPtr>{underlying_};
+  }
+
+  reference operator*() const {
+    return *underlying_.item();
+  }
+
+  pointer operator->() const {
+    return std::pointer_traits<pointer>::pointer_to(**this);
+  }
+
+  NodeContainerIterator& operator++() {
+    underlying_.advance();
+    return *this;
+  }
+
+  NodeContainerIterator operator++(int) {
+    auto cur = *this;
+    ++*this;
+    return cur;
+  }
+
+  bool operator==(NodeContainerIterator<ValueConstPtr> const& rhs) const {
+    return underlying_ == rhs.underlying_;
+  }
+  bool operator!=(NodeContainerIterator<ValueConstPtr> const& rhs) const {
+    return !(*this == rhs);
+  }
+
+ private:
+  ItemIter underlying_;
+
+  explicit NodeContainerIterator(ItemIter const& underlying)
+      : underlying_{underlying} {}
+
+  template <typename K, typename M, typename H, typename E, typename A>
+  friend class NodeContainerPolicy;
+
+  template <typename P>
+  friend class NodeContainerIterator;
+};
+
+template <
+    typename Key,
+    typename MappedTypeOrVoid,
+    typename HasherOrVoid,
+    typename KeyEqualOrVoid,
+    typename AllocOrVoid>
+class NodeContainerPolicy
+    : public BasePolicy<
+          Key,
+          MappedTypeOrVoid,
+          HasherOrVoid,
+          KeyEqualOrVoid,
+          AllocOrVoid,
+          typename std::allocator_traits<Defaulted<
+              AllocOrVoid,
+              DefaultAlloc<std::conditional_t<
+                  std::is_void<MappedTypeOrVoid>::value,
+                  Key,
+                  MapValueType<Key, MappedTypeOrVoid>>>>>::pointer> {
+ public:
+  using Super = BasePolicy<
+      Key,
+      MappedTypeOrVoid,
+      HasherOrVoid,
+      KeyEqualOrVoid,
+      AllocOrVoid,
+      typename std::allocator_traits<Defaulted<
+          AllocOrVoid,
+          DefaultAlloc<std::conditional_t<
+              std::is_void<MappedTypeOrVoid>::value,
+              Key,
+              MapValueType<Key, MappedTypeOrVoid>>>>>::pointer>;
+  using typename Super::Alloc;
+  using typename Super::Item;
+  using typename Super::ItemIter;
+  using typename Super::Value;
+
+ private:
+  using Super::kIsMap;
+  using typename Super::AllocTraits;
+
+ public:
+  using ConstIter = NodeContainerIterator<typename AllocTraits::const_pointer>;
+  using Iter = std::conditional_t<
+      kIsMap,
+      NodeContainerIterator<typename AllocTraits::pointer>,
+      ConstIter>;
+
+  //////// F14Table policy
+
+  static constexpr bool prefetchBeforeRehash() {
+    return true;
+  }
+
+  static constexpr bool prefetchBeforeCopy() {
+    return true;
+  }
+
+  static constexpr bool prefetchBeforeDestroy() {
+    return !std::is_trivially_destructible<Value>::value;
+  }
+
+  static constexpr bool destroyItemOnClear() {
+    return true;
+  }
+
+  // inherit constructors
+  using Super::Super;
+
+  void swapPolicy(NodeContainerPolicy& rhs) {
+    this->swapBasePolicy(rhs);
+  }
+
+  using Super::keyForValue;
+
+  std::size_t computeItemHash(Item const& item) const {
+    return this->computeKeyHash(keyForValue(*item));
+  }
+
+  template <typename K>
+  bool keyMatchesItem(K const& key, Item const& item) const {
+    return this->keyEqual()(key, keyForValue(*item));
+  }
+
+  Value const& valueAtItemForCopy(Item const& item) const {
+    return *item;
+  }
+
+  template <typename... Args>
+  void
+  constructValueAtItem(std::size_t /*size*/, Item* itemAddr, Args&&... args) {
+    Alloc& a = this->alloc();
+    folly::assume(itemAddr != nullptr);
+    new (itemAddr) Item{AllocTraits::allocate(a, 1)};
+    auto p = std::addressof(**itemAddr);
+    folly::assume(p != nullptr);
+    AllocTraits::construct(a, p, std::forward<Args>(args)...);
+  }
+
+  void moveItemDuringRehash(Item* itemAddr, Item& src) {
+    // This is basically *itemAddr = src; src = nullptr, but allowing
+    // for fancy pointers.
+    folly::assume(itemAddr != nullptr);
+    new (itemAddr) Item{std::move(src)};
+    src = nullptr;
+    src.~Item();
+  }
+
+  void prefetchValue(Item const& item) {
+    prefetchAddr(std::addressof(*item));
+  }
+
+  void destroyItem(Item& item) {
+    if (item != nullptr) {
+      Alloc& a = this->alloc();
+      AllocTraits::destroy(a, std::addressof(*item));
+      AllocTraits::deallocate(a, item, 1);
+    }
+    item.~Item();
+  }
+
+  std::size_t indirectBytesUsed(
+      std::size_t size,
+      std::size_t /*capacity*/,
+      ItemIter /*underlying*/) const {
+    return size * sizeof(Value);
+  }
+
+  //////// F14BasicMap/Set policy
+
+  Iter makeIter(ItemIter const& underlying) const {
+    return Iter{underlying};
+  }
+  ConstIter makeConstIter(ItemIter const& underlying) const {
+    return Iter{underlying};
+  }
+  ItemIter const& unwrapIter(ConstIter const& iter) const {
+    return iter.underlying_;
+  }
+};
+
+//////// VectorContainer
+
+template <
+    typename Key,
+    typename MappedTypeOrVoid,
+    typename HasherOrVoid,
+    typename KeyEqualOrVoid,
+    typename AllocOrVoid>
+class VectorContainerPolicy;
+
+template <typename ValuePtr>
+class VectorContainerIterator : public BaseIter<ValuePtr, uint32_t> {
+  using Super = BaseIter<ValuePtr, uint32_t>;
+  using typename Super::ValueConstPtr;
+
+ public:
+  using typename Super::pointer;
+  using typename Super::reference;
+  using typename Super::value_type;
+
+  VectorContainerIterator() = default;
+  VectorContainerIterator(VectorContainerIterator const&) = default;
+  VectorContainerIterator(VectorContainerIterator&&) = default;
+  VectorContainerIterator& operator=(VectorContainerIterator const&) = default;
+  VectorContainerIterator& operator=(VectorContainerIterator&&) = default;
+  ~VectorContainerIterator() = default;
+
+  /*implicit*/ operator VectorContainerIterator<ValueConstPtr>() const {
+    // can we trust that fancy pointers are implicitly convertible to
+    // fancy const pointers?
+    return VectorContainerIterator<ValueConstPtr>{current_, lowest_};
+  }
+
+  reference operator*() const {
+    return *current_;
+  }
+
+  pointer operator->() const {
+    return current_;
+  }
+
+  VectorContainerIterator& operator++() {
+    if (UNLIKELY(current_ == lowest_)) {
+      current_ = nullptr;
+    } else {
+      --current_;
+    }
+    return *this;
+  }
+
+  VectorContainerIterator operator++(int) {
+    auto cur = *this;
+    ++*this;
+    return cur;
+  }
+
+  bool operator==(VectorContainerIterator<ValueConstPtr> const& rhs) const {
+    return current_ == rhs.current_;
+  }
+  bool operator!=(VectorContainerIterator<ValueConstPtr> const& rhs) const {
+    return !(*this == rhs);
+  }
+
+ private:
+  ValuePtr current_;
+  ValuePtr lowest_;
+
+  explicit VectorContainerIterator(ValuePtr current, ValuePtr lowest)
+      : current_(current), lowest_(lowest) {}
+
+  std::size_t index() const {
+    return current_ - lowest_;
+  }
+
+  template <typename K, typename M, typename H, typename E, typename A>
+  friend class VectorContainerPolicy;
+
+  template <typename P>
+  friend class VectorContainerIterator;
+};
+
+struct VectorContainerIndexSearch {
+  uint32_t index_;
+};
+
+template <
+    typename Key,
+    typename MappedTypeOrVoid,
+    typename HasherOrVoid,
+    typename KeyEqualOrVoid,
+    typename AllocOrVoid>
+class VectorContainerPolicy : public BasePolicy<
+                                  Key,
+                                  MappedTypeOrVoid,
+                                  HasherOrVoid,
+                                  KeyEqualOrVoid,
+                                  AllocOrVoid,
+                                  uint32_t> {
+ public:
+  using Super = BasePolicy<
+      Key,
+      MappedTypeOrVoid,
+      HasherOrVoid,
+      KeyEqualOrVoid,
+      AllocOrVoid,
+      uint32_t>;
+  using typename Super::Alloc;
+  using typename Super::Item;
+  using typename Super::ItemIter;
+  using typename Super::Value;
+
+ private:
+  using Super::kIsMap;
+  using typename Super::AllocTraits;
+
+ public:
+  using InternalSizeType = Item;
+
+  using ConstIter =
+      VectorContainerIterator<typename AllocTraits::const_pointer>;
+  using Iter = std::conditional_t<
+      kIsMap,
+      VectorContainerIterator<typename AllocTraits::pointer>,
+      ConstIter>;
+
+  using ValuePtr = typename AllocTraits::pointer;
+
+  //////// F14Table policy
+
+  static constexpr bool prefetchBeforeRehash() {
+    return true;
+  }
+
+  static constexpr bool prefetchBeforeCopy() {
+    return false;
+  }
+
+  static constexpr bool prefetchBeforeDestroy() {
+    return false;
+  }
+
+  static constexpr bool destroyItemOnClear() {
+    return false;
+  }
+
+  // inherit constructors
+  using Super::Super;
+
+  VectorContainerPolicy(VectorContainerPolicy const& rhs)
+      : Super{rhs}, values_{nullptr} {}
+
+  VectorContainerPolicy(VectorContainerPolicy&& rhs) noexcept
+      : Super{std::move(rhs)}, values_{rhs.values_} {
+    rhs.values_ = nullptr;
+  }
+
+  VectorContainerPolicy& operator=(VectorContainerPolicy const& rhs) {
+    if (this != &rhs) {
+      FOLLY_SAFE_DCHECK(values_ == nullptr, "");
+      Super::operator=(rhs);
+    }
+    return *this;
+  }
+
+  VectorContainerPolicy& operator=(VectorContainerPolicy&& rhs) noexcept {
+    if (this != &rhs) {
+      Super::operator=(std::move(rhs));
+      values_ = rhs.values_;
+      rhs.values_ = nullptr;
+    }
+    return *this;
+  }
+
+  void swapPolicy(VectorContainerPolicy& rhs) {
+    using std::swap;
+    this->swapBasePolicy(rhs);
+    swap(values_, rhs.values_);
+  }
+
+  template <typename K>
+  std::size_t computeKeyHash(K const& key) const {
+    static_assert(
+        Super::isAvalanchingHasher() ==
+            IsAvalanchingHasher<typename Super::Hasher, K>::value,
+        "");
+    return this->hasher()(key);
+  }
+
+  std::size_t computeKeyHash(VectorContainerIndexSearch const& key) const {
+    return computeItemHash(key.index_);
+  }
+
+  using Super::keyForValue;
+
+  std::size_t computeItemHash(Item const& item) const {
+    return this->computeKeyHash(keyForValue(values_[item]));
+  }
+
+  bool keyMatchesItem(VectorContainerIndexSearch const& key, Item const& item)
+      const {
+    return key.index_ == item;
+  }
+
+  template <typename K>
+  bool keyMatchesItem(K const& key, Item const& item) const {
+    return this->keyEqual()(key, keyForValue(values_[item]));
+  }
+
+  Key const& keyForValue(VectorContainerIndexSearch const& arg) const {
+    return keyForValue(values_[arg.index_]);
+  }
+
+  VectorContainerIndexSearch valueAtItemForCopy(Item const& item) const {
+    return {item};
+  }
+
+  void constructValueAtItem(
+      std::size_t /*size*/,
+      Item* itemAddr,
+      VectorContainerIndexSearch arg) {
+    *itemAddr = arg.index_;
+  }
+
+  template <typename... Args>
+  void constructValueAtItem(std::size_t size, Item* itemAddr, Args&&... args) {
+    Alloc& a = this->alloc();
+    *itemAddr = size;
+    AllocTraits::construct(
+        a, std::addressof(values_[size]), std::forward<Args>(args)...);
+  }
+
+  void moveItemDuringRehash(Item* itemAddr, Item& src) {
+    *itemAddr = src;
+  }
+
+  void prefetchValue(Item const& item) {
+    prefetchAddr(std::addressof(values_[item]));
+  }
+
+  void destroyItem(Item&) {}
+
+  template <typename T>
+  std::enable_if_t<std::is_nothrow_move_constructible<T>::value>
+  complainUnlessNothrowMove() {}
+
+  template <typename T>
+  FOLLY_DEPRECATED(
+      "use F14NodeMap/Set or mark key and mapped type move constructor nothrow")
+  std::enable_if_t<!std::is_nothrow_move_constructible<
+      T>::value> complainUnlessNothrowMove() {}
+
+  template <typename Dummy = int>
+  void transfer(
+      Alloc& a,
+      Value* src,
+      Value* dst,
+      std::size_t n,
+      typename std::enable_if_t<kIsMap, Dummy> = 0) {
+    complainUnlessNothrowMove<Key>();
+    complainUnlessNothrowMove<MappedTypeOrVoid>();
+
+    if (std::is_same<Alloc, std::allocator<Value>>::value &&
+        FOLLY_IS_TRIVIALLY_COPYABLE(Value)) {
+      std::memcpy(dst, src, n * sizeof(Value));
+    } else {
+      for (std::size_t i = 0; i < n; ++i, ++src, ++dst) {
+        // See ValueContainerPolicy::moveItemDuringRehash for an explanation
+        //  of // the strange const_cast and launder below
+        folly::assume(dst != nullptr);
+        AllocTraits::construct(
+            a,
+            dst,
+            std::move(const_cast<Key&>(src->first)),
+            std::move(src->second));
+        AllocTraits::destroy(a, folly::launder(src));
+      }
+    }
+  }
+
+  template <typename Dummy = int>
+  void transfer(
+      Alloc& a,
+      Value* src,
+      Value* dst,
+      std::size_t n,
+      typename std::enable_if_t<!kIsMap, Dummy> = 0) {
+    complainUnlessNothrowMove<Value>();
+
+    if (std::is_same<Alloc, std::allocator<Value>>::value &&
+        FOLLY_IS_TRIVIALLY_COPYABLE(Value)) {
+      std::memcpy(dst, src, n * sizeof(Value));
+    } else {
+      for (std::size_t i = 0; i < n; ++i, ++src, ++dst) {
+        folly::assume(dst != nullptr);
+        AllocTraits::construct(a, dst, std::move(*src));
+        AllocTraits::destroy(a, src);
+      }
+    }
+  }
+
+  bool beforeCopy(
+      std::size_t size,
+      std::size_t /*capacity*/,
+      VectorContainerPolicy const& rhs) {
+    Alloc& a = this->alloc();
+
+    FOLLY_SAFE_DCHECK(values_ != nullptr, "");
+
+    Value const* src = std::addressof(rhs.values_[0]);
+    Value* dst = std::addressof(values_[0]);
+
+    if (std::is_same<Alloc, std::allocator<Value>>::value &&
+        FOLLY_IS_TRIVIALLY_COPYABLE(Value)) {
+      std::memcpy(dst, src, size * sizeof(Value));
+    } else {
+      for (std::size_t i = 0; i < size; ++i, ++src, ++dst) {
+        try {
+          folly::assume(dst != nullptr);
+          AllocTraits::construct(a, dst, *src);
+        } catch (...) {
+          for (Value* cleanup = std::addressof(values_[0]); cleanup != dst;
+               ++cleanup) {
+            AllocTraits::destroy(a, cleanup);
+          }
+          throw;
+        }
+      }
+    }
+    return true;
+  }
+
+  void afterCopy(
+      bool /*undoState*/,
+      bool success,
+      std::size_t /*size*/,
+      std::size_t /*capacity*/,
+      VectorContainerPolicy const& /*rhs*/) {
+    // valueAtItemForCopy can be copied trivially, no failure should occur
+    FOLLY_SAFE_DCHECK(success, "");
+  }
+
+  ValuePtr beforeRehash(
+      std::size_t size,
+      std::size_t oldCapacity,
+      std::size_t newCapacity) {
+    FOLLY_SAFE_DCHECK(
+        size <= oldCapacity && ((values_ == nullptr) == (oldCapacity == 0)) &&
+            newCapacity > 0 &&
+            newCapacity <= (std::numeric_limits<Item>::max)(),
+        "");
+
+    Alloc& a = this->alloc();
+    ValuePtr before = values_;
+    ValuePtr after = AllocTraits::allocate(a, newCapacity);
+
+    if (size > 0) {
+      transfer(a, std::addressof(before[0]), std::addressof(after[0]), size);
+    }
+
+    values_ = after;
+    return before;
+  }
+
+  FOLLY_NOINLINE void
+  afterFailedRehash(ValuePtr state, std::size_t size, std::size_t newCapacity) {
+    // state holds the old storage
+    Alloc& a = this->alloc();
+    if (size > 0) {
+      transfer(a, std::addressof(values_[0]), std::addressof(state[0]), size);
+    }
+    AllocTraits::deallocate(a, values_, newCapacity);
+    values_ = state;
+  }
+
+  void afterRehash(
+      ValuePtr state,
+      bool success,
+      std::size_t size,
+      std::size_t oldCapacity,
+      std::size_t newCapacity) {
+    if (!success) {
+      afterFailedRehash(state, size, newCapacity);
+    } else if (state != nullptr) {
+      Alloc& a = this->alloc();
+      AllocTraits::deallocate(a, state, oldCapacity);
+    }
+  }
+
+  void beforeClear(std::size_t size, std::size_t capacity) {
+    FOLLY_SAFE_DCHECK(
+        size <= capacity && ((values_ == nullptr) == (capacity == 0)), "");
+    Alloc& a = this->alloc();
+    for (std::size_t i = 0; i < size; ++i) {
+      AllocTraits::destroy(a, std::addressof(values_[i]));
+    }
+  }
+
+  void beforeReset(std::size_t size, std::size_t capacity) {
+    FOLLY_SAFE_DCHECK(
+        size <= capacity && ((values_ == nullptr) == (capacity == 0)), "");
+    if (capacity > 0) {
+      beforeClear(size, capacity);
+      Alloc& a = this->alloc();
+      AllocTraits::deallocate(a, values_, capacity);
+      values_ = nullptr;
+    }
+  }
+
+  std::size_t indirectBytesUsed(
+      std::size_t /*size*/,
+      std::size_t capacity,
+      ItemIter /*underlying*/) const {
+    return sizeof(Value) * capacity;
+  }
+
+  // Iterator stuff
+
+  Iter linearBegin(std::size_t size) const {
+    return Iter{(size > 0 ? values_ + size - 1 : nullptr), values_};
+  }
+
+  Iter linearEnd() const {
+    return Iter{nullptr, nullptr};
+  }
+
+  //////// F14BasicMap/Set policy
+
+  Iter makeIter(ItemIter const& underlying) const {
+    if (underlying.atEnd()) {
+      return linearEnd();
+    } else {
+      folly::assume(values_ + underlying.item() != nullptr);
+      folly::assume(values_ != nullptr);
+      return Iter{values_ + underlying.item(), values_};
+    }
+  }
+
+  ConstIter makeConstIter(ItemIter const& underlying) const {
+    return makeIter(underlying);
+  }
+
+  Item iterToIndex(ConstIter const& iter) const {
+    auto n = iter.index();
+    folly::assume(n <= std::numeric_limits<Item>::max());
+    return static_cast<Item>(n);
+  }
+
+  Iter indexToIter(Item index) const {
+    return Iter{values_ + index, values_};
+  }
+
+  ValuePtr values_{nullptr};
+};
+
+template <
+    template <typename, typename, typename, typename, typename> class Policy,
+    typename Key,
+    typename Mapped,
+    typename Hasher,
+    typename KeyEqual,
+    typename Alloc>
+using MapPolicyWithDefaults = Policy<
+    Key,
+    Mapped,
+    VoidDefault<Hasher, DefaultHasher<Key>>,
+    VoidDefault<KeyEqual, DefaultKeyEqual<Key>>,
+    VoidDefault<Alloc, DefaultAlloc<std::pair<Key const, Mapped>>>>;
+
+template <
+    template <typename, typename, typename, typename, typename> class Policy,
+    typename Key,
+    typename Hasher,
+    typename KeyEqual,
+    typename Alloc>
+using SetPolicyWithDefaults = Policy<
+    Key,
+    void,
+    VoidDefault<Hasher, DefaultHasher<Key>>,
+    VoidDefault<KeyEqual, DefaultKeyEqual<Key>>,
+    VoidDefault<Alloc, DefaultAlloc<Key>>>;
+
+} // namespace detail
+} // namespace f14
+} // namespace folly
+
+#endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
diff --git a/folly/container/detail/F14Table.cpp b/folly/container/detail/F14Table.cpp
new file mode 100644
index 00000000000..0ee5c1c7068
--- /dev/null
+++ b/folly/container/detail/F14Table.cpp
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/container/detail/F14Table.h>
+
+///////////////////////////////////
+#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+///////////////////////////////////
+
+namespace folly {
+namespace f14 {
+namespace detail {
+
+__m128i kEmptyTagVector = {};
+
+} // namespace detail
+} // namespace f14
+} // namespace folly
+
+///////////////////////////////////
+#endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+///////////////////////////////////
diff --git a/folly/container/detail/F14Table.h b/folly/container/detail/F14Table.h
new file mode 100644
index 00000000000..b9d8be2904d
--- /dev/null
+++ b/folly/container/detail/F14Table.h
@@ -0,0 +1,1698 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+
+#include <array>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <new>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+#include <vector>
+
+#include <folly/Bits.h>
+#include <folly/Likely.h>
+#include <folly/Portability.h>
+#include <folly/ScopeGuard.h>
+#include <folly/Traits.h>
+#include <folly/lang/Assume.h>
+#include <folly/lang/Exception.h>
+#include <folly/lang/Launder.h>
+#include <folly/lang/SafeAssert.h>
+#include <folly/portability/Builtins.h>
+#include <folly/portability/TypeTraits.h>
+
+#include <folly/container/detail/F14Memory.h>
+
+// clang-format off
+// F14 is only available on x86 with SSE2 intrinsics (so far)
+#ifndef FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+# if FOLLY_SSE >= 2
+#  define FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE 1
+# else
+#  define FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE 0
+#  pragma message                                       \
+    "Vector intrinsics unavailable on this platform, " \
+    "falling back to std::unordered_map / set"
+# endif
+#endif
+// clang-format on
+
+#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+#include <immintrin.h> // __m128i intrinsics
+#include <xmmintrin.h> // _mm_prefetch
+#endif
+
+#ifdef _WIN32
+#include <intrin.h> // for _mul128
+#endif
+
+namespace folly {
+
+struct F14TableStats {
+  char const* policy;
+  std::size_t size{0};
+  std::size_t valueSize{0};
+  std::size_t bucketCount{0};
+  std::size_t chunkCount{0};
+  std::vector<std::size_t> chunkOccupancyHisto;
+  std::vector<std::size_t> chunkOutboundOverflowHisto;
+  std::vector<std::size_t> chunkHostedOverflowHisto;
+  std::vector<std::size_t> keyProbeLengthHisto;
+  std::vector<std::size_t> missProbeLengthHisto;
+  std::size_t totalBytes{0};
+  std::size_t overheadBytes{0};
+
+ private:
+  template <typename T>
+  static auto computeHelper(T const* m) -> decltype(m->computeStats()) {
+    return m->computeStats();
+  }
+
+  static F14TableStats computeHelper(...) {
+    return {};
+  }
+
+ public:
+  template <typename T>
+  static F14TableStats compute(T const& m) {
+    return computeHelper(&m);
+  }
+};
+
+namespace f14 {
+template <typename T>
+using DefaultHasher = std::hash<T>;
+
+template <typename T>
+using DefaultKeyEqual = std::equal_to<T>;
+
+template <typename T>
+using DefaultAlloc = std::allocator<T>;
+} // namespace f14
+
+#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+namespace f14 {
+namespace detail {
+template <typename Policy>
+class F14Table;
+} // namespace detail
+} // namespace f14
+
+class F14HashToken final {
+ private:
+  using HashPair = std::pair<std::size_t, uint8_t>;
+
+  explicit F14HashToken(HashPair hp) : hp_(hp) {}
+  explicit operator HashPair() const {
+    return hp_;
+  }
+
+  HashPair hp_;
+
+  template <typename Policy>
+  friend class f14::detail::F14Table;
+};
+
+namespace f14 {
+namespace detail {
+//// Defaults should be selected using void
+
+template <typename Arg, typename Default>
+using VoidDefault =
+    std::conditional_t<std::is_same<Arg, Default>::value, void, Arg>;
+
+template <typename Arg, typename Default>
+using Defaulted =
+    typename std::conditional_t<std::is_same<Arg, void>::value, Default, Arg>;
+
+template <
+    typename Void,
+    typename Hasher,
+    typename KeyEqual,
+    typename Key,
+    typename T>
+struct EnableIfIsTransparent {};
+
+template <typename Hasher, typename KeyEqual, typename Key, typename T>
+struct EnableIfIsTransparent<
+    folly::void_t<
+        typename Hasher::is_transparent,
+        typename KeyEqual::is_transparent>,
+    Hasher,
+    KeyEqual,
+    Key,
+    T> {
+  using type = T;
+};
+
+////////////////
+
+template <typename T>
+FOLLY_ALWAYS_INLINE static void prefetchAddr(T const* ptr) {
+  // _mm_prefetch is x86_64-specific and comes from xmmintrin.h.
+  // It compiles to the same thing as __builtin_prefetch.
+  _mm_prefetch(
+      static_cast<char const*>(static_cast<void const*>(ptr)), _MM_HINT_T0);
+}
+
+extern __m128i kEmptyTagVector;
+
+template <typename ItemType>
+struct alignas(std::max_align_t) SSE2Chunk {
+  using Item = ItemType;
+
+  // Assuming alignof(std::max_align_t) == 16 (and assuming alignof(Item)
+  // >= 4) kCapacity of 14 is always most space efficient.  Slightly
+  // smaller or larger capacities can help with cache alignment in a
+  // couple of cases without wasting too much space, but once the items
+  // are larger then we're unlikely to get much benefit anyway.  The only
+  // case we optimize is using kCapacity of 12 for 4 byte items, which
+  // makes the chunk take exactly 1 cache line, and adding 16 bytes of
+  // padding for 16 byte items so that a chunk takes exactly 4 cache lines.
+  static constexpr unsigned kCapacity = sizeof(Item) == 4 ? 12 : 14;
+
+  static constexpr unsigned kDesiredCapacity = kCapacity - 2;
+
+  static constexpr unsigned kAllocatedCapacity =
+      kCapacity + (sizeof(Item) == 16 ? 1 : 0);
+
+  static constexpr unsigned kFullMask =
+      static_cast<unsigned>(~(~uint64_t{0} << kCapacity));
+
+  // Non-empty tags have their top bit set
+  std::array<uint8_t, kCapacity> tags_;
+
+  // Bits 0..3 record the actual capacity of the chunk if this is chunk
+  // zero, or hold 0000 for other chunks.  Bits 4-7 are a 4-bit counter
+  // of the number of values in this chunk that were placed because they
+  // overflowed their desired chunk (hostedOverflowCount).
+  uint8_t control_;
+
+  // The number of values that would have been placed into this chunk if
+  // there had been space, including values that also overflowed previous
+  // full chunks.  This value saturates; once it becomes 255 it no longer
+  // increases nor decreases.
+  uint8_t outboundOverflowCount_;
+
+  std::array<
+      std::aligned_storage_t<sizeof(Item), alignof(Item)>,
+      kAllocatedCapacity>
+      rawItems_;
+
+  static SSE2Chunk* emptyInstance() {
+    auto rv = static_cast<SSE2Chunk*>(static_cast<void*>(&kEmptyTagVector));
+    FOLLY_SAFE_DCHECK(
+        rv->occupiedMask() == 0 && rv->chunk0Capacity() == 0 &&
+            rv->outboundOverflowCount() == 0,
+        "");
+    return rv;
+  }
+
+  void clear() {
+    // this doesn't violate strict aliasing rules because __m128i is
+    // tagged as __may_alias__
+    auto* v = static_cast<__m128i*>(static_cast<void*>(&tags_[0]));
+    _mm_store_si128(v, _mm_setzero_si128());
+    // tags_ = {}; control_ = 0; outboundOverflowCount_ = 0;
+  }
+
+  void copyOverflowInfoFrom(SSE2Chunk const& rhs) {
+    FOLLY_SAFE_DCHECK(hostedOverflowCount() == 0, "");
+    control_ += rhs.control_ & 0xf0;
+    outboundOverflowCount_ = rhs.outboundOverflowCount_;
+  }
+
+  unsigned hostedOverflowCount() const {
+    return control_ >> 4;
+  }
+
+  static constexpr uint8_t kIncrHostedOverflowCount = 0x10;
+  static constexpr uint8_t kDecrHostedOverflowCount =
+      static_cast<uint8_t>(-0x10);
+
+  void adjustHostedOverflowCount(uint8_t op) {
+    control_ += op;
+  }
+
+  bool eof() const {
+    return (control_ & 0xf) != 0;
+  }
+
+  std::size_t chunk0Capacity() const {
+    return control_ & 0xf;
+  }
+
+  void markEof(std::size_t c0c) {
+    FOLLY_SAFE_DCHECK(
+        this != emptyInstance() && control_ == 0 && c0c > 0 && c0c <= 0xf &&
+            c0c <= kCapacity,
+        "");
+    control_ = static_cast<uint8_t>(c0c);
+  }
+
+  unsigned outboundOverflowCount() const {
+    return outboundOverflowCount_;
+  }
+
+  void incrOutboundOverflowCount() {
+    if (outboundOverflowCount_ != 255) {
+      ++outboundOverflowCount_;
+    }
+  }
+
+  void decrOutboundOverflowCount() {
+    if (outboundOverflowCount_ != 255) {
+      --outboundOverflowCount_;
+    }
+  }
+
+  uint8_t tag(std::size_t index) const {
+    return tags_[index];
+  }
+
+  void setTag(std::size_t index, uint8_t tag) {
+    FOLLY_SAFE_DCHECK(this != emptyInstance() && (tag & 0x80) != 0, "");
+    tags_[index] = tag;
+  }
+
+  void clearTag(std::size_t index) {
+    tags_[index] = 0;
+  }
+
+  __m128i const* tagVector() const {
+    return static_cast<__m128i const*>(static_cast<void const*>(&tags_[0]));
+  }
+
+  unsigned tagMatchMask(uint8_t needle) const {
+    FOLLY_SAFE_DCHECK((needle & 0x80) != 0, "");
+    auto tagV = _mm_load_si128(tagVector());
+    auto needleV = _mm_set1_epi8(needle);
+    auto eqV = _mm_cmpeq_epi8(tagV, needleV);
+    return _mm_movemask_epi8(eqV) & kFullMask;
+  }
+
+  unsigned occupiedMask() const {
+    auto tagV = _mm_load_si128(tagVector());
+    return _mm_movemask_epi8(tagV) & kFullMask;
+  }
+
+  bool occupied(std::size_t index) const {
+    FOLLY_SAFE_DCHECK(tags_[index] == 0 || (tags_[index] & 0x80) != 0, "");
+    return tags_[index] != 0;
+  }
+
+  unsigned emptyMask() const {
+    return occupiedMask() ^ kFullMask;
+  }
+
+  unsigned lastOccupiedIndex() const {
+    auto m = occupiedMask();
+    // assume + findLastSet results in optimal __builtin_clz on gcc
+    folly::assume(m != 0);
+    unsigned i = folly::findLastSet(m) - 1;
+    FOLLY_SAFE_DCHECK(occupied(i), "");
+    return i;
+  }
+
+  Item* itemAddr(std::size_t i) const {
+    return static_cast<Item*>(
+        const_cast<void*>(static_cast<void const*>(&rawItems_[i])));
+  }
+
+  Item& item(std::size_t i) {
+    FOLLY_SAFE_DCHECK(this->occupied(i), "");
+    return *folly::launder(itemAddr(i));
+  }
+
+  Item const& citem(std::size_t i) const {
+    FOLLY_SAFE_DCHECK(this->occupied(i), "");
+    return *folly::launder(itemAddr(i));
+  }
+
+  static SSE2Chunk& owner(Item& item, std::size_t index) {
+    auto rawAddr =
+        static_cast<uint8_t*>(static_cast<void*>(std::addressof(item))) -
+        offsetof(SSE2Chunk, rawItems_) - index * sizeof(Item);
+    auto chunkAddr = static_cast<SSE2Chunk*>(static_cast<void*>(rawAddr));
+    FOLLY_SAFE_DCHECK(std::addressof(item) == chunkAddr->itemAddr(index), "");
+    return *chunkAddr;
+  }
+};
+
+class SparseMaskIter {
+  unsigned mask_;
+
+ public:
+  explicit SparseMaskIter(unsigned mask) : mask_{mask} {}
+
+  bool hasNext() {
+    return mask_ != 0;
+  }
+
+  unsigned next() {
+    FOLLY_SAFE_DCHECK(hasNext(), "");
+    unsigned i = __builtin_ctz(mask_);
+    mask_ &= (mask_ - 1);
+    return i;
+  }
+};
+
+class DenseMaskIter {
+  unsigned mask_;
+  unsigned index_{0};
+
+ public:
+  explicit DenseMaskIter(unsigned mask) : mask_{mask} {}
+
+  bool hasNext() {
+    return mask_ != 0;
+  }
+
+  unsigned next() {
+    FOLLY_SAFE_DCHECK(hasNext(), "");
+    if (LIKELY((mask_ & 1) != 0)) {
+      mask_ >>= 1;
+      return index_++;
+    } else {
+      unsigned s = __builtin_ctz(mask_);
+      unsigned rv = index_ + s;
+      mask_ >>= (s + 1);
+      index_ = rv + 1;
+      return rv;
+    }
+  }
+};
+
+////////////////
+
+template <typename ChunkPtr>
+class F14ItemIter {
+ private:
+  using Chunk = typename std::pointer_traits<ChunkPtr>::element_type;
+
+ public:
+  using Item = typename Chunk::Item;
+  using ItemPtr = typename std::pointer_traits<ChunkPtr>::template rebind<Item>;
+  using ItemConstPtr =
+      typename std::pointer_traits<ChunkPtr>::template rebind<Item const>;
+
+  using Packed = TaggedPtr<ItemPtr>;
+
+  //// PUBLIC
+
+  F14ItemIter() noexcept : itemPtr_{nullptr}, index_{0} {}
+
+  // default copy and move constructors and assignment operators are correct
+
+  explicit F14ItemIter(Packed const& packed)
+      : itemPtr_{packed.ptr()}, index_{packed.extra()} {}
+
+  F14ItemIter(ChunkPtr chunk, std::size_t index)
+      : itemPtr_{std::pointer_traits<ItemPtr>::pointer_to(chunk->item(index))},
+        index_{index} {
+    FOLLY_SAFE_DCHECK(index < Chunk::kCapacity, "");
+    folly::assume(
+        std::pointer_traits<ItemPtr>::pointer_to(chunk->item(index)) !=
+        nullptr);
+    folly::assume(itemPtr_ != nullptr);
+  }
+
+  FOLLY_ALWAYS_INLINE void advance() {
+    auto c = chunk();
+
+    // common case is packed entries
+    while (index_ > 0) {
+      --index_;
+      --itemPtr_;
+      if (LIKELY(c->occupied(index_))) {
+        return;
+      }
+    }
+
+    // It's fairly common for an iterator to be advanced and then become
+    // dead, for example in the return value from erase(iter) or in
+    // the last step of a loop.  We'd like to make sure that the entire
+    // advance() method can be eliminated by the compiler's dead code
+    // elimination pass.  To do that it must eliminate the loops, which
+    // requires it to prove that they have no side effects.  It's easy
+    // to show that there are no escaping stores, but at the moment
+    // compilers also consider an infinite loop to be a side effect.
+    // (There are parts of the standard that would allow them to treat
+    // this as undefined behavior, but at the moment they don't exploit
+    // those clauses.)
+    //
+    // The following loop should really be a while loop, which would
+    // save a register, some instructions, and a conditional branch,
+    // but by writing it as a for loop the compiler can prove to itself
+    // that it will eventually terminate.  (No matter that even if the
+    // loop executed in a single cycle it would take about 200 years to
+    // run all 2^64 iterations.)
+    //
+    // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82776 has the bug we
+    // filed about the issue.  while (true) {
+    for (std::size_t i = 1; i != 0; ++i) {
+      // exhausted the current chunk
+      if (UNLIKELY(c->eof())) {
+        FOLLY_SAFE_DCHECK(index_ == 0, "");
+        itemPtr_ = nullptr;
+        return;
+      }
+      --c;
+      auto m = c->occupiedMask();
+      if (LIKELY(m != 0)) {
+        index_ = folly::findLastSet(m) - 1;
+        itemPtr_ = std::pointer_traits<ItemPtr>::pointer_to(c->item(index_));
+        return;
+      }
+    }
+  }
+
+  // precheckedAdvance requires knowledge that the current iterator
+  // position isn't the last item
+  void precheckedAdvance() {
+    auto c = chunk();
+
+    // common case is packed entries
+    while (index_ > 0) {
+      --index_;
+      --itemPtr_;
+      if (LIKELY(c->occupied(index_))) {
+        return;
+      }
+    }
+
+    while (true) {
+      // exhausted the current chunk
+      FOLLY_SAFE_DCHECK(!c->eof(), "");
+      --c;
+      auto m = c->occupiedMask();
+      if (LIKELY(m != 0)) {
+        index_ = folly::findLastSet(m) - 1;
+        itemPtr_ = std::pointer_traits<ItemPtr>::pointer_to(c->item(index_));
+        return;
+      }
+    }
+  }
+
+  ChunkPtr chunk() const {
+    return std::pointer_traits<ChunkPtr>::pointer_to(
+        Chunk::owner(*itemPtr_, index_));
+  }
+
+  std::size_t index() const {
+    return index_;
+  }
+
+  Item* itemAddr() const {
+    return std::addressof(*itemPtr_);
+  }
+  Item& item() const {
+    return *itemPtr_;
+  }
+  Item const& citem() const {
+    return *itemPtr_;
+  }
+
+  bool atEnd() const {
+    return itemPtr_ == nullptr;
+  }
+
+  Packed pack() const {
+    return Packed{itemPtr_, static_cast<uint8_t>(index_)};
+  }
+
+  bool operator==(F14ItemIter const& rhs) const {
+    // this form makes iter == end() into a single null check after inlining
+    // and constant propagation
+    return itemPtr_ == rhs.itemPtr_;
+  }
+
+  bool operator!=(F14ItemIter const& rhs) const {
+    return !(*this == rhs);
+  }
+
+ private:
+  ItemPtr itemPtr_;
+  std::size_t index_;
+};
+
+////////////////
+
+template <typename SizeType, typename ItemIter, bool EnablePackedItemIter>
+struct SizeAndPackedBegin {
+  SizeType size_{0};
+
+ private:
+  typename ItemIter::Packed packedBegin_{ItemIter{}.pack()};
+
+ public:
+  typename ItemIter::Packed& packedBegin() {
+    return packedBegin_;
+  }
+
+  typename ItemIter::Packed const& packedBegin() const {
+    return packedBegin_;
+  }
+};
+
+template <typename SizeType, typename ItemIter>
+struct SizeAndPackedBegin<SizeType, ItemIter, false> {
+  SizeType size_{0};
+
+  [[noreturn]] typename ItemIter::Packed& packedBegin() {
+    folly::assume_unreachable();
+  }
+
+  [[noreturn]] typename ItemIter::Packed const& packedBegin() const {
+    folly::assume_unreachable();
+  }
+};
+
+template <typename Policy>
+class F14Table : public Policy {
+ public:
+  using typename Policy::Item;
+  using value_type = typename Policy::Value;
+  using allocator_type = typename Policy::Alloc;
+
+ private:
+  using HashPair = typename F14HashToken::HashPair;
+
+  using Chunk = SSE2Chunk<Item>;
+  using ChunkAlloc = typename std::allocator_traits<
+      allocator_type>::template rebind_alloc<Chunk>;
+  using ChunkPtr = typename std::allocator_traits<ChunkAlloc>::pointer;
+
+  static constexpr bool kChunkAllocIsDefault =
+      std::is_same<ChunkAlloc, std::allocator<Chunk>>::value;
+
+  using ByteAlloc = typename std::allocator_traits<
+      allocator_type>::template rebind_alloc<uint8_t>;
+  using BytePtr = typename std::allocator_traits<ByteAlloc>::pointer;
+
+ public:
+  using ItemIter = F14ItemIter<ChunkPtr>;
+
+ private:
+  // emulate c++17's std::allocator_traits<A>::is_always_equal
+
+  template <typename A, typename = void>
+  struct AllocIsAlwaysEqual : std::is_empty<A> {};
+
+  template <typename A>
+  struct AllocIsAlwaysEqual<A, typename A::is_always_equal>
+      : A::is_always_equal {};
+
+  // emulate c++17 has std::is_nothrow_swappable
+  template <typename T>
+  static constexpr bool isNothrowSwap() {
+    using std::swap;
+    return noexcept(swap(std::declval<T&>(), std::declval<T&>()));
+  }
+
+ public:
+  static constexpr bool kAllocIsAlwaysEqual =
+      AllocIsAlwaysEqual<allocator_type>::value;
+
+  static constexpr bool kDefaultConstructIsNoexcept =
+      std::is_nothrow_default_constructible<typename Policy::Hasher>::value &&
+      std::is_nothrow_default_constructible<typename Policy::KeyEqual>::value &&
+      std::is_nothrow_default_constructible<typename Policy::Alloc>::value;
+
+  static constexpr bool kSwapIsNoexcept = kAllocIsAlwaysEqual &&
+      isNothrowSwap<typename Policy::Hasher>() &&
+      isNothrowSwap<typename Policy::KeyEqual>();
+
+ private:
+  //////// begin fields
+
+  ChunkPtr chunks_{Chunk::emptyInstance()};
+  typename Policy::InternalSizeType chunkMask_{0};
+  typename Policy::InternalSizeType size_{0};
+  typename ItemIter::Packed packedBegin_{ItemIter{}.pack()};
+
+  //////// end fields
+
+  void swapContents(F14Table& rhs) noexcept {
+    using std::swap;
+    swap(chunks_, rhs.chunks_);
+    swap(chunkMask_, rhs.chunkMask_);
+    swap(size_, rhs.size_);
+    swap(packedBegin_, rhs.packedBegin_);
+  }
+
+ public:
+  F14Table(
+      std::size_t initialCapacity,
+      typename Policy::Hasher const& hasher,
+      typename Policy::KeyEqual const& keyEqual,
+      typename Policy::Alloc const& alloc)
+      : Policy{hasher, keyEqual, alloc} {
+    if (initialCapacity > 0) {
+      reserve(initialCapacity);
+    }
+  }
+
+  F14Table(F14Table const& rhs) : Policy{rhs} {
+    copyFromF14Table(rhs);
+  }
+
+  F14Table(F14Table const& rhs, typename Policy::Alloc const& alloc)
+      : Policy{rhs, alloc} {
+    copyFromF14Table(rhs);
+  }
+
+  F14Table(F14Table&& rhs) noexcept(
+      std::is_nothrow_move_constructible<typename Policy::Hasher>::value&&
+          std::is_nothrow_move_constructible<typename Policy::KeyEqual>::value&&
+              std::is_nothrow_move_constructible<typename Policy::Alloc>::value)
+      : Policy{std::move(rhs)} {
+    swapContents(rhs);
+  }
+
+  F14Table(F14Table&& rhs, typename Policy::Alloc const& alloc) noexcept(
+      kAllocIsAlwaysEqual)
+      : Policy{std::move(rhs), alloc} {
+    FOLLY_SAFE_CHECK(
+        kAllocIsAlwaysEqual || this->alloc() == rhs.alloc(),
+        "F14 move with unequal allocators not yet supported");
+    swapContents(rhs);
+  }
+
+  F14Table& operator=(F14Table const& rhs) {
+    if (this != &rhs) {
+      reset();
+      static_cast<Policy&>(*this) = rhs;
+      copyFromF14Table(rhs);
+    }
+    return *this;
+  }
+
+  F14Table& operator=(F14Table&& rhs) noexcept(
+      std::is_nothrow_move_assignable<typename Policy::Hasher>::value&&
+          std::is_nothrow_move_assignable<typename Policy::KeyEqual>::value &&
+      (kAllocIsAlwaysEqual ||
+       (std::allocator_traits<typename Policy::Alloc>::
+            propagate_on_container_move_assignment::value &&
+        std::is_nothrow_move_assignable<typename Policy::Alloc>::value))) {
+    if (this != &rhs) {
+      reset();
+      static_cast<Policy&>(*this) = std::move(rhs);
+      FOLLY_SAFE_CHECK(
+          std::allocator_traits<typename Policy::Alloc>::
+                  propagate_on_container_move_assignment::value ||
+              kAllocIsAlwaysEqual || this->alloc() == rhs.alloc(),
+          "F14 move with unequal allocators not yet supported");
+      swapContents(rhs);
+    }
+    return *this;
+  }
+
+  ~F14Table() {
+    reset();
+  }
+
+  void swap(F14Table& rhs) noexcept(kSwapIsNoexcept) {
+    this->swapPolicy(rhs);
+    swapContents(rhs);
+  }
+
+ private:
+  //////// hash helpers
+
+  // Hash values are used to compute the desired position, which is the
+  // chunk index at which we would like to place a value (if there is no
+  // overflow), and the tag, which is an additional 8 bits of entropy.
+  //
+  // The standard's definition of hash function quality only refers to
+  // the probability of collisions of the entire hash value, not to the
+  // probability of collisions of the results of shifting or masking the
+  // hash value.  Some hash functions, however, provide this stronger
+  // guarantee (not quite the same as the definition of avalanching,
+  // but similar).
+  //
+  // If the user-supplied hasher is an avalanching one (each bit of the
+  // hash value has a 50% chance of being the same for differing hash
+  // inputs), then we can just take 1 byte of the hash value for the tag
+  // and the rest for the desired position.  Avalanching hashers also
+  // let us map hash value to array index position with just a bitmask
+  // without risking clumping.  (Many hash tables just accept the risk
+  // and do it regardless.)
+  //
+  // std::hash<std::string> avalanches in all implementations we've
+  // examined: libstdc++-v3 uses MurmurHash2, and libc++ uses CityHash
+  // or MurmurHash2.  The other std::hash specializations, however, do not
+  // have this property.  std::hash for integral and pointer values is the
+  // identity function on libstdc++-v3 and libc++, in particular.  In our
+  // experience it is also fairly common for user-defined specializations
+  // of std::hash to combine fields in an ad-hoc way that does not evenly
+  // distribute entropy among the bits of the result (a + 37 * b, for
+  // example, where a and b are integer fields).
+  //
+  // For hash functions we don't trust to avalanche, we repair things by
+  // applying a bit mixer to the user-supplied hash.  The mixer below is
+  // not fully avalanching for all 64 bits of output, but looks quite
+  // good for bits 18..63 and puts plenty of entropy even lower when
+  // considering multiple bits together (like the tag).  Importantly,
+  // when under register pressure it uses fewer registers, instructions,
+  // and immediate constants than the alternatives, resulting in compact
+  // code that is more easily inlinable.  In one instantiation a modified
+  // Murmur mixer was 48 bytes of assembly (even after using the same
+  // multiplicand for both steps) and this one was 27 bytes, for example.
+
+  static HashPair splitHash(std::size_t hash) {
+    uint8_t tag;
+    if (!Policy::isAvalanchingHasher()) {
+      auto const kMul = 0xc4ceb9fe1a85ec53ULL;
+#ifdef _WIN32
+      __int64 signedHi;
+      __int64 signedLo = _mul128(
+          static_cast<__int64>(hash), static_cast<__int64>(kMul), &signedHi);
+      auto hi = static_cast<uint64_t>(signedHi);
+      auto lo = static_cast<uint64_t>(signedLo);
+#else
+      auto hi = static_cast<uint64_t>(
+          (static_cast<unsigned __int128>(hash) * kMul) >> 64);
+      auto lo = hash * kMul;
+#endif
+      hash = hi ^ lo;
+      hash *= kMul;
+      tag = static_cast<uint8_t>(hash >> 15);
+      hash >>= 22;
+    } else {
+      tag = hash >> 56;
+    }
+    tag |= 0x80;
+    return std::make_pair(hash, tag);
+  }
+
+  //////// memory management helpers
+
+  static std::size_t allocSize(
+      std::size_t chunkCount,
+      std::size_t maxSizeWithoutRehash) {
+    if (chunkCount == 1) {
+      auto n = offsetof(Chunk, rawItems_) + maxSizeWithoutRehash * sizeof(Item);
+      FOLLY_SAFE_DCHECK((maxSizeWithoutRehash % 2) == 0, "");
+      if ((sizeof(Item) % 8) != 0) {
+        n = ((n - 1) | 15) + 1;
+      }
+      FOLLY_SAFE_DCHECK((n % 16) == 0, "");
+      return n;
+    } else {
+      return sizeof(Chunk) * chunkCount;
+    }
+  }
+
+  ChunkPtr newChunks(std::size_t chunkCount, std::size_t maxSizeWithoutRehash) {
+    ByteAlloc a{this->alloc()};
+    uint8_t* raw = &*std::allocator_traits<ByteAlloc>::allocate(
+        a, allocSize(chunkCount, maxSizeWithoutRehash));
+    static_assert(std::is_trivial<Chunk>::value, "SSE2Chunk should be POD");
+    auto chunks = static_cast<Chunk*>(static_cast<void*>(raw));
+    for (std::size_t i = 0; i < chunkCount; ++i) {
+      chunks[i].clear();
+    }
+    chunks[0].markEof(chunkCount == 1 ? maxSizeWithoutRehash : 1);
+    return std::pointer_traits<ChunkPtr>::pointer_to(*chunks);
+  }
+
+  void deleteChunks(
+      ChunkPtr chunks,
+      std::size_t chunkCount,
+      std::size_t maxSizeWithoutRehash) {
+    ByteAlloc a{this->alloc()};
+    BytePtr bp = std::pointer_traits<BytePtr>::pointer_to(
+        *static_cast<uint8_t*>(static_cast<void*>(&*chunks)));
+    std::allocator_traits<ByteAlloc>::deallocate(
+        a, bp, allocSize(chunkCount, maxSizeWithoutRehash));
+  }
+
+ public:
+  ItemIter begin() const noexcept {
+    return ItemIter{packedBegin_};
+  }
+
+  ItemIter end() const noexcept {
+    return ItemIter{};
+  }
+
+  bool empty() const noexcept {
+    return size() == 0;
+  }
+
+  std::size_t size() const noexcept {
+    return size_;
+  }
+
+  std::size_t max_size() const noexcept {
+    allocator_type a = this->alloc();
+    return std::min<std::size_t>(
+        (std::numeric_limits<typename Policy::InternalSizeType>::max)(),
+        std::allocator_traits<allocator_type>::max_size(a));
+  }
+
+  std::size_t bucket_count() const noexcept {
+    // bucket_count is just a synthetic construct for the outside world
+    // so that size, bucket_count, load_factor, and max_load_factor are
+    // all self-consistent.  The only one of those that is real is size().
+    if (chunkMask_ != 0) {
+      return (chunkMask_ + 1) * Chunk::kDesiredCapacity;
+    } else {
+      return chunks_->chunk0Capacity();
+    }
+  }
+
+  std::size_t max_bucket_count() const noexcept {
+    return max_size();
+  }
+
+  float load_factor() const noexcept {
+    return empty()
+        ? 0.0f
+        : static_cast<float>(size()) / static_cast<float>(bucket_count());
+  }
+
+  float max_load_factor() const noexcept {
+    return 1.0f;
+  }
+
+  void max_load_factor(float) noexcept {
+    // Probing hash tables can't run load factors >= 1 (unlike chaining
+    // tables).  In addition, we have measured that there is little or
+    // no performance advantage to running a smaller load factor (cache
+    // locality losses outweigh the small reduction in probe lengths,
+    // often making it slower).  Therefore, we've decided to just fix
+    // max_load_factor at 1.0f regardless of what the user requests.
+    // This has an additional advantage that we don't have to store it.
+    // Taking alignment into consideration this makes every F14 table
+    // 8 bytes smaller, and is part of the reason an empty F14NodeMap
+    // is almost half the size of an empty std::unordered_map (32 vs
+    // 56 bytes).
+    //
+    // I don't have a strong opinion on whether we should remove this
+    // method or leave a stub, let ngbronson or xshi know if you have a
+    // compelling argument either way.
+  }
+
+ private:
+  // Our probe strategy is to advance through additional chunks with
+  // a stride that is key-specific.  This is called double hashing,
+  // and is a well known and high quality probing strategy.  So long as
+  // the stride and the chunk count are relatively prime, we will visit
+  // every chunk once and then return to the original chunk, letting us
+  // detect and end the cycle.  The chunk count is a power of two, so
+  // we can satisfy the relatively prime part by choosing an odd stride.
+  // We've already computed a high quality secondary hash value for the
+  // tag, so we just use it for the second probe hash as well.
+  //
+  // At the maximum load factor of 12/14, expected probe length for a
+  // find hit is 1.041, with 99% of keys found in the first three chunks.
+  // Expected probe length for a find miss (or insert) is 1.275, with a
+  // p99 probe length of 4 (fewer than 1% of failing find look at 5 or
+  // more chunks).
+  //
+  // This code is structured so you can try various ways of encoding
+  // the current probe state.  For example, at the moment the probe's
+  // state is the position in the cycle and the resulting chunk index is
+  // computed from that inside probeCurrentIndex.  We could also make the
+  // probe state the chunk index, and then increment it by hp.second *
+  // 2 + 1 in probeAdvance.  Wrapping can be applied early or late as
+  // well.  This particular code seems to be easier for the optimizer
+  // to understand.
+  //
+  // We could also implement probing strategies that resulted in the same
+  // tour for every key initially assigned to a chunk (linear probing or
+  // quadratic), but that results in longer probe lengths.  In particular,
+  // the cache locality wins of linear probing are not worth the increase
+  // in probe lengths (extra work and less branch predictability) in
+  // our experiments.
+
+  std::size_t probeDelta(HashPair hp) const {
+    return 2 * hp.second + 1;
+  }
+
+  template <typename K>
+  FOLLY_ALWAYS_INLINE ItemIter findImpl(HashPair hp, K const& key) const {
+    std::size_t index = hp.first;
+    std::size_t step = probeDelta(hp);
+    for (std::size_t tries = 0; tries <= chunkMask_; ++tries) {
+      ChunkPtr chunk = chunks_ + (index & chunkMask_);
+      if (sizeof(Chunk) > 64) {
+        prefetchAddr(chunk->itemAddr(8));
+      }
+      auto mask = chunk->tagMatchMask(hp.second);
+      SparseMaskIter hits{mask};
+      while (hits.hasNext()) {
+        auto i = hits.next();
+        if (LIKELY(this->keyMatchesItem(key, chunk->item(i)))) {
+          // Tag match and key match were both successful.  The chance
+          // of a false tag match is 1/128 for each key in the chunk
+          // (with a proper hash function).
+          return ItemIter{chunk, i};
+        }
+      }
+      if (LIKELY(chunk->outboundOverflowCount() == 0)) {
+        // No keys that wanted to be placed in this chunk were denied
+        // entry, so our search is over.  This is the common case.
+        break;
+      }
+      index += step;
+    }
+    // Loop exit because tries is exhausted is rare, but possible.
+    // That means that for every chunk there is currently a key present
+    // in the map that visited that chunk on its probe search but ended
+    // up somewhere else, and we have searched every chunk.
+    return ItemIter{};
+  }
+
+ public:
+  // Prehashing splits the work of find(key) into two calls, enabling you
+  // to manually implement loop pipelining for hot bulk lookups.  prehash
+  // computes the hash and prefetches the first computed memory location,
+  // and the two-arg find(F14HashToken,K) performs the rest of the search.
+  template <typename K>
+  F14HashToken prehash(K const& key) const {
+    FOLLY_SAFE_DCHECK(chunks_ != nullptr, "");
+    auto hp = splitHash(this->computeKeyHash(key));
+    ChunkPtr firstChunk = chunks_ + (hp.first & chunkMask_);
+    prefetchAddr(firstChunk);
+    return F14HashToken(std::move(hp));
+  }
+
+  template <typename K>
+  FOLLY_ALWAYS_INLINE ItemIter find(K const& key) const {
+    auto hp = splitHash(this->computeKeyHash(key));
+    return findImpl(hp, key);
+  }
+
+  template <typename K>
+  FOLLY_ALWAYS_INLINE ItemIter
+  find(F14HashToken const& token, K const& key) const {
+    FOLLY_SAFE_DCHECK(
+        splitHash(this->computeKeyHash(key)) == static_cast<HashPair>(token),
+        "");
+    return findImpl(static_cast<HashPair>(token), key);
+  }
+
+ private:
+  void adjustSizeAndBeginAfterInsert(ItemIter iter) {
+    // packedBegin_ is the max of all valid ItemIter::pack()
+    auto packed = iter.pack();
+    if (packedBegin_ < packed) {
+      packedBegin_ = packed;
+    }
+
+    ++size_;
+  }
+
+  // Ignores hp if pos.chunk()->hostedOverflowCount() == 0
+  void eraseBlank(ItemIter iter, HashPair hp) {
+    iter.chunk()->clearTag(iter.index());
+
+    if (iter.chunk()->hostedOverflowCount() != 0) {
+      // clean up
+      std::size_t index = hp.first;
+      std::size_t delta = probeDelta(hp);
+      uint8_t hostedOp = 0;
+      while (true) {
+        ChunkPtr chunk = chunks_ + (index & chunkMask_);
+        if (chunk == iter.chunk()) {
+          chunk->adjustHostedOverflowCount(hostedOp);
+          break;
+        }
+        chunk->decrOutboundOverflowCount();
+        hostedOp = Chunk::kDecrHostedOverflowCount;
+        index += delta;
+      }
+    }
+  }
+
+  void adjustSizeAndBeginBeforeErase(ItemIter iter) {
+    --size_;
+    if (iter.pack() == packedBegin_) {
+      if (size_ == 0) {
+        iter = ItemIter{};
+      } else {
+        iter.precheckedAdvance();
+      }
+      packedBegin_ = iter.pack();
+    }
+  }
+
+  template <typename... Args>
+  void insertAtBlank(ItemIter pos, HashPair hp, Args&&... args) {
+    try {
+      auto dst = pos.itemAddr();
+      folly::assume(dst != nullptr);
+      this->constructValueAtItem(size_, dst, std::forward<Args>(args)...);
+    } catch (...) {
+      eraseBlank(pos, hp);
+      throw;
+    }
+    adjustSizeAndBeginAfterInsert(pos);
+  }
+
+  ItemIter allocateTag(uint8_t* fullness, HashPair hp) {
+    ChunkPtr chunk;
+    std::size_t index = hp.first;
+    std::size_t delta = probeDelta(hp);
+    uint8_t hostedOp = 0;
+    while (true) {
+      index &= chunkMask_;
+      chunk = chunks_ + index;
+      if (LIKELY(fullness[index] < Chunk::kCapacity)) {
+        break;
+      }
+      chunk->incrOutboundOverflowCount();
+      hostedOp = Chunk::kIncrHostedOverflowCount;
+      index += delta;
+    }
+    unsigned itemIndex = fullness[index]++;
+    FOLLY_SAFE_DCHECK(!chunk->occupied(itemIndex), "");
+    chunk->setTag(itemIndex, hp.second);
+    chunk->adjustHostedOverflowCount(hostedOp);
+    return ItemIter{chunk, itemIndex};
+  }
+
+  void directCopyFrom(F14Table const& src) {
+    FOLLY_SAFE_DCHECK(src.size() > 0 && chunkMask_ == src.chunkMask_, "");
+
+    Policy const& srcPolicy = src;
+    auto undoState = this->beforeCopy(src.size(), bucket_count(), srcPolicy);
+    bool success = false;
+    SCOPE_EXIT {
+      this->afterCopy(
+          undoState, success, src.size(), bucket_count(), srcPolicy);
+    };
+
+    // Copy can fail part-way through if a Value copy constructor throws.
+    // Failing afterCopy is limited in its cleanup power in this case,
+    // because it can't enumerate the items that were actually copied.
+    // Fortunately we can divide the situation into cases where all of
+    // the state is owned by the table itself (F14Node and F14Value),
+    // for which clearImpl() can do partial cleanup, and cases where all
+    // of the values are owned by the policy (F14Vector), in which case
+    // partial failure should not occur.  Sorry for the subtle invariants
+    // in the Policy API.
+
+    auto srcBegin = src.begin();
+    std::size_t maxChunkIndex = srcBegin.chunk() - src.chunks_;
+
+    if (FOLLY_IS_TRIVIALLY_COPYABLE(Item) && !this->destroyItemOnClear() &&
+        bucket_count() == src.bucket_count()) {
+      // most happy path
+      auto n = allocSize(chunkMask_ + 1, bucket_count());
+      std::memcpy(&chunks_[0], &src.chunks_[0], n);
+      size_ = src.size_;
+      packedBegin_ = ItemIter{chunks_ + maxChunkIndex, srcBegin.index()}.pack();
+    } else {
+      // happy path, no rehash but pack items toward bottom of chunk and
+      // use copy constructor
+      Chunk const* srcChunk = &src.chunks_[maxChunkIndex];
+      Chunk* dstChunk = &chunks_[maxChunkIndex];
+      do {
+        dstChunk->copyOverflowInfoFrom(*srcChunk);
+
+        auto mask = srcChunk->occupiedMask();
+        if (Policy::prefetchBeforeCopy()) {
+          for (DenseMaskIter iter{mask}; iter.hasNext();) {
+            this->prefetchValue(srcChunk->citem(iter.next()));
+          }
+        }
+
+        std::size_t dstI = 0;
+        for (DenseMaskIter iter{mask}; iter.hasNext(); ++dstI) {
+          auto srcI = iter.next();
+          auto&& srcValue = src.valueAtItemForCopy(srcChunk->citem(srcI));
+          auto dst = dstChunk->itemAddr(dstI);
+          folly::assume(dst != nullptr);
+          this->constructValueAtItem(
+              0, dst, std::forward<decltype(srcValue)>(srcValue));
+          dstChunk->setTag(dstI, srcChunk->tag(srcI));
+          ++size_;
+        }
+
+        --srcChunk;
+        --dstChunk;
+      } while (size_ != src.size_);
+
+      // reset doesn't care about packedBegin, so we don't fix it until the end
+      packedBegin_ =
+          ItemIter{chunks_ + maxChunkIndex,
+                   folly::popcount(chunks_[maxChunkIndex].occupiedMask()) - 1}
+              .pack();
+    }
+
+    success = true;
+  }
+
+  void rehashCopyFrom(F14Table const& src) {
+    FOLLY_SAFE_DCHECK(src.chunkMask_ > chunkMask_, "");
+
+    // 1 byte per chunk means < 1 bit per value temporary overhead
+    std::array<uint8_t, 256> stackBuf;
+    uint8_t* fullness;
+    auto cc = chunkMask_ + 1;
+    if (cc <= stackBuf.size()) {
+      fullness = stackBuf.data();
+    } else {
+      ByteAlloc a{this->alloc()};
+      fullness = &*std::allocator_traits<ByteAlloc>::allocate(a, cc);
+    }
+    SCOPE_EXIT {
+      if (cc > stackBuf.size()) {
+        ByteAlloc a{this->alloc()};
+        std::allocator_traits<ByteAlloc>::deallocate(
+            a,
+            std::pointer_traits<typename std::allocator_traits<
+                ByteAlloc>::pointer>::pointer_to(*fullness),
+            cc);
+      }
+    };
+    std::memset(fullness, '\0', cc);
+
+    // Exception safety requires beforeCopy to happen after all of the
+    // allocate() calls.
+    Policy const& srcPolicy = src;
+    auto undoState = this->beforeCopy(src.size(), bucket_count(), srcPolicy);
+    bool success = false;
+    SCOPE_EXIT {
+      this->afterCopy(
+          undoState, success, src.size(), bucket_count(), srcPolicy);
+    };
+
+    // The current table is at a valid state at all points for policies
+    // in which non-trivial values are owned by the main table (F14Node
+    // and F14Value), so reset() will clean things up properly if we
+    // fail partway through.  For the case that the policy manages value
+    // lifecycle (F14Vector) then nothing after beforeCopy can throw and
+    // we don't have to worry about partial failure.
+
+    std::size_t srcChunkIndex = src.begin().chunk() - src.chunks_;
+    while (true) {
+      Chunk const* srcChunk = &src.chunks_[srcChunkIndex];
+      auto mask = srcChunk->occupiedMask();
+      if (Policy::prefetchBeforeRehash()) {
+        for (DenseMaskIter iter{mask}; iter.hasNext();) {
+          this->prefetchValue(srcChunk->citem(iter.next()));
+        }
+      }
+      if (srcChunk->hostedOverflowCount() == 0) {
+        // all items are in their preferred chunk (no probing), so we
+        // don't need to compute any hash values
+        for (DenseMaskIter iter{mask}; iter.hasNext();) {
+          auto i = iter.next();
+          auto& srcItem = srcChunk->citem(i);
+          auto&& srcValue = src.valueAtItemForCopy(srcItem);
+          HashPair hp{srcChunkIndex, srcChunk->tag(i)};
+          insertAtBlank(
+              allocateTag(fullness, hp),
+              hp,
+              std::forward<decltype(srcValue)>(srcValue));
+        }
+      } else {
+        // any chunk's items might be in here
+        for (DenseMaskIter iter{mask}; iter.hasNext();) {
+          auto i = iter.next();
+          auto& srcItem = srcChunk->citem(i);
+          auto&& srcValue = src.valueAtItemForCopy(srcItem);
+          auto const& srcKey = src.keyForValue(srcValue);
+          auto hp = splitHash(this->computeKeyHash(srcKey));
+          FOLLY_SAFE_DCHECK(hp.second == srcChunk->tag(i), "");
+          insertAtBlank(
+              allocateTag(fullness, hp),
+              hp,
+              std::forward<decltype(srcValue)>(srcValue));
+        }
+      }
+      if (srcChunkIndex == 0) {
+        break;
+      }
+      --srcChunkIndex;
+    }
+
+    success = true;
+  }
+
+  FOLLY_NOINLINE void copyFromF14Table(F14Table const& src) {
+    FOLLY_SAFE_DCHECK(size() == 0, "");
+    if (src.size() == 0) {
+      return;
+    }
+
+    reserveForInsert(src.size());
+    try {
+      if (chunkMask_ == src.chunkMask_) {
+        directCopyFrom(src);
+      } else {
+        rehashCopyFrom(src);
+      }
+    } catch (...) {
+      reset();
+      throw;
+    }
+  }
+
+  FOLLY_NOINLINE void rehashImpl(
+      std::size_t newChunkCount,
+      std::size_t newMaxSizeWithoutRehash) {
+    FOLLY_SAFE_DCHECK(newMaxSizeWithoutRehash > 0, "");
+
+    auto origChunks = chunks_;
+    const auto origChunkCount = chunkMask_ + 1;
+    const auto origMaxSizeWithoutRehash = bucket_count();
+
+    auto undoState = this->beforeRehash(
+        size_, origMaxSizeWithoutRehash, newMaxSizeWithoutRehash);
+    bool success = false;
+    SCOPE_EXIT {
+      this->afterRehash(
+          std::move(undoState),
+          success,
+          size_,
+          origMaxSizeWithoutRehash,
+          newMaxSizeWithoutRehash);
+    };
+
+    chunks_ = newChunks(newChunkCount, newMaxSizeWithoutRehash);
+    chunkMask_ = newChunkCount - 1;
+
+    if (size_ == 0) {
+      // nothing to do
+    } else if (origChunkCount == 1 && newChunkCount == 1) {
+      // no mask, no chunk scan, no hash computation, no probing
+      auto srcChunk = origChunks;
+      auto dstChunk = chunks_;
+      std::size_t srcI = 0;
+      std::size_t dstI = 0;
+      while (dstI < size_) {
+        if (LIKELY(srcChunk->occupied(srcI))) {
+          dstChunk->setTag(dstI, srcChunk->tag(srcI));
+          this->moveItemDuringRehash(
+              dstChunk->itemAddr(dstI), srcChunk->item(srcI));
+          ++dstI;
+        }
+        ++srcI;
+      }
+      packedBegin_ = ItemIter{dstChunk, dstI - 1}.pack();
+    } else {
+      // 1 byte per chunk means < 1 bit per value temporary overhead
+      std::array<uint8_t, 256> stackBuf;
+      uint8_t* fullness;
+      if (newChunkCount <= stackBuf.size()) {
+        fullness = stackBuf.data();
+      } else {
+        try {
+          ByteAlloc a{this->alloc()};
+          fullness =
+              &*std::allocator_traits<ByteAlloc>::allocate(a, newChunkCount);
+        } catch (...) {
+          deleteChunks(chunks_, newChunkCount, newMaxSizeWithoutRehash);
+          chunks_ = origChunks;
+          chunkMask_ = origChunkCount - 1;
+          throw;
+        }
+      }
+      std::memset(fullness, '\0', newChunkCount);
+      SCOPE_EXIT {
+        if (newChunkCount > stackBuf.size()) {
+          ByteAlloc a{this->alloc()};
+          std::allocator_traits<ByteAlloc>::deallocate(
+              a,
+              std::pointer_traits<typename std::allocator_traits<
+                  ByteAlloc>::pointer>::pointer_to(*fullness),
+              newChunkCount);
+        }
+      };
+
+      auto srcChunk = origChunks + origChunkCount - 1;
+      std::size_t remaining = size_;
+      while (remaining > 0) {
+        auto mask = srcChunk->occupiedMask();
+        if (Policy::prefetchBeforeRehash()) {
+          for (DenseMaskIter iter{mask}; iter.hasNext();) {
+            this->prefetchValue(srcChunk->item(iter.next()));
+          }
+        }
+        for (DenseMaskIter iter{mask}; iter.hasNext();) {
+          --remaining;
+          auto srcI = iter.next();
+          Item& srcItem = srcChunk->item(srcI);
+          auto hp = splitHash(
+              this->computeItemHash(const_cast<Item const&>(srcItem)));
+          FOLLY_SAFE_DCHECK(hp.second == srcChunk->tag(srcI), "");
+
+          auto dstIter = allocateTag(fullness, hp);
+          this->moveItemDuringRehash(dstIter.itemAddr(), srcItem);
+        }
+        --srcChunk;
+      }
+
+      // this code replaces size_ invocations of adjustSizeAndBeginAfterInsert
+      std::size_t i = chunkMask_;
+      while (fullness[i] == 0) {
+        --i;
+      }
+      packedBegin_ = ItemIter{chunks_ + i, std::size_t{fullness[i]} - 1}.pack();
+    }
+
+    if (origMaxSizeWithoutRehash != 0) {
+      deleteChunks(origChunks, origChunkCount, origMaxSizeWithoutRehash);
+    }
+    success = true;
+  }
+
+ public:
+  // user has no control over max_load_factor
+
+  void rehash(std::size_t capacity) {
+    if (capacity < size()) {
+      capacity = size();
+    }
+
+    auto unroundedLimit = max_size();
+    std::size_t exactLimit = Chunk::kDesiredCapacity;
+    while (exactLimit <= unroundedLimit / 2) {
+      exactLimit *= 2;
+    }
+    if (UNLIKELY(capacity > exactLimit)) {
+      throw_exception<std::bad_alloc>();
+    }
+
+    std::size_t const kInitialCapacity = 2;
+    std::size_t const kHalfChunkCapacity =
+        (Chunk::kDesiredCapacity / 2) & ~std::size_t{1};
+    std::size_t maxSizeWithoutRehash;
+    std::size_t chunkCount;
+    if (capacity <= kInitialCapacity) {
+      chunkCount = 1;
+      maxSizeWithoutRehash = kInitialCapacity;
+    } else if (capacity <= kHalfChunkCapacity) {
+      chunkCount = 1;
+      maxSizeWithoutRehash = kHalfChunkCapacity;
+    } else {
+      chunkCount = 1;
+      while (chunkCount * Chunk::kDesiredCapacity < capacity) {
+        chunkCount *= 2;
+      }
+      maxSizeWithoutRehash = chunkCount * Chunk::kDesiredCapacity;
+    }
+    if (bucket_count() != maxSizeWithoutRehash) {
+      rehashImpl(chunkCount, maxSizeWithoutRehash);
+    }
+  }
+
+  void reserve(std::size_t capacity) {
+    rehash(capacity);
+  }
+
+  // Returns true iff a rehash was performed
+  void reserveForInsert(size_t incoming = 1) {
+    if (size() + incoming - 1 >= bucket_count()) {
+      reserveForInsertImpl(incoming);
+    }
+  }
+
+  FOLLY_NOINLINE void reserveForInsertImpl(size_t incoming) {
+    rehash(size() + incoming);
+  }
+
+  // Returns pos,true if construct, pos,false if found.  key is only used
+  // during the search; all constructor args for an inserted value come
+  // from args...  key won't be accessed after args are touched.
+  template <typename K, typename... Args>
+  std::pair<ItemIter, bool> tryEmplaceValue(K const& key, Args&&... args) {
+    const auto hp = splitHash(this->computeKeyHash(key));
+
+    auto existing = findImpl(hp, key);
+    if (!existing.atEnd()) {
+      return std::make_pair(existing, false);
+    }
+
+    reserveForInsert();
+
+    std::size_t index = hp.first;
+    ChunkPtr chunk = chunks_ + (index & chunkMask_);
+    auto emptyMask = chunk->emptyMask();
+
+    if (emptyMask == 0) {
+      std::size_t delta = probeDelta(hp);
+      do {
+        chunk->incrOutboundOverflowCount();
+        index += delta;
+        chunk = chunks_ + (index & chunkMask_);
+        emptyMask = chunk->emptyMask();
+      } while (emptyMask == 0);
+      chunk->adjustHostedOverflowCount(Chunk::kIncrHostedOverflowCount);
+    }
+    std::size_t itemIndex = __builtin_ctz(emptyMask);
+
+    chunk->setTag(itemIndex, hp.second);
+    ItemIter iter{chunk, itemIndex};
+
+    // insertAtBlank will clear the tag if the constructor throws
+    insertAtBlank(iter, hp, std::forward<Args>(args)...);
+    return std::make_pair(iter, true);
+  }
+
+ private:
+  template <bool Reset>
+  void clearImpl() noexcept {
+    if (chunks_ == Chunk::emptyInstance()) {
+      FOLLY_SAFE_DCHECK(empty() && bucket_count() == 0, "");
+      return;
+    }
+
+    // turn clear into reset if the table is >= 16 chunks so that
+    // we don't get too low a load factor
+    bool willReset = Reset || chunkMask_ + 1 >= 16;
+
+    if (willReset) {
+      this->beforeReset(size(), bucket_count());
+    } else {
+      this->beforeClear(size(), bucket_count());
+    }
+
+    if (!empty()) {
+      if (Policy::destroyItemOnClear()) {
+        for (std::size_t ci = 0; ci <= chunkMask_; ++ci) {
+          ChunkPtr chunk = chunks_ + ci;
+          auto mask = chunk->occupiedMask();
+          if (Policy::prefetchBeforeDestroy()) {
+            for (DenseMaskIter iter{mask}; iter.hasNext();) {
+              this->prefetchValue(chunk->item(iter.next()));
+            }
+          }
+          for (DenseMaskIter iter{mask}; iter.hasNext();) {
+            this->destroyItem(chunk->item(iter.next()));
+          }
+        }
+      }
+      if (!willReset) {
+        // It's okay to do this in a separate loop because we only do it
+        // when the chunk count is small.  That avoids a branch when we
+        // are promoting a clear to a reset for a large table.
+        auto c0c = chunks_[0].chunk0Capacity();
+        for (std::size_t ci = 0; ci <= chunkMask_; ++ci) {
+          chunks_[ci].clear();
+        }
+        chunks_[0].markEof(c0c);
+      }
+      packedBegin_ = ItemIter{}.pack();
+      size_ = 0;
+    }
+
+    if (willReset) {
+      deleteChunks(chunks_, chunkMask_ + 1, bucket_count());
+      chunks_ = Chunk::emptyInstance();
+      chunkMask_ = 0;
+
+      this->afterReset();
+    } else {
+      this->afterClear(bucket_count());
+    }
+  }
+
+  void eraseImpl(ItemIter pos, HashPair hp) {
+    this->destroyItem(pos.item());
+    adjustSizeAndBeginBeforeErase(pos);
+    eraseBlank(pos, hp);
+  }
+
+ public:
+  // The item needs to still be hashable during this call.  If you want
+  // to intercept the item before it is destroyed (to extract it, for
+  // example), use erase(pos, beforeDestroy).
+  template <typename BeforeDestroy>
+  void erase(ItemIter pos, BeforeDestroy const& beforeDestroy) {
+    HashPair hp{};
+    if (pos.chunk()->hostedOverflowCount() != 0) {
+      hp = splitHash(this->computeItemHash(pos.citem()));
+    }
+    beforeDestroy(pos.item());
+    eraseImpl(pos, hp);
+  }
+
+  // The item needs to still be hashable during this call.  If you want
+  // to intercept the item before it is destroyed (to extract it, for
+  // example), use erase(pos, beforeDestroy).
+  void erase(ItemIter pos) {
+    return erase(pos, [](Item const&) {});
+  }
+
+  template <typename K>
+  std::size_t erase(K const& key) {
+    if (UNLIKELY(size_ == 0)) {
+      return 0;
+    }
+    auto hp = splitHash(this->computeKeyHash(key));
+    auto iter = findImpl(hp, key);
+    if (!iter.atEnd()) {
+      eraseImpl(iter, hp);
+      return 1;
+    } else {
+      return 0;
+    }
+  }
+
+  void clear() noexcept {
+    clearImpl<false>();
+  }
+
+  // Like clear(), but always frees all dynamic storage allocated
+  // by the table.
+  void reset() noexcept {
+    clearImpl<true>();
+  }
+
+ private:
+  static std::size_t& histoAt(
+      std::vector<std::size_t>& histo,
+      std::size_t index) {
+    if (histo.size() <= index) {
+      histo.resize(index + 1);
+    }
+    return histo.at(index);
+  }
+
+ public:
+  // Expensive
+  F14TableStats computeStats() const {
+    F14TableStats stats;
+
+    if (folly::kIsDebug) {
+      // validate iteration
+      std::size_t n = 0;
+      ItemIter prev;
+      for (auto iter = begin(); iter != end(); iter.advance()) {
+        FOLLY_SAFE_DCHECK(n == 0 || iter.pack() < prev.pack(), "");
+        ++n;
+        prev = iter;
+      }
+      FOLLY_SAFE_DCHECK(n == size(), "");
+    }
+
+    FOLLY_SAFE_DCHECK(
+        (chunks_ == Chunk::emptyInstance()) == (bucket_count() == 0), "");
+
+    std::size_t n1 = 0;
+    std::size_t n2 = 0;
+    auto cc = bucket_count() == 0 ? 0 : chunkMask_ + 1;
+    for (std::size_t ci = 0; ci < cc; ++ci) {
+      ChunkPtr chunk = chunks_ + ci;
+      FOLLY_SAFE_DCHECK(chunk->eof() == (ci == 0), "");
+
+      auto mask = chunk->occupiedMask();
+      n1 += folly::popcount(mask);
+
+      histoAt(stats.chunkOccupancyHisto, folly::popcount(mask))++;
+      histoAt(
+          stats.chunkOutboundOverflowHisto, chunk->outboundOverflowCount())++;
+      histoAt(stats.chunkHostedOverflowHisto, chunk->hostedOverflowCount())++;
+
+      for (DenseMaskIter iter{mask}; iter.hasNext();) {
+        auto ii = iter.next();
+        ++n2;
+
+        {
+          auto& item = chunk->citem(ii);
+          auto hp = splitHash(this->computeItemHash(item));
+          FOLLY_SAFE_DCHECK(chunk->tag(ii) == hp.second, "");
+
+          std::size_t dist = 1;
+          std::size_t index = hp.first;
+          std::size_t delta = probeDelta(hp);
+          while ((index & chunkMask_) != ci) {
+            index += delta;
+            ++dist;
+          }
+
+          histoAt(stats.keyProbeLengthHisto, dist)++;
+        }
+
+        // misses could have any tag, so we do the dumb but accurate
+        // thing and just try them all
+        for (std::size_t ti = 0; ti < 256; ++ti) {
+          uint8_t tag = static_cast<uint8_t>(ti == 0 ? 1 : 0);
+          HashPair hp{ci, tag};
+
+          std::size_t dist = 1;
+          std::size_t index = hp.first;
+          std::size_t delta = probeDelta(hp);
+          for (std::size_t tries = 0; tries <= chunkMask_ &&
+               chunks_[index & chunkMask_].outboundOverflowCount() != 0;
+               ++tries) {
+            index += delta;
+            ++dist;
+          }
+
+          histoAt(stats.missProbeLengthHisto, dist)++;
+        }
+      }
+    }
+
+    FOLLY_SAFE_DCHECK(n1 == size(), "");
+    FOLLY_SAFE_DCHECK(n2 == size(), "");
+
+    stats.policy = typeid(Policy).name();
+    stats.size = size();
+    stats.valueSize = sizeof(value_type);
+    stats.bucketCount = bucket_count();
+    stats.chunkCount = cc;
+
+    stats.totalBytes = sizeof(*this) +
+        (cc == 0 ? 0 : allocSize(cc, bucket_count())) +
+        this->indirectBytesUsed(size(), bucket_count(), begin());
+    stats.overheadBytes = stats.totalBytes - size() * sizeof(value_type);
+
+    return stats;
+  }
+};
+} // namespace detail
+} // namespace f14
+
+#endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+
+} // namespace folly
diff --git a/folly/container/test/EvictingCacheMapTest.cpp b/folly/container/test/EvictingCacheMapTest.cpp
index baa1894d1b0..e5bfcf8c9f9 100644
--- a/folly/container/test/EvictingCacheMapTest.cpp
+++ b/folly/container/test/EvictingCacheMapTest.cpp
@@ -633,3 +633,28 @@ TEST(EvictingCacheMap, MoveTest) {
     EXPECT_EQ(i, map2.get(i));
   }
 }
+
+TEST(EvictingCacheMap, CustomKeyEqual) {
+  const int nItems = 100;
+  struct Eq {
+    bool operator()(const int& a, const int& b) const {
+      return (a % mod) == (b % mod);
+    }
+    int mod;
+  };
+  struct Hash {
+    size_t operator()(const int& a) const {
+      return std::hash<int>()(a % mod);
+    }
+    int mod;
+  };
+  EvictingCacheMap<int, int, Hash, Eq> map(
+      nItems, 1 /* clearSize */, Hash{nItems}, Eq{nItems});
+  for (int i = 0; i < nItems; i++) {
+    map.set(i, i);
+    EXPECT_TRUE(map.exists(i));
+    EXPECT_EQ(i, map.get(i));
+    EXPECT_TRUE(map.exists(i + nItems));
+    EXPECT_EQ(i, map.get(i + nItems));
+  }
+}
diff --git a/folly/container/test/F14MapTest.cpp b/folly/container/test/F14MapTest.cpp
new file mode 100644
index 00000000000..1c4a519ad9f
--- /dev/null
+++ b/folly/container/test/F14MapTest.cpp
@@ -0,0 +1,1103 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/container/F14Map.h>
+
+///////////////////////////////////
+#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+///////////////////////////////////
+
+#include <chrono>
+#include <random>
+#include <string>
+#include <typeinfo>
+#include <unordered_map>
+
+#include <folly/Range.h>
+#include <folly/hash/Hash.h>
+#include <folly/portability/GTest.h>
+
+#include <folly/container/test/F14TestUtil.h>
+
+using namespace folly;
+using namespace folly::string_piece_literals;
+
+namespace {
+std::string s(char const* p) {
+  return p;
+}
+} // namespace
+
+template <typename T>
+void runSimple() {
+  T h;
+
+  EXPECT_EQ(h.size(), 0);
+
+  h.insert(std::make_pair(s("abc"), s("ABC")));
+  EXPECT_TRUE(h.find(s("def")) == h.end());
+  EXPECT_FALSE(h.find(s("abc")) == h.end());
+  EXPECT_EQ(h[s("abc")], s("ABC"));
+  h[s("ghi")] = s("GHI");
+  EXPECT_EQ(h.size(), 2);
+  h.erase(h.find(s("abc")));
+  EXPECT_EQ(h.size(), 1);
+
+  T h2(std::move(h));
+  EXPECT_EQ(h.size(), 0);
+  EXPECT_TRUE(h.begin() == h.end());
+  EXPECT_EQ(h2.size(), 1);
+
+  EXPECT_TRUE(h2.find(s("abc")) == h2.end());
+  EXPECT_EQ(h2.begin()->first, s("ghi"));
+  {
+    auto i = h2.begin();
+    EXPECT_FALSE(i == h2.end());
+    ++i;
+    EXPECT_TRUE(i == h2.end());
+  }
+
+  T h3;
+  h3.try_emplace(s("xxx"));
+  h3.insert_or_assign(s("yyy"), s("YYY"));
+  h3 = std::move(h2);
+  EXPECT_EQ(h2.size(), 0);
+  EXPECT_EQ(h3.size(), 1);
+  EXPECT_TRUE(h3.find(s("xxx")) == h3.end());
+
+  for (uint64_t i = 0; i < 1000; ++i) {
+    h[std::to_string(i * i * i)] = s("x");
+    EXPECT_EQ(h.size(), i + 1);
+  }
+  {
+    using std::swap;
+    swap(h, h2);
+  }
+  for (uint64_t i = 0; i < 1000; ++i) {
+    EXPECT_TRUE(h2.find(std::to_string(i * i * i)) != h2.end());
+    EXPECT_EQ(
+        h2.find(std::to_string(i * i * i))->first, std::to_string(i * i * i));
+    EXPECT_TRUE(h2.find(std::to_string(i * i * i + 2)) == h2.end());
+  }
+
+  T h4{h2};
+  EXPECT_EQ(h2.size(), 1000);
+  EXPECT_EQ(h4.size(), 1000);
+
+  T h5{std::move(h2)};
+  T h6;
+  h6 = h4;
+  T h7 = h4;
+  T h8({{s("abc"), s("ABC")}, {s("def"), s("DEF")}});
+  T h9({{s("abc"), s("ABD")}, {s("def"), s("DEF")}});
+  EXPECT_EQ(h8.size(), 2);
+  EXPECT_EQ(h8.count(s("abc")), 1);
+  EXPECT_EQ(h8.count(s("xyz")), 0);
+
+  EXPECT_TRUE(h7 != h8);
+  EXPECT_TRUE(h8 != h9);
+
+  h8 = std::move(h7);
+  // h2 and h7 are moved from, h4, h5, h6, and h8 should be identical
+
+  EXPECT_TRUE(h4 == h8);
+
+  EXPECT_TRUE(h2.empty());
+  EXPECT_TRUE(h7.empty());
+  for (uint64_t i = 0; i < 1000; ++i) {
+    auto k = std::to_string(i * i * i);
+    EXPECT_EQ(h4.count(k), 1);
+    EXPECT_EQ(h5.count(k), 1);
+    EXPECT_EQ(h6.count(k), 1);
+    EXPECT_EQ(h8.count(k), 1);
+  }
+
+  EXPECT_TRUE(h2 == h7);
+  EXPECT_TRUE(h4 != h7);
+
+  EXPECT_EQ(h3.at(s("ghi")), s("GHI"));
+  EXPECT_THROW(h3.at(s("abc")), std::out_of_range);
+
+  F14TableStats::compute(h);
+  F14TableStats::compute(h2);
+  F14TableStats::compute(h3);
+  F14TableStats::compute(h4);
+  F14TableStats::compute(h5);
+  F14TableStats::compute(h6);
+  F14TableStats::compute(h7);
+  F14TableStats::compute(h8);
+  F14TableStats::compute(h9);
+
+  LOG(INFO) << "sizeof(" << typeid(T).name() << ") = " << sizeof(T);
+}
+
+template <typename T>
+void runRehash() {
+  unsigned n = 10000;
+  T h;
+  auto b = h.bucket_count();
+  for (unsigned i = 0; i < n; ++i) {
+    h.insert(std::make_pair(std::to_string(i), s("")));
+    if (b != h.bucket_count()) {
+      F14TableStats::compute(h);
+      b = h.bucket_count();
+    }
+  }
+  EXPECT_EQ(h.size(), n);
+  F14TableStats::compute(h);
+}
+
+// T should be a map from uint64_t to uint64_t
+template <typename T>
+void runRandom() {
+  using R = std::unordered_map<uint64_t, uint64_t>;
+
+  std::mt19937_64 gen(0);
+  std::uniform_int_distribution<> pctDist(0, 100);
+  std::uniform_int_distribution<uint64_t> bitsBitsDist(1, 6);
+  T t0;
+  T t1;
+  R r0;
+  R r1;
+
+  for (std::size_t reps = 0; reps < 10000; ++reps) {
+    // discardBits will be from 0 to 62
+    auto discardBits = (uint64_t{1} << bitsBitsDist(gen)) - 2;
+    auto k = gen() >> discardBits;
+    auto v = gen();
+    auto pct = pctDist(gen);
+
+    EXPECT_EQ(t0.empty(), r0.empty());
+    EXPECT_EQ(t0.size(), r0.size());
+    if (pct < 15) {
+      // insert
+      auto t = t0.insert(std::make_pair(k, v));
+      auto r = r0.insert(std::make_pair(k, v));
+      EXPECT_EQ(*t.first, *r.first);
+      EXPECT_EQ(t.second, r.second);
+    } else if (pct < 25) {
+      // emplace
+      auto t = t0.emplace(k, v);
+      auto r = r0.emplace(k, v);
+      EXPECT_EQ(*t.first, *r.first);
+      EXPECT_EQ(t.second, r.second);
+    } else if (pct < 30) {
+      // bulk insert
+      t0.insert(r1.begin(), r1.end());
+      r0.insert(r1.begin(), r1.end());
+    } else if (pct < 40) {
+      // erase by key
+      auto t = t0.erase(k);
+      auto r = r0.erase(k);
+      EXPECT_EQ(t, r);
+    } else if (pct < 50) {
+      // erase by iterator
+      if (t0.size() > 0) {
+        auto r = r0.find(k);
+        if (r == r0.end()) {
+          r = r0.begin();
+        }
+        k = r->first;
+        auto t = t0.find(k);
+        t = t0.erase(t);
+        if (t != t0.end()) {
+          EXPECT_NE(t->first, k);
+        }
+        r = r0.erase(r);
+        if (r != r0.end()) {
+          EXPECT_NE(r->first, k);
+        }
+      }
+    } else if (pct < 58) {
+      // find
+      auto t = t0.find(k);
+      auto r = r0.find(k);
+      EXPECT_EQ((t == t0.end()), (r == r0.end()));
+      if (t != t0.end() && r != r0.end()) {
+        EXPECT_EQ(*t, *r);
+      }
+      EXPECT_EQ(t0.count(k), r0.count(k));
+    } else if (pct < 60) {
+      // equal_range
+      auto t = t0.equal_range(k);
+      auto r = r0.equal_range(k);
+      EXPECT_EQ((t.first == t.second), (r.first == r.second));
+      if (t.first != t.second && r.first != r.second) {
+        EXPECT_EQ(*t.first, *r.first);
+        t.first++;
+        r.first++;
+        EXPECT_TRUE(t.first == t.second);
+        EXPECT_TRUE(r.first == r.second);
+      }
+    } else if (pct < 65) {
+      // iterate
+      uint64_t t = 0;
+      for (auto& e : t0) {
+        t += e.first * 37 + e.second + 1000;
+      }
+      uint64_t r = 0;
+      for (auto& e : r0) {
+        r += e.first * 37 + e.second + 1000;
+      }
+      EXPECT_EQ(t, r);
+    } else if (pct < 69) {
+      // swap
+      using std::swap;
+      swap(t0, t1);
+      swap(r0, r1);
+    } else if (pct < 70) {
+      // swap
+      t0.swap(t1);
+      r0.swap(r1);
+    } else if (pct < 72) {
+      // default construct
+      t0.~T();
+      new (&t0) T();
+      r0.~R();
+      new (&r0) R();
+    } else if (pct < 74) {
+      // default construct with capacity
+      std::size_t capacity = k & 0xffff;
+      t0.~T();
+      new (&t0) T(capacity);
+      r0.~R();
+      new (&r0) R(capacity);
+    } else if (pct < 80) {
+      // bulk iterator construct
+      t0.~T();
+      new (&t0) T(r1.begin(), r1.end());
+      r0.~R();
+      new (&r0) R(r1.begin(), r1.end());
+    } else if (pct < 82) {
+      // initializer list construct
+      auto k2 = gen() >> discardBits;
+      auto v2 = gen();
+      t0.~T();
+      new (&t0) T({{k, v}, {k2, v}, {k2, v2}});
+      r0.~R();
+      new (&r0) R({{k, v}, {k2, v}, {k2, v2}});
+    } else if (pct < 88) {
+      // copy construct
+      t0.~T();
+      new (&t0) T(t1);
+      r0.~R();
+      new (&r0) R(r1);
+    } else if (pct < 90) {
+      // move construct
+      t0.~T();
+      new (&t0) T(std::move(t1));
+      r0.~R();
+      new (&r0) R(std::move(r1));
+    } else if (pct < 94) {
+      // copy assign
+      t0 = t1;
+      r0 = r1;
+    } else if (pct < 96) {
+      // move assign
+      t0 = std::move(t1);
+      r0 = std::move(r1);
+    } else if (pct < 98) {
+      // operator==
+      EXPECT_EQ((t0 == t1), (r0 == r1));
+    } else if (pct < 99) {
+      // clear
+      F14TableStats::compute(t0);
+      t0.clear();
+      r0.clear();
+    } else if (pct < 100) {
+      // reserve
+      auto scale = std::uniform_int_distribution<>(0, 8)(gen);
+      auto delta = std::uniform_int_distribution<>(-2, 2)(gen);
+      std::ptrdiff_t target = (t0.size() * scale) / 4 + delta;
+      if (target >= 0) {
+        t0.reserve(static_cast<std::size_t>(target));
+        r0.reserve(static_cast<std::size_t>(target));
+      }
+    }
+  }
+}
+
+template <typename T>
+void runPrehash() {
+  T h;
+
+  EXPECT_EQ(h.size(), 0);
+
+  h.insert(std::make_pair(s("abc"), s("ABC")));
+  EXPECT_TRUE(h.find(s("def")) == h.end());
+  EXPECT_FALSE(h.find(s("abc")) == h.end());
+
+  auto t1 = h.prehash(s("def"));
+  auto t2 = h.prehash(s("abc"));
+  EXPECT_TRUE(h.find(t1, s("def")) == h.end());
+  EXPECT_FALSE(h.find(t2, s("abc")) == h.end());
+}
+
+TEST(F14ValueMap, simple) {
+  runSimple<F14ValueMap<std::string, std::string>>();
+}
+
+TEST(F14NodeMap, simple) {
+  runSimple<F14NodeMap<std::string, std::string>>();
+}
+
+TEST(F14VectorMap, simple) {
+  runSimple<F14VectorMap<std::string, std::string>>();
+}
+
+TEST(F14FastMap, simple) {
+  // F14FastMap is just a conditional typedef. Verify it compiles.
+  runRandom<F14FastMap<uint64_t, uint64_t>>();
+  runSimple<F14FastMap<std::string, std::string>>();
+}
+
+TEST(F14ValueMap, rehash) {
+  runRehash<F14ValueMap<std::string, std::string>>();
+}
+
+TEST(F14NodeMap, rehash) {
+  runRehash<F14NodeMap<std::string, std::string>>();
+}
+
+TEST(F14VectorMap, rehash) {
+  runRehash<F14VectorMap<std::string, std::string>>();
+}
+
+TEST(F14ValueMap, prehash) {
+  runPrehash<F14ValueMap<std::string, std::string>>();
+}
+
+TEST(F14NodeMap, prehash) {
+  runPrehash<F14NodeMap<std::string, std::string>>();
+}
+
+TEST(F14ValueMap, random) {
+  runRandom<F14ValueMap<uint64_t, uint64_t>>();
+}
+
+TEST(F14NodeMap, random) {
+  runRandom<F14NodeMap<uint64_t, uint64_t>>();
+}
+
+TEST(F14VectorMap, random) {
+  runRandom<F14VectorMap<uint64_t, uint64_t>>();
+}
+
+TEST(F14ValueMap, grow_stats) {
+  F14ValueMap<uint64_t, uint64_t> h;
+  for (unsigned i = 1; i <= 3072; ++i) {
+    h[i]++;
+  }
+  LOG(INFO) << "F14ValueMap just before rehash -> "
+            << F14TableStats::compute(h);
+  h[0]++;
+  LOG(INFO) << "F14ValueMap just after rehash -> " << F14TableStats::compute(h);
+}
+
+TEST(F14ValueMap, steady_state_stats) {
+  // 10k keys, 14% probability of insert, 90% chance of erase, so the
+  // table should converge to 1400 size without triggering the rehash
+  // that would occur at 1536.
+  F14ValueMap<uint64_t, uint64_t> h;
+  std::mt19937_64 gen(0);
+  std::uniform_int_distribution<> dist(0, 10000);
+  for (std::size_t i = 0; i < 100000; ++i) {
+    auto key = dist(gen);
+    if (dist(gen) < 1400) {
+      h.insert_or_assign(key, i);
+    } else {
+      h.erase(key);
+    }
+    if (((i + 1) % 10000) == 0) {
+      auto stats = F14TableStats::compute(h);
+      // Verify that average miss probe length is bounded despite continued
+      // erase + reuse.  p99 of the average across 10M random steps is 4.69,
+      // average is 2.96.
+      EXPECT_LT(f14::expectedProbe(stats.missProbeLengthHisto), 10.0);
+    }
+  }
+  LOG(INFO) << "F14ValueMap at steady state -> " << F14TableStats::compute(h);
+}
+
+// Tracked is implicitly constructible across tags
+namespace {
+struct Counts {
+  uint64_t copyConstruct{0};
+  uint64_t moveConstruct{0};
+  uint64_t copyConvert{0};
+  uint64_t moveConvert{0};
+  uint64_t copyAssign{0};
+  uint64_t moveAssign{0};
+  uint64_t defaultConstruct{0};
+
+  explicit Counts(
+      uint64_t copConstr = 0,
+      uint64_t movConstr = 0,
+      uint64_t copConv = 0,
+      uint64_t movConv = 0,
+      uint64_t copAssign = 0,
+      uint64_t movAssign = 0,
+      uint64_t def = 0)
+      : copyConstruct{copConstr},
+        moveConstruct{movConstr},
+        copyConvert{copConv},
+        moveConvert{movConv},
+        copyAssign{copAssign},
+        moveAssign{movAssign},
+        defaultConstruct{def} {}
+
+  uint64_t dist(Counts const& rhs) const {
+    auto d = [](uint64_t x, uint64_t y) { return (x - y) * (x - y); };
+    return d(copyConstruct, rhs.copyConstruct) +
+        d(moveConstruct, rhs.moveConstruct) + d(copyConvert, rhs.copyConvert) +
+        d(moveConvert, rhs.moveConvert) + d(copyAssign, rhs.copyAssign) +
+        d(moveAssign, rhs.moveAssign) +
+        d(defaultConstruct, rhs.defaultConstruct);
+  }
+
+  bool operator==(Counts const& rhs) const {
+    return copyConstruct == rhs.copyConstruct &&
+        moveConstruct == rhs.moveConstruct && copyConvert == rhs.copyConvert &&
+        moveConvert == rhs.moveConvert && copyAssign == rhs.copyAssign &&
+        moveAssign == rhs.moveAssign &&
+        defaultConstruct == rhs.defaultConstruct;
+  }
+  bool operator!=(Counts const& rhs) const {
+    return !(*this == rhs);
+  }
+};
+
+thread_local Counts sumCounts{};
+
+template <int Tag>
+struct Tracked {
+  static thread_local Counts counts;
+
+  uint64_t val_;
+
+  Tracked() : val_{0} {
+    sumCounts.defaultConstruct++;
+    counts.defaultConstruct++;
+  }
+  /* implicit */ Tracked(uint64_t val) : val_{val} {
+    sumCounts.copyConvert++;
+    counts.copyConvert++;
+  }
+  Tracked(Tracked const& rhs) : val_{rhs.val_} {
+    sumCounts.copyConstruct++;
+    counts.copyConstruct++;
+  }
+  Tracked(Tracked&& rhs) noexcept : val_{rhs.val_} {
+    sumCounts.moveConstruct++;
+    counts.moveConstruct++;
+  }
+  Tracked& operator=(Tracked const& rhs) {
+    val_ = rhs.val_;
+    sumCounts.copyAssign++;
+    counts.copyAssign++;
+    return *this;
+  }
+  Tracked& operator=(Tracked&& rhs) noexcept {
+    val_ = rhs.val_;
+    sumCounts.moveAssign++;
+    counts.moveAssign++;
+    return *this;
+  }
+
+  template <int T>
+  /* implicit */ Tracked(Tracked<T> const& rhs) : val_{rhs.val_} {
+    sumCounts.copyConvert++;
+    counts.copyConvert++;
+  }
+
+  template <int T>
+  /* implicit */ Tracked(Tracked<T>&& rhs) : val_{rhs.val_} {
+    sumCounts.moveConvert++;
+    counts.moveConvert++;
+  }
+
+  bool operator==(Tracked const& rhs) const {
+    return val_ == rhs.val_;
+  }
+  bool operator!=(Tracked const& rhs) const {
+    return !(*this == rhs);
+  }
+};
+
+template <>
+thread_local Counts Tracked<0>::counts{};
+template <>
+thread_local Counts Tracked<1>::counts{};
+template <>
+thread_local Counts Tracked<2>::counts{};
+template <>
+thread_local Counts Tracked<3>::counts{};
+template <>
+thread_local Counts Tracked<4>::counts{};
+template <>
+thread_local Counts Tracked<5>::counts{};
+
+void resetTracking() {
+  sumCounts = Counts{};
+  Tracked<0>::counts = Counts{};
+  Tracked<1>::counts = Counts{};
+  Tracked<2>::counts = Counts{};
+  Tracked<3>::counts = Counts{};
+  Tracked<4>::counts = Counts{};
+  Tracked<5>::counts = Counts{};
+}
+} // namespace
+
+std::ostream& operator<<(std::ostream& xo, Counts const& counts) {
+  xo << "[";
+  std::string glue = "";
+  if (counts.copyConstruct > 0) {
+    xo << glue << counts.copyConstruct << " copy";
+    glue = ", ";
+  }
+  if (counts.moveConstruct > 0) {
+    xo << glue << counts.moveConstruct << " move";
+    glue = ", ";
+  }
+  if (counts.copyConvert > 0) {
+    xo << glue << counts.copyConvert << " copy convert";
+    glue = ", ";
+  }
+  if (counts.moveConvert > 0) {
+    xo << glue << counts.moveConvert << " move convert";
+    glue = ", ";
+  }
+  if (counts.copyAssign > 0) {
+    xo << glue << counts.copyAssign << " copy assign";
+    glue = ", ";
+  }
+  if (counts.moveAssign > 0) {
+    xo << glue << counts.moveAssign << " move assign";
+    glue = ", ";
+  }
+  if (counts.defaultConstruct > 0) {
+    xo << glue << counts.defaultConstruct << " default construct";
+    glue = ", ";
+  }
+  xo << "]";
+  return xo;
+}
+
+namespace std {
+template <int Tag>
+struct hash<Tracked<Tag>> {
+  size_t operator()(Tracked<Tag> const& tracked) const {
+    return tracked.val_ ^ Tag;
+  }
+};
+} // namespace std
+
+TEST(Tracked, baseline) {
+  Tracked<0> a0;
+
+  {
+    resetTracking();
+    Tracked<0> b0{a0};
+    EXPECT_EQ(a0.val_, b0.val_);
+    EXPECT_EQ(sumCounts, (Counts{1, 0, 0, 0}));
+    EXPECT_EQ(Tracked<0>::counts, (Counts{1, 0, 0, 0}));
+  }
+  {
+    resetTracking();
+    Tracked<0> b0{std::move(a0)};
+    EXPECT_EQ(a0.val_, b0.val_);
+    EXPECT_EQ(sumCounts, (Counts{0, 1, 0, 0}));
+    EXPECT_EQ(Tracked<0>::counts, (Counts{0, 1, 0, 0}));
+  }
+  {
+    resetTracking();
+    Tracked<1> b1{a0};
+    EXPECT_EQ(a0.val_, b1.val_);
+    EXPECT_EQ(sumCounts, (Counts{0, 0, 1, 0}));
+    EXPECT_EQ(Tracked<1>::counts, (Counts{0, 0, 1, 0}));
+  }
+  {
+    resetTracking();
+    Tracked<1> b1{std::move(a0)};
+    EXPECT_EQ(a0.val_, b1.val_);
+    EXPECT_EQ(sumCounts, (Counts{0, 0, 0, 1}));
+    EXPECT_EQ(Tracked<1>::counts, (Counts{0, 0, 0, 1}));
+  }
+  {
+    Tracked<0> b0;
+    resetTracking();
+    b0 = a0;
+    EXPECT_EQ(a0.val_, b0.val_);
+    EXPECT_EQ(sumCounts, (Counts{0, 0, 0, 0, 1, 0}));
+    EXPECT_EQ(Tracked<0>::counts, (Counts{0, 0, 0, 0, 1, 0}));
+  }
+  {
+    Tracked<0> b0;
+    resetTracking();
+    b0 = std::move(a0);
+    EXPECT_EQ(a0.val_, b0.val_);
+    EXPECT_EQ(sumCounts, (Counts{0, 0, 0, 0, 0, 1}));
+    EXPECT_EQ(Tracked<0>::counts, (Counts{0, 0, 0, 0, 0, 1}));
+  }
+  {
+    Tracked<1> b1;
+    resetTracking();
+    b1 = a0;
+    EXPECT_EQ(a0.val_, b1.val_);
+    EXPECT_EQ(sumCounts, (Counts{0, 0, 1, 0, 0, 1}));
+    EXPECT_EQ(Tracked<1>::counts, (Counts{0, 0, 1, 0, 0, 1}));
+  }
+  {
+    Tracked<1> b1;
+    resetTracking();
+    b1 = std::move(a0);
+    EXPECT_EQ(a0.val_, b1.val_);
+    EXPECT_EQ(sumCounts, (Counts{0, 0, 0, 1, 0, 1}));
+    EXPECT_EQ(Tracked<1>::counts, (Counts{0, 0, 0, 1, 0, 1}));
+  }
+}
+
+// M should be a map from Tracked<0> to Tracked<1>.  F should take a map
+// and a pair const& or pair&& and cause it to be inserted
+template <typename M, typename F>
+void runInsertCases(
+    std::string const& name,
+    F const& insertFunc,
+    uint64_t expectedDist = 0) {
+  static_assert(std::is_same<typename M::key_type, Tracked<0>>::value, "");
+  static_assert(std::is_same<typename M::mapped_type, Tracked<1>>::value, "");
+  {
+    typename M::value_type p{0, 0};
+    M m;
+    resetTracking();
+    insertFunc(m, p);
+    LOG(INFO) << name << ", fresh key, value_type const& -> "
+              << "key_type ops " << Tracked<0>::counts << ", mapped_type ops "
+              << Tracked<1>::counts;
+    // copy is expected
+    EXPECT_EQ(
+        Tracked<0>::counts.dist(Counts{1, 0, 0, 0}) +
+            Tracked<1>::counts.dist(Counts{1, 0, 0, 0}),
+        expectedDist);
+  }
+  {
+    typename M::value_type p{0, 0};
+    M m;
+    resetTracking();
+    insertFunc(m, std::move(p));
+    LOG(INFO) << name << ", fresh key, value_type&& -> "
+              << "key_type ops " << Tracked<0>::counts << ", mapped_type ops "
+              << Tracked<1>::counts;
+    // key copy is unfortunate but required
+    EXPECT_EQ(
+        Tracked<0>::counts.dist(Counts{1, 0, 0, 0}) +
+            Tracked<1>::counts.dist(Counts{0, 1, 0, 0}),
+        expectedDist);
+  }
+  {
+    std::pair<Tracked<0>, Tracked<1>> p{0, 0};
+    M m;
+    resetTracking();
+    insertFunc(m, p);
+    LOG(INFO) << name << ", fresh key, pair<key_type,mapped_type> const& -> "
+              << "key_type ops " << Tracked<0>::counts << ", mapped_type ops "
+              << Tracked<1>::counts;
+    // 1 copy is required
+    EXPECT_EQ(
+        Tracked<0>::counts.dist(Counts{1, 0, 0, 0}) +
+            Tracked<1>::counts.dist(Counts{1, 0, 0, 0}),
+        expectedDist);
+  }
+  {
+    std::pair<Tracked<0>, Tracked<1>> p{0, 0};
+    M m;
+    resetTracking();
+    insertFunc(m, std::move(p));
+    LOG(INFO) << name << ", fresh key, pair<key_type,mapped_type>&& -> "
+              << "key_type ops " << Tracked<0>::counts << ", mapped_type ops "
+              << Tracked<1>::counts;
+    // this is the happy path for insert(make_pair(.., ..))
+    EXPECT_EQ(
+        Tracked<0>::counts.dist(Counts{0, 1, 0, 0}) +
+            Tracked<1>::counts.dist(Counts{0, 1, 0, 0}),
+        expectedDist);
+  }
+  {
+    std::pair<Tracked<2>, Tracked<3>> p{0, 0};
+    M m;
+    resetTracking();
+    insertFunc(m, p);
+    LOG(INFO) << name << ", fresh key, convertible const& -> "
+              << "key_type ops " << Tracked<0>::counts << ", key_src ops "
+              << Tracked<2>::counts << ", mapped_type ops "
+              << Tracked<1>::counts << ", mapped_src ops "
+              << Tracked<3>::counts;
+
+    // There are three strategies that could be optimal for particular
+    // ratios of cost:
+    //
+    // - convert key and value in place to final position, destroy if
+    //   insert fails. This is the strategy used by std::unordered_map
+    //   and FBHashMap
+    //
+    // - convert key and default value in place to final position,
+    //   convert value only if insert succeeds.  Nobody uses this strategy
+    //
+    // - convert key to a temporary, move key and convert value if
+    //   insert succeeds.  This is the strategy used by F14 and what is
+    //   EXPECT_EQ here.
+
+    // The expectedDist * 3 is just a hack for the emplace-pieces-by-value
+    // test, whose test harness copies the original pair and then uses
+    // move conversion instead of copy conversion.
+    EXPECT_EQ(
+        Tracked<0>::counts.dist(Counts{0, 1, 1, 0}) +
+            Tracked<1>::counts.dist(Counts{0, 0, 1, 0}) +
+            Tracked<2>::counts.dist(Counts{0, 0, 0, 0}) +
+            Tracked<3>::counts.dist(Counts{0, 0, 0, 0}),
+        expectedDist * 3);
+  }
+  {
+    std::pair<Tracked<2>, Tracked<3>> p{0, 0};
+    M m;
+    resetTracking();
+    insertFunc(m, std::move(p));
+    LOG(INFO) << name << ", fresh key, convertible&& -> "
+              << "key_type ops " << Tracked<0>::counts << ", key_src ops "
+              << Tracked<2>::counts << ", mapped_type ops "
+              << Tracked<1>::counts << ", mapped_src ops "
+              << Tracked<3>::counts;
+
+    EXPECT_EQ(
+        Tracked<0>::counts.dist(Counts{0, 1, 0, 1}) +
+            Tracked<1>::counts.dist(Counts{0, 0, 0, 1}) +
+            Tracked<2>::counts.dist(Counts{0, 0, 0, 0}) +
+            Tracked<3>::counts.dist(Counts{0, 0, 0, 0}),
+        expectedDist);
+  }
+  {
+    typename M::value_type p{0, 0};
+    M m;
+    m[0] = 0;
+    resetTracking();
+    insertFunc(m, p);
+    LOG(INFO) << name << ", duplicate key, value_type const& -> "
+              << "key_type ops " << Tracked<0>::counts << ", mapped_type ops "
+              << Tracked<1>::counts;
+
+    EXPECT_EQ(
+        Tracked<0>::counts.dist(Counts{0, 0, 0, 0}) +
+            Tracked<1>::counts.dist(Counts{0, 0, 0, 0}),
+        expectedDist);
+  }
+  {
+    typename M::value_type p{0, 0};
+    M m;
+    m[0] = 0;
+    resetTracking();
+    insertFunc(m, std::move(p));
+    LOG(INFO) << name << ", duplicate key, value_type&& -> "
+              << "key_type ops " << Tracked<0>::counts << ", mapped_type ops "
+              << Tracked<1>::counts;
+
+    EXPECT_EQ(
+        Tracked<0>::counts.dist(Counts{0, 0, 0, 0}) +
+            Tracked<1>::counts.dist(Counts{0, 0, 0, 0}),
+        expectedDist);
+  }
+  {
+    std::pair<Tracked<0>, Tracked<1>> p{0, 0};
+    M m;
+    m[0] = 0;
+    resetTracking();
+    insertFunc(m, p);
+    LOG(INFO) << name
+              << ", duplicate key, pair<key_type,mapped_type> const& -> "
+              << "key_type ops " << Tracked<0>::counts << ", mapped_type ops "
+              << Tracked<1>::counts;
+
+    EXPECT_EQ(
+        Tracked<0>::counts.dist(Counts{0, 0, 0, 0}) +
+            Tracked<1>::counts.dist(Counts{0, 0, 0, 0}),
+        expectedDist);
+  }
+  {
+    std::pair<Tracked<0>, Tracked<1>> p{0, 0};
+    M m;
+    m[0] = 0;
+    resetTracking();
+    insertFunc(m, std::move(p));
+    LOG(INFO) << name << ", duplicate key, pair<key_type,mapped_type>&& -> "
+              << "key_type ops " << Tracked<0>::counts << ", mapped_type ops "
+              << Tracked<1>::counts;
+
+    EXPECT_EQ(
+        Tracked<0>::counts.dist(Counts{0, 0, 0, 0}) +
+            Tracked<1>::counts.dist(Counts{0, 0, 0, 0}),
+        expectedDist);
+  }
+  {
+    std::pair<Tracked<2>, Tracked<3>> p{0, 0};
+    M m;
+    m[0] = 0;
+    resetTracking();
+    insertFunc(m, p);
+    LOG(INFO) << name << ", duplicate key, convertible const& -> "
+              << "key_type ops " << Tracked<0>::counts << ", key_src ops "
+              << Tracked<2>::counts << ", mapped_type ops "
+              << Tracked<1>::counts << ", mapped_src ops "
+              << Tracked<3>::counts;
+
+    EXPECT_EQ(
+        Tracked<0>::counts.dist(Counts{0, 0, 1, 0}) +
+            Tracked<1>::counts.dist(Counts{0, 0, 0, 0}) +
+            Tracked<2>::counts.dist(Counts{0, 0, 0, 0}) +
+            Tracked<3>::counts.dist(Counts{0, 0, 0, 0}),
+        expectedDist * 2);
+  }
+  {
+    std::pair<Tracked<2>, Tracked<3>> p{0, 0};
+    M m;
+    m[0] = 0;
+    resetTracking();
+    insertFunc(m, std::move(p));
+    LOG(INFO) << name << ", duplicate key, convertible&& -> "
+              << "key_type ops " << Tracked<0>::counts << ", key_src ops "
+              << Tracked<2>::counts << ", mapped_type ops "
+              << Tracked<1>::counts << ", mapped_src ops "
+              << Tracked<3>::counts;
+
+    EXPECT_EQ(
+        Tracked<0>::counts.dist(Counts{0, 0, 0, 1}) +
+            Tracked<1>::counts.dist(Counts{0, 0, 0, 0}) +
+            Tracked<2>::counts.dist(Counts{0, 0, 0, 0}) +
+            Tracked<3>::counts.dist(Counts{0, 0, 0, 0}),
+        expectedDist);
+  }
+}
+
+struct DoInsert {
+  template <typename M, typename P>
+  void operator()(M& m, P&& p) const {
+    m.insert(std::forward<P>(p));
+  }
+};
+
+struct DoEmplace1 {
+  template <typename M, typename P>
+  void operator()(M& m, P&& p) const {
+    m.emplace(std::forward<P>(p));
+  }
+};
+
+struct DoEmplace2 {
+  template <typename M, typename U1, typename U2>
+  void operator()(M& m, std::pair<U1, U2> const& p) const {
+    m.emplace(p.first, p.second);
+  }
+
+  template <typename M, typename U1, typename U2>
+  void operator()(M& m, std::pair<U1, U2>&& p) const {
+    m.emplace(std::move(p.first), std::move(p.second));
+  }
+};
+
+struct DoEmplace3 {
+  template <typename M, typename U1, typename U2>
+  void operator()(M& m, std::pair<U1, U2> const& p) const {
+    m.emplace(
+        std::piecewise_construct,
+        std::forward_as_tuple(p.first),
+        std::forward_as_tuple(p.second));
+  }
+
+  template <typename M, typename U1, typename U2>
+  void operator()(M& m, std::pair<U1, U2>&& p) const {
+    m.emplace(
+        std::piecewise_construct,
+        std::forward_as_tuple(std::move(p.first)),
+        std::forward_as_tuple(std::move(p.second)));
+  }
+};
+
+// Simulates use of piecewise_construct without proper use of
+// forward_as_tuple.  This code doesn't yield the normal pattern, but
+// it should have exactly 1 additional move or copy of the key and 1
+// additional move or copy of the mapped value.
+struct DoEmplace3Value {
+  template <typename M, typename U1, typename U2>
+  void operator()(M& m, std::pair<U1, U2> const& p) const {
+    m.emplace(
+        std::piecewise_construct,
+        std::tuple<U1>{p.first},
+        std::tuple<U2>{p.second});
+  }
+
+  template <typename M, typename U1, typename U2>
+  void operator()(M& m, std::pair<U1, U2>&& p) const {
+    m.emplace(
+        std::piecewise_construct,
+        std::tuple<U1>{std::move(p.first)},
+        std::tuple<U2>{std::move(p.second)});
+  }
+};
+
+template <typename M>
+void runInsertAndEmplace(std::string const& name) {
+  runInsertCases<M>(name + " insert", DoInsert{});
+  runInsertCases<M>(name + " emplace pair", DoEmplace1{});
+  runInsertCases<M>(name + " emplace k,v", DoEmplace2{});
+  runInsertCases<M>(name + " emplace pieces", DoEmplace3{});
+  runInsertCases<M>(name + " emplace pieces by value", DoEmplace3Value{}, 2);
+
+  // Calling the default pair constructor via emplace is valid, but not
+  // very useful in real life.  Verify that it works.
+  M m;
+  typename M::key_type k;
+  EXPECT_EQ(m.count(k), 0);
+  m.emplace();
+  EXPECT_EQ(m.count(k), 1);
+}
+
+TEST(F14ValueMap, destructuring) {
+  runInsertAndEmplace<F14ValueMap<Tracked<0>, Tracked<1>>>("f14value");
+}
+
+TEST(F14NodeMap, destructuring) {
+  runInsertAndEmplace<F14NodeMap<Tracked<0>, Tracked<1>>>("f14node");
+}
+
+TEST(F14VectorMap, destructuring) {
+  runInsertAndEmplace<F14VectorMap<Tracked<0>, Tracked<1>>>("f14vector");
+}
+
+TEST(F14VectorMap, destructuringErase) {
+  using M = F14VectorMap<Tracked<0>, Tracked<1>>;
+  typename M::value_type p1{0, 0};
+  typename M::value_type p2{2, 2};
+  M m;
+  m.insert(p1);
+  m.insert(p2);
+
+  resetTracking();
+  m.erase(p1.first);
+  LOG(INFO) << "erase -> "
+            << "key_type ops " << Tracked<0>::counts << ", mapped_type ops "
+            << Tracked<1>::counts;
+  // deleting p1 will cause p2 to be moved to the front of the values array
+  EXPECT_EQ(
+      Tracked<0>::counts.dist(Counts{0, 1, 0, 0}) +
+          Tracked<1>::counts.dist(Counts{0, 1, 0, 0}),
+      0);
+}
+
+TEST(F14ValueMap, vectorMaxSize) {
+  F14ValueMap<int, int> m;
+  EXPECT_EQ(
+      m.max_size(),
+      std::numeric_limits<uint64_t>::max() / sizeof(std::pair<int, int>));
+}
+
+TEST(F14NodeMap, vectorMaxSize) {
+  F14NodeMap<int, int> m;
+  EXPECT_EQ(
+      m.max_size(),
+      std::numeric_limits<uint64_t>::max() / sizeof(std::pair<int, int>));
+}
+
+TEST(F14VectorMap, vectorMaxSize) {
+  F14VectorMap<int, int> m;
+  EXPECT_EQ(m.max_size(), std::numeric_limits<uint32_t>::max());
+}
+
+template <typename M>
+void runMoveOnlyTest() {
+  M t0;
+  t0[10] = 20;
+  t0.emplace(30, 40);
+  t0.insert(std::make_pair(50, 60));
+  M t1{std::move(t0)};
+  EXPECT_TRUE(t0.empty());
+  M t2;
+  EXPECT_TRUE(t2.empty());
+  t2 = std::move(t1);
+  EXPECT_EQ(t2.size(), 3);
+}
+
+TEST(F14ValueMap, moveOnly) {
+  runMoveOnlyTest<F14ValueMap<f14::MoveOnlyTestInt, int>>();
+  runMoveOnlyTest<F14ValueMap<int, f14::MoveOnlyTestInt>>();
+  runMoveOnlyTest<F14ValueMap<f14::MoveOnlyTestInt, f14::MoveOnlyTestInt>>();
+}
+
+TEST(F14NodeMap, moveOnly) {
+  runMoveOnlyTest<F14NodeMap<f14::MoveOnlyTestInt, int>>();
+  runMoveOnlyTest<F14NodeMap<int, f14::MoveOnlyTestInt>>();
+  runMoveOnlyTest<F14NodeMap<f14::MoveOnlyTestInt, f14::MoveOnlyTestInt>>();
+}
+
+TEST(F14VectorMap, moveOnly) {
+  runMoveOnlyTest<F14VectorMap<f14::MoveOnlyTestInt, int>>();
+  runMoveOnlyTest<F14VectorMap<int, f14::MoveOnlyTestInt>>();
+  runMoveOnlyTest<F14VectorMap<f14::MoveOnlyTestInt, f14::MoveOnlyTestInt>>();
+}
+
+TEST(F14FastMap, moveOnly) {
+  runMoveOnlyTest<F14FastMap<f14::MoveOnlyTestInt, int>>();
+  runMoveOnlyTest<F14FastMap<int, f14::MoveOnlyTestInt>>();
+  runMoveOnlyTest<F14FastMap<f14::MoveOnlyTestInt, f14::MoveOnlyTestInt>>();
+}
+
+TEST(F14ValueMap, heterogeneous) {
+  // note: std::string is implicitly convertible to but not from StringPiece
+  using Hasher = folly::transparent<folly::hasher<folly::StringPiece>>;
+  using KeyEqual = folly::transparent<std::equal_to<folly::StringPiece>>;
+
+  constexpr auto hello = "hello"_sp;
+  constexpr auto buddy = "buddy"_sp;
+  constexpr auto world = "world"_sp;
+
+  F14ValueMap<std::string, bool, Hasher, KeyEqual> map;
+  map.emplace(hello.str(), true);
+  map.emplace(world.str(), false);
+
+  auto checks = [hello, buddy](auto& ref) {
+    // count
+    EXPECT_EQ(0, ref.count(buddy));
+    EXPECT_EQ(1, ref.count(hello));
+
+    // find
+    EXPECT_TRUE(ref.end() == ref.find(buddy));
+    EXPECT_EQ(hello, ref.find(hello)->first);
+
+    // prehash + find
+    EXPECT_TRUE(ref.end() == ref.find(ref.prehash(buddy), buddy));
+    EXPECT_EQ(hello, ref.find(ref.prehash(hello), hello)->first);
+
+    // equal_range
+    EXPECT_TRUE(std::make_pair(ref.end(), ref.end()) == ref.equal_range(buddy));
+    EXPECT_TRUE(
+        std::make_pair(ref.find(hello), ++ref.find(hello)) ==
+        ref.equal_range(hello));
+  };
+
+  checks(map);
+  checks(folly::as_const(map));
+}
+
+///////////////////////////////////
+#endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+///////////////////////////////////
diff --git a/folly/container/test/F14SetTest.cpp b/folly/container/test/F14SetTest.cpp
new file mode 100644
index 00000000000..0ee8d4bf558
--- /dev/null
+++ b/folly/container/test/F14SetTest.cpp
@@ -0,0 +1,474 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/container/F14Set.h>
+
+///////////////////////////////////
+#if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+///////////////////////////////////
+
+#include <chrono>
+#include <random>
+#include <string>
+#include <unordered_set>
+
+#include <folly/Range.h>
+#include <folly/portability/GTest.h>
+
+#include <folly/container/test/F14TestUtil.h>
+
+using namespace folly;
+using namespace folly::string_piece_literals;
+
+namespace {
+std::string s(char const* p) {
+  return p;
+}
+} // namespace
+
+template <typename T>
+void runSimple() {
+  T h;
+
+  EXPECT_EQ(h.size(), 0);
+
+  h.insert(s("abc"));
+  EXPECT_TRUE(h.find(s("def")) == h.end());
+  EXPECT_FALSE(h.find(s("abc")) == h.end());
+  h.insert(s("ghi"));
+  EXPECT_EQ(h.size(), 2);
+  h.erase(h.find(s("abc")));
+  EXPECT_EQ(h.size(), 1);
+
+  T h2(std::move(h));
+  EXPECT_EQ(h.size(), 0);
+  EXPECT_TRUE(h.begin() == h.end());
+  EXPECT_EQ(h2.size(), 1);
+
+  EXPECT_TRUE(h2.find(s("abc")) == h2.end());
+  EXPECT_EQ(*h2.begin(), s("ghi"));
+  {
+    auto i = h2.begin();
+    EXPECT_FALSE(i == h2.end());
+    ++i;
+    EXPECT_TRUE(i == h2.end());
+  }
+
+  T h3;
+  h3.insert(s("xxx"));
+  h3.insert(s("yyy"));
+  h3 = std::move(h2);
+  EXPECT_EQ(h2.size(), 0);
+  EXPECT_EQ(h3.size(), 1);
+  EXPECT_TRUE(h3.find(s("xxx")) == h3.end());
+
+  for (uint64_t i = 0; i < 1000; ++i) {
+    h.insert(std::move(std::to_string(i * i * i)));
+    EXPECT_EQ(h.size(), i + 1);
+  }
+  {
+    using std::swap;
+    swap(h, h2);
+  }
+  for (uint64_t i = 0; i < 1000; ++i) {
+    EXPECT_TRUE(h2.find(std::to_string(i * i * i)) != h2.end());
+    EXPECT_EQ(*h2.find(std::to_string(i * i * i)), std::to_string(i * i * i));
+    EXPECT_TRUE(h2.find(std::to_string(i * i * i + 2)) == h2.end());
+  }
+
+  T h4{h2};
+  EXPECT_EQ(h2.size(), 1000);
+  EXPECT_EQ(h4.size(), 1000);
+
+  T h5{std::move(h2)};
+  T h6;
+  h6 = h4;
+  T h7 = h4;
+
+  T h8({s("abc"), s("def")});
+  T h9({s("abd"), s("def")});
+  EXPECT_EQ(h8.size(), 2);
+  EXPECT_EQ(h8.count(s("abc")), 1);
+  EXPECT_EQ(h8.count(s("xyz")), 0);
+
+  EXPECT_TRUE(h7 != h8);
+  EXPECT_TRUE(h8 != h9);
+
+  h8 = std::move(h7);
+  // h2 and h7 are moved from, h4, h5, h6, and h8 should be identical
+
+  EXPECT_TRUE(h4 == h8);
+
+  EXPECT_TRUE(h2.empty());
+  EXPECT_TRUE(h7.empty());
+  for (uint64_t i = 0; i < 1000; ++i) {
+    auto k = std::to_string(i * i * i);
+    EXPECT_EQ(h4.count(k), 1);
+    EXPECT_EQ(h5.count(k), 1);
+    EXPECT_EQ(h6.count(k), 1);
+    EXPECT_EQ(h8.count(k), 1);
+  }
+
+  F14TableStats::compute(h);
+  F14TableStats::compute(h2);
+  F14TableStats::compute(h3);
+  F14TableStats::compute(h4);
+  F14TableStats::compute(h5);
+  F14TableStats::compute(h6);
+  F14TableStats::compute(h7);
+  F14TableStats::compute(h8);
+}
+
+template <typename T>
+void runRehash() {
+  unsigned n = 10000;
+  T h;
+  for (unsigned i = 0; i < n; ++i) {
+    h.insert(std::to_string(i));
+  }
+  EXPECT_EQ(h.size(), n);
+  F14TableStats::compute(h);
+}
+
+// T should be a set of uint64_t
+template <typename T>
+void runRandom() {
+  using R = std::unordered_set<uint64_t>;
+
+  std::mt19937_64 gen(0);
+  std::uniform_int_distribution<> pctDist(0, 100);
+  std::uniform_int_distribution<uint64_t> bitsBitsDist(1, 6);
+  T t0;
+  T t1;
+  R r0;
+  R r1;
+
+  for (std::size_t reps = 0; reps < 100000; ++reps) {
+    // discardBits will be from 0 to 62
+    auto discardBits = (uint64_t{1} << bitsBitsDist(gen)) - 2;
+    auto k = gen() >> discardBits;
+    auto pct = pctDist(gen);
+
+    EXPECT_EQ(t0.size(), r0.size());
+    if (pct < 15) {
+      // insert
+      auto t = t0.insert(k);
+      auto r = r0.insert(k);
+      EXPECT_EQ(t.second, r.second);
+      EXPECT_EQ(*t.first, *r.first);
+    } else if (pct < 25) {
+      // emplace
+      auto t = t0.emplace(k);
+      auto r = r0.emplace(k);
+      EXPECT_EQ(t.second, r.second);
+      EXPECT_EQ(*t.first, *r.first);
+    } else if (pct < 30) {
+      // bulk insert
+      t0.insert(t1.begin(), t1.end());
+      r0.insert(r1.begin(), r1.end());
+    } else if (pct < 40) {
+      // erase by key
+      auto t = t0.erase(k);
+      auto r = r0.erase(k);
+      EXPECT_EQ(t, r);
+    } else if (pct < 50) {
+      // erase by iterator
+      if (t0.size() > 0) {
+        auto r = r0.find(k);
+        if (r == r0.end()) {
+          r = r0.begin();
+        }
+        k = *r;
+        auto t = t0.find(k);
+        t = t0.erase(t);
+        if (t != t0.end()) {
+          EXPECT_NE(*t, k);
+        }
+        r = r0.erase(r);
+        if (r != r0.end()) {
+          EXPECT_NE(*r, k);
+        }
+      }
+    } else if (pct < 58) {
+      // find
+      auto t = t0.find(k);
+      auto r = r0.find(k);
+      EXPECT_EQ((t == t0.end()), (r == r0.end()));
+      if (t != t0.end() && r != r0.end()) {
+        EXPECT_EQ(*t, *r);
+      }
+      EXPECT_EQ(t0.count(k), r0.count(k));
+    } else if (pct < 60) {
+      // equal_range
+      auto t = t0.equal_range(k);
+      auto r = r0.equal_range(k);
+      EXPECT_EQ((t.first == t.second), (r.first == r.second));
+      if (t.first != t.second && r.first != r.second) {
+        EXPECT_EQ(*t.first, *r.first);
+        t.first++;
+        r.first++;
+        EXPECT_TRUE(t.first == t.second);
+        EXPECT_TRUE(r.first == r.second);
+      }
+    } else if (pct < 65) {
+      // iterate
+      uint64_t t = 0;
+      for (auto& e : t0) {
+        t += e + 1000;
+      }
+      uint64_t r = 0;
+      for (auto& e : r0) {
+        r += e + 1000;
+      }
+      EXPECT_EQ(t, r);
+    } else if (pct < 69) {
+      // swap
+      using std::swap;
+      swap(t0, t1);
+      swap(r0, r1);
+    } else if (pct < 70) {
+      // swap
+      t0.swap(t1);
+      r0.swap(r1);
+    } else if (pct < 72) {
+      // default construct
+      t0.~T();
+      new (&t0) T();
+      r0.~R();
+      new (&r0) R();
+    } else if (pct < 74) {
+      // default construct with capacity
+      std::size_t capacity = k & 0xffff;
+      t0.~T();
+      new (&t0) T(capacity);
+      r0.~R();
+      new (&r0) R(capacity);
+    } else if (pct < 80) {
+      // bulk iterator construct
+      t0.~T();
+      new (&t0) T(r1.begin(), r1.end());
+      r0.~R();
+      new (&r0) R(r1.begin(), r1.end());
+    } else if (pct < 82) {
+      // initializer list construct
+      auto k2 = gen() >> discardBits;
+      t0.~T();
+      new (&t0) T({k, k, k2});
+      r0.~R();
+      new (&r0) R({k, k, k2});
+    } else if (pct < 88) {
+      // copy construct
+      t0.~T();
+      new (&t0) T(t1);
+      r0.~R();
+      new (&r0) R(r1);
+    } else if (pct < 90) {
+      // move construct
+      t0.~T();
+      new (&t0) T(std::move(t1));
+      r0.~R();
+      new (&r0) R(std::move(r1));
+    } else if (pct < 94) {
+      // copy assign
+      t0 = t1;
+      r0 = r1;
+    } else if (pct < 96) {
+      // move assign
+      t0 = std::move(t1);
+      r0 = std::move(r1);
+    } else if (pct < 98) {
+      // operator==
+      EXPECT_EQ((t0 == t1), (r0 == r1));
+    } else if (pct < 99) {
+      // clear
+      t0.computeStats();
+      t0.clear();
+      r0.clear();
+    } else if (pct < 100) {
+      // reserve
+      auto scale = std::uniform_int_distribution<>(0, 8)(gen);
+      auto delta = std::uniform_int_distribution<>(-2, 2)(gen);
+      std::ptrdiff_t target = (t0.size() * scale) / 4 + delta;
+      if (target >= 0) {
+        t0.reserve(static_cast<std::size_t>(target));
+        r0.reserve(static_cast<std::size_t>(target));
+      }
+    }
+  }
+}
+
+TEST(F14ValueSet, simple) {
+  runSimple<F14ValueSet<std::string>>();
+}
+
+TEST(F14NodeSet, simple) {
+  runSimple<F14NodeSet<std::string>>();
+}
+
+TEST(F14VectorSet, simple) {
+  runSimple<F14VectorSet<std::string>>();
+}
+
+TEST(F14FastSet, simple) {
+  // F14FastSet is just a conditional typedef. Verify it compiles.
+  runRandom<F14FastSet<uint64_t>>();
+  runSimple<F14FastSet<std::string>>();
+}
+
+TEST(F14ValueSet, rehash) {
+  runRehash<F14ValueSet<std::string>>();
+}
+
+TEST(F14NodeSet, rehash) {
+  runRehash<F14NodeSet<std::string>>();
+}
+
+TEST(F14VectorSet, rehash) {
+  runRehash<F14VectorSet<std::string>>();
+}
+
+TEST(F14ValueSet, random) {
+  runRandom<F14ValueSet<uint64_t>>();
+}
+
+TEST(F14NodeSet, random) {
+  runRandom<F14NodeSet<uint64_t>>();
+}
+
+TEST(F14VectorSet, random) {
+  runRandom<F14VectorSet<uint64_t>>();
+}
+
+TEST(F14ValueSet, grow_stats) {
+  F14ValueSet<uint64_t> h;
+  for (unsigned i = 1; i <= 3072; ++i) {
+    h.insert(i);
+  }
+  LOG(INFO) << "F14ValueSet just before rehash -> "
+            << F14TableStats::compute(h);
+  h.insert(0);
+  LOG(INFO) << "F14ValueSet just after rehash -> " << F14TableStats::compute(h);
+}
+
+TEST(F14ValueSet, steady_state_stats) {
+  // 10k keys, 14% probability of insert, 90% chance of erase, so the
+  // table should converge to 1400 size without triggering the rehash
+  // that would occur at 1536.
+  F14ValueSet<uint64_t> h;
+  std::mt19937 gen(0);
+  std::uniform_int_distribution<> dist(0, 10000);
+  for (std::size_t i = 0; i < 100000; ++i) {
+    auto key = dist(gen);
+    if (dist(gen) < 1400) {
+      h.insert(key);
+    } else {
+      h.erase(key);
+    }
+    if (((i + 1) % 10000) == 0) {
+      auto stats = F14TableStats::compute(h);
+      // Verify that average miss probe length is bounded despite continued
+      // erase + reuse.  p99 of the average across 10M random steps is 4.69,
+      // average is 2.96.
+      EXPECT_LT(f14::expectedProbe(stats.missProbeLengthHisto), 10.0);
+    }
+  }
+  LOG(INFO) << "F14ValueSet at steady state -> " << F14TableStats::compute(h);
+}
+
+TEST(F14ValueSet, vectorMaxSize) {
+  F14ValueSet<int> s;
+  EXPECT_EQ(s.max_size(), std::numeric_limits<uint64_t>::max() / sizeof(int));
+}
+
+TEST(F14NodeSet, vectorMaxSize) {
+  F14NodeSet<int> s;
+  EXPECT_EQ(s.max_size(), std::numeric_limits<uint64_t>::max() / sizeof(int));
+}
+
+TEST(F14VectorSet, vectorMaxSize) {
+  F14VectorSet<int> s;
+  EXPECT_EQ(s.max_size(), std::numeric_limits<uint32_t>::max());
+}
+
+template <typename S>
+void runMoveOnlyTest() {
+  S t0;
+  t0.emplace(10);
+  t0.insert(20);
+  S t1{std::move(t0)};
+  EXPECT_TRUE(t0.empty());
+  S t2;
+  EXPECT_TRUE(t2.empty());
+  t2 = std::move(t1);
+  EXPECT_EQ(t2.size(), 2);
+}
+
+TEST(F14ValueSet, moveOnly) {
+  runMoveOnlyTest<F14ValueSet<f14::MoveOnlyTestInt>>();
+}
+
+TEST(F14NodeSet, moveOnly) {
+  runMoveOnlyTest<F14NodeSet<f14::MoveOnlyTestInt>>();
+}
+
+TEST(F14VectorSet, moveOnly) {
+  runMoveOnlyTest<F14VectorSet<f14::MoveOnlyTestInt>>();
+}
+
+TEST(F14FastSet, moveOnly) {
+  runMoveOnlyTest<F14FastSet<f14::MoveOnlyTestInt>>();
+}
+
+TEST(F14ValueSet, heterogeneous) {
+  // note: std::string is implicitly convertible to but not from StringPiece
+  using Hasher = folly::transparent<folly::hasher<folly::StringPiece>>;
+  using KeyEqual = folly::transparent<std::equal_to<folly::StringPiece>>;
+
+  constexpr auto hello = "hello"_sp;
+  constexpr auto buddy = "buddy"_sp;
+  constexpr auto world = "world"_sp;
+
+  F14ValueSet<std::string, Hasher, KeyEqual> set;
+  set.emplace(hello.str());
+  set.emplace(world.str());
+
+  auto checks = [hello, buddy](auto& ref) {
+    // count
+    EXPECT_EQ(0, ref.count(buddy));
+    EXPECT_EQ(1, ref.count(hello));
+
+    // find
+    EXPECT_TRUE(ref.end() == ref.find(buddy));
+    EXPECT_EQ(hello, *ref.find(hello));
+
+    // prehash + find
+    EXPECT_TRUE(ref.end() == ref.find(ref.prehash(buddy), buddy));
+    EXPECT_EQ(hello, *ref.find(ref.prehash(hello), hello));
+
+    // equal_range
+    EXPECT_TRUE(std::make_pair(ref.end(), ref.end()) == ref.equal_range(buddy));
+    EXPECT_TRUE(
+        std::make_pair(ref.find(hello), ++ref.find(hello)) ==
+        ref.equal_range(hello));
+  };
+
+  checks(set);
+  checks(folly::as_const(set));
+}
+
+///////////////////////////////////
+#endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
+///////////////////////////////////
diff --git a/folly/container/test/F14TestUtil.h b/folly/container/test/F14TestUtil.h
new file mode 100644
index 00000000000..f389a32f2df
--- /dev/null
+++ b/folly/container/test/F14TestUtil.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include <folly/Demangle.h>
+#include <folly/container/detail/F14Policy.h>
+#include <folly/container/detail/F14Table.h>
+
+namespace folly {
+namespace f14 {
+
+struct Histo {
+  std::vector<std::size_t> const& data;
+};
+
+std::ostream& operator<<(std::ostream& xo, Histo const& histo) {
+  xo << "[";
+  size_t sum = 0;
+  for (auto v : histo.data) {
+    sum += v;
+  }
+  size_t partial = 0;
+  for (size_t i = 0; i < histo.data.size(); ++i) {
+    if (i > 0) {
+      xo << ", ";
+    }
+    partial += histo.data[i];
+    if (histo.data[i] > 0) {
+      xo << i << ": " << histo.data[i] << " (" << (partial * 100.0 / sum)
+         << "%)";
+    }
+  }
+  xo << "]";
+  return xo;
+}
+
+void accumulate(
+    std::vector<std::size_t>& a,
+    std::vector<std::size_t> const& d) {
+  if (a.size() < d.size()) {
+    a.resize(d.size());
+  }
+  for (std::size_t i = 0; i < d.size(); ++i) {
+    a[i] += d[i];
+  }
+}
+
+double expectedProbe(std::vector<std::size_t> const& probeLengths) {
+  std::size_t sum = 0;
+  std::size_t count = 0;
+  for (std::size_t i = 1; i < probeLengths.size(); ++i) {
+    sum += i * probeLengths[i];
+    count += probeLengths[i];
+  }
+  return static_cast<double>(sum) / count;
+}
+
+// Returns i such that probeLengths elements 0 to i (inclusive) account
+// for at least 99% of the samples.
+std::size_t p99Probe(std::vector<std::size_t> const& probeLengths) {
+  std::size_t count = 0;
+  for (std::size_t i = 1; i < probeLengths.size(); ++i) {
+    count += probeLengths[i];
+  }
+  std::size_t rv = probeLengths.size();
+  std::size_t suffix = 0;
+  while ((suffix + probeLengths[rv - 1]) * 100 <= count) {
+    --rv;
+  }
+  return rv;
+}
+
+struct MoveOnlyTestInt {
+  int x;
+
+  MoveOnlyTestInt() noexcept : x(0) {}
+  /* implicit */ MoveOnlyTestInt(int x0) : x(x0) {}
+  MoveOnlyTestInt(MoveOnlyTestInt&& rhs) noexcept : x(rhs.x) {}
+  MoveOnlyTestInt(MoveOnlyTestInt const&) = delete;
+  MoveOnlyTestInt& operator=(MoveOnlyTestInt&& rhs) noexcept {
+    x = rhs.x;
+    return *this;
+  }
+  MoveOnlyTestInt& operator=(MoveOnlyTestInt const&) = delete;
+
+  bool operator==(MoveOnlyTestInt const& rhs) const {
+    return x == rhs.x;
+  }
+  bool operator!=(MoveOnlyTestInt const& rhs) const {
+    return !(*this == rhs);
+  }
+};
+
+} // namespace f14
+
+std::ostream& operator<<(std::ostream& xo, F14TableStats const& stats) {
+  using f14::Histo;
+
+  xo << "{ " << std::endl;
+  xo << "  policy: " << folly::demangle(stats.policy) << std::endl;
+  xo << "  size: " << stats.size << std::endl;
+  xo << "  valueSize: " << stats.valueSize << std::endl;
+  xo << "  bucketCount: " << stats.bucketCount << std::endl;
+  xo << "  chunkCount: " << stats.chunkCount << std::endl;
+  xo << "  chunkOccupancyHisto" << Histo{stats.chunkOccupancyHisto}
+     << std::endl;
+  xo << "  chunkOutboundOverflowHisto"
+     << Histo{stats.chunkOutboundOverflowHisto} << std::endl;
+  xo << "  chunkHostedOverflowHisto" << Histo{stats.chunkHostedOverflowHisto}
+     << std::endl;
+  xo << "  keyProbeLengthHisto" << Histo{stats.keyProbeLengthHisto}
+     << std::endl;
+  xo << "  missProbeLengthHisto" << Histo{stats.missProbeLengthHisto}
+     << std::endl;
+  xo << "  totalBytes: " << stats.totalBytes << std::endl;
+  xo << "  valueBytes: " << (stats.size * stats.valueSize) << std::endl;
+  xo << "  overheadBytes: " << stats.overheadBytes << std::endl;
+  if (stats.size > 0) {
+    xo << "  overheadBytesPerKey: " << (stats.overheadBytes * 1.0 / stats.size)
+       << std::endl;
+  }
+  xo << "}";
+  return xo;
+}
+
+} // namespace folly
+
+namespace std {
+template <>
+struct hash<folly::f14::MoveOnlyTestInt> {
+  std::size_t operator()(folly::f14::MoveOnlyTestInt const& val) const {
+    return val.x;
+  }
+};
+} // namespace std
diff --git a/folly/detail/MemoryIdler.h b/folly/detail/MemoryIdler.h
index 14018f6e006..802e3d7b74d 100644
--- a/folly/detail/MemoryIdler.h
+++ b/folly/detail/MemoryIdler.h
@@ -188,11 +188,8 @@ struct MemoryIdler {
     if (idleTimeout > IdleTime::zero()) {
       auto idleDeadline = Deadline::clock::now() + idleTimeout;
       if (idleDeadline < deadline) {
-        while (true) {
-          auto rv = fut.futexWaitUntil(expected, idleDeadline, waitMask);
-          if (rv == FutexResult::TIMEDOUT) {
-            break;
-          }
+        auto rv = fut.futexWaitUntil(expected, idleDeadline, waitMask);
+        if (rv != FutexResult::TIMEDOUT) {
           // finished before timeout hit, no flush
           _ret = rv;
           return true;
diff --git a/folly/detail/ThreadLocalDetail.h b/folly/detail/ThreadLocalDetail.h
index 04c85b54afa..709135f8e81 100644
--- a/folly/detail/ThreadLocalDetail.h
+++ b/folly/detail/ThreadLocalDetail.h
@@ -28,7 +28,6 @@
 
 #include <folly/Exception.h>
 #include <folly/Function.h>
-#include <folly/MicroSpinLock.h>
 #include <folly/Portability.h>
 #include <folly/ScopeGuard.h>
 #include <folly/SharedMutex.h>
@@ -36,6 +35,7 @@
 #include <folly/detail/AtFork.h>
 #include <folly/memory/Malloc.h>
 #include <folly/portability/PThread.h>
+#include <folly/synchronization/MicroSpinLock.h>
 
 #include <folly/detail/StaticSingletonManager.h>
 
diff --git a/folly/dynamic.cpp b/folly/dynamic.cpp
index 3028eec741f..3fc79fb0d00 100644
--- a/folly/dynamic.cpp
+++ b/folly/dynamic.cpp
@@ -14,12 +14,14 @@
  * limitations under the License.
  */
 
+#include <numeric>
+
 #include <folly/dynamic.h>
 
 #include <folly/Format.h>
 #include <folly/hash/Hash.h>
 #include <folly/lang/Assume.h>
-#include <folly/portability/BitsFunctexcept.h>
+#include <folly/lang/Exception.h>
 
 namespace folly {
 
@@ -246,7 +248,7 @@ const dynamic* dynamic::get_ptr(dynamic const& idx) const& {
 
 [[noreturn]] static void throwOutOfRangeAtMissingKey(dynamic const& idx) {
   auto msg = sformat("couldn't find key {} in dynamic object", idx.asString());
-  std::__throw_out_of_range(msg.c_str());
+  throw_exception<std::out_of_range>(msg);
 }
 
 dynamic const& dynamic::at(dynamic const& idx) const& {
@@ -255,7 +257,7 @@ dynamic const& dynamic::at(dynamic const& idx) const& {
       throwTypeError_("int64", idx.type());
     }
     if (idx < 0 || idx >= parray->size()) {
-      std::__throw_out_of_range("out of range in dynamic array");
+      throw_exception<std::out_of_range>("out of range in dynamic array");
     }
     return (*parray)[size_t(idx.asInt())];
   } else if (auto* pobject = get_nothrow<ObjectImpl>()) {
@@ -291,10 +293,22 @@ dynamic::iterator dynamic::erase(const_iterator first, const_iterator last) {
 
 std::size_t dynamic::hash() const {
   switch (type()) {
+  case NULLT:
+    return 0xBAAAAAAD;
   case OBJECT:
+  {
+    // Accumulate using addition instead of using hash_range (as in the ARRAY
+    // case), as we need a commutative hash operation since unordered_map's
+    // iteration order is unspecified.
+    auto h = std::hash<std::pair<dynamic, dynamic>>{};
+    return std::accumulate(
+        items().begin(),
+        items().end(),
+        size_t{0x0B1EC7},
+        [&](auto acc, auto item) { return acc + h(item); });
+    }
   case ARRAY:
-  case NULLT:
-    throwTypeError_("not null/object/array", type());
+    return folly::hash::hash_range(begin(), end());
   case INT64:
     return std::hash<int64_t>()(getInt());
   case DOUBLE:
diff --git a/folly/dynamic.h b/folly/dynamic.h
index 5a0b8901905..0c344e7cbfe 100644
--- a/folly/dynamic.h
+++ b/folly/dynamic.h
@@ -403,14 +403,16 @@ struct dynamic : private boost::operators<dynamic> {
   /*
    * This works for access to both objects and arrays.
    *
-   * In the case of an array, the index must be an integer, and this will throw
-   * std::out_of_range if it is less than zero or greater than size().
+   * In the case of an array, the index must be an integer, and this
+   * will throw std::out_of_range if it is less than zero or greater
+   * than size().
    *
    * In the case of an object, the non-const overload inserts a null
    * value if the key isn't present.  The const overload will throw
    * std::out_of_range if the key is not present.
    *
-   * These functions do not invalidate iterators.
+   * These functions do not invalidate iterators except when a null value
+   * is inserted into an object as described above.
    */
   dynamic&       operator[](dynamic const&) &;
   dynamic const& operator[](dynamic const&) const&;
diff --git a/folly/executors/ManualExecutor.h b/folly/executors/ManualExecutor.h
index 2163300757b..61a5f81ab1b 100644
--- a/folly/executors/ManualExecutor.h
+++ b/folly/executors/ManualExecutor.h
@@ -23,6 +23,7 @@
 
 #include <folly/executors/DrivableExecutor.h>
 #include <folly/executors/ScheduledExecutor.h>
+#include <folly/executors/SequencedExecutor.h>
 #include <folly/synchronization/LifoSem.h>
 
 namespace folly {
@@ -35,7 +36,8 @@ namespace folly {
   /// NB No attempt has been made to make anything other than add and schedule
   /// threadsafe.
   class ManualExecutor : public DrivableExecutor,
-                         public ScheduledExecutor {
+                         public ScheduledExecutor,
+                         public SequencedExecutor {
    public:
     void add(Func) override;
 
diff --git a/folly/executors/NotificationQueueExecutor.h b/folly/executors/NotificationQueueExecutor.h
index 1ea35697824..725a09cccc6 100644
--- a/folly/executors/NotificationQueueExecutor.h
+++ b/folly/executors/NotificationQueueExecutor.h
@@ -18,11 +18,13 @@
 #include <folly/ExceptionString.h>
 #include <folly/Function.h>
 #include <folly/executors/DrivableExecutor.h>
+#include <folly/executors/SequencedExecutor.h>
 #include <folly/io/async/NotificationQueue.h>
 
 namespace folly {
 
-class NotificationQueueExecutor : public folly::DrivableExecutor {
+class NotificationQueueExecutor : public DrivableExecutor,
+                                  public SequencedExecutor {
  public:
   using Func = folly::Func;
 
diff --git a/folly/executors/ScheduledExecutor.h b/folly/executors/ScheduledExecutor.h
index 634e6a3b004..bc268fb737e 100644
--- a/folly/executors/ScheduledExecutor.h
+++ b/folly/executors/ScheduledExecutor.h
@@ -21,7 +21,7 @@
 #include <stdexcept>
 
 #include <folly/Executor.h>
-#include <folly/portability/BitsFunctexcept.h>
+#include <folly/lang/Exception.h>
 
 namespace folly {
   // An executor that supports timed scheduling. Like RxScheduler.
@@ -48,7 +48,7 @@ namespace folly {
      /// Schedule a Func to be executed at time t, or as soon afterward as
      /// possible. Expect millisecond resolution at best. Must be threadsafe.
      virtual void scheduleAt(Func&& /* a */, TimePoint const& /* t */) {
-       std::__throw_logic_error("unimplemented");
+       throw_exception<std::logic_error>("unimplemented");
      }
 
      /// Get this executor's notion of time. Must be threadsafe.
diff --git a/folly/portability/BitsFunctexcept.h b/folly/executors/SequencedExecutor.h
similarity index 53%
rename from folly/portability/BitsFunctexcept.h
rename to folly/executors/SequencedExecutor.h
index 188c41cbbcb..ce7e7de0fde 100644
--- a/folly/portability/BitsFunctexcept.h
+++ b/folly/executors/SequencedExecutor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2013-present Facebook, Inc.
+ * Copyright 2018-present Facebook, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -16,28 +16,16 @@
 
 #pragma once
 
-#include <new>
+#include <folly/Executor.h>
 
-#include <folly/Portability.h>
-#include <folly/portability/Config.h>
+namespace folly {
 
-#if FOLLY_HAVE_BITS_FUNCTEXCEPT_H
+// An SequencedExecutor is an executor that provides the following guarantee:
+// if add(A) and add(B) were sequenced then execution of A and B will be
+// sequenced too.
+class SequencedExecutor : public virtual Executor {
+ public:
+  ~SequencedExecutor() override = default;
+};
 
-#include <bits/functexcept.h>
-
-#else
-
-namespace std {
-
-#if _LIBCPP_VERSION < 4000
-[[noreturn]] void __throw_length_error(char const* msg);
-[[noreturn]] void __throw_logic_error(char const* msg);
-[[noreturn]] void __throw_out_of_range(char const* msg);
-#endif
-
-#if _CPPLIB_VER // msvc c++ std lib
-[[noreturn]] void __throw_bad_alloc();
-#endif
-}
-
-#endif
+} // namespace folly
diff --git a/folly/executors/SerialExecutor.h b/folly/executors/SerialExecutor.h
index 71addadcfb2..fa0e5219c9b 100644
--- a/folly/executors/SerialExecutor.h
+++ b/folly/executors/SerialExecutor.h
@@ -18,8 +18,8 @@
 
 #include <memory>
 
-#include <folly/Executor.h>
 #include <folly/executors/GlobalExecutor.h>
+#include <folly/executors/SequencedExecutor.h>
 
 namespace folly {
 
@@ -46,7 +46,7 @@ namespace folly {
  * parent executor is executing tasks.
  */
 
-class SerialExecutor : public folly::Executor {
+class SerialExecutor : public SequencedExecutor {
  public:
   ~SerialExecutor() override = default;
   SerialExecutor(SerialExecutor const&) = delete;
diff --git a/folly/experimental/JemallocNodumpAllocator.cpp b/folly/experimental/JemallocNodumpAllocator.cpp
index 7ad2870d6c8..90b733f2de0 100644
--- a/folly/experimental/JemallocNodumpAllocator.cpp
+++ b/folly/experimental/JemallocNodumpAllocator.cpp
@@ -151,7 +151,7 @@ void* JemallocNodumpAllocator::alloc(
 
 #endif // FOLLY_JEMALLOC_NODUMP_ALLOCATOR_SUPPORTED
 
-void JemallocNodumpAllocator::deallocate(void* p) {
+void JemallocNodumpAllocator::deallocate(void* p, size_t) {
   dallocx != nullptr ? dallocx(p, flags_) : free(p);
 }
 
diff --git a/folly/experimental/JemallocNodumpAllocator.h b/folly/experimental/JemallocNodumpAllocator.h
index 688bbf83665..8dd242f531b 100644
--- a/folly/experimental/JemallocNodumpAllocator.h
+++ b/folly/experimental/JemallocNodumpAllocator.h
@@ -80,7 +80,7 @@ class JemallocNodumpAllocator {
 
   void* allocate(size_t size);
   void* reallocate(void* p, size_t size);
-  void deallocate(void* p);
+  void deallocate(void* p, size_t = 0);
 
   unsigned getArenaIndex() const { return arena_index_; }
   int getFlags() const { return flags_; }
diff --git a/folly/experimental/coro/AwaitWrapper.h b/folly/experimental/coro/AwaitWrapper.h
new file mode 100644
index 00000000000..74240354980
--- /dev/null
+++ b/folly/experimental/coro/AwaitWrapper.h
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <experimental/coroutine>
+
+#include <folly/ExceptionString.h>
+#include <folly/Executor.h>
+
+namespace folly {
+namespace coro {
+
+template <typename Awaitable>
+class AwaitWrapper {
+ public:
+  struct promise_type {
+    std::experimental::suspend_always initial_suspend() {
+      return {};
+    }
+
+    std::experimental::suspend_never final_suspend() {
+      executor_->add(awaiter_);
+      awaitWrapper_->promise_ = nullptr;
+      return {};
+    }
+
+    void return_void() {}
+
+    void unhandled_exception() {
+      LOG(FATAL) << "Failed to schedule a task to awake a coroutine: "
+                 << exceptionStr(std::current_exception());
+    }
+
+    AwaitWrapper get_return_object() {
+      return {*this};
+    }
+
+    Executor* executor_;
+    std::experimental::coroutine_handle<> awaiter_;
+    AwaitWrapper* awaitWrapper_{nullptr};
+  };
+
+  static AwaitWrapper create(Awaitable* awaitable) {
+    return {awaitable};
+  }
+
+  static AwaitWrapper create(Awaitable* awaitable, Executor* executor) {
+    auto ret = awaitWrapper();
+    ret.awaitable_ = awaitable;
+    ret.promise_->executor_ = executor;
+    return ret;
+  }
+
+  bool await_ready() {
+    return awaitable_->await_ready();
+  }
+
+  using await_suspend_return_type =
+      decltype((*static_cast<Awaitable*>(nullptr))
+                   .await_suspend(std::experimental::coroutine_handle<>()));
+
+  await_suspend_return_type await_suspend(
+      std::experimental::coroutine_handle<> awaiter) {
+    if (promise_) {
+      promise_->awaiter_ = std::move(awaiter);
+      return awaitable_->await_suspend(
+          std::experimental::coroutine_handle<promise_type>::from_promise(
+              *promise_));
+    }
+
+    return awaitable_->await_suspend(awaiter);
+  }
+
+  decltype((*static_cast<Awaitable*>(nullptr)).await_resume()) await_resume() {
+    return awaitable_->await_resume();
+  }
+
+  ~AwaitWrapper() {
+    if (promise_) {
+      // This happens if await_ready() returns true or await_suspend() returns
+      // false.
+      std::experimental::coroutine_handle<promise_type>::from_promise(*promise_)
+          .destroy();
+    }
+  }
+
+ private:
+  AwaitWrapper(Awaitable* awaitable) : awaitable_(awaitable) {}
+  AwaitWrapper(promise_type& promise) : promise_(&promise) {
+    promise.awaitWrapper_ = this;
+  }
+
+  static AwaitWrapper awaitWrapper() {
+    co_return;
+  }
+
+  promise_type* promise_{nullptr};
+  Awaitable* awaitable_{nullptr};
+};
+} // namespace coro
+} // namespace folly
diff --git a/folly/experimental/coro/Future.h b/folly/experimental/coro/Future.h
new file mode 100644
index 00000000000..87841dd3a4b
--- /dev/null
+++ b/folly/experimental/coro/Future.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <glog/logging.h>
+
+#include <folly/Executor.h>
+#include <folly/experimental/coro/Promise.h>
+#include <folly/experimental/coro/Task.h>
+#include <folly/experimental/coro/Wait.h>
+
+namespace folly {
+namespace coro {
+
+/*
+ * Future object attached to a running coroutine. Implement await_* APIs.
+ */
+template <typename T>
+class Future {
+ public:
+  Future(const Future&) = delete;
+  Future(Future&& other) : promise_(other.promise_) {
+    other.promise_ = nullptr;
+  }
+
+  Wait wait() {
+    co_await *this;
+    co_return;
+  }
+
+  typename std::add_lvalue_reference<T>::type get() {
+    DCHECK(promise_->state_ == Promise<T>::State::HAS_RESULT);
+    return *promise_->result_;
+  }
+
+  bool await_ready() {
+    return promise_->state_.load(std::memory_order_acquire) ==
+        Promise<T>::State::HAS_RESULT;
+  }
+
+  bool await_suspend(std::experimental::coroutine_handle<> awaiter) {
+    auto state = promise_->state_.load(std::memory_order_acquire);
+
+    if (state == Promise<T>::State::HAS_RESULT) {
+      return false;
+    }
+    DCHECK(state == Promise<T>::State::EMPTY);
+
+    promise_->awaiter_ = std::move(awaiter);
+
+    if (promise_->state_.compare_exchange_strong(
+            state,
+            Promise<T>::State::HAS_AWAITER,
+            std::memory_order_release,
+            std::memory_order_acquire)) {
+      return true;
+    }
+
+    DCHECK(promise_->state_ == Promise<T>::State::HAS_RESULT);
+    return false;
+  }
+
+  typename std::add_lvalue_reference<T>::type await_resume() {
+    return get();
+  }
+
+  ~Future() {
+    if (!promise_) {
+      return;
+    }
+
+    auto state = promise_->state_.load(std::memory_order_acquire);
+
+    do {
+      DCHECK(state != Promise<T>::State::DETACHED);
+      DCHECK(state != Promise<T>::State::HAS_AWAITER);
+
+      if (state == Promise<T>::State::HAS_RESULT) {
+        auto ch = std::experimental::coroutine_handle<Promise<T>>::from_promise(
+            *promise_);
+        DCHECK(ch.done());
+        ch.destroy();
+        return;
+      }
+      DCHECK(state == Promise<T>::State::EMPTY);
+    } while (!promise_->state_.compare_exchange_weak(
+        state,
+        Promise<T>::State::DETACHED,
+        std::memory_order::memory_order_release,
+        std::memory_order::memory_order_acquire));
+  }
+
+ private:
+  friend class Task<T>;
+  template <typename U>
+  friend class Promise;
+
+  Future(Promise<T>& promise) : promise_(&promise) {}
+
+  Promise<T>* promise_;
+};
+} // namespace coro
+} // namespace folly
diff --git a/folly/experimental/coro/Promise.h b/folly/experimental/coro/Promise.h
new file mode 100644
index 00000000000..42f90e95060
--- /dev/null
+++ b/folly/experimental/coro/Promise.h
@@ -0,0 +1,203 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <glog/logging.h>
+
+#include <folly/ExceptionWrapper.h>
+#include <folly/Try.h>
+#include <folly/experimental/coro/AwaitWrapper.h>
+#include <folly/experimental/coro/Task.h>
+#include <folly/experimental/coro/Utils.h>
+#include <folly/futures/Future.h>
+
+namespace folly {
+namespace coro {
+
+enum class PromiseState {
+  // Coroutine hasn't started
+  EMPTY,
+  // Coroutine is running, but Future object managing this coroutine was
+  // destroyed
+  DETACHED,
+  // Some other coroutine is waiting on this coroutine to be complete
+  HAS_AWAITER,
+  // Coroutine is finished, result is stored inside Promise
+  HAS_RESULT
+};
+
+template <typename T>
+class Future;
+
+template <typename T>
+class PromiseBase {
+ public:
+  template <typename U>
+  void return_value(U&& value) {
+    result_ = Try<T>(std::forward<U>(value));
+  }
+
+ protected:
+  folly::Try<T> result_;
+};
+
+template <>
+class PromiseBase<void> {
+ public:
+  void return_void() {}
+
+ protected:
+  folly::Try<void> result_;
+};
+
+template <typename T>
+class Promise : public PromiseBase<T> {
+ public:
+  using State = PromiseState;
+
+  Promise() {}
+
+  ~Promise() {}
+
+  Task<T> get_return_object() {
+    return {*this};
+  }
+
+  std::experimental::suspend_always initial_suspend() {
+    return {};
+  }
+
+  template <typename U>
+  auto await_transform(Task<U>&& task) {
+    return std::move(task).viaInline(executor_);
+  }
+
+  template <typename U>
+  auto await_transform(folly::SemiFuture<U>& future) {
+    return folly::detail::FutureAwaitable<U>(future.via(executor_));
+  }
+
+  template <typename U>
+  auto await_transform(folly::SemiFuture<U>&& future) {
+    return folly::detail::FutureAwaitable<U>(future.via(executor_));
+  }
+
+  template <typename U>
+  auto await_transform(folly::Future<U>& future) {
+    future = future.via(executor_);
+    return folly::detail::FutureRefAwaitable<U>(future);
+  }
+
+  template <typename U>
+  auto await_transform(folly::Future<U>&& future) {
+    future = future.via(executor_);
+    return folly::detail::FutureRefAwaitable<U>(future);
+  }
+
+  template <typename U>
+  AwaitWrapper<Future<U>> await_transform(Future<U>& future) {
+    if (future.promise_->executor_ == executor_) {
+      return AwaitWrapper<Future<U>>::create(future);
+    }
+
+    return AwaitWrapper<Future<U>>::create(future, executor_);
+  }
+
+  template <typename U>
+  AwaitWrapper<Future<U>> await_transform(Future<U>&& future) {
+    if (future.promise_->executor_ == executor_) {
+      return AwaitWrapper<Future<U>>::create(&future);
+    }
+
+    return AwaitWrapper<Future<U>>::create(&future, executor_);
+  }
+
+  template <typename U>
+  AwaitWrapper<U> await_transform(U&& awaitable) {
+    return AwaitWrapper<U>::create(&awaitable, executor_);
+  }
+
+  auto await_transform(getCurrentExecutor) {
+    return AwaitableReady<Executor*>(executor_);
+  }
+
+  class FinalSuspender;
+
+  FinalSuspender final_suspend() {
+    return {*this};
+  }
+
+  void unhandled_exception() {
+    this->result_ =
+        Try<T>(exception_wrapper::from_exception_ptr(std::current_exception()));
+  }
+
+  void start() {
+    std::experimental::coroutine_handle<Promise>::from_promise (*this)();
+  }
+
+ private:
+  friend class Future<T>;
+  friend class Task<T>;
+  template <typename U>
+  friend class Promise;
+
+  std::atomic<State> state_{State::EMPTY};
+
+  std::experimental::coroutine_handle<> awaiter_;
+
+  Executor* executor_{nullptr};
+};
+
+template <typename T>
+class Promise<T>::FinalSuspender {
+ public:
+  bool await_ready() {
+    return promise_.state_.load(std::memory_order_acquire) == State::DETACHED;
+  }
+
+  bool await_suspend(std::experimental::coroutine_handle<>) {
+    auto state = promise_.state_.load(std::memory_order_acquire);
+
+    do {
+      if (state == State::DETACHED) {
+        return false;
+      }
+      DCHECK(state != State::HAS_RESULT);
+    } while (!promise_.state_.compare_exchange_weak(
+        state,
+        State::HAS_RESULT,
+        std::memory_order_release,
+        std::memory_order_acquire));
+
+    if (state == State::HAS_AWAITER) {
+      promise_.awaiter_.resume();
+    }
+
+    return true;
+  }
+
+  void await_resume() {}
+
+ private:
+  friend class Promise;
+
+  FinalSuspender(Promise& promise) : promise_(promise) {}
+  Promise& promise_;
+};
+
+} // namespace coro
+} // namespace folly
diff --git a/folly/experimental/coro/Task.h b/folly/experimental/coro/Task.h
new file mode 100644
index 00000000000..111111581ea
--- /dev/null
+++ b/folly/experimental/coro/Task.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <glog/logging.h>
+
+#include <folly/Executor.h>
+
+namespace folly {
+namespace coro {
+
+template <typename T>
+class Promise;
+
+template <typename T>
+class Future;
+
+/*
+ * Represents allocated, but not-started coroutine, which is not yet assigned to
+ * any executor.
+ */
+template <typename T>
+class Task {
+ public:
+  using promise_type = Promise<T>;
+
+  Task(const Task&) = delete;
+  Task(Task&& other) : promise_(other.promise_) {
+    other.promise_ = nullptr;
+  }
+
+  ~Task() {
+    DCHECK(!promise_);
+  }
+
+  Future<T> scheduleVia(folly::Executor* executor) && {
+    promise_->executor_ = executor;
+    promise_->executor_->add([promise = promise_] { promise->start(); });
+    return {*std::exchange(promise_, nullptr)};
+  }
+
+ private:
+  template <typename U>
+  friend class Promise;
+
+  Future<T> viaInline(folly::Executor* executor) && {
+    promise_->executor_ = executor;
+    promise_->start();
+    return {*std::exchange(promise_, nullptr)};
+  }
+
+  Task(promise_type& promise) : promise_(&promise) {}
+
+  Promise<T>* promise_;
+};
+
+} // namespace coro
+
+template <typename T>
+coro::Future<T> via(folly::Executor* executor, coro::Task<T>&& task) {
+  return std::move(task).scheduleVia(executor);
+}
+
+} // namespace folly
diff --git a/folly/experimental/coro/Utils.h b/folly/experimental/coro/Utils.h
new file mode 100644
index 00000000000..231c842076b
--- /dev/null
+++ b/folly/experimental/coro/Utils.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <experimental/coroutine>
+#include <future>
+
+namespace folly {
+namespace coro {
+
+template <typename T>
+class AwaitableReady {
+ public:
+  explicit AwaitableReady(T value) : value_(std::move(value)) {}
+
+  bool await_ready() {
+    return true;
+  }
+
+  bool await_suspend(std::experimental::coroutine_handle<>) {
+    return false;
+  }
+
+  T await_resume() {
+    return std::move(value_);
+  }
+
+ private:
+  T value_;
+};
+
+struct getCurrentExecutor {};
+
+struct yield {
+  bool await_ready() {
+    return false;
+  }
+
+  void await_suspend(std::experimental::coroutine_handle<> ch) {
+    ch();
+  }
+
+  void await_resume() {}
+};
+
+} // namespace coro
+} // namespace folly
diff --git a/folly/experimental/coro/Wait.h b/folly/experimental/coro/Wait.h
new file mode 100644
index 00000000000..ac09f407afd
--- /dev/null
+++ b/folly/experimental/coro/Wait.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <experimental/coroutine>
+#include <future>
+
+namespace folly {
+namespace coro {
+
+class Wait {
+ public:
+  class promise_type {
+   public:
+    Wait get_return_object() {
+      return Wait(promise_.get_future());
+    }
+
+    std::experimental::suspend_never initial_suspend() {
+      return {};
+    }
+
+    std::experimental::suspend_never final_suspend() {
+      return {};
+    }
+
+    void return_void() {
+      promise_.set_value();
+    }
+
+    void unhandled_exception() {
+      promise_.set_exception(std::current_exception());
+    }
+
+   private:
+    std::promise<void> promise_;
+  };
+
+  explicit Wait(std::future<void> future) : future_(std::move(future)) {}
+
+  Wait(Wait&&) = default;
+
+  ~Wait() {
+    if (future_.valid()) {
+      future_.get();
+    }
+  }
+
+ private:
+  std::future<void> future_;
+};
+} // namespace coro
+} // namespace folly
diff --git a/folly/experimental/coro/tests/CoroTest.cpp b/folly/experimental/coro/tests/CoroTest.cpp
new file mode 100644
index 00000000000..b6ef018e9b1
--- /dev/null
+++ b/folly/experimental/coro/tests/CoroTest.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2017-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <folly/executors/ManualExecutor.h>
+#include <folly/experimental/coro/Future.h>
+#include <folly/io/async/ScopedEventBaseThread.h>
+#include <folly/portability/GTest.h>
+
+using namespace folly;
+
+coro::Task<int> task42() {
+  co_return 42;
+}
+
+TEST(Coro, Basic) {
+  ManualExecutor executor;
+  auto future = via(&executor, task42());
+
+  EXPECT_FALSE(future.await_ready());
+
+  executor.drive();
+
+  EXPECT_TRUE(future.await_ready());
+  EXPECT_EQ(42, future.get());
+}
+
+coro::Task<void> taskVoid() {
+  co_await task42();
+  co_return;
+}
+
+TEST(Coro, Basic2) {
+  ManualExecutor executor;
+  auto future = taskVoid().via(&executor);
+
+  EXPECT_FALSE(future.await_ready());
+
+  executor.drive();
+
+  EXPECT_TRUE(future.await_ready());
+}
+
+coro::Task<void> taskSleep() {
+  co_await futures::sleep(std::chrono::seconds{1});
+  co_return;
+}
+
+TEST(Coro, Sleep) {
+  ScopedEventBaseThread evbThread;
+
+  auto startTime = std::chrono::steady_clock::now();
+  auto future = via(evbThread.getEventBase(), taskSleep());
+
+  EXPECT_FALSE(future.await_ready());
+
+  future.wait();
+
+  EXPECT_GE(
+      std::chrono::steady_clock::now() - startTime, std::chrono::seconds{1});
+  EXPECT_TRUE(future.await_ready());
+}
+
+coro::Task<void> taskException() {
+  throw std::runtime_error("Test exception");
+  co_return;
+}
+
+TEST(Coro, Throw) {
+  ManualExecutor executor;
+  auto future = via(&executor, taskException());
+
+  EXPECT_FALSE(future.await_ready());
+
+  executor.drive();
+
+  EXPECT_TRUE(future.await_ready());
+  EXPECT_THROW(future.get(), std::runtime_error);
+}
+
+coro::Task<int> taskRecursion(int depth) {
+  if (depth > 0) {
+    EXPECT_EQ(depth - 1, co_await taskRecursion(depth - 1));
+  } else {
+    co_await futures::sleep(std::chrono::seconds{1});
+  }
+
+  co_return depth;
+}
+
+TEST(Coro, LargeStack) {
+  ScopedEventBaseThread evbThread;
+  auto future = via(evbThread.getEventBase(), taskRecursion(10000));
+
+  future.wait();
+  EXPECT_EQ(10000, future.get());
+}
+
+coro::Task<void> taskThreadNested(std::thread::id threadId) {
+  EXPECT_EQ(threadId, std::this_thread::get_id());
+  co_await futures::sleep(std::chrono::seconds{1});
+  EXPECT_EQ(threadId, std::this_thread::get_id());
+  co_return;
+}
+
+coro::Task<int> taskThread() {
+  auto threadId = std::this_thread::get_id();
+
+  folly::ScopedEventBaseThread evbThread;
+  co_await via(
+      evbThread.getEventBase(), taskThreadNested(evbThread.getThreadId()));
+
+  EXPECT_EQ(threadId, std::this_thread::get_id());
+
+  co_return 42;
+}
+
+TEST(Coro, NestedThreads) {
+  ScopedEventBaseThread evbThread;
+  auto future = via(evbThread.getEventBase(), taskThread());
+
+  future.wait();
+  EXPECT_EQ(42, future.get());
+}
+
+coro::Task<int> taskYield(Executor* executor) {
+  auto currentExecutor = co_await coro::getCurrentExecutor();
+  EXPECT_EQ(executor, currentExecutor);
+
+  auto future = via(currentExecutor, task42());
+  EXPECT_FALSE(future.await_ready());
+
+  co_await coro::yield();
+
+  EXPECT_TRUE(future.await_ready());
+  co_return future.get();
+}
+
+TEST(Coro, CurrentExecutor) {
+  ScopedEventBaseThread evbThread;
+  auto future =
+      via(evbThread.getEventBase(), taskYield(evbThread.getEventBase()));
+
+  future.wait();
+  EXPECT_EQ(42, future.get());
+}
diff --git a/folly/experimental/exception_tracer/ExceptionCounterLib.cpp b/folly/experimental/exception_tracer/ExceptionCounterLib.cpp
index b4090b1adb5..ffb3abaaebb 100644
--- a/folly/experimental/exception_tracer/ExceptionCounterLib.cpp
+++ b/folly/experimental/exception_tracer/ExceptionCounterLib.cpp
@@ -119,7 +119,7 @@ void throwHandler(void*, std::type_info* exType, void (*)(void*)) noexcept {
   auto exceptionId =
       folly::hash::SpookyHashV2::Hash64(frames, (n + 1) * sizeof(frames[0]), 0);
 
-  SYNCHRONIZED(holder, gExceptionStats->statsHolder) {
+  gExceptionStats->statsHolder.withWLock([&](auto& holder) {
     auto it = holder.find(exceptionId);
     if (it != holder.end()) {
       ++it->second.count;
@@ -129,7 +129,7 @@ void throwHandler(void*, std::type_info* exType, void (*)(void*)) noexcept {
       info.frames.assign(frames + 1, frames + 1 + n);
       holder.emplace(exceptionId, ExceptionStats{1, std::move(info)});
     }
-  }
+  });
 }
 
 struct Initializer {
diff --git a/folly/experimental/exception_tracer/ExceptionTracerLib.cpp b/folly/experimental/exception_tracer/ExceptionTracerLib.cpp
index 2c026dc948f..caa99b8607f 100644
--- a/folly/experimental/exception_tracer/ExceptionTracerLib.cpp
+++ b/folly/experimental/exception_tracer/ExceptionTracerLib.cpp
@@ -47,18 +47,15 @@ template <typename Function>
 class CallbackHolder {
  public:
   void registerCallback(Function f) {
-    SYNCHRONIZED(callbacks_) {
-      callbacks_.push_back(std::move(f));
-    }
+    callbacks_.wlock()->push_back(std::move(f));
   }
 
   // always inline to enforce kInternalFramesNumber
   template <typename... Args>
   FOLLY_ALWAYS_INLINE void invoke(Args... args) {
-    SYNCHRONIZED_CONST(callbacks_) {
-      for (auto& cb : callbacks_) {
-        cb(args...);
-      }
+    auto callbacksLock = callbacks_.rlock();
+    for (auto& cb : *callbacksLock) {
+      cb(args...);
     }
   }
 
diff --git a/folly/experimental/gdb/deadlock.py b/folly/experimental/gdb/deadlock.py
index 1d9fb472be5..343e4c13330 100644
--- a/folly/experimental/gdb/deadlock.py
+++ b/folly/experimental/gdb/deadlock.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
 from collections import defaultdict
+from enum import Enum
 import gdb
 import re
 
@@ -230,14 +231,81 @@ def find_cycle(graph):
         return []
 
 
+def get_stacktrace(thread_id):
+    '''
+    Returns the stack trace for the thread id as a list of strings.
+    '''
+    gdb.execute('thread %d' % thread_id, from_tty=False, to_string=True)
+    output = gdb.execute('bt', from_tty=False, to_string=True)
+    stacktrace_lines = output.strip().split('\n')
+    return stacktrace_lines
+
+
+def is_thread_blocked_with_frame(
+    thread_id, top_line, expected_top_line, expected_frame
+):
+    '''
+    Returns True if we found expected_top_line in top_line, and
+    we found the expected_frame in the thread's stack trace.
+    '''
+    if expected_top_line not in top_line:
+        return False
+    stacktrace_lines = get_stacktrace(thread_id)
+    return any(expected_frame in line for line in stacktrace_lines)
+
+
+class MutexType(Enum):
+    '''Types of mutexes that we can detect deadlocks.'''
+
+    PTHREAD_MUTEX_T = 'pthread_mutex_t'
+    PTHREAD_RWLOCK_T = 'pthread_rwlock_t'
+
+    @staticmethod
+    def get_mutex_type(thread_id, top_line):
+        '''
+        Returns the probable mutex type, based on the first line
+        of the thread's stack. Returns None if not found.
+        '''
+
+        if is_thread_blocked_with_frame(
+            thread_id, top_line, '__lll_lock_wait', 'pthread_mutex'
+        ):
+            return MutexType.PTHREAD_MUTEX_T
+        if is_thread_blocked_with_frame(
+            thread_id, top_line, 'futex_wait', 'pthread_rwlock'
+        ):
+            return MutexType.PTHREAD_RWLOCK_T
+        return None
+
+    @staticmethod
+    def get_mutex_owner_and_address_func_for_type(mutex_type):
+        '''
+        Returns a function to resolve the mutex owner and address for
+        the given type. The returned function f has the following
+        signature:
+
+            f: args: (map of thread lwp -> thread id), blocked thread lwp
+               returns: (lwp of thread owning mutex, mutex address)
+                        or (None, None) if not found.
+
+        Returns None if there is no function for this mutex_type.
+        '''
+        if mutex_type == MutexType.PTHREAD_MUTEX_T:
+            return get_pthread_mutex_t_owner_and_address
+        if mutex_type == MutexType.PTHREAD_RWLOCK_T:
+            return get_pthread_rwlock_t_owner_and_address
+        return None
+
+
 def print_cycle(graph, lwp_to_thread_id, cycle):
     '''Prints the threads and mutexes involved in the deadlock.'''
     for (m, n) in cycle:
         print(
-            'Thread %d (LWP %d) is waiting on mutex (0x%016x) held by '
+            'Thread %d (LWP %d) is waiting on %s (0x%016x) held by '
             'Thread %d (LWP %d)' % (
-                lwp_to_thread_id[m], m, graph.attributes(m, n)['mutex'],
-                lwp_to_thread_id[n], n
+                lwp_to_thread_id[m], m,
+                graph.attributes(m, n)['mutex_type'].value,
+                graph.attributes(m, n)['mutex'], lwp_to_thread_id[n], n
             )
         )
 
@@ -246,28 +314,32 @@ def get_thread_info():
     '''
     Returns a pair of:
     - map of LWP -> thread ID
-    - set of blocked threads LWP
+    - map of blocked threads LWP -> potential mutex type
     '''
     # LWP -> thread ID
     lwp_to_thread_id = {}
 
-    # Set of threads blocked on mutexes
-    blocked_threads = set()
+    # LWP -> potential mutex type it is blocked on
+    blocked_threads = {}
 
     output = gdb.execute('info threads', from_tty=False, to_string=True)
     lines = output.strip().split('\n')[1:]
     regex = re.compile(r'[\s\*]*(\d+).*Thread.*\(LWP (\d+)\).*')
     for line in lines:
-        thread_id = int(regex.match(line).group(1))
-        thread_lwp = int(regex.match(line).group(2))
-        lwp_to_thread_id[thread_lwp] = thread_id
-        if '__lll_lock_wait' in line:
-            blocked_threads.add(thread_lwp)
+        try:
+            thread_id = int(regex.match(line).group(1))
+            thread_lwp = int(regex.match(line).group(2))
+            lwp_to_thread_id[thread_lwp] = thread_id
+            mutex_type = MutexType.get_mutex_type(thread_id, line)
+            if mutex_type:
+                blocked_threads[thread_lwp] = mutex_type
+        except Exception:
+            continue
 
     return (lwp_to_thread_id, blocked_threads)
 
 
-def get_mutex_owner_and_address(lwp_to_thread_id, thread_lwp):
+def get_pthread_mutex_t_owner_and_address(lwp_to_thread_id, thread_lwp):
     '''
     Finds the thread holding the mutex that this thread is blocked on.
     Returns a pair of (lwp of thread owning mutex, mutex address),
@@ -291,6 +363,37 @@ def get_mutex_owner_and_address(lwp_to_thread_id, thread_lwp):
         return (None, None)
 
 
+def get_pthread_rwlock_t_owner_and_address(lwp_to_thread_id, thread_lwp):
+    '''
+    If the thread is waiting on a write-locked pthread_rwlock_t, this will
+    return the pair of:
+        (lwp of thread that is write-owning the mutex, mutex address)
+    or (None, None) if not found, or if the mutex is read-locked.
+    '''
+    # Go up the stack to the pthread_rwlock_{rd|wr}lock frame
+    gdb.execute(
+        'thread %d' % lwp_to_thread_id[thread_lwp],
+        from_tty=False,
+        to_string=True
+    )
+    gdb.execute('frame 2', from_tty=False, to_string=True)
+
+    # Get the owner of the mutex by inspecting the internal
+    # fields of the mutex.
+    try:
+        rwlock_info = gdb.parse_and_eval('rwlock').dereference()
+        rwlock_owner_lwp = int(rwlock_info['__data']['__writer'])
+        # We can only track the owner if it is currently write-locked.
+        # If it is not write-locked or if it is currently read-locked,
+        # possibly by multiple threads, we cannot find the owner.
+        if rwlock_owner_lwp != 0:
+            return (rwlock_owner_lwp, int(rwlock_info.address))
+        else:
+            return (None, None)
+    except gdb.error:
+        return (None, None)
+
+
 class Deadlock(gdb.Command):
     '''Detects deadlocks'''
 
@@ -307,13 +410,21 @@ def invoke(self, arg, from_tty):
 
         # Go through all the blocked threads and see which threads
         # they are blocked on, and build the thread wait graph.
-        for thread_lwp in blocked_threads:
-            mutex_owner_lwp, mutex_address = get_mutex_owner_and_address(
+        for thread_lwp, mutex_type in blocked_threads.items():
+            get_owner_and_address_func = \
+                MutexType.get_mutex_owner_and_address_func_for_type(mutex_type)
+            if not get_owner_and_address_func:
+                continue
+            mutex_owner_lwp, mutex_address = get_owner_and_address_func(
                 lwp_to_thread_id, thread_lwp
             )
             if mutex_owner_lwp and mutex_address:
-                graph.add_edge(thread_lwp, mutex_owner_lwp,
-                               mutex=mutex_address)
+                graph.add_edge(
+                    thread_lwp,
+                    mutex_owner_lwp,
+                    mutex=mutex_address,
+                    mutex_type=mutex_type
+                )
 
         # A deadlock exists if there is a cycle in the graph.
         cycle = find_cycle(graph)
diff --git a/folly/experimental/hazptr/hazptr-impl.h b/folly/experimental/hazptr/hazptr-impl.h
index e47d9c9f44d..a4cb91179e5 100644
--- a/folly/experimental/hazptr/hazptr-impl.h
+++ b/folly/experimental/hazptr/hazptr-impl.h
@@ -84,6 +84,7 @@
 #define HAZPTR_STATS false
 #endif
 
+#include <folly/SingletonThreadLocal.h>
 #include <folly/concurrency/CacheLocality.h>
 #include <folly/experimental/hazptr/debug.h>
 #include <folly/synchronization/AsymmetricMemoryBarrier.h>
@@ -161,8 +162,8 @@ bool hazptr_tc_enabled();
 bool hazptr_priv_enabled();
 
 hazptr_tc* hazptr_tc_tls();
-void hazptr_tc_init();
-void hazptr_tc_shutdown();
+void hazptr_tc_init(hazptr_tc& tc);
+void hazptr_tc_shutdown(hazptr_tc& tc);
 hazptr_rec* hazptr_tc_try_get();
 bool hazptr_tc_try_put(hazptr_rec* hprec);
 
@@ -317,8 +318,8 @@ static_assert(
     folly::kMscVer || std::is_trivial<hazptr_priv>::value,
     "hazptr_priv must be trivial to avoid a branch to check initialization");
 
-void hazptr_priv_init();
-void hazptr_priv_shutdown();
+void hazptr_priv_init(hazptr_priv& priv);
+void hazptr_priv_shutdown(hazptr_priv& priv);
 bool hazptr_priv_try_retire(hazptr_obj* obj);
 
 inline void hazptr_priv_list::insert(hazptr_priv* rec) {
@@ -364,27 +365,31 @@ inline void hazptr_priv_list::collect(hazptr_obj*& head, hazptr_obj*& tail) {
   }
 }
 
-/** hazptr_tls_life */
+/** tls globals */
 
-struct hazptr_tls_life {
-  hazptr_tls_life();
-  ~hazptr_tls_life();
-};
+struct hazptr_tls_globals_ {
+  hazptr_tls_state tls_state{TLS_UNINITIALIZED};
+  hazptr_tc tc;
+  hazptr_priv priv;
 
-void tls_life_odr_use();
+  hazptr_tls_globals_() {
+    HAZPTR_DEBUG_PRINT(this);
+    tls_state = TLS_ALIVE;
+    hazptr_tc_init(tc);
+    hazptr_priv_init(priv);
+  }
+  ~hazptr_tls_globals_() {
+    HAZPTR_DEBUG_PRINT(this);
+    CHECK(tls_state == TLS_ALIVE);
+    hazptr_tc_shutdown(tc);
+    hazptr_priv_shutdown(priv);
+    tls_state = TLS_DESTROYED;
+  }
+};
+FOLLY_ALWAYS_INLINE hazptr_tls_globals_& hazptr_tls_globals() {
+  return folly::SingletonThreadLocal<hazptr_tls_globals_, void>::get();
+}
 
-/** tls globals */
-#if HAZPTR_ENABLE_TLS
-extern thread_local hazptr_tls_state tls_state_;
-extern thread_local hazptr_tc tls_tc_data_;
-extern thread_local hazptr_priv tls_priv_data_;
-extern thread_local hazptr_tls_life tls_life_; // last
-#else
-extern hazptr_tls_state tls_state_;
-extern hazptr_tc tls_tc_data_;
-extern hazptr_priv tls_priv_data_;
-extern hazptr_tls_life tls_life_; // last
-#endif
 /**
  *  hazptr_domain
  */
@@ -702,6 +707,7 @@ FOLLY_ALWAYS_INLINE hazptr_array<M>::~hazptr_array() {
       auto count = tc.count();
       if ((M <= HAZPTR_TC_SIZE) && (count + M <= HAZPTR_TC_SIZE)) {
         for (size_t i = 0; i < M; ++i) {
+          h[i].reset();
           tc[count + i].hprec_ = h[i].hazptr_;
           HAZPTR_DEBUG_PRINT(i << " " << &h[i]);
           new (&h[i]) hazptr_holder(nullptr);
@@ -774,7 +780,7 @@ FOLLY_ALWAYS_INLINE hazptr_local<M>::hazptr_local() {
     }
   }
   // Slow path
-  need_destruct_ = true;
+  slow_path_ = true;
   for (size_t i = 0; i < M; ++i) {
     new (&h[i]) hazptr_holder;
     HAZPTR_DEBUG_PRINT(
@@ -784,7 +790,7 @@ FOLLY_ALWAYS_INLINE hazptr_local<M>::hazptr_local() {
 
 template <size_t M>
 FOLLY_ALWAYS_INLINE hazptr_local<M>::~hazptr_local() {
-  if (LIKELY(!need_destruct_)) {
+  if (LIKELY(!slow_path_)) {
     if (kIsDebug) {
       auto ptc = hazptr_tc_tls();
       DCHECK(ptc != nullptr);
@@ -792,6 +798,10 @@ FOLLY_ALWAYS_INLINE hazptr_local<M>::~hazptr_local() {
       DCHECK(tc.local_);
       tc.local_ = false;
     }
+    auto h = reinterpret_cast<hazptr_holder*>(&raw_);
+    for (size_t i = 0; i < M; ++i) {
+      h[i].reset();
+    }
     return;
   }
   // Slow path
@@ -1171,19 +1181,17 @@ FOLLY_ALWAYS_INLINE size_t hazptr_tc::count() {
 
 /** hazptr_tc free functions */
 FOLLY_ALWAYS_INLINE hazptr_tc* hazptr_tc_tls() {
-  HAZPTR_DEBUG_PRINT(tls_state_);
-  if (LIKELY(tls_state_ == TLS_ALIVE)) {
-    HAZPTR_DEBUG_PRINT(tls_state_);
-    return &tls_tc_data_;
-  } else if (tls_state_ == TLS_UNINITIALIZED) {
-    tls_life_odr_use();
-    return &tls_tc_data_;
+  HAZPTR_DEBUG_PRINT(hazptr_tls_globals().tls_state);
+  if (LIKELY(hazptr_tls_globals().tls_state == TLS_ALIVE)) {
+    HAZPTR_DEBUG_PRINT(hazptr_tls_globals().tls_state);
+    return &hazptr_tls_globals().tc;
+  } else if (hazptr_tls_globals().tls_state == TLS_UNINITIALIZED) {
+    return &hazptr_tls_globals().tc;
   }
   return nullptr;
 }
 
-inline void hazptr_tc_init() {
-  auto& tc = tls_tc_data_;
+inline void hazptr_tc_init(hazptr_tc& tc) {
   HAZPTR_DEBUG_PRINT(&tc);
   tc.count_ = 0;
   if (kIsDebug) {
@@ -1191,8 +1199,7 @@ inline void hazptr_tc_init() {
   }
 }
 
-inline void hazptr_tc_shutdown() {
-  auto& tc = tls_tc_data_;
+inline void hazptr_tc_shutdown(hazptr_tc& tc) {
   HAZPTR_DEBUG_PRINT(&tc);
   for (size_t i = 0; i < tc.count_; ++i) {
     tc.entry_[i].evict();
@@ -1201,22 +1208,21 @@ inline void hazptr_tc_shutdown() {
 
 FOLLY_ALWAYS_INLINE hazptr_rec* hazptr_tc_try_get() {
   HAZPTR_DEBUG_PRINT(TLS_UNINITIALIZED << TLS_ALIVE << TLS_DESTROYED);
-  HAZPTR_DEBUG_PRINT(tls_state_);
-  if (LIKELY(tls_state_ == TLS_ALIVE)) {
-    HAZPTR_DEBUG_PRINT(tls_state_);
-    return tls_tc_data_.get();
-  } else if (tls_state_ == TLS_UNINITIALIZED) {
-    tls_life_odr_use();
-    return tls_tc_data_.get();
+  HAZPTR_DEBUG_PRINT(hazptr_tls_globals().tls_state);
+  if (LIKELY(hazptr_tls_globals().tls_state == TLS_ALIVE)) {
+    HAZPTR_DEBUG_PRINT(hazptr_tls_globals().tls_state);
+    return hazptr_tls_globals().tc.get();
+  } else if (hazptr_tls_globals().tls_state == TLS_UNINITIALIZED) {
+    return hazptr_tls_globals().tc.get();
   }
   return nullptr;
 }
 
 FOLLY_ALWAYS_INLINE bool hazptr_tc_try_put(hazptr_rec* hprec) {
-  HAZPTR_DEBUG_PRINT(tls_state_);
-  if (LIKELY(tls_state_ == TLS_ALIVE)) {
-    HAZPTR_DEBUG_PRINT(tls_state_);
-    return tls_tc_data_.put(hprec);
+  HAZPTR_DEBUG_PRINT(hazptr_tls_globals().tls_state);
+  if (LIKELY(hazptr_tls_globals().tls_state == TLS_ALIVE)) {
+    HAZPTR_DEBUG_PRINT(hazptr_tls_globals().tls_state);
+    return hazptr_tls_globals().tc.put(hprec);
   }
   return false;
 }
@@ -1225,14 +1231,12 @@ FOLLY_ALWAYS_INLINE bool hazptr_tc_try_put(hazptr_rec* hprec) {
  *  hazptr_priv
  */
 
-inline void hazptr_priv_init() {
-  auto& priv = tls_priv_data_;
+inline void hazptr_priv_init(hazptr_priv& priv) {
   HAZPTR_DEBUG_PRINT(&priv);
   priv.init();
 }
 
-inline void hazptr_priv_shutdown() {
-  auto& priv = tls_priv_data_;
+inline void hazptr_priv_shutdown(hazptr_priv& priv) {
   HAZPTR_DEBUG_PRINT(&priv);
   DCHECK(priv.active());
   priv.clear_active();
@@ -1243,46 +1247,19 @@ inline void hazptr_priv_shutdown() {
 }
 
 inline bool hazptr_priv_try_retire(hazptr_obj* obj) {
-  HAZPTR_DEBUG_PRINT(tls_state_);
-  if (tls_state_ == TLS_ALIVE) {
-    HAZPTR_DEBUG_PRINT(tls_state_);
-    tls_priv_data_.push(obj);
+  HAZPTR_DEBUG_PRINT(hazptr_tls_globals().tls_state);
+  if (hazptr_tls_globals().tls_state == TLS_ALIVE) {
+    HAZPTR_DEBUG_PRINT(hazptr_tls_globals().tls_state);
+    hazptr_tls_globals().priv.push(obj);
     return true;
-  } else if (tls_state_ == TLS_UNINITIALIZED) {
-    HAZPTR_DEBUG_PRINT(tls_state_);
-    tls_life_odr_use();
-    tls_priv_data_.push(obj);
+  } else if (hazptr_tls_globals().tls_state == TLS_UNINITIALIZED) {
+    HAZPTR_DEBUG_PRINT(hazptr_tls_globals().tls_state);
+    hazptr_tls_globals().priv.push(obj);
     return true;
   }
   return false;
 }
 
-/** hazptr_tls_life */
-
-inline void tls_life_odr_use() {
-  HAZPTR_DEBUG_PRINT(tls_state_);
-  CHECK(tls_state_ == TLS_UNINITIALIZED);
-  auto volatile tlsOdrUse = &tls_life_;
-  CHECK(tlsOdrUse != nullptr);
-  HAZPTR_DEBUG_PRINT(tlsOdrUse);
-}
-
-inline hazptr_tls_life::hazptr_tls_life() {
-  HAZPTR_DEBUG_PRINT(this);
-  CHECK(tls_state_ == TLS_UNINITIALIZED);
-  hazptr_tc_init();
-  hazptr_priv_init();
-  tls_state_ = TLS_ALIVE;
-}
-
-inline hazptr_tls_life::~hazptr_tls_life() {
-  HAZPTR_DEBUG_PRINT(this);
-  CHECK(tls_state_ == TLS_ALIVE);
-  hazptr_tc_shutdown();
-  hazptr_priv_shutdown();
-  tls_state_ = TLS_DESTROYED;
-}
-
 /** hazptr_obj_batch */
 /*  Only for default domain. Supports only hazptr_obj_base_refcounted
  *  and a thread-safe access only, for now. */
diff --git a/folly/experimental/hazptr/hazptr.cpp b/folly/experimental/hazptr/hazptr.cpp
index 2623bec8642..843ffeee518 100644
--- a/folly/experimental/hazptr/hazptr.cpp
+++ b/folly/experimental/hazptr/hazptr.cpp
@@ -22,17 +22,6 @@ namespace hazptr {
 FOLLY_STATIC_CTOR_PRIORITY_MAX hazptr_domain default_domain_;
 
 hazptr_stats hazptr_stats_;
-#if HAZPTR_ENABLE_TLS
-thread_local hazptr_tls_state tls_state_ = TLS_UNINITIALIZED;
-thread_local hazptr_tc tls_tc_data_;
-thread_local hazptr_priv tls_priv_data_;
-thread_local hazptr_tls_life tls_life_; // last
-#else
-hazptr_tls_state tls_state_ = TLS_UNINITIALIZED;
-hazptr_tc tls_tc_data_;
-hazptr_priv tls_priv_data_;
-hazptr_tls_life tls_life_; // last
-#endif
 
 bool hazptr_tc_enabled() {
   return HAZPTR_TC;
diff --git a/folly/experimental/hazptr/hazptr.h b/folly/experimental/hazptr/hazptr.h
index e4308f2b0ec..93c3d82193b 100644
--- a/folly/experimental/hazptr/hazptr.h
+++ b/folly/experimental/hazptr/hazptr.h
@@ -328,7 +328,7 @@ class hazptr_local {
 
  private:
   aligned_hazptr_holder raw_[M];
-  bool need_destruct_{false};
+  bool slow_path_{false};
 };
 
 } // namespace hazptr
diff --git a/folly/experimental/hazptr/test/HazptrTest.cpp b/folly/experimental/hazptr/test/HazptrTest.cpp
index 3e31af4ae01..93e02b65d1a 100644
--- a/folly/experimental/hazptr/test/HazptrTest.cpp
+++ b/folly/experimental/hazptr/test/HazptrTest.cpp
@@ -29,6 +29,8 @@
 #include <folly/portability/GFlags.h>
 #include <folly/portability/GTest.h>
 
+#include <condition_variable>
+
 #include <thread>
 
 DEFINE_int32(num_threads, 5, "Number of threads");
@@ -639,4 +641,95 @@ TEST_F(HazptrTest, FreeFunctionCleanup) {
   for (auto& t : threads) {
     t.join();
   }
+  { // Cleanup after using array
+    constructed.store(0);
+    destroyed.store(0);
+    { hazptr_array<2> h; }
+    {
+      hazptr_array<2> h;
+      auto p0 = new Foo(0, nullptr);
+      auto p1 = new Foo(0, nullptr);
+      h[0].reset(p0);
+      h[1].reset(p1);
+      p0->retire();
+      p1->retire();
+    }
+    CHECK_EQ(constructed.load(), 2);
+    hazptr_cleanup();
+    CHECK_EQ(destroyed.load(), 2);
+  }
+  { // Cleanup after using local
+    constructed.store(0);
+    destroyed.store(0);
+    { hazptr_local<2> h; }
+    {
+      hazptr_local<2> h;
+      auto p0 = new Foo(0, nullptr);
+      auto p1 = new Foo(0, nullptr);
+      h[0].reset(p0);
+      h[1].reset(p1);
+      p0->retire();
+      p1->retire();
+    }
+    CHECK_EQ(constructed.load(), 2);
+    hazptr_cleanup();
+    CHECK_EQ(destroyed.load(), 2);
+  }
+}
+
+TEST_F(HazptrTest, ForkTest) {
+  struct Foo : hazptr_obj_base<Foo> {
+    int a;
+  };
+  std::mutex m;
+  std::condition_variable cv;
+  std::condition_variable cv2;
+  bool ready = false;
+  bool ready2 = false;
+  auto mkthread = [&]() {
+    hazptr_holder h;
+    auto p = new Foo;
+    std::atomic<Foo*> ap{p};
+    h.get_protected<Foo>(p);
+    p->retire();
+    {
+      std::unique_lock<std::mutex> lk(m);
+      ready = true;
+      cv.notify_one();
+      cv2.wait(lk, [&] { return ready2; });
+    }
+  };
+  std::thread t(mkthread);
+  hazptr_holder h;
+  auto p = new Foo;
+  std::atomic<Foo*> ap{p};
+  h.get_protected<Foo>(p);
+  p->retire();
+  {
+    std::unique_lock<std::mutex> lk(m);
+    cv.wait(lk, [&] { return ready; });
+  }
+  auto pid = fork();
+  CHECK_GE(pid, 0);
+  if (pid) {
+    {
+      std::lock_guard<std::mutex> g(m);
+      ready2 = true;
+      cv2.notify_one();
+    }
+    t.join();
+    int status;
+    wait(&status);
+    CHECK_EQ(status, 0);
+  } else {
+    // child
+    std::thread tchild(mkthread);
+    {
+      std::lock_guard<std::mutex> g(m);
+      ready2 = true;
+      cv2.notify_one();
+    }
+    tchild.join();
+    _exit(0); // Do not print gtest results
+  }
 }
diff --git a/folly/experimental/io/test/FsUtilTest.cpp b/folly/experimental/io/test/FsUtilTest.cpp
index afee700e3e8..809487b0bda 100644
--- a/folly/experimental/io/test/FsUtilTest.cpp
+++ b/folly/experimental/io/test/FsUtilTest.cpp
@@ -23,23 +23,13 @@
 using namespace folly;
 using namespace folly::fs;
 
-namespace {
-// We cannot use EXPECT_EQ(a, b) due to a bug in gtest 1.6.0: gtest wants
-// to print path as a container even though it has operator<< defined,
-// and as path is a container of path, this leads to infinite
-// recursion.
-void expectPathEq(const path& a, const path& b) {
-  EXPECT_TRUE(a == b) << "expected path=" << a << "\nactual path=" << b;
-}
-} // namespace
-
 TEST(Simple, Path) {
   path root("/");
   path abs1("/hello/world");
   path rel1("meow");
   EXPECT_TRUE(starts_with(abs1, root));
   EXPECT_FALSE(starts_with(rel1, root));
-  expectPathEq(path("hello/world"), remove_prefix(abs1, root));
+  EXPECT_EQ(path("hello/world"), remove_prefix(abs1, root));
   EXPECT_THROW({ remove_prefix(rel1, root); }, filesystem_error);
 
   path abs2("/hello");
@@ -52,9 +42,9 @@ TEST(Simple, Path) {
   EXPECT_TRUE(starts_with(abs1, abs4));
   EXPECT_FALSE(starts_with(abs1, abs5));
   EXPECT_FALSE(starts_with(abs1, abs6));
-  expectPathEq(path("world"), remove_prefix(abs1, abs2));
-  expectPathEq(path("world"), remove_prefix(abs1, abs3));
-  expectPathEq(path(), remove_prefix(abs1, abs4));
+  EXPECT_EQ(path("world"), remove_prefix(abs1, abs2));
+  EXPECT_EQ(path("world"), remove_prefix(abs1, abs3));
+  EXPECT_EQ(path(), remove_prefix(abs1, abs4));
   EXPECT_THROW({ remove_prefix(abs1, abs5); }, filesystem_error);
   EXPECT_THROW({ remove_prefix(abs1, abs6); }, filesystem_error);
 }
@@ -65,12 +55,12 @@ TEST(Simple, CanonicalizeParent) {
   path c("/usr/bin/DOES_NOT_EXIST_ASDF");
   path d("/usr/lib/../bin/DOES_NOT_EXIST_ASDF");
 
-  expectPathEq(a, canonical(a));
-  expectPathEq(a, canonical_parent(b));
-  expectPathEq(a, canonical(b));
-  expectPathEq(a, canonical_parent(b));
+  EXPECT_EQ(a, canonical(a));
+  EXPECT_EQ(a, canonical_parent(b));
+  EXPECT_EQ(a, canonical(b));
+  EXPECT_EQ(a, canonical_parent(b));
   EXPECT_THROW({ canonical(c); }, filesystem_error);
   EXPECT_THROW({ canonical(d); }, filesystem_error);
-  expectPathEq(c, canonical_parent(c));
-  expectPathEq(c, canonical_parent(d));
+  EXPECT_EQ(c, canonical_parent(c));
+  EXPECT_EQ(c, canonical_parent(d));
 }
diff --git a/folly/experimental/logging/Init.cpp b/folly/experimental/logging/Init.cpp
index 542a7961cf4..438b186e655 100644
--- a/folly/experimental/logging/Init.cpp
+++ b/folly/experimental/logging/Init.cpp
@@ -23,12 +23,24 @@
 namespace folly {
 
 void initLogging(StringPiece configString) {
-  if (configString.empty()) {
+  auto* const baseConfigStr = getBaseLoggingConfig();
+  // Return early if we have nothing to do
+  if (!baseConfigStr && configString.empty()) {
     return;
   }
 
-  // Parse and apply the config string
-  auto config = parseLogConfig(configString);
+  // Parse the configuration string(s)
+  LogConfig config;
+  if (baseConfigStr) {
+    config = parseLogConfig(baseConfigStr);
+    if (!configString.empty()) {
+      config.update(parseLogConfig(configString));
+    }
+  } else {
+    config = parseLogConfig(configString);
+  }
+
+  // Apply the config settings
   LoggerDB::get().updateConfig(config);
 }
 
diff --git a/folly/experimental/logging/Init.h b/folly/experimental/logging/Init.h
index dbab8030423..e507cd1e7d0 100644
--- a/folly/experimental/logging/Init.h
+++ b/folly/experimental/logging/Init.h
@@ -39,4 +39,26 @@ namespace folly {
  */
 void initLogging(folly::StringPiece configString = "");
 
+/**
+ * folly::getBaseLoggingConfig() allows individual executables to easily
+ * customize their default logging configuration.
+ *
+ * You can define this function in your executable and folly::initLogging()
+ * will call it to get the base logging configuration.  The settings returned
+ * by getBaseLoggingConfig() will then be modified by updating them with the
+ * configuration string parameter passed to initLogging().
+ *
+ * This allows the user-specified configuration passed to initLogging() to
+ * update the base configuration.  The user-specified configuration can apply
+ * additional settings, and it may also override settings for categories and
+ * handlers defined in the base configuration.
+ *
+ * See folly/experimental/logging/example/main.cpp for an example that defines
+ * getBaseLoggingConfig().
+ *
+ * If this function returns a non-null pointer, it should point to a
+ * null-terminated string with static storage duration.
+ */
+const char* getBaseLoggingConfig();
+
 } // namespace folly
diff --git a/folly/experimental/logging/InitWeak.cpp b/folly/experimental/logging/InitWeak.cpp
new file mode 100644
index 00000000000..de1fde66d77
--- /dev/null
+++ b/folly/experimental/logging/InitWeak.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2004-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <folly/CPortability.h>
+
+namespace folly {
+
+// The default implementation for getBaseLoggingConfig().
+// By default this returns null, and we will use only the default settings
+// applied by initializeLoggerDB().
+//
+// This is defined in a separate module from initLogging() so that it can be
+// placed into a separate library from the main folly logging code when linking
+// as a shared library.  This is required to help ensure that any
+// getBaseLoggingConfig() provided by the main binary is preferred over this
+// symbol.
+FOLLY_ATTR_WEAK const char* getBaseLoggingConfig() {
+  return nullptr;
+}
+
+} // namespace folly
diff --git a/folly/experimental/logging/Logger.h b/folly/experimental/logging/Logger.h
index cfbb76915cd..d6a4987e6b1 100644
--- a/folly/experimental/logging/Logger.h
+++ b/folly/experimental/logging/Logger.h
@@ -22,24 +22,6 @@
 #include <folly/experimental/logging/LogStream.h>
 #include <folly/experimental/logging/LogStreamProcessor.h>
 
-/**
- * Helper macro for implementing FB_LOG() and FB_LOGF().
- *
- * This macro generally should not be used directly by end users.
- */
-#define FB_LOG_IMPL(logger, level, type, ...)                                \
-  (!(logger).getCategory()->logCheck(level))                                 \
-      ? ::folly::logDisabledHelper(                                          \
-            std::integral_constant<bool, ::folly::isLogLevelFatal(level)>{}) \
-      : ::folly::LogStreamVoidify<::folly::isLogLevelFatal(level)>{} &       \
-          ::folly::LogStreamProcessor{(logger).getCategory(),                \
-                                      (level),                               \
-                                      __FILE__,                              \
-                                      __LINE__,                              \
-                                      (type),                                \
-                                      ##__VA_ARGS__}                         \
-              .stream()
-
 /**
  * Log a message to the specified logger.
  *
@@ -77,6 +59,82 @@
       arg1,                                    \
       ##__VA_ARGS__)
 
+/**
+ * FB_LOG_RAW() can be used by callers that want to pass in the log level as a
+ * variable, and/or who want to explicitly specify the filename and line
+ * number.
+ *
+ * This is useful for callers implementing their own log wrapper functions
+ * that want to pass in their caller's filename and line number rather than
+ * their own.
+ *
+ * The log level parameter must be an explicitly qualified LogLevel value, or a
+ * LogLevel variable.  (This differs from FB_LOG() and FB_LOGF() which accept
+ * an unqualified LogLevel name.)
+ */
+#define FB_LOG_RAW(logger, level, filename, linenumber, ...) \
+  FB_LOG_RAW_IMPL(                                           \
+      logger,                                                \
+      level,                                                 \
+      filename,                                              \
+      linenumber,                                            \
+      ::folly::LogStreamProcessor::APPEND,                   \
+      ##__VA_ARGS__)
+
+/**
+ * FB_LOGF_RAW() is similar to FB_LOG_RAW(), but formats the log arguments
+ * using folly::format().
+ */
+#define FB_LOGF_RAW(logger, level, filename, linenumber, fmt, arg1, ...) \
+  FB_LOG_RAW_IMPL(                                                       \
+      logger,                                                            \
+      level,                                                             \
+      filename,                                                          \
+      linenumber,                                                        \
+      ::folly::LogStreamProcessor::FORMAT,                               \
+      fmt,                                                               \
+      arg1,                                                              \
+      ##__VA_ARGS__)
+
+/**
+ * Helper macro for implementing FB_LOG() and FB_LOGF().
+ *
+ * This macro generally should not be used directly by end users.
+ */
+#define FB_LOG_IMPL(logger, level, type, ...)                                \
+  (!(logger).getCategory()->logCheck(level))                                 \
+      ? ::folly::logDisabledHelper(                                          \
+            std::integral_constant<bool, ::folly::isLogLevelFatal(level)>{}) \
+      : ::folly::LogStreamVoidify<::folly::isLogLevelFatal(level)>{} &       \
+          ::folly::LogStreamProcessor{(logger).getCategory(),                \
+                                      (level),                               \
+                                      __FILE__,                              \
+                                      __LINE__,                              \
+                                      (type),                                \
+                                      ##__VA_ARGS__}                         \
+              .stream()
+
+/**
+ * Helper macro for implementing FB_LOG_RAW() and FB_LOGF_RAW().
+ *
+ * This macro generally should not be used directly by end users.
+ *
+ * This is very similar to FB_LOG_IMPL(), but since the level may be a variable
+ * instead of a compile-time constant, we cannot detect at compile time if this
+ * is a fatal log message or not.
+ */
+#define FB_LOG_RAW_IMPL(logger, level, filename, line, type, ...) \
+  (!(logger).getCategory()->logCheck(level))                      \
+      ? static_cast<void>(0)                                      \
+      : ::folly::LogStreamVoidify<false>{} &                      \
+          ::folly::LogStreamProcessor{(logger).getCategory(),     \
+                                      (level),                    \
+                                      (filename),                 \
+                                      (line),                     \
+                                      (type),                     \
+                                      ##__VA_ARGS__}              \
+              .stream()
+
 namespace folly {
 
 class LoggerDB;
diff --git a/folly/experimental/logging/LoggerDB.cpp b/folly/experimental/logging/LoggerDB.cpp
index 0e220009e04..1666395c416 100644
--- a/folly/experimental/logging/LoggerDB.cpp
+++ b/folly/experimental/logging/LoggerDB.cpp
@@ -17,6 +17,7 @@
 
 #include <set>
 
+#include <folly/CPortability.h>
 #include <folly/Conv.h>
 #include <folly/FileUtil.h>
 #include <folly/String.h>
@@ -28,7 +29,6 @@
 #include <folly/experimental/logging/Logger.h>
 #include <folly/experimental/logging/RateLimiter.h>
 #include <folly/experimental/logging/StreamHandlerFactory.h>
-#include <folly/portability/Config.h>
 
 using std::string;
 
@@ -40,10 +40,7 @@ namespace folly {
  * This is defined as a weak symbol to allow programs to provide their own
  * alternative definition if desired.
  */
-#if FOLLY_HAVE_WEAK_SYMBOLS
-void initializeLoggerDB(LoggerDB& db) __attribute__((weak));
-#endif
-void initializeLoggerDB(LoggerDB& db) {
+FOLLY_ATTR_WEAK void initializeLoggerDB(LoggerDB& db) {
   // Register the StreamHandlerFactory
   //
   // This is the only LogHandlerFactory that we register by default.  We
@@ -159,6 +156,14 @@ void LoggerDB::setLevel(LogCategory* category, LogLevel level, bool inherit) {
 }
 
 LogConfig LoggerDB::getConfig() const {
+  return getConfigImpl(/* includeAllCategories = */ false);
+}
+
+LogConfig LoggerDB::getFullConfig() const {
+  return getConfigImpl(/* includeAllCategories = */ true);
+}
+
+LogConfig LoggerDB::getConfigImpl(bool includeAllCategories) const {
   auto handlerInfo = handlerInfo_.rlock();
 
   LogConfig::HandlerConfigMap handlerConfigs;
@@ -193,9 +198,10 @@ LogConfig LoggerDB::getConfig() const {
       auto levelInfo = category->getLevelInfo();
       auto handlers = category->getHandlers();
 
-      // Don't report categories that have default settings.
-      if (handlers.empty() && levelInfo.first == LogLevel::MAX_LEVEL &&
-          levelInfo.second) {
+      // Don't report categories that have default settings
+      // if includeAllCategories is false
+      if (!includeAllCategories && handlers.empty() &&
+          levelInfo.first == LogLevel::MAX_LEVEL && levelInfo.second) {
         continue;
       }
 
diff --git a/folly/experimental/logging/LoggerDB.h b/folly/experimental/logging/LoggerDB.h
index 8d9b8a84a5a..fa3edd300c1 100644
--- a/folly/experimental/logging/LoggerDB.h
+++ b/folly/experimental/logging/LoggerDB.h
@@ -79,15 +79,18 @@ class LoggerDB {
 
   /**
    * Get a LogConfig object describing the current state of the LoggerDB.
-   *
-   * Note that this may not 100% accurately describe the current configuration
-   * if callers have manually added LogHandlers to some categories without
-   * using the updateConfig() or resetConfig() functions.  In this case
-   * getConfig() will simply report these handlers as "unknown_handler" when
-   * returning handler names for the categories in question.
    */
   LogConfig getConfig() const;
 
+  /**
+   * Get a LogConfig object fully describing the state of the LoggerDB.
+   *
+   * This is similar to getConfig(), but it returns LogCategoryConfig objects
+   * for all defined log categories, including ones that are using the default
+   * configuration settings.
+   */
+  LogConfig getFullConfig() const;
+
   /**
    * Update the current LoggerDB state with the specified LogConfig settings.
    *
@@ -252,6 +255,7 @@ class LoggerDB {
       std::unordered_map<std::string, std::shared_ptr<LogHandler>>;
   using OldToNewHandlerMap = std::
       unordered_map<std::shared_ptr<LogHandler>, std::shared_ptr<LogHandler>>;
+  LogConfig getConfigImpl(bool includeAllCategories) const;
   void startConfigUpdate(
       const Synchronized<HandlerInfo>::LockedPtr& handlerInfo,
       const LogConfig& config,
@@ -312,6 +316,11 @@ class LoggerDB {
  * generally a good idea to defer more complicated setup until after main()
  * starts.
  *
+ * In most situations it is normally better to override getBaseLoggingConfig()
+ * from logging/Init.h rather than overriding initializeLoggerDB().  You only
+ * need to override initializeLoggerDB() if you want to change the settings
+ * that are used for messages that get logged before initLogging() is called.
+ *
  * The default implementation configures the root log category to write all
  * warning and higher-level log messages to stderr, using a format similar to
  * that used by GLOG.
diff --git a/folly/experimental/logging/example/main.cpp b/folly/experimental/logging/example/main.cpp
index c0723c76bf4..2df99c29b41 100644
--- a/folly/experimental/logging/example/main.cpp
+++ b/folly/experimental/logging/example/main.cpp
@@ -25,16 +25,26 @@ DEFINE_string(logging, "", "Logging category configuration string");
 using namespace example;
 using folly::LogLevel;
 
-// Invoking code that uses XLOG() statements before main is safe,
-// but will not log anywhere, since no handlers are configured yet.
+// Invoking code that uses XLOG() statements before main() is safe.
+// This will use default log settings defined by folly::initializeLoggerDB().
 static ExampleObject staticInitialized("static");
 
+namespace folly {
+const char* getBaseLoggingConfig() {
+  // Configure folly to enable INFO+ messages, and everything else to
+  // enable WARNING+.
+  //
+  // Set the default log handler to log asynchronously by default.
+  return ".=WARNING,folly=INFO; default:async=true";
+}
+} // namespace folly
+
 int main(int argc, char* argv[]) {
-  // Using log macros before configuring any log levels or log handlers is
-  // safe, but the messages will always be ignore since no handlers are defined.
-  XLOG(INFO, "no handlers configured yet, so this will go nowhere");
-  printf("main starting\n");
-  fflush(stdout);
+  // Using log macros before calling folly::initLogging() will use the default
+  // log settings defined by folly::initializeLoggerDB().  The default behavior
+  // is to log WARNING+ messages to stderr.
+  XLOG(INFO) << "log messages less than WARNING will be ignored";
+  XLOG(ERR) << "error messages before initLogging() will be logged to stderr";
 
   // Call folly::init() and then initialize log levels and handlers
   folly::init(&argc, &argv);
@@ -42,15 +52,16 @@ int main(int argc, char* argv[]) {
 
   // All XLOG() statements in this file will log to the category
   // folly.experimental.logging.example.main
-  XLOG(INFO, "now log messages will be sent to stderr");
+  XLOG(INFO, "now the normal log settings have been applied");
 
   XLOG(DBG1, "log arguments are concatenated: ", 12345, ", ", 92.0);
   XLOGF(DBG1, "XLOGF supports {}-style formatting: {:.3f}", "python", 1.0 / 3);
   XLOG(DBG2) << "streaming syntax is also supported: " << 1234;
-  XLOG(DBG2, "you can even", " mix function-style") << " and streaming "
-                                                    << "syntax";
+  XLOG(DBG2, "if you really want, ", "you can even")
+      << " mix function-style and streaming syntax: " << 42;
+  XLOGF(DBG3, "and {} can mix {} style", "you", "format") << " and streaming";
 
   ExampleObject("foo");
-  XLOG(INFO, "main returning");
+  XLOG(INFO) << "main returning";
   return 0;
 }
diff --git a/folly/experimental/logging/printf.h b/folly/experimental/logging/printf.h
index f482f0f623e..03f80eb147a 100644
--- a/folly/experimental/logging/printf.h
+++ b/folly/experimental/logging/printf.h
@@ -40,12 +40,8 @@ std::string loggingFormatPrintf(
 /**
  * Log a message to the specified logger using a printf-style format string.
  */
-#define FB_LOGC(logger, level, fmt, ...)   \
-  FB_LOG_IMPL(                             \
-      logger,                              \
-      ::folly::LogLevel::level,            \
-      ::folly::LogStreamProcessor::APPEND, \
-      ::folly::loggingFormatPrintf(fmt, ##__VA_ARGS__))
+#define FB_LOGC(logger, level, fmt, ...) \
+  FB_LOG(logger, level, ::folly::loggingFormatPrintf(fmt, ##__VA_ARGS__))
 
 /**
  * Log a message to the file's default log category using a printf-style format
diff --git a/folly/experimental/logging/test/ConfigHelpers.cpp b/folly/experimental/logging/test/ConfigHelpers.cpp
new file mode 100644
index 00000000000..982974c136f
--- /dev/null
+++ b/folly/experimental/logging/test/ConfigHelpers.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright 2004-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <folly/experimental/logging/test/ConfigHelpers.h>
+
+#include <ostream>
+
+#include <folly/String.h>
+#include <folly/experimental/logging/LogConfig.h>
+#include <folly/experimental/logging/LogConfigParser.h>
+#include <folly/experimental/logging/LogHandler.h>
+
+namespace folly {
+
+std::ostream& operator<<(std::ostream& os, const LogConfig& config) {
+  // We could just use folly::toPrettyJson(logConfigToDynamic(config))
+  // However, the format here is much more compact and easier to read if there
+  // are discrepancies between configs in a test check.
+
+  // Sort the categories by name before printing
+  os << "{\n  categories: {\n";
+  std::vector<std::string> names;
+  const auto& catConfigs = config.getCategoryConfigs();
+  for (const auto& cc : catConfigs) {
+    names.push_back(cc.first);
+  }
+  std::sort(names.begin(), names.end());
+  for (const auto& name : names) {
+    os << "    " << name << "=" << catConfigs.at(name) << "\n";
+  }
+
+  // Sort the handlers by name before printing
+  os << "  }\n  handlers: {\n";
+  const auto& handlerConfigs = config.getHandlerConfigs();
+  names.clear();
+  for (const auto& cc : handlerConfigs) {
+    names.push_back(cc.first);
+  }
+  std::sort(names.begin(), names.end());
+  for (const auto& name : names) {
+    os << "    " << name << "=" << handlerConfigs.at(name) << "\n";
+  }
+
+  os << "  }\n}";
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const LogCategoryConfig& config) {
+  // Rather than printing the JSON configuration, we print a shorter
+  // representation closer to the basic config string format.
+  os << logLevelToString(config.level);
+  if (!config.inheritParentLevel) {
+    os << "!";
+  }
+  if (config.handlers.hasValue()) {
+    os << ":" << join(",", config.handlers.value());
+  }
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const LogHandlerConfig& config) {
+  // Rather than printing the JSON configuration, we print a shorter
+  // representation closer to the basic config string format.
+  os << (config.type ? config.type.value() : "[no type]");
+  bool first = true;
+  for (const auto& opt : config.options) {
+    if (!first) {
+      os << ",";
+    } else {
+      os << ":";
+      first = false;
+    }
+    os << opt.first << "=" << opt.second;
+  }
+  return os;
+}
+
+void PrintTo(const std::shared_ptr<LogHandler>& handler, std::ostream* os) {
+  *os << "Handler(" << handler->getConfig() << ")";
+}
+
+} // namespace folly
diff --git a/folly/experimental/logging/test/ConfigHelpers.h b/folly/experimental/logging/test/ConfigHelpers.h
new file mode 100644
index 00000000000..6328c4ba7d9
--- /dev/null
+++ b/folly/experimental/logging/test/ConfigHelpers.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2004-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <iosfwd>
+#include <memory>
+
+namespace folly {
+
+class LogCategoryConfig;
+class LogConfig;
+class LogHandler;
+class LogHandlerConfig;
+
+/*
+ * ostream<< operators so that various objects can be printed nicely in test
+ * failure messages and other locations.
+ */
+
+std::ostream& operator<<(std::ostream& os, const LogConfig& config);
+std::ostream& operator<<(std::ostream& os, const LogCategoryConfig& config);
+std::ostream& operator<<(std::ostream& os, const LogHandlerConfig& config);
+
+/*
+ * Print std::shared_ptr<LogHandler> nicely so that unit tests matching against
+ * LogCategory::getHandlers() can print output nicely.
+ */
+void PrintTo(const std::shared_ptr<LogHandler>& handler, std::ostream* os);
+
+} // namespace folly
diff --git a/folly/experimental/logging/test/ConfigParserTest.cpp b/folly/experimental/logging/test/ConfigParserTest.cpp
index b3005b3c126..c2eb369f695 100644
--- a/folly/experimental/logging/test/ConfigParserTest.cpp
+++ b/folly/experimental/logging/test/ConfigParserTest.cpp
@@ -18,6 +18,7 @@
 #include <folly/experimental/logging/LogCategory.h>
 #include <folly/experimental/logging/LogConfig.h>
 #include <folly/experimental/logging/LogConfigParser.h>
+#include <folly/experimental/logging/test/ConfigHelpers.h>
 #include <folly/json.h>
 #include <folly/portability/GMock.h>
 #include <folly/portability/GTest.h>
@@ -28,34 +29,6 @@ using namespace folly;
 using ::testing::Pair;
 using ::testing::UnorderedElementsAre;
 
-namespace folly {
-std::ostream& operator<<(std::ostream& os, const LogCategoryConfig& config) {
-  os << logLevelToString(config.level);
-  if (!config.inheritParentLevel) {
-    os << "!";
-  }
-  if (config.handlers.hasValue()) {
-    os << ":" << join(",", config.handlers.value());
-  }
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, const LogHandlerConfig& config) {
-  os << (config.type ? config.type.value() : "[no type]");
-  bool first = true;
-  for (const auto& opt : config.options) {
-    if (!first) {
-      os << ",";
-    } else {
-      os << ":";
-      first = false;
-    }
-    os << opt.first << "=" << opt.second;
-  }
-  return os;
-}
-} // namespace folly
-
 TEST(LogConfig, parseBasic) {
   auto config = parseLogConfig("");
   EXPECT_THAT(config.getCategoryConfigs(), UnorderedElementsAre());
diff --git a/folly/experimental/logging/test/ConfigUpdateTest.cpp b/folly/experimental/logging/test/ConfigUpdateTest.cpp
index 68c85f5e3d2..96e192999a0 100644
--- a/folly/experimental/logging/test/ConfigUpdateTest.cpp
+++ b/folly/experimental/logging/test/ConfigUpdateTest.cpp
@@ -19,6 +19,7 @@
 #include <folly/experimental/logging/LogConfigParser.h>
 #include <folly/experimental/logging/LogHandlerFactory.h>
 #include <folly/experimental/logging/LoggerDB.h>
+#include <folly/experimental/logging/test/ConfigHelpers.h>
 #include <folly/experimental/logging/test/TestLogHandler.h>
 #include <folly/json.h>
 #include <folly/portability/GMock.h>
@@ -50,39 +51,6 @@ auto MatchLogHandler(const LogHandlerConfig& config) {
 
 } // namespace
 
-namespace folly {
-/**
- * Print TestLogHandler objects nicely in test failure messages
- */
-std::ostream& operator<<(
-    std::ostream& os,
-    const std::shared_ptr<LogHandler>& handler) {
-  auto configHandler = std::dynamic_pointer_cast<TestLogHandler>(handler);
-  if (!configHandler) {
-    os << "unknown handler type";
-    return os;
-  }
-
-  auto config = configHandler->getConfig();
-  os << "ConfigHandler(" << (config.type ? config.type.value() : "[no type]");
-  for (const auto& entry : config.options) {
-    os << ", " << entry.first << "=" << entry.second;
-  }
-  os << ")";
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, const LogConfig& config) {
-  os << toPrettyJson(logConfigToDynamic(config));
-  return os;
-}
-
-std::ostream& operator<<(std::ostream& os, const LogHandlerConfig& config) {
-  os << toPrettyJson(logConfigToDynamic(config));
-  return os;
-}
-} // namespace folly
-
 TEST(ConfigUpdate, updateLogLevels) {
   LoggerDB db{LoggerDB::TESTING};
   db.updateConfig(parseLogConfig("foo.bar=dbg5"));
@@ -377,3 +345,44 @@ TEST(ConfigUpdate, getConfigAnonymousHandlers) {
                      "anonymousHandler2=foo: abc=xyz"),
       db.getConfig());
 }
+
+TEST(ConfigUpdate, getFullConfig) {
+  LoggerDB db{LoggerDB::TESTING};
+  db.registerHandlerFactory(
+      std::make_unique<TestLogHandlerFactory>("handlerA"));
+  db.registerHandlerFactory(
+      std::make_unique<TestLogHandlerFactory>("handlerB"));
+  EXPECT_EQ(parseLogConfig(".:=ERROR:"), db.getConfig());
+
+  db.getCategory("src.libfoo.foo.c");
+  db.getCategory("src.libfoo.foo.h");
+  db.getCategory("src.libfoo.bar.h");
+  db.getCategory("src.libfoo.bar.c");
+  db.getCategory("test.foo.test.c");
+
+  db.updateConfig(
+      parseLogConfig(".=ERR:stdout,"
+                     "src.libfoo=dbg5; "
+                     "stdout=handlerA:stream=stdout"));
+  EXPECT_EQ(
+      parseLogConfig(".:=ERR:stdout,"
+                     "src.libfoo=dbg5:; "
+                     "stdout=handlerA:stream=stdout"),
+      db.getConfig());
+  EXPECT_EQ(
+      parseLogConfig(".:=ERR:stdout,"
+                     "src=FATAL:, "
+                     "src.libfoo=dbg5:, "
+                     "src.libfoo.foo=FATAL:, "
+                     "src.libfoo.foo.c=FATAL:, "
+                     "src.libfoo.foo.h=FATAL:, "
+                     "src.libfoo.bar=FATAL:, "
+                     "src.libfoo.bar.c=FATAL:, "
+                     "src.libfoo.bar.h=FATAL:, "
+                     "test=FATAL:, "
+                     "test.foo=FATAL:, "
+                     "test.foo.test=FATAL:, "
+                     "test.foo.test.c=FATAL:; "
+                     "stdout=handlerA:stream=stdout"),
+      db.getFullConfig());
+}
diff --git a/folly/experimental/logging/test/InitTest.cpp b/folly/experimental/logging/test/InitTest.cpp
new file mode 100644
index 00000000000..fe41c14dc7d
--- /dev/null
+++ b/folly/experimental/logging/test/InitTest.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2004-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <folly/experimental/logging/Init.h>
+
+#include <folly/experimental/logging/LogConfigParser.h>
+#include <folly/experimental/logging/LoggerDB.h>
+#include <folly/experimental/logging/test/ConfigHelpers.h>
+#include <folly/portability/GFlags.h>
+#include <folly/portability/GTest.h>
+
+using folly::LoggerDB;
+using folly::parseLogConfig;
+
+namespace {
+// A counter to help confirm that our getBaseLoggingConfigCalled() was invoked
+// rather than the default implementation that folly exports as a weak symbol.
+unsigned int getBaseLoggingConfigCalled;
+} // namespace
+
+namespace folly {
+
+const char* getBaseLoggingConfig() {
+  ++getBaseLoggingConfigCalled;
+  return "folly=INFO; default:stream=stdout";
+}
+
+} // namespace folly
+
+TEST(Init, checkConfig) {
+  // Before we call initLogging(), the LoggerDB will have the default
+  // configuration provided by initializeLoggerDB().
+  auto initialConfig = folly::LoggerDB::get().getConfig();
+  EXPECT_EQ(0, getBaseLoggingConfigCalled);
+  EXPECT_EQ(
+      parseLogConfig(".:=WARN:default; "
+                     "default=stream:stream=stderr,async=false"),
+      LoggerDB::get().getConfig());
+
+  // Call initLogging()
+  // Make sure it merges the supplied config argument with our custom
+  // base configuration.
+  folly::initLogging(".=ERROR,folly.logging=DBG7");
+  EXPECT_EQ(1, getBaseLoggingConfigCalled);
+  EXPECT_EQ(
+      parseLogConfig(".:=ERROR:default,folly=INFO:,folly.logging=DBG7:; "
+                     "default=stream:stream=stdout,async=false"),
+      LoggerDB::get().getConfig());
+}
+
+// We use our custom main() to ensure that folly::initLogging() has
+// not been called yet when we start running the tests.
+int main(int argc, char** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  gflags::ParseCommandLineFlags(&argc, &argv, /* remove_flags = */ true);
+
+  return RUN_ALL_TESTS();
+}
diff --git a/folly/experimental/logging/test/LoggerTest.cpp b/folly/experimental/logging/test/LoggerTest.cpp
index 483ec07ddf5..8f4ac8275e2 100644
--- a/folly/experimental/logging/test/LoggerTest.cpp
+++ b/folly/experimental/logging/test/LoggerTest.cpp
@@ -21,6 +21,7 @@
 #include <folly/experimental/logging/test/TestLogHandler.h>
 #include <folly/portability/GMock.h>
 #include <folly/portability/GTest.h>
+#include <folly/test/TestUtils.h>
 
 using namespace folly;
 using std::make_shared;
@@ -349,3 +350,29 @@ TEST_F(LoggerTest, logMacros) {
           R"(format string: "whoops: \{\}, \{\}", arguments: \((.*: )?5\))"));
   messages.clear();
 }
+
+TEST_F(LoggerTest, logRawMacros) {
+  Logger foobar{&db_, "test.foo.bar"};
+  db_.setLevel("test.foo", LogLevel::DBG2);
+
+  auto& messages = handler_->getMessages();
+
+  FB_LOG_RAW(foobar, LogLevel::DBG1, "src/some/file.c", 1234, "hello", ' ', 1)
+      << " world";
+  ASSERT_EQ(1, messages.size());
+  EXPECT_EQ("hello 1 world", messages[0].first.getMessage());
+  EXPECT_EQ("src/some/file.c", messages[0].first.getFileName());
+  EXPECT_EQ("file.c", messages[0].first.getFileBaseName());
+  EXPECT_EQ(1234, messages[0].first.getLineNumber());
+  messages.clear();
+
+  auto level = LogLevel::DBG1;
+  FB_LOGF_RAW(foobar, level, "test/mytest.c", 99, "{}: num={}", "test", 42)
+      << " plus extra stuff";
+  ASSERT_EQ(1, messages.size());
+  EXPECT_EQ("test: num=42 plus extra stuff", messages[0].first.getMessage());
+  EXPECT_EQ("test/mytest.c", messages[0].first.getFileName());
+  EXPECT_EQ("mytest.c", messages[0].first.getFileBaseName());
+  EXPECT_EQ(99, messages[0].first.getLineNumber());
+  messages.clear();
+}
diff --git a/folly/experimental/observer/Observer.h b/folly/experimental/observer/Observer.h
index c54db7bb772..2f8c3a85d50 100644
--- a/folly/experimental/observer/Observer.h
+++ b/folly/experimental/observer/Observer.h
@@ -140,6 +140,9 @@ class CallbackHandle {
   std::shared_ptr<Context> context_;
 };
 
+template <typename Observable, typename Traits>
+class ObserverCreator;
+
 template <typename T>
 class Observer {
  public:
diff --git a/folly/fibers/FiberManagerMap.cpp b/folly/fibers/FiberManagerMap.cpp
index 9d8a79f2920..f7b6887d119 100644
--- a/folly/fibers/FiberManagerMap.cpp
+++ b/folly/fibers/FiberManagerMap.cpp
@@ -99,14 +99,14 @@ class ThreadLocalCache {
 
   static void erase(EventBaseT& evb) {
     for (auto& localInstance : instance().accessAllThreads()) {
-      SYNCHRONIZED(info, localInstance.eraseInfo_) {
+      localInstance.eraseInfo_.withWLock([&](auto& info) {
         if (info.eraseList.size() >= kEraseListMaxSize) {
           info.eraseAll = true;
         } else {
           info.eraseList.push_back(&evb);
         }
         localInstance.eraseRequested_ = true;
-      }
+      });
     }
   }
 
@@ -143,7 +143,7 @@ class ThreadLocalCache {
       return;
     }
 
-    SYNCHRONIZED(info, eraseInfo_) {
+    eraseInfo_.withWLock([&](auto& info) {
       if (info.eraseAll) {
         map_.clear();
       } else {
@@ -155,7 +155,7 @@ class ThreadLocalCache {
       info.eraseList.clear();
       info.eraseAll = false;
       eraseRequested_ = false;
-    }
+    });
   }
 
   std::unordered_map<EventBaseT*, FiberManager*> map_;
diff --git a/folly/fibers/GuardPageAllocator.cpp b/folly/fibers/GuardPageAllocator.cpp
index 98619190ace..1829cffd942 100644
--- a/folly/fibers/GuardPageAllocator.cpp
+++ b/folly/fibers/GuardPageAllocator.cpp
@@ -90,9 +90,7 @@ class StackCache {
     auto p = freeList_.back().first;
     if (!freeList_.back().second) {
       PCHECK(0 == ::mprotect(p, pagesize(), PROT_NONE));
-      SYNCHRONIZED(pages, protectedPages()) {
-        pages.insert(reinterpret_cast<intptr_t>(p));
-      }
+      protectedPages().wlock()->insert(reinterpret_cast<intptr_t>(p));
     }
     freeList_.pop_back();
 
@@ -118,12 +116,13 @@ class StackCache {
     assert(storage_);
 
     auto as = allocSize(size);
-    auto p = limit + size - as;
-    if (p < storage_ || p >= storage_ + allocSize_ * kNumGuarded) {
+    if (std::less_equal<void*>{}(limit, storage_) ||
+        std::less_equal<void*>{}(storage_ + allocSize_ * kNumGuarded, limit)) {
       /* not mine */
       return false;
     }
 
+    auto p = limit + size - as;
     assert(as == allocSize_);
     assert((p - storage_) % allocSize_ == 0);
     freeList_.emplace_back(p, /* protected= */ true);
@@ -132,25 +131,25 @@ class StackCache {
 
   ~StackCache() {
     assert(storage_);
-    SYNCHRONIZED(pages, protectedPages()) {
+    protectedPages().withWLock([&](auto& pages) {
       for (const auto& item : freeList_) {
         pages.erase(reinterpret_cast<intptr_t>(item.first));
       }
-    }
+    });
     PCHECK(0 == ::munmap(storage_, allocSize_ * kNumGuarded));
   }
 
   static bool isProtected(intptr_t addr) {
     // Use a read lock for reading.
-    SYNCHRONIZED_CONST(pages, protectedPages()) {
+    return protectedPages().withRLock([&](auto const& pages) {
       for (const auto& page : pages) {
         intptr_t pageEnd = intptr_t(page + pagesize());
         if (page <= addr && addr < pageEnd) {
           return true;
         }
       }
-    }
-    return false;
+      return false;
+    });
   }
 
  private:
diff --git a/folly/fibers/Semaphore.cpp b/folly/fibers/Semaphore.cpp
index 6215a3efd12..4ae26b13c94 100644
--- a/folly/fibers/Semaphore.cpp
+++ b/folly/fibers/Semaphore.cpp
@@ -20,23 +20,24 @@ namespace fibers {
 
 bool Semaphore::signalSlow() {
   // If we signalled a release, notify the waitlist
-  SYNCHRONIZED(waitList_) {
-    auto testVal = tokens_.load(std::memory_order_acquire);
-    if (testVal != 0) {
-      return false;
-    }
+  auto waitListLock = waitList_.wlock();
+  auto& waitList = *waitListLock;
 
-    if (waitList_.empty()) {
-      // If the waitlist is now empty, ensure the token count increments
-      // No need for CAS here as we will always be under the mutex
-      CHECK(tokens_.compare_exchange_strong(
-          testVal, testVal + 1, std::memory_order_relaxed));
-    } else {
-      // trigger waiter if there is one
-      waitList_.front()->post();
-      waitList_.pop();
-    }
-  } // SYNCHRONIZED(waitList_)
+  auto testVal = tokens_.load(std::memory_order_acquire);
+  if (testVal != 0) {
+    return false;
+  }
+
+  if (waitList.empty()) {
+    // If the waitlist is now empty, ensure the token count increments
+    // No need for CAS here as we will always be under the mutex
+    CHECK(tokens_.compare_exchange_strong(
+        testVal, testVal + 1, std::memory_order_relaxed));
+  } else {
+    // trigger waiter if there is one
+    waitList.front()->post();
+    waitList.pop();
+  }
   return true;
 }
 
@@ -59,13 +60,16 @@ bool Semaphore::waitSlow() {
   // Slow path, create a baton and acquire a mutex to update the wait list
   folly::fibers::Baton waitBaton;
 
-  SYNCHRONIZED(waitList_) {
+  {
+    auto waitListLock = waitList_.wlock();
+    auto& waitList = *waitListLock;
+
     auto testVal = tokens_.load(std::memory_order_acquire);
     if (testVal != 0) {
       return false;
     }
     // prepare baton and add to queue
-    waitList_.push(&waitBaton);
+    waitList.push(&waitBaton);
   }
   // If we managed to create a baton, wait on it
   // This has to be done here so the mutex has been released
diff --git a/folly/functional/test/ApplyTupleTest.cpp b/folly/functional/test/ApplyTupleTest.cpp
index a8bf4b4bd53..b35e2e30583 100644
--- a/folly/functional/test/ApplyTupleTest.cpp
+++ b/folly/functional/test/ApplyTupleTest.cpp
@@ -272,10 +272,10 @@ TEST(ApplyTuple, MemberFunctionWithUniquePtr) {
   MemberFunc mf;
   mf.x = 234;
 
-  EXPECT_EQ(folly::applyTuple(&MemberFunc::getX,
-                              std::make_tuple(std::unique_ptr<MemberFunc>(
-                                  new MemberFunc(mf)))),
-            234);
+  EXPECT_EQ(
+      folly::applyTuple(&MemberFunc::getX,
+                        std::make_tuple(std::make_unique<MemberFunc>(mf))),
+      234);
 }
 
 TEST(ApplyTuple, Array) {
diff --git a/folly/futures/Future-inl.h b/folly/futures/Future-inl.h
index a9fc1252e18..6f4ec265f8b 100644
--- a/folly/futures/Future-inl.h
+++ b/folly/futures/Future-inl.h
@@ -294,8 +294,10 @@ FutureBase<T>::thenImplementation(
   p.core_->setInterruptHandlerNoLock(this->core_->getInterruptHandler());
 
   // grab the Future now before we lose our handle on the Promise
-  auto f = p.getFuture();
-  f.core_->setExecutorNoLock(this->getExecutor());
+  auto sf = p.getSemiFuture();
+  sf.core_->setExecutor(this->getExecutor());
+  auto f = Future<B>(sf.core_);
+  sf.core_ = nullptr;
 
   /* This is a bit tricky.
 
@@ -340,6 +342,21 @@ FutureBase<T>::thenImplementation(
   return f;
 }
 
+// Pass through a simple future as it needs no deferral adaptation
+template <class T>
+Future<T> chainExecutor(Executor*, Future<T>&& f) {
+  return std::move(f);
+}
+
+// Correctly chain a SemiFuture for deferral
+template <class T>
+Future<T> chainExecutor(Executor* e, SemiFuture<T>&& f) {
+  if (!e) {
+    e = &folly::InlineExecutor::instance();
+  }
+  return std::move(f).via(e);
+}
+
 // Variant: returns a Future
 // e.g. f.then([](T&& t){ return makeFuture<T>(t); });
 template <class T>
@@ -356,25 +373,33 @@ FutureBase<T>::thenImplementation(
   p.core_->setInterruptHandlerNoLock(this->core_->getInterruptHandler());
 
   // grab the Future now before we lose our handle on the Promise
-  auto f = p.getFuture();
-  f.core_->setExecutorNoLock(this->getExecutor());
-
-  this->setCallback_(
-      [state = futures::detail::makeCoreCallbackState(
-           std::move(p), std::forward<F>(func))](Try<T>&& t) mutable {
-        if (!isTry && t.hasException()) {
-          state.setException(std::move(t.exception()));
-        } else {
-          auto tf2 = state.tryInvoke(t.template get<isTry, Args>()...);
-          if (tf2.hasException()) {
-            state.setException(std::move(tf2.exception()));
-          } else {
-            tf2->setCallback_([p = state.stealPromise()](Try<B> && b) mutable {
-              p.setTry(std::move(b));
-            });
-          }
-        }
-      });
+  auto sf = p.getSemiFuture();
+  auto* e = this->getExecutor();
+  sf.core_->setExecutor(e);
+  auto f = Future<B>(sf.core_);
+  sf.core_ = nullptr;
+
+  this->setCallback_([state = futures::detail::makeCoreCallbackState(
+                          std::move(p), std::forward<F>(func))](
+                         Try<T>&& t) mutable {
+    if (!isTry && t.hasException()) {
+      state.setException(std::move(t.exception()));
+    } else {
+      // Ensure that if function returned a SemiFuture we correctly chain
+      // potential deferral.
+      auto tf2 = state.tryInvoke(t.template get<isTry, Args>()...);
+      if (tf2.hasException()) {
+        state.setException(std::move(tf2.exception()));
+      } else {
+        auto statePromise = state.stealPromise();
+        auto tf3 =
+            chainExecutor(statePromise.core_->getExecutor(), *std::move(tf2));
+        tf3.setCallback_([p2 = std::move(statePromise)](Try<B>&& b) mutable {
+          p2.setTry(std::move(b));
+        });
+      }
+    }
+  });
 
   return f;
 }
@@ -686,9 +711,15 @@ inline Future<T> SemiFuture<T>::via(Executor* executor, int8_t priority) && {
   return newFuture;
 }
 
+template <class T>
+inline Future<T> SemiFuture<T>::toUnsafeFuture() && {
+  return std::move(*this).via(&folly::InlineExecutor::instance());
+}
+
 template <class T>
 template <typename F>
-SemiFuture<typename futures::detail::callableResult<T, F>::Return::value_type>
+SemiFuture<
+    typename futures::detail::deferCallableResult<T, F>::Return::value_type>
 SemiFuture<T>::defer(F&& func) && {
   DeferredExecutor* deferredExecutor = getDeferredExecutor();
   if (!deferredExecutor) {
@@ -704,6 +735,93 @@ SemiFuture<T>::defer(F&& func) && {
   return sf;
 }
 
+template <class T>
+template <typename F>
+SemiFuture<typename futures::detail::deferValueCallableResult<T, F>::Return::
+               value_type>
+SemiFuture<T>::deferValue(F&& func) && {
+  return std::move(*this).defer(
+      [f = std::forward<F>(func)](folly::Try<T>&& t) mutable {
+        return f(t.template get<
+                 false,
+                 typename futures::detail::Extract<F>::FirstArg>());
+      });
+}
+
+template <class T>
+template <class F>
+typename std::enable_if<
+    !futures::detail::callableWith<F, exception_wrapper>::value &&
+        !futures::detail::callableWith<F, exception_wrapper&>::value &&
+        !futures::detail::Extract<F>::ReturnsFuture::value,
+    SemiFuture<T>>::type
+SemiFuture<T>::deferError(F&& func) {
+  using Exn =
+      std::remove_reference_t<typename futures::detail::Extract<F>::FirstArg>;
+  return std::move(*this).defer(
+      [func = std::forward<F>(func)](Try<T>&& t) mutable {
+        if (auto e = t.template tryGetExceptionObject<Exn>()) {
+          return makeSemiFuture<T>(makeTryWith([&]() { return func(*e); }));
+        } else {
+          return makeSemiFuture<T>(std::move(t));
+        }
+      });
+}
+
+template <class T>
+template <class F>
+typename std::enable_if<
+    !futures::detail::callableWith<F, exception_wrapper>::value &&
+        !futures::detail::callableWith<F, exception_wrapper&>::value &&
+        futures::detail::Extract<F>::ReturnsFuture::value,
+    SemiFuture<T>>::type
+SemiFuture<T>::deferError(F&& func) {
+  using Exn =
+      std::remove_reference_t<typename futures::detail::Extract<F>::FirstArg>;
+  return std::move(*this).defer(
+      [func = std::forward<F>(func)](Try<T>&& t) mutable {
+        if (auto e = t.template tryGetExceptionObject<Exn>()) {
+          return func(*e);
+        } else {
+          return makeSemiFuture<T>(std::move(t));
+        }
+      });
+}
+
+template <class T>
+template <class F>
+typename std::enable_if<
+    futures::detail::callableWith<F, exception_wrapper>::value &&
+        !futures::detail::Extract<F>::ReturnsFuture::value,
+    SemiFuture<T>>::type
+SemiFuture<T>::deferError(F&& func) {
+  return std::move(*this).defer(
+      [func = std::forward<F>(func)](Try<T> t) mutable {
+        if (t.hasException()) {
+          return makeSemiFuture<T>(func(std::move(t.exception())));
+        } else {
+          return makeSemiFuture<T>(std::move(t));
+        }
+      });
+}
+
+template <class T>
+template <class F>
+typename std::enable_if<
+    futures::detail::callableWith<F, exception_wrapper>::value &&
+        futures::detail::Extract<F>::ReturnsFuture::value,
+    SemiFuture<T>>::type
+SemiFuture<T>::deferError(F&& func) {
+  return std::move(*this).defer(
+      [func = std::forward<F>(func)](Try<T> t) mutable {
+        if (t.hasException()) {
+          return func(std::move(t.exception()));
+        } else {
+          return makeSemiFuture<T>(std::move(t));
+        }
+      });
+}
+
 template <class T>
 Future<T> Future<T>::makeEmpty() {
   return Future<T>(futures::detail::EmptyConstruct{});
@@ -780,13 +898,18 @@ template <class T>
 inline Future<T> Future<T>::via(Executor* executor, int8_t priority) & {
   this->throwIfInvalid();
   Promise<T> p;
-  auto f = p.getFuture();
+  auto sf = p.getSemiFuture();
   auto func = [p = std::move(p)](Try<T>&& t) mutable {
     p.setTry(std::move(t));
   };
   using R = futures::detail::callableResult<T, decltype(func)>;
   this->template thenImplementation<decltype(func), R>(
       std::move(func), typename R::Arg());
+  // Construct future from semifuture manually because this may not have
+  // an executor set due to legacy code. This means we can bypass the executor
+  // check in SemiFuture::via
+  auto f = Future<T>(sf.core_);
+  sf.core_ = nullptr;
   return std::move(f).via(executor, priority);
 }
 
@@ -826,7 +949,7 @@ Future<T>::onError(F&& func) {
 
   Promise<T> p;
   p.core_->setInterruptHandlerNoLock(this->core_->getInterruptHandler());
-  auto f = p.getFuture();
+  auto sf = p.getSemiFuture();
 
   this->setCallback_(
       [state = futures::detail::makeCoreCallbackState(
@@ -838,7 +961,10 @@ Future<T>::onError(F&& func) {
         }
       });
 
-  return f;
+  // Allow for applying to future with null executor while this is still
+  // possible.
+  // TODO(T26801487): Should have an executor
+  return std::move(sf).via(&folly::InlineExecutor::instance());
 }
 
 // onError where the callback returns Future<T>
@@ -859,7 +985,7 @@ Future<T>::onError(F&& func) {
       Exn;
 
   Promise<T> p;
-  auto f = p.getFuture();
+  auto sf = p.getSemiFuture();
 
   this->setCallback_(
       [state = futures::detail::makeCoreCallbackState(
@@ -878,7 +1004,10 @@ Future<T>::onError(F&& func) {
         }
       });
 
-  return f;
+  // Allow for applying to future with null executor while this is still
+  // possible.
+  // TODO(T26801487): Should have an executor
+  return std::move(sf).via(&folly::InlineExecutor::instance());
 }
 
 template <class T>
@@ -910,7 +1039,7 @@ Future<T>::onError(F&& func) {
       "Return type of onError callback must be T or Future<T>");
 
   Promise<T> p;
-  auto f = p.getFuture();
+  auto sf = p.getSemiFuture();
   this->setCallback_(
       [state = futures::detail::makeCoreCallbackState(
            std::move(p), std::forward<F>(func))](Try<T> t) mutable {
@@ -928,7 +1057,10 @@ Future<T>::onError(F&& func) {
         }
       });
 
-  return f;
+  // Allow for applying to future with null executor while this is still
+  // possible.
+  // TODO(T26801487): Should have an executor
+  return std::move(sf).via(&folly::InlineExecutor::instance());
 }
 
 // onError(exception_wrapper) that returns T
@@ -945,7 +1077,7 @@ Future<T>::onError(F&& func) {
       "Return type of onError callback must be T or Future<T>");
 
   Promise<T> p;
-  auto f = p.getFuture();
+  auto sf = p.getSemiFuture();
   this->setCallback_(
       [state = futures::detail::makeCoreCallbackState(
            std::move(p), std::forward<F>(func))](Try<T>&& t) mutable {
@@ -957,7 +1089,10 @@ Future<T>::onError(F&& func) {
         }
       });
 
-  return f;
+  // Allow for applying to future with null executor while this is still
+  // possible.
+  // TODO(T26801487): Should have an executor
+  return std::move(sf).via(&folly::InlineExecutor::instance());
 }
 
 template <class Func>
@@ -1047,6 +1182,7 @@ void mapSetCallback(InputIterator first, InputIterator last, F func) {
 
 // collectAll (variadic)
 
+// TODO(T26439406): Make return SemiFuture
 template <typename... Fs>
 typename futures::detail::CollectAllVariadicContext<
     typename std::decay<Fs>::type::value_type...>::type
@@ -1055,11 +1191,12 @@ collectAll(Fs&&... fs) {
       typename std::decay<Fs>::type::value_type...>>();
   futures::detail::collectVariadicHelper<
       futures::detail::CollectAllVariadicContext>(ctx, std::forward<Fs>(fs)...);
-  return ctx->p.getFuture();
+  return ctx->p.getSemiFuture().via(&folly::InlineExecutor::instance());
 }
 
 // collectAll (iterator)
 
+// TODO(T26439406): Make return SemiFuture
 template <class InputIterator>
 Future<
   std::vector<
@@ -1082,7 +1219,7 @@ collectAll(InputIterator first, InputIterator last) {
   mapSetCallback<T>(first, last, [ctx](size_t i, Try<T>&& t) {
     ctx->results[i] = std::move(t);
   });
-  return ctx->p.getFuture();
+  return ctx->p.getSemiFuture().via(&folly::InlineExecutor::instance());
 }
 
 // collect (iterator)
@@ -1130,6 +1267,7 @@ struct CollectContext {
 } // namespace detail
 } // namespace futures
 
+// TODO(T26439406): Make return SemiFuture
 template <class InputIterator>
 Future<typename futures::detail::CollectContext<typename std::iterator_traits<
     InputIterator>::value_type::value_type>::Result>
@@ -1148,11 +1286,12 @@ collect(InputIterator first, InputIterator last) {
        ctx->setPartialResult(i, t);
      }
   });
-  return ctx->p.getFuture();
+  return ctx->p.getSemiFuture().via(&folly::InlineExecutor::instance());
 }
 
 // collect (variadic)
 
+// TODO(T26439406): Make return SemiFuture
 template <typename... Fs>
 typename futures::detail::CollectVariadicContext<
     typename std::decay<Fs>::type::value_type...>::type
@@ -1161,11 +1300,12 @@ collect(Fs&&... fs) {
       typename std::decay<Fs>::type::value_type...>>();
   futures::detail::collectVariadicHelper<
       futures::detail::CollectVariadicContext>(ctx, std::forward<Fs>(fs)...);
-  return ctx->p.getFuture();
+  return ctx->p.getSemiFuture().via(&folly::InlineExecutor::instance());
 }
 
 // collectAny (iterator)
 
+// TODO(T26439406): Make return SemiFuture
 template <class InputIterator>
 Future<
   std::pair<size_t,
@@ -1188,11 +1328,12 @@ collectAny(InputIterator first, InputIterator last) {
       ctx->p.setValue(std::make_pair(i, std::move(t)));
     }
   });
-  return ctx->p.getFuture();
+  return ctx->p.getSemiFuture().via(&folly::InlineExecutor::instance());
 }
 
 // collectAnyWithoutException (iterator)
 
+// TODO(T26439406): Make return SemiFuture
 template <class InputIterator>
 Future<std::pair<
     size_t,
@@ -1219,11 +1360,12 @@ collectAnyWithoutException(InputIterator first, InputIterator last) {
       ctx->p.setException(t.exception());
     }
   });
-  return ctx->p.getFuture();
+  return ctx->p.getSemiFuture().via(&folly::InlineExecutor::instance());
 }
 
 // collectN (iterator)
 
+// TODO(T26439406): Make return SemiFuture
 template <class InputIterator>
 Future<std::vector<std::pair<size_t, Try<typename
   std::iterator_traits<InputIterator>::value_type::value_type>>>>
@@ -1257,7 +1399,7 @@ collectN(InputIterator first, InputIterator last, size_t n) {
     });
   }
 
-  return ctx->p.getFuture();
+  return ctx->p.getSemiFuture().via(&folly::InlineExecutor::instance());
 }
 
 // reduce (iterator)
@@ -1350,7 +1492,7 @@ window(Executor* executor, Collection input, F func, size_t n) {
   std::vector<Future<Result>> futures;
   futures.reserve(ctx->promises.size());
   for (auto& promise : ctx->promises) {
-    futures.emplace_back(promise.getFuture());
+    futures.emplace_back(promise.getSemiFuture().via(executor));
   }
 
   return futures;
@@ -1375,6 +1517,7 @@ Future<I> Future<T>::reduce(I&& initial, F&& func) {
 
 // unorderedReduce (iterator)
 
+// TODO(T26439406): Make return SemiFuture
 template <class It, class T, class F, class ItT, class Arg>
 Future<T> unorderedReduce(It first, It last, T initial, F func) {
   if (first == last) {
@@ -1423,7 +1566,7 @@ Future<T> unorderedReduce(It first, It last, T initial, F func) {
         }
       });
 
-  return ctx->promise_.getFuture();
+  return ctx->promise_.getSemiFuture().via(&folly::InlineExecutor::instance());
 }
 
 // within
@@ -1494,7 +1637,9 @@ Future<T> Future<T>::within(Duration dur, E e, Timekeeper* tk) {
     }
   });
 
-  return ctx->promise.getFuture().via(this->getExecutor());
+  auto* currentExecutor = this->getExecutor();
+  return ctx->promise.getSemiFuture().via(
+      currentExecutor ? currentExecutor : &folly::InlineExecutor::instance());
 }
 
 // delayed
@@ -1524,6 +1669,19 @@ void waitImpl(FutureType& f) {
   assert(f.isReady());
 }
 
+template <class T>
+void convertFuture(SemiFuture<T>&& sf, Future<T>& f) {
+  // Carry executor from f, inserting an inline executor if it did not have one
+  auto* currentExecutor = f.getExecutor();
+  f = std::move(sf).via(
+      currentExecutor ? currentExecutor : &folly::InlineExecutor::instance());
+}
+
+template <class T>
+void convertFuture(SemiFuture<T>&& sf, SemiFuture<T>& f) {
+  f = std::move(sf);
+}
+
 template <class FutureType, typename T = typename FutureType::value_type>
 void waitImpl(FutureType& f, Duration dur) {
   // short-circuit if there's nothing to do
@@ -1532,13 +1690,13 @@ void waitImpl(FutureType& f, Duration dur) {
   }
 
   Promise<T> promise;
-  auto ret = promise.getFuture();
+  auto ret = promise.getSemiFuture();
   auto baton = std::make_shared<FutureBatonType>();
   f.setCallback_([baton, promise = std::move(promise)](Try<T>&& t) mutable {
     promise.setTry(std::move(t));
     baton->post();
   });
-  f = std::move(ret);
+  convertFuture(std::move(ret), f);
   if (baton->try_wait_for(dur)) {
     assert(f.isReady());
   }
diff --git a/folly/futures/Future-pre.h b/folly/futures/Future-pre.h
index bc046f91852..3ffae6b6d14 100644
--- a/folly/futures/Future-pre.h
+++ b/folly/futures/Future-pre.h
@@ -130,6 +130,32 @@ struct callableResult {
   typedef Future<typename ReturnsFuture::Inner> Return;
 };
 
+template <typename T, typename F>
+struct deferCallableResult {
+  typedef typename std::conditional<
+      callableWith<F>::value,
+      detail::argResult<false, F>,
+      typename std::conditional<
+          callableWith<F, Try<T>&&>::value,
+          detail::argResult<true, F, Try<T>&&>,
+          detail::argResult<true, F, Try<T>&>>::type>::type Arg;
+  typedef isFutureOrSemiFuture<typename Arg::Result> ReturnsFuture;
+  typedef Future<typename ReturnsFuture::Inner> Return;
+};
+
+template <typename T, typename F>
+struct deferValueCallableResult {
+  typedef typename std::conditional<
+      callableWith<F>::value,
+      detail::argResult<false, F>,
+      typename std::conditional<
+          callableWith<F, T&&>::value,
+          detail::argResult<false, F, T&&>,
+          detail::argResult<false, F, T&>>::type>::type Arg;
+  typedef isFutureOrSemiFuture<typename Arg::Result> ReturnsFuture;
+  typedef Future<typename ReturnsFuture::Inner> Return;
+};
+
 template <typename L>
 struct Extract : Extract<decltype(&L::operator())> { };
 
diff --git a/folly/futures/Future.h b/folly/futures/Future.h
index 799573cfeef..63c83d1b9a5 100644
--- a/folly/futures/Future.h
+++ b/folly/futures/Future.h
@@ -48,6 +48,9 @@ class Future;
 template <class T>
 class SemiFuture;
 
+template <class T>
+class FutureSplitter;
+
 namespace futures {
 namespace detail {
 template <class T>
@@ -193,6 +196,8 @@ class FutureBase {
   typename std::enable_if<R::ReturnsFuture::value, typename R::Return>::type
   thenImplementation(F&& func, futures::detail::argResult<isTry, F, Args...>);
 };
+template <class T>
+void convertFuture(SemiFuture<T>&& sf, Future<T>& f);
 } // namespace detail
 } // namespace futures
 
@@ -310,6 +315,7 @@ class SemiFuture : private futures::detail::FutureBase<T> {
 
   /**
    * Defer work to run on the consumer of the future.
+   * Function must take a Try as a parameter.
    * This work will be run eithe ron an executor that the caller sets on the
    * SemiFuture, or inline with the call to .get().
    * NB: This is a custom method because boost-blocking executors is a
@@ -318,15 +324,82 @@ class SemiFuture : private futures::detail::FutureBase<T> {
    * of driveable executor here.
    */
   template <typename F>
-  SemiFuture<typename futures::detail::callableResult<T, F>::Return::value_type>
+  SemiFuture<
+      typename futures::detail::deferCallableResult<T, F>::Return::value_type>
   defer(F&& func) &&;
 
+  /**
+   * Defer for functions taking a T rather than a Try<T>.
+   */
+  template <typename F>
+  SemiFuture<typename futures::detail::deferValueCallableResult<T, F>::Return::
+                 value_type>
+  deferValue(F&& func) &&;
+
+  /// Set an error callback for this SemiFuture. The callback should take a
+  /// single argument of the type that you want to catch, and should return a
+  /// value of the same type as this SemiFuture, or a SemiFuture of that type
+  /// (see overload below). For instance,
+  ///
+  /// makeSemiFuture()
+  ///   .defer([] {
+  ///     throw std::runtime_error("oh no!");
+  ///     return 42;
+  ///   })
+  ///   .deferError([] (std::runtime_error& e) {
+  ///     LOG(INFO) << "std::runtime_error: " << e.what();
+  ///     return -1; // or makeSemiFuture<int>(-1)
+  ///   });
+  template <class F>
+  typename std::enable_if<
+      !futures::detail::callableWith<F, exception_wrapper>::value &&
+          !futures::detail::callableWith<F, exception_wrapper&>::value &&
+          !futures::detail::Extract<F>::ReturnsFuture::value,
+      SemiFuture<T>>::type
+  deferError(F&& func);
+
+  /// Overload of deferError where the error callback returns a Future<T>
+  template <class F>
+  typename std::enable_if<
+      !futures::detail::callableWith<F, exception_wrapper>::value &&
+          !futures::detail::callableWith<F, exception_wrapper&>::value &&
+          futures::detail::Extract<F>::ReturnsFuture::value,
+      SemiFuture<T>>::type
+  deferError(F&& func);
+
+  /// Overload of deferError that takes exception_wrapper and returns T
+  template <class F>
+  typename std::enable_if<
+      futures::detail::callableWith<F, exception_wrapper>::value &&
+          !futures::detail::Extract<F>::ReturnsFuture::value,
+      SemiFuture<T>>::type
+  deferError(F&& func);
+
+  /// Overload of deferError that takes exception_wrapper and returns Future<T>
+  template <class F>
+  typename std::enable_if<
+      futures::detail::callableWith<F, exception_wrapper>::value &&
+          futures::detail::Extract<F>::ReturnsFuture::value,
+      SemiFuture<T>>::type
+  deferError(F&& func);
+
+  /// Return a future that completes inline, as if the future had no executor.
+  /// Intended for porting legacy code without behavioural change, and for rare
+  /// cases where this is really the intended behaviour.
+  /// Future is unsafe in the sense that the executor it completes on is
+  /// non-deterministic in the standard case.
+  /// For new code, or to update code that temporarily uses this, please
+  /// use via and pass a meaningful executor.
+  inline Future<T> toUnsafeFuture() &&;
+
  private:
   friend class Promise<T>;
   template <class>
   friend class futures::detail::FutureBase;
   template <class>
   friend class SemiFuture;
+  template <class>
+  friend class Future;
 
   using typename Base::corePtr;
   using Base::setExecutor;
@@ -777,6 +850,8 @@ class Future : private futures::detail::FutureBase<T> {
   friend class Future;
   template <class>
   friend class SemiFuture;
+  template <class>
+  friend class FutureSplitter;
 
   using Base::setExecutor;
   using Base::throwIfInvalid;
@@ -812,8 +887,70 @@ class Future : private futures::detail::FutureBase<T> {
   /// predicate behaves like std::function<bool(void)>
   template <class P, class F>
   friend Future<Unit> whileDo(P&& predicate, F&& thunk);
+
+  template <class FT>
+  friend void futures::detail::convertFuture(
+      SemiFuture<FT>&& sf,
+      Future<FT>& f);
 };
 
 } // namespace folly
 
+#if FOLLY_HAS_COROUTINES
+#include <experimental/coroutine>
+
+namespace folly {
+namespace detail {
+template <typename T>
+class FutureAwaitable {
+ public:
+  explicit FutureAwaitable(folly::Future<T>&& future)
+      : future_(std::move(future)) {}
+
+  bool await_ready() const {
+    return future_.isReady();
+  }
+
+  T await_resume() {
+    return std::move(future_.value());
+  }
+
+  void await_suspend(std::experimental::coroutine_handle<> h) {
+    future_.setCallback_([h](Try<T>&&) mutable { h(); });
+  }
+
+ private:
+  folly::Future<T> future_;
+};
+
+template <typename T>
+class FutureRefAwaitable {
+ public:
+  explicit FutureRefAwaitable(folly::Future<T>& future) : future_(future) {}
+
+  bool await_ready() const {
+    return future_.isReady();
+  }
+
+  T await_resume() {
+    return std::move(future_.value());
+  }
+
+  void await_suspend(std::experimental::coroutine_handle<> h) {
+    future_.setCallback_([h](Try<T>&&) mutable { h(); });
+  }
+
+ private:
+  folly::Future<T>& future_;
+};
+} // namespace detail
+} // namespace folly
+
+template <typename T>
+folly::detail::FutureAwaitable<T>
+/* implicit */ operator co_await(folly::Future<T>& future) {
+  return folly::detail::FutureRefAwaitable<T>(future);
+}
+#endif
+
 #include <folly/futures/Future-inl.h>
diff --git a/folly/futures/FutureSplitter.h b/folly/futures/FutureSplitter.h
index 6e6cc7ce550..84b60be19f3 100644
--- a/folly/futures/FutureSplitter.h
+++ b/folly/futures/FutureSplitter.h
@@ -45,8 +45,9 @@ class FutureSplitter {
    * Provide a way to split a Future<T>.
    */
   explicit FutureSplitter(Future<T>&& future)
-      : promise_(std::make_shared<SharedPromise<T>>()) {
-    future.then([promise = promise_](Try<T> && theTry) {
+      : promise_(std::make_shared<SharedPromise<T>>()),
+        e_(getExecutorFrom(future)) {
+    future.then([promise = promise_](Try<T>&& theTry) {
       promise->setTry(std::move(theTry));
     });
   }
@@ -58,11 +59,29 @@ class FutureSplitter {
     if (promise_ == nullptr) {
       throwNoFutureInSplitter();
     }
-    return promise_->getFuture();
+    return promise_->getSemiFuture().via(e_);
+  }
+
+  /**
+   * This can be called an unlimited number of times per FutureSplitter.
+   */
+  SemiFuture<T> getSemiFuture() {
+    if (promise_ == nullptr) {
+      throwNoFutureInSplitter();
+    }
+    return promise_->getSemiFuture();
   }
 
  private:
   std::shared_ptr<SharedPromise<T>> promise_;
+  Executor* e_ = nullptr;
+
+  static Executor* getExecutorFrom(Future<T>& f) {
+    // If the passed future had a null executor, use an inline executor
+    // to ensure that .via is safe
+    auto* e = f.getExecutor();
+    return e ? e : &folly::InlineExecutor::instance();
+  }
 };
 
 /**
diff --git a/folly/futures/detail/Core.h b/folly/futures/detail/Core.h
index 6adc841b373..48abec367c2 100644
--- a/folly/futures/detail/Core.h
+++ b/folly/futures/detail/Core.h
@@ -24,14 +24,14 @@
 
 #include <folly/Executor.h>
 #include <folly/Function.h>
-#include <folly/MicroSpinLock.h>
 #include <folly/Optional.h>
 #include <folly/ScopeGuard.h>
 #include <folly/Try.h>
 #include <folly/Utility.h>
 #include <folly/futures/FutureException.h>
 #include <folly/futures/detail/FSM.h>
-#include <folly/portability/BitsFunctexcept.h>
+#include <folly/lang/Exception.h>
+#include <folly/synchronization/MicroSpinLock.h>
 
 #include <folly/io/async/Request.h>
 
@@ -161,7 +161,7 @@ class Core final {
       case State::OnlyCallback:
       case State::Armed:
       case State::Done:
-        std::__throw_logic_error("setCallback called twice");
+        throw_exception<std::logic_error>("setCallback called twice");
     FSM_END
 
     // we could always call this, it is an optimization to only call it when
@@ -188,7 +188,7 @@ class Core final {
       case State::OnlyResult:
       case State::Armed:
       case State::Done:
-        std::__throw_logic_error("setResult called twice");
+      throw_exception<std::logic_error>("setResult called twice");
     FSM_END
 
     if (transitionToArmed) {
@@ -226,17 +226,12 @@ class Core final {
   /// May call from any thread
   bool isActive() { return active_.load(std::memory_order_acquire); }
 
-  /// Call only from Future thread
+  /// Call only from Future thread, either before attaching a callback or after
+  /// the callback has already been invoked, but not concurrently with anything
+  /// which might trigger invocation of the callback
   void setExecutor(Executor* x, int8_t priority = Executor::MID_PRI) {
-    if (!executorLock_.try_lock()) {
-      executorLock_.lock();
-    }
-    executor_ = x;
-    priority_ = priority;
-    executorLock_.unlock();
-  }
-
-  void setExecutorNoLock(Executor* x, int8_t priority = Executor::MID_PRI) {
+    auto s = fsm_.getState();
+    DCHECK(s == State::Start || s == State::OnlyResult || s == State::Done);
     executor_ = x;
     priority_ = priority;
   }
@@ -337,16 +332,7 @@ class Core final {
 
   void doCallback() {
     Executor* x = executor_;
-    // initialize, solely to appease clang's -Wconditional-uninitialized
-    int8_t priority = 0;
-    if (x) {
-      if (!executorLock_.try_lock()) {
-        executorLock_.lock();
-      }
-      x = executor_;
-      priority = priority_;
-      executorLock_.unlock();
-    }
+    int8_t priority = priority_;
 
     if (x) {
       exception_wrapper ew;
@@ -426,7 +412,6 @@ class Core final {
   std::atomic<bool> active_ {true};
   std::atomic<bool> interruptHandlerSet_ {false};
   folly::MicroSpinLock interruptLock_ {0};
-  folly::MicroSpinLock executorLock_ {0};
   int8_t priority_ {-1};
   Executor* executor_ {nullptr};
   std::shared_ptr<RequestContext> context_ {nullptr};
diff --git a/folly/futures/detail/FSM.h b/folly/futures/detail/FSM.h
index 04c57d118c4..cecb6f4aeda 100644
--- a/folly/futures/detail/FSM.h
+++ b/folly/futures/detail/FSM.h
@@ -19,7 +19,7 @@
 #include <atomic>
 #include <mutex>
 
-#include <folly/MicroSpinLock.h>
+#include <folly/synchronization/MicroSpinLock.h>
 
 namespace folly {
 namespace futures {
diff --git a/folly/futures/test/FutureSplitterTest.cpp b/folly/futures/test/FutureSplitterTest.cpp
index 4cca54a40ef..bdd55111911 100644
--- a/folly/futures/test/FutureSplitterTest.cpp
+++ b/folly/futures/test/FutureSplitterTest.cpp
@@ -21,7 +21,40 @@ using namespace folly;
 
 TEST(FutureSplitter, splitFutureSuccess) {
   Promise<int> p;
-  FutureSplitter<int> sp(p.getFuture());
+  folly::FutureSplitter<int> sp(
+      p.getSemiFuture().via(&folly::InlineExecutor::instance()));
+  auto f1 = sp.getFuture();
+  EXPECT_FALSE(f1.isReady());
+  p.setValue(1);
+  EXPECT_TRUE(f1.isReady());
+  EXPECT_TRUE(f1.hasValue());
+  auto f2 = sp.getFuture();
+  EXPECT_TRUE(f2.isReady());
+  EXPECT_TRUE(f2.hasValue());
+}
+
+TEST(FutureSplitter, splitFutureSuccessSemiFuture) {
+  Promise<int> p;
+  folly::FutureSplitter<int> sp(
+      p.getSemiFuture().via(&folly::InlineExecutor::instance()));
+  auto f1 = sp.getSemiFuture();
+  EXPECT_FALSE(f1.isReady());
+  p.setValue(1);
+  EXPECT_TRUE(f1.isReady());
+  EXPECT_TRUE(f1.hasValue());
+  auto f2 = sp.getSemiFuture();
+  EXPECT_TRUE(f2.isReady());
+  EXPECT_TRUE(f2.hasValue());
+}
+
+TEST(FutureSplitter, splitFutureSuccessNullExecutor) {
+  Promise<int> p;
+  auto sf = p.getSemiFuture();
+  // Double via because a null executor to SemiFuture.via is invalid but we
+  // are testing a situation where we have a FutureSplitter from a future with
+  // a null executor to account for legacy code.
+  auto f = std::move(sf).via(&folly::InlineExecutor::instance()).via(nullptr);
+  folly::FutureSplitter<int> sp(std::move(f));
   auto f1 = sp.getFuture();
   EXPECT_FALSE(f1.isReady());
   p.setValue(1);
@@ -34,8 +67,9 @@ TEST(FutureSplitter, splitFutureSuccess) {
 
 TEST(FutureSplitter, splitFutureCopyable) {
   Promise<int> p;
-  FutureSplitter<int> sp1(p.getFuture());
-  FutureSplitter<int> sp2(sp1);
+  folly::FutureSplitter<int> sp1(
+      p.getSemiFuture().via(&folly::InlineExecutor::instance()));
+  folly::FutureSplitter<int> sp2(sp1);
   auto f1 = sp1.getFuture();
   EXPECT_FALSE(f1.isReady());
   p.setValue(1);
@@ -44,7 +78,7 @@ TEST(FutureSplitter, splitFutureCopyable) {
   auto f2 = sp2.getFuture();
   EXPECT_TRUE(f2.isReady());
   EXPECT_TRUE(f2.hasValue());
-  FutureSplitter<int> sp3(sp1);
+  folly::FutureSplitter<int> sp3(sp1);
   auto f3 = sp3.getFuture();
   EXPECT_TRUE(f3.isReady());
   EXPECT_TRUE(f3.hasValue());
@@ -52,9 +86,10 @@ TEST(FutureSplitter, splitFutureCopyable) {
 
 TEST(FutureSplitter, splitFutureMovable) {
   Promise<int> p;
-  FutureSplitter<int> sp1(p.getFuture());
+  folly::FutureSplitter<int> sp1(
+      p.getSemiFuture().via(&folly::InlineExecutor::instance()));
   auto f1 = sp1.getFuture();
-  FutureSplitter<int> sp2(std::move(sp1));
+  folly::FutureSplitter<int> sp2(std::move(sp1));
   EXPECT_FALSE(f1.isReady());
   p.setValue(1);
   EXPECT_TRUE(f1.isReady());
@@ -62,7 +97,7 @@ TEST(FutureSplitter, splitFutureMovable) {
   auto f2 = sp2.getFuture();
   EXPECT_TRUE(f2.isReady());
   EXPECT_TRUE(f2.hasValue());
-  FutureSplitter<int> sp3(std::move(sp2));
+  folly::FutureSplitter<int> sp3(std::move(sp2));
   auto f3 = sp3.getFuture();
   EXPECT_TRUE(f3.isReady());
   EXPECT_TRUE(f3.hasValue());
@@ -70,8 +105,9 @@ TEST(FutureSplitter, splitFutureMovable) {
 
 TEST(FutureSplitter, splitFutureCopyAssignable) {
   Promise<int> p;
-  FutureSplitter<int> sp1(p.getFuture());
-  FutureSplitter<int> sp2{};
+  folly::FutureSplitter<int> sp1(
+      p.getSemiFuture().via(&folly::InlineExecutor::instance()));
+  folly::FutureSplitter<int> sp2{};
   sp2 = sp1;
   auto f1 = sp1.getFuture();
   EXPECT_FALSE(f1.isReady());
@@ -81,7 +117,7 @@ TEST(FutureSplitter, splitFutureCopyAssignable) {
   auto f2 = sp2.getFuture();
   EXPECT_TRUE(f2.isReady());
   EXPECT_TRUE(f2.hasValue());
-  FutureSplitter<int> sp3(sp1);
+  folly::FutureSplitter<int> sp3(sp1);
   auto f3 = sp3.getFuture();
   EXPECT_TRUE(f3.isReady());
   EXPECT_TRUE(f3.hasValue());
@@ -89,9 +125,10 @@ TEST(FutureSplitter, splitFutureCopyAssignable) {
 
 TEST(FutureSplitter, splitFutureMoveAssignable) {
   Promise<int> p;
-  FutureSplitter<int> sp1(p.getFuture());
+  folly::FutureSplitter<int> sp1(
+      p.getSemiFuture().via(&folly::InlineExecutor::instance()));
   auto f1 = sp1.getFuture();
-  FutureSplitter<int> sp2{};
+  folly::FutureSplitter<int> sp2{};
   sp2 = std::move(sp1);
   EXPECT_FALSE(f1.isReady());
   p.setValue(1);
@@ -100,7 +137,7 @@ TEST(FutureSplitter, splitFutureMoveAssignable) {
   auto f2 = sp2.getFuture();
   EXPECT_TRUE(f2.isReady());
   EXPECT_TRUE(f2.hasValue());
-  FutureSplitter<int> sp3(std::move(sp2));
+  folly::FutureSplitter<int> sp3(std::move(sp2));
   auto f3 = sp3.getFuture();
   EXPECT_TRUE(f3.isReady());
   EXPECT_TRUE(f3.hasValue());
@@ -108,7 +145,8 @@ TEST(FutureSplitter, splitFutureMoveAssignable) {
 
 TEST(FutureSplitter, splitFutureScope) {
   Promise<int> p;
-  auto pSP = std::make_unique<FutureSplitter<int>>(p.getFuture());
+  auto pSP = std::make_unique<folly::FutureSplitter<int>>(
+      p.getSemiFuture().via(&folly::InlineExecutor::instance()));
   auto f1 = pSP->getFuture();
   EXPECT_FALSE(f1.isReady());
   pSP.reset();
@@ -121,7 +159,8 @@ TEST(FutureSplitter, splitFutureScope) {
 
 TEST(FutureSplitter, splitFutureFailure) {
   Promise<int> p;
-  FutureSplitter<int> sp(p.getFuture());
+  folly::FutureSplitter<int> sp(
+      p.getSemiFuture().via(&folly::InlineExecutor::instance()));
   auto f1 = sp.getFuture();
   EXPECT_FALSE(f1.isReady());
   try {
diff --git a/folly/futures/test/SemiFutureTest.cpp b/folly/futures/test/SemiFutureTest.cpp
index c2acb7801b7..990f842f198 100644
--- a/folly/futures/test/SemiFutureTest.cpp
+++ b/folly/futures/test/SemiFutureTest.cpp
@@ -365,7 +365,7 @@ TEST(SemiFuture, SimpleResultThrow) {
 TEST(SemiFuture, SimpleDefer) {
   std::atomic<int> innerResult{0};
   Promise<folly::Unit> p;
-  auto f = p.getFuture();
+  auto f = p.getSemiFuture().toUnsafeFuture();
   auto sf = std::move(f).semi().defer([&]() { innerResult = 17; });
   p.setValue();
   // Run "F" here inline in the calling thread
@@ -376,7 +376,7 @@ TEST(SemiFuture, SimpleDefer) {
 TEST(SemiFuture, DeferWithDelayedSetValue) {
   EventBase e2;
   Promise<folly::Unit> p;
-  auto f = p.getFuture();
+  auto f = p.getSemiFuture().toUnsafeFuture();
   auto sf = std::move(f).semi().defer([&]() { return 17; });
 
   // Start thread and have it blocking in the semifuture before we satisfy the
@@ -396,7 +396,7 @@ TEST(SemiFuture, DeferWithDelayedSetValue) {
 TEST(SemiFuture, DeferWithViaAndDelayedSetValue) {
   EventBase e2;
   Promise<folly::Unit> p;
-  auto f = p.getFuture();
+  auto f = p.getSemiFuture().toUnsafeFuture();
   auto sf = std::move(f).semi().defer([&]() { return 17; }).via(&e2);
   // Start thread and have it blocking in the semifuture before we satisfy the
   // promise.
@@ -417,7 +417,7 @@ TEST(SemiFuture, DeferWithViaAndDelayedSetValue) {
 TEST(SemiFuture, DeferWithGetTimedGet) {
   std::atomic<int> innerResult{0};
   Promise<folly::Unit> p;
-  auto f = p.getFuture();
+  auto f = p.getSemiFuture().toUnsafeFuture();
   auto sf = std::move(f).semi().defer([&]() { innerResult = 17; });
   EXPECT_THROW(std::move(sf).get(std::chrono::milliseconds(100)), TimedOut);
   ASSERT_EQ(innerResult, 0);
@@ -425,7 +425,7 @@ TEST(SemiFuture, DeferWithGetTimedGet) {
 
 TEST(SemiFuture, DeferWithGetTimedWait) {
   Promise<folly::Unit> p;
-  auto f = p.getFuture();
+  auto f = p.getSemiFuture().toUnsafeFuture();
   auto sf = std::move(f).semi().defer([&]() { return 17; });
   ASSERT_FALSE(sf.isReady());
   sf.wait(std::chrono::milliseconds(100));
@@ -436,7 +436,7 @@ TEST(SemiFuture, DeferWithGetTimedWait) {
 
 TEST(SemiFuture, DeferWithGetMultipleTimedWait) {
   Promise<folly::Unit> p;
-  auto f = p.getFuture();
+  auto f = p.getSemiFuture().toUnsafeFuture();
   auto sf = std::move(f).semi().defer([&]() { return 17; });
   sf.wait(std::chrono::milliseconds(100));
   sf.wait(std::chrono::milliseconds(100));
@@ -450,7 +450,7 @@ TEST(SemiFuture, DeferWithVia) {
   std::atomic<int> innerResult{0};
   EventBase e2;
   Promise<folly::Unit> p;
-  auto f = p.getFuture();
+  auto f = p.getSemiFuture().toUnsafeFuture();
   auto sf = std::move(f).semi().defer([&]() { innerResult = 17; });
   // Run "F" here inline in the calling thread
   auto tf = std::move(sf).via(&e2);
@@ -464,7 +464,7 @@ TEST(SemiFuture, ChainingDefertoThen) {
   std::atomic<int> result{0};
   EventBase e2;
   Promise<folly::Unit> p;
-  auto f = p.getFuture();
+  auto f = p.getSemiFuture().toUnsafeFuture();
   auto sf = std::move(f).semi().defer([&]() { innerResult = 17; });
   // Run "F" here inline in a task running on the eventbase
   auto tf = std::move(sf).via(&e2).then([&]() { result = 42; });
@@ -477,8 +477,8 @@ TEST(SemiFuture, ChainingDefertoThen) {
 TEST(SemiFuture, SimpleDeferWithValue) {
   std::atomic<int> innerResult{0};
   Promise<int> p;
-  auto f = p.getFuture();
-  auto sf = std::move(f).semi().defer([&](int a) { innerResult = a; });
+  auto f = p.getSemiFuture().toUnsafeFuture();
+  auto sf = std::move(f).semi().deferValue([&](int a) { innerResult = a; });
   p.setValue(7);
   // Run "F" here inline in the calling thread
   std::move(sf).get();
@@ -490,8 +490,8 @@ TEST(SemiFuture, ChainingDefertoThenWithValue) {
   std::atomic<int> result{0};
   EventBase e2;
   Promise<int> p;
-  auto f = p.getFuture();
-  auto sf = std::move(f).semi().defer([&](int a) {
+  auto f = p.getSemiFuture().toUnsafeFuture();
+  auto sf = std::move(f).semi().deferValue([&](int a) {
     innerResult = a;
     return a;
   });
@@ -505,7 +505,7 @@ TEST(SemiFuture, ChainingDefertoThenWithValue) {
 
 TEST(SemiFuture, MakeSemiFutureFromFutureWithTry) {
   Promise<int> p;
-  auto f = p.getFuture();
+  auto f = p.getSemiFuture().toUnsafeFuture();
   auto sf = std::move(f).semi().defer([&](Try<int> t) {
     if (auto err = t.tryGetExceptionObject<std::logic_error>()) {
       return Try<std::string>(err->what());
@@ -517,3 +517,300 @@ TEST(SemiFuture, MakeSemiFutureFromFutureWithTry) {
   auto tryResult = std::move(sf).get();
   ASSERT_EQ(tryResult.value(), "Try");
 }
+
+TEST(SemiFuture, DeferWithinContinuation) {
+  std::atomic<int> innerResult{0};
+  std::atomic<int> result{0};
+  EventBase e2;
+  Promise<int> p;
+  Promise<int> p2;
+  auto f = p.getSemiFuture().via(&e2);
+  auto resultF = std::move(f).then([&, p3 = std::move(p2)](int outer) mutable {
+    result = outer;
+    return makeSemiFuture<int>(std::move(outer))
+        .deferValue([&, p4 = std::move(p3)](int inner) mutable {
+          innerResult = inner;
+          p4.setValue(inner);
+          return inner;
+        });
+  });
+  p.setValue(7);
+  auto r = resultF.getVia(&e2);
+  ASSERT_EQ(r, 7);
+  ASSERT_EQ(innerResult, 7);
+  ASSERT_EQ(result, 7);
+}
+
+TEST(SemiFuture, onError) {
+  bool theFlag = false;
+  auto flag = [&] { theFlag = true; };
+#define EXPECT_FLAG()     \
+  do {                    \
+    EXPECT_TRUE(theFlag); \
+    theFlag = false;      \
+  } while (0);
+
+#define EXPECT_NO_FLAG()   \
+  do {                     \
+    EXPECT_FALSE(theFlag); \
+    theFlag = false;       \
+  } while (0);
+
+  // By reference
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { throw eggs; })
+                 .deferError([&](eggs_t& /* e */) { flag(); });
+    EXPECT_NO_THROW(std::move(f).get());
+    EXPECT_FLAG();
+  }
+
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { throw eggs; })
+                 .deferError([&](eggs_t& /* e */) {
+                   flag();
+                   return makeSemiFuture();
+                 });
+    EXPECT_NO_THROW(std::move(f).get());
+    EXPECT_FLAG();
+  }
+
+  // By value
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { throw eggs; })
+                 .deferError([&](eggs_t /* e */) { flag(); });
+    EXPECT_NO_THROW(std::move(f).get());
+    EXPECT_FLAG();
+  }
+
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { throw eggs; })
+                 .deferError([&](eggs_t /* e */) {
+                   flag();
+                   return makeSemiFuture();
+                 });
+    EXPECT_NO_THROW(std::move(f).get());
+    EXPECT_FLAG();
+  }
+
+  // Polymorphic
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { throw eggs; })
+                 .deferError([&](std::exception& /* e */) { flag(); });
+    EXPECT_NO_THROW(std::move(f).get());
+    EXPECT_FLAG();
+  }
+
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { throw eggs; })
+                 .deferError([&](std::exception& /* e */) {
+                   flag();
+                   return makeSemiFuture();
+                 });
+    EXPECT_NO_THROW(std::move(f).get());
+    EXPECT_FLAG();
+  }
+
+  // Non-exceptions
+  {
+    auto f =
+        makeSemiFuture().defer([] { throw - 1; }).deferError([&](int /* e */) {
+          flag();
+        });
+    EXPECT_NO_THROW(std::move(f).get());
+    EXPECT_FLAG();
+  }
+
+  {
+    auto f =
+        makeSemiFuture().defer([] { throw - 1; }).deferError([&](int /* e */) {
+          flag();
+          return makeSemiFuture();
+        });
+    EXPECT_NO_THROW(std::move(f).get());
+    EXPECT_FLAG();
+  }
+
+  // Mutable lambda
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { throw eggs; })
+                 .deferError([&](eggs_t& /* e */) mutable { flag(); });
+    EXPECT_NO_THROW(std::move(f).get());
+    EXPECT_FLAG();
+  }
+
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { throw eggs; })
+                 .deferError([&](eggs_t& /* e */) mutable {
+                   flag();
+                   return makeSemiFuture();
+                 });
+    EXPECT_NO_THROW(std::move(f).get());
+    EXPECT_FLAG();
+  }
+
+  // Function pointer
+  {
+    auto f = makeSemiFuture()
+                 .defer([]() -> int { throw eggs; })
+                 .deferError(onErrorHelperEggs)
+                 .deferError(onErrorHelperGeneric);
+    EXPECT_EQ(10, std::move(f).get());
+  }
+  {
+    auto f = makeSemiFuture()
+                 .defer([]() -> int { throw std::runtime_error("test"); })
+                 .deferError(onErrorHelperEggs)
+                 .deferError(onErrorHelperGeneric);
+    EXPECT_EQ(20, std::move(f).get());
+  }
+  {
+    auto f = makeSemiFuture()
+                 .defer([]() -> int { throw std::runtime_error("test"); })
+                 .deferError(onErrorHelperEggs);
+    EXPECT_THROW(std::move(f).get(), std::runtime_error);
+  }
+
+  // No throw
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { return 42; })
+                 .deferError([&](eggs_t& /* e */) {
+                   flag();
+                   return -1;
+                 });
+    EXPECT_NO_FLAG();
+    EXPECT_EQ(42, std::move(f).get());
+    EXPECT_NO_FLAG();
+  }
+
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { return 42; })
+                 .deferError([&](eggs_t& /* e */) {
+                   flag();
+                   return makeSemiFuture<int>(-1);
+                 });
+    EXPECT_EQ(42, std::move(f).get());
+    EXPECT_NO_FLAG();
+  }
+
+  // Catch different exception
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { throw eggs; })
+                 .deferError([&](std::runtime_error& /* e */) { flag(); });
+    EXPECT_THROW(std::move(f).get(), eggs_t);
+    EXPECT_NO_FLAG();
+  }
+
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { throw eggs; })
+                 .deferError([&](std::runtime_error& /* e */) {
+                   flag();
+                   return makeSemiFuture();
+                 });
+    EXPECT_THROW(std::move(f).get(), eggs_t);
+    EXPECT_NO_FLAG();
+  }
+
+  // Returned value propagates
+  {
+    auto f = makeSemiFuture()
+                 .defer([]() -> int { throw eggs; })
+                 .deferError([&](eggs_t& /* e */) { return 42; });
+    EXPECT_EQ(42, std::move(f).get());
+  }
+
+  // Returned future propagates
+  {
+    auto f = makeSemiFuture()
+                 .defer([]() -> int { throw eggs; })
+                 .deferError(
+                     [&](eggs_t& /* e */) { return makeSemiFuture<int>(42); });
+    EXPECT_EQ(42, std::move(f).get());
+  }
+
+  // Throw in callback
+  {
+    auto f = makeSemiFuture()
+                 .defer([]() -> int { throw eggs; })
+                 .deferError([&](eggs_t& e) -> int { throw e; });
+    EXPECT_THROW(std::move(f).get(), eggs_t);
+  }
+
+  {
+    auto f = makeSemiFuture()
+                 .defer([]() -> int { throw eggs; })
+                 .deferError([&](eggs_t& e) -> SemiFuture<int> { throw e; });
+    EXPECT_THROW(std::move(f).get(), eggs_t);
+  }
+
+  // exception_wrapper, return Future<T>
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { throw eggs; })
+                 .deferError([&](exception_wrapper /* e */) {
+                   flag();
+                   return makeSemiFuture();
+                 });
+    EXPECT_NO_THROW(std::move(f).get());
+    EXPECT_FLAG();
+  }
+
+  // exception_wrapper, return Future<T> but throw
+  {
+    auto f = makeSemiFuture()
+                 .defer([]() -> int { throw eggs; })
+                 .deferError([&](exception_wrapper /* e */) -> SemiFuture<int> {
+                   flag();
+                   throw eggs;
+                 });
+    EXPECT_THROW(std::move(f).get(), eggs_t);
+    EXPECT_FLAG();
+  }
+
+  // exception_wrapper, return T
+  {
+    auto f = makeSemiFuture()
+                 .defer([]() -> int { throw eggs; })
+                 .deferError([&](exception_wrapper /* e */) {
+                   flag();
+                   return -1;
+                 });
+    EXPECT_EQ(-1, std::move(f).get());
+    EXPECT_FLAG();
+  }
+
+  // exception_wrapper, return T but throw
+  {
+    auto f = makeSemiFuture()
+                 .defer([]() -> int { throw eggs; })
+                 .deferError([&](exception_wrapper /* e */) -> int {
+                   flag();
+                   throw eggs;
+                 });
+    EXPECT_THROW(std::move(f).get(), eggs_t);
+    EXPECT_FLAG();
+  }
+
+  // const exception_wrapper&
+  {
+    auto f = makeSemiFuture()
+                 .defer([] { throw eggs; })
+                 .deferError([&](const exception_wrapper& /* e */) {
+                   flag();
+                   return makeSemiFuture();
+                 });
+    EXPECT_NO_THROW(std::move(f).get());
+    EXPECT_FLAG();
+  }
+}
diff --git a/folly/gen/Base.h b/folly/gen/Base.h
index 250b862e9af..fd887d7a5ea 100644
--- a/folly/gen/Base.h
+++ b/folly/gen/Base.h
@@ -238,6 +238,15 @@ class To {
   }
 };
 
+template <class Dest>
+class TryTo {
+ public:
+  template <class Value>
+  Expected<Dest, ConversionCode> operator()(Value&& value) const {
+    return ::folly::tryTo<Dest>(std::forward<Value>(value));
+  }
+};
+
 // Specialization to allow String->StringPiece conversion
 template <>
 class To<StringPiece> {
@@ -643,11 +652,18 @@ Visit visit(Visitor visitor = Visitor()) {
   return Visit(std::move(visitor));
 }
 
-template <class Predicate, class Until = detail::Until<Predicate>>
+template <class Predicate = Identity, class Until = detail::Until<Predicate>>
 Until until(Predicate pred = Predicate()) {
   return Until(std::move(pred));
 }
 
+template <
+    class Predicate = Identity,
+    class TakeWhile = detail::Until<Negate<Predicate>>>
+TakeWhile takeWhile(Predicate pred = Predicate()) {
+  return TakeWhile(Negate<Predicate>(std::move(pred)));
+}
+
 template <
     class Selector = Identity,
     class Comparer = Less,
@@ -689,9 +705,15 @@ Cast eachAs() {
 }
 
 // call folly::to on each value
-template <class Dest, class To = detail::Map<To<Dest>>>
-To eachTo() {
-  return To();
+template <class Dest, class EachTo = detail::Map<To<Dest>>>
+EachTo eachTo() {
+  return EachTo();
+}
+
+// call folly::tryTo on each value
+template <class Dest, class EachTryTo = detail::Map<TryTo<Dest>>>
+EachTryTo eachTryTo() {
+  return EachTryTo();
 }
 
 template <class Value>
diff --git a/folly/gen/test/BaseTest.cpp b/folly/gen/test/BaseTest.cpp
index f57eeaac4b0..c907e34b9a9 100644
--- a/folly/gen/test/BaseTest.cpp
+++ b/folly/gen/test/BaseTest.cpp
@@ -1225,6 +1225,21 @@ TEST(Gen, Guard) {
                runtime_error);
 }
 
+TEST(Gen, eachTryTo) {
+  using std::runtime_error;
+  EXPECT_EQ(4,
+            from({"1", "a", "3"})
+            | eachTryTo<int>()
+            | dereference
+            | sum);
+  EXPECT_EQ(1,
+            from({"1", "a", "3"})
+            | eachTryTo<int>()
+            | takeWhile()
+            | dereference
+            | sum);
+}
+
 TEST(Gen, Batch) {
   EXPECT_EQ((vector<vector<int>> { {1} }),
             seq(1, 1) | batch(5) | as<vector>());
diff --git a/folly/hash/Hash.h b/folly/hash/Hash.h
index ae6c1dd0564..31a72cfb2f6 100644
--- a/folly/hash/Hash.h
+++ b/folly/hash/Hash.h
@@ -33,7 +33,8 @@
  * Various hashing functions.
  */
 
-namespace folly { namespace hash {
+namespace folly {
+namespace hash {
 
 // This is a general-purpose way to create a single hash from multiple
 // hashable objects. hash_combine_generic takes a class Hasher implementing
@@ -41,7 +42,6 @@ namespace folly { namespace hash {
 // hash_combine_generic hashes each argument and combines those hashes in
 // an order-dependent way to yield a new hash.
 
-
 // This is the Hash128to64 function from Google's cityhash (available
 // under the MIT License).  We use it to reduce multiple 64 bit hashes
 // into a single hash.
@@ -65,10 +65,8 @@ inline size_t hash_combine_generic() {
 template <
     class Iter,
     class Hash = std::hash<typename std::iterator_traits<Iter>::value_type>>
-uint64_t hash_range(Iter begin,
-                    Iter end,
-                    uint64_t hash = 0,
-                    Hash hasher = Hash()) {
+uint64_t
+hash_range(Iter begin, Iter end, uint64_t hash = 0, Hash hasher = Hash()) {
   for (; begin != end; ++begin) {
     hash = hash_128_to_64(hash, hasher(*begin));
   }
@@ -115,13 +113,13 @@ size_t hash_combine(const T& t, const Ts&... ts) {
  */
 
 inline uint64_t twang_mix64(uint64_t key) {
-  key = (~key) + (key << 21);  // key *= (1 << 21) - 1; key -= 1;
+  key = (~key) + (key << 21); // key *= (1 << 21) - 1; key -= 1;
   key = key ^ (key >> 24);
-  key = key + (key << 3) + (key << 8);  // key *= 1 + (1 << 3) + (1 << 8)
+  key = key + (key << 3) + (key << 8); // key *= 1 + (1 << 3) + (1 << 8)
   key = key ^ (key >> 14);
-  key = key + (key << 2) + (key << 4);  // key *= 1 + (1 << 2) + (1 << 4)
+  key = key + (key << 2) + (key << 4); // key *= 1 + (1 << 2) + (1 << 4)
   key = key ^ (key >> 28);
-  key = key + (key << 31);  // key *= 1 + (1 << 31)
+  key = key + (key << 31); // key *= 1 + (1 << 31)
   return key;
 }
 
@@ -155,7 +153,7 @@ inline uint32_t twang_32from64(uint64_t key) {
   key = key ^ (key >> 11);
   key = key + (key << 6);
   key = key ^ (key >> 22);
-  return (uint32_t) key;
+  return (uint32_t)key;
 }
 
 /*
@@ -163,11 +161,11 @@ inline uint32_t twang_32from64(uint64_t key) {
  */
 
 inline uint32_t jenkins_rev_mix32(uint32_t key) {
-  key += (key << 12);  // key *= (1 + (1 << 12))
+  key += (key << 12); // key *= (1 + (1 << 12))
   key ^= (key >> 22);
-  key += (key << 4);   // key *= (1 + (1 << 4))
+  key += (key << 4); // key *= (1 + (1 << 4))
   key ^= (key >> 9);
-  key += (key << 10);  // key *= (1 + (1 << 10))
+  key += (key << 10); // key *= (1 + (1 << 10))
   key ^= (key >> 2);
   // key *= (1 + (1 << 7)) * (1 + (1 << 12))
   key += (key << 7);
@@ -194,11 +192,9 @@ inline uint32_t jenkins_rev_unmix32(uint32_t key) {
   // for (int i = n; i < 32; i += n) {
   //   b ^= (a >> i);
   // }
-  key ^=
-    (key >> 2) ^ (key >> 4) ^ (key >> 6) ^ (key >> 8) ^
-    (key >> 10) ^ (key >> 12) ^ (key >> 14) ^ (key >> 16) ^
-    (key >> 18) ^ (key >> 20) ^ (key >> 22) ^ (key >> 24) ^
-    (key >> 26) ^ (key >> 28) ^ (key >> 30);
+  key ^= (key >> 2) ^ (key >> 4) ^ (key >> 6) ^ (key >> 8) ^ (key >> 10) ^
+      (key >> 12) ^ (key >> 14) ^ (key >> 16) ^ (key >> 18) ^ (key >> 20) ^
+      (key >> 22) ^ (key >> 24) ^ (key >> 26) ^ (key >> 28) ^ (key >> 30);
   key *= 3222273025U;
   key ^= (key >> 9) ^ (key >> 18) ^ (key >> 27);
   key *= 4042322161U;
@@ -221,30 +217,30 @@ inline uint32_t fnv32(const char* buf, uint32_t hash = FNV_32_HASH_START) {
   const signed char* s = reinterpret_cast<const signed char*>(buf);
 
   for (; *s; ++s) {
-    hash += (hash << 1) + (hash << 4) + (hash << 7) +
-            (hash << 8) + (hash << 24);
+    hash +=
+        (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24);
     hash ^= *s;
   }
   return hash;
 }
 
-inline uint32_t fnv32_buf(const void* buf,
-                          size_t n,
-                          uint32_t hash = FNV_32_HASH_START) {
+inline uint32_t
+fnv32_buf(const void* buf, size_t n, uint32_t hash = FNV_32_HASH_START) {
   // forcing signed char, since other platforms can use unsigned
   const signed char* char_buf = reinterpret_cast<const signed char*>(buf);
 
   for (size_t i = 0; i < n; ++i) {
-    hash += (hash << 1) + (hash << 4) + (hash << 7) +
-            (hash << 8) + (hash << 24);
+    hash +=
+        (hash << 1) + (hash << 4) + (hash << 7) + (hash << 8) + (hash << 24);
     hash ^= char_buf[i];
   }
 
   return hash;
 }
 
-inline uint32_t fnv32(const std::string& str,
-                      uint32_t hash = FNV_32_HASH_START) {
+inline uint32_t fnv32(
+    const std::string& str,
+    uint32_t hash = FNV_32_HASH_START) {
   return fnv32_buf(str.data(), str.size(), hash);
 }
 
@@ -254,46 +250,46 @@ inline uint64_t fnv64(const char* buf, uint64_t hash = FNV_64_HASH_START) {
 
   for (; *s; ++s) {
     hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) +
-      (hash << 8) + (hash << 40);
+        (hash << 8) + (hash << 40);
     hash ^= *s;
   }
   return hash;
 }
 
-inline uint64_t fnv64_buf(const void* buf,
-                          size_t n,
-                          uint64_t hash = FNV_64_HASH_START) {
+inline uint64_t
+fnv64_buf(const void* buf, size_t n, uint64_t hash = FNV_64_HASH_START) {
   // forcing signed char, since other platforms can use unsigned
   const signed char* char_buf = reinterpret_cast<const signed char*>(buf);
 
   for (size_t i = 0; i < n; ++i) {
     hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) +
-      (hash << 8) + (hash << 40);
+        (hash << 8) + (hash << 40);
     hash ^= char_buf[i];
   }
   return hash;
 }
 
-inline uint64_t fnv64(const std::string& str,
-                      uint64_t hash = FNV_64_HASH_START) {
+inline uint64_t fnv64(
+    const std::string& str,
+    uint64_t hash = FNV_64_HASH_START) {
   return fnv64_buf(str.data(), str.size(), hash);
 }
 
-inline uint64_t fnva64_buf(const void* buf,
-                           size_t n,
-                           uint64_t hash = FNVA_64_HASH_START) {
+inline uint64_t
+fnva64_buf(const void* buf, size_t n, uint64_t hash = FNVA_64_HASH_START) {
   const uint8_t* char_buf = reinterpret_cast<const uint8_t*>(buf);
 
   for (size_t i = 0; i < n; ++i) {
     hash ^= char_buf[i];
     hash += (hash << 1) + (hash << 4) + (hash << 5) + (hash << 7) +
-            (hash << 8) + (hash << 40);
+        (hash << 8) + (hash << 40);
   }
   return hash;
 }
 
-inline uint64_t fnva64(const std::string& str,
-                       uint64_t hash = FNVA_64_HASH_START) {
+inline uint64_t fnva64(
+    const std::string& str,
+    uint64_t hash = FNVA_64_HASH_START) {
   return fnva64_buf(str.data(), str.size(), hash);
 }
 
@@ -318,31 +314,31 @@ inline uint32_t hsieh_hash32_buf(const void* buf, size_t len) {
   len >>= 2;
 
   /* Main loop */
-  for (;len > 0; len--) {
-    hash  += get16bits (s);
-    tmp    = (get16bits (s+2) << 11) ^ hash;
-    hash   = (hash << 16) ^ tmp;
-    s  += 2*sizeof (uint16_t);
-    hash  += hash >> 11;
+  for (; len > 0; len--) {
+    hash += get16bits(s);
+    tmp = (get16bits(s + 2) << 11) ^ hash;
+    hash = (hash << 16) ^ tmp;
+    s += 2 * sizeof(uint16_t);
+    hash += hash >> 11;
   }
 
   /* Handle end cases */
   switch (rem) {
-  case 3:
-    hash += get16bits(s);
-    hash ^= hash << 16;
-    hash ^= s[sizeof (uint16_t)] << 18;
-    hash += hash >> 11;
-    break;
-  case 2:
-    hash += get16bits(s);
-    hash ^= hash << 11;
-    hash += hash >> 17;
-    break;
-  case 1:
-    hash += *s;
-    hash ^= hash << 10;
-    hash += hash >> 1;
+    case 3:
+      hash += get16bits(s);
+      hash ^= hash << 16;
+      hash ^= s[sizeof(uint16_t)] << 18;
+      hash += hash >> 11;
+      break;
+    case 2:
+      hash += get16bits(s);
+      hash ^= hash << 11;
+      hash += hash >> 17;
+      break;
+    case 1:
+      hash += *s;
+      hash ^= hash << 10;
+      hash += hash >> 1;
   }
 
   /* Force "avalanching" of final 127 bits */
@@ -372,23 +368,32 @@ inline uint32_t hsieh_hash32_str(const std::string& str) {
 
 namespace detail {
 
+template <typename I>
 struct integral_hasher {
-  template <typename I>
   size_t operator()(I const& i) const {
-    static_assert(sizeof(I) <= 8, "Input type is too wide");
+    static_assert(sizeof(I) <= 16, "Input type is too wide");
     /* constexpr */ if (sizeof(I) <= 4) {
       auto const i32 = static_cast<int32_t>(i); // impl accident: sign-extends
       auto const u32 = static_cast<uint32_t>(i32);
       return static_cast<size_t>(hash::jenkins_rev_mix32(u32));
-    } else {
+    } else if (sizeof(I) <= 8) {
       auto const u64 = static_cast<uint64_t>(i);
       return static_cast<size_t>(hash::twang_mix64(u64));
+    } else {
+      auto const u = to_unsigned(i);
+      auto const hi = static_cast<uint64_t>(u >> sizeof(I) * 4);
+      auto const lo = static_cast<uint64_t>(u);
+      return hash::hash_128_to_64(hi, lo);
     }
   }
 };
 
+template <typename I>
+using integral_hasher_avalanches =
+    std::integral_constant<bool, sizeof(I) >= 8 || sizeof(size_t) == 4>;
+
+template <typename F>
 struct float_hasher {
-  template <typename F>
   size_t operator()(F const& f) const {
     static_assert(sizeof(F) <= 8, "Input type is too wide");
 
@@ -396,15 +401,9 @@ struct float_hasher {
       return 0;
     }
 
-    /* constexpr */ if (sizeof(F) <= 4) {
-      uint32_t u32 = 0;
-      memcpy(&u32, &f, sizeof(F));
-      return static_cast<size_t>(hash::jenkins_rev_mix32(u32));
-    } else {
-      uint64_t u64 = 0;
-      memcpy(&u64, &f, sizeof(F));
-      return static_cast<size_t>(hash::twang_mix64(u64));
-    }
+    uint64_t u64 = 0;
+    memcpy(&u64, &f, sizeof(F));
+    return static_cast<size_t>(hash::twang_mix64(u64));
   }
 };
 
@@ -425,6 +424,44 @@ struct Hash {
   }
 };
 
+// IsAvalanchingHasher<H, K> extends std::integral_constant<bool, V>.
+// V will be true if it is known that when a hasher of type H computes
+// the hash of a key of type K, any subset of B bits from the resulting
+// hash value is usable in a context that can tolerate a collision rate
+// of about 1/2^B.  (Input bits lost implicitly converting between K and
+// the argument of H::operator() are not considered here; K is separate
+// to handle the case of generic hashers like folly::Hash).
+//
+// The standard's definition of hash quality is based on the chance hash
+// collisions using the entire hash value.  No requirement is made that
+// this property holds for subsets of the bits.  In addition, hashed keys
+// in real-world workloads are not chosen uniformly from the entire domain
+// of keys, which can further increase the collision rate for a subset
+// of bits.  For example, std::hash<uint64_t> in libstdc++-v3 and libc++
+// is the identity function.  This hash function has no collisions when
+// considering hash values in their entirety, but for real-world workloads
+// the high bits are likely to always be zero.
+//
+// Some hash functions provide a stronger guarantee -- the standard's
+// collision property is also preserved for subsets of the output bits and
+// for sub-domains of keys.  Another way to say this is that each bit of
+// the hash value contains entropy from the entire input, changes to the
+// input avalanche across all of the bits of the output.  The distinction
+// is useful when mapping the hash value onto a smaller space efficiently
+// (such as when implementing a hash table).
+template <typename Hasher, typename Key>
+struct IsAvalanchingHasher : std::false_type {};
+
+template <typename T, typename E, typename K>
+struct IsAvalanchingHasher<hasher<T, E>, K>
+    : std::conditional<
+          std::is_enum<T>::value || std::is_integral<T>::value,
+          detail::integral_hasher_avalanches<T>,
+          std::is_floating_point<T>>::type {};
+
+template <typename K>
+struct IsAvalanchingHasher<Hash, K> : IsAvalanchingHasher<hasher<K>, K> {};
+
 template <>
 struct hasher<bool> {
   size_t operator()(bool key) const {
@@ -432,81 +469,105 @@ struct hasher<bool> {
     return key ? std::numeric_limits<size_t>::max() : 0;
   }
 };
+template <typename K>
+struct IsAvalanchingHasher<hasher<bool>, K> : std::true_type {};
 
 template <>
-struct hasher<unsigned long long> : detail::integral_hasher {};
+struct hasher<unsigned long long>
+    : detail::integral_hasher<unsigned long long> {};
 
 template <>
-struct hasher<signed long long> : detail::integral_hasher {};
+struct hasher<signed long long> : detail::integral_hasher<signed long long> {};
 
 template <>
-struct hasher<unsigned long> : detail::integral_hasher {};
+struct hasher<unsigned long> : detail::integral_hasher<unsigned long> {};
 
 template <>
-struct hasher<signed long> : detail::integral_hasher {};
+struct hasher<signed long> : detail::integral_hasher<signed long> {};
 
 template <>
-struct hasher<unsigned int> : detail::integral_hasher {};
+struct hasher<unsigned int> : detail::integral_hasher<unsigned int> {};
 
 template <>
-struct hasher<signed int> : detail::integral_hasher {};
+struct hasher<signed int> : detail::integral_hasher<signed int> {};
 
 template <>
-struct hasher<unsigned short> : detail::integral_hasher {};
+struct hasher<unsigned short> : detail::integral_hasher<unsigned short> {};
 
 template <>
-struct hasher<signed short> : detail::integral_hasher {};
+struct hasher<signed short> : detail::integral_hasher<signed short> {};
 
 template <>
-struct hasher<unsigned char> : detail::integral_hasher {};
+struct hasher<unsigned char> : detail::integral_hasher<unsigned char> {};
 
 template <>
-struct hasher<signed char> : detail::integral_hasher {};
+struct hasher<signed char> : detail::integral_hasher<signed char> {};
 
 template <> // char is a different type from both signed char and unsigned char
-struct hasher<char> : detail::integral_hasher {};
+struct hasher<char> : detail::integral_hasher<char> {};
+
+#if FOLLY_HAVE_INT128_T
+template <>
+struct hasher<signed __int128> : detail::integral_hasher<signed __int128> {};
 
 template <>
-struct hasher<float> : detail::float_hasher {};
+struct hasher<unsigned __int128> : detail::integral_hasher<unsigned __int128> {
+};
+#endif
+
+template <>
+struct hasher<float> : detail::float_hasher<float> {};
 
 template <>
-struct hasher<double> : detail::float_hasher {};
+struct hasher<double> : detail::float_hasher<double> {};
 
-template <> struct hasher<std::string> {
+template <>
+struct hasher<std::string> {
   size_t operator()(const std::string& key) const {
     return static_cast<size_t>(
         hash::SpookyHashV2::Hash64(key.data(), key.size(), 0));
   }
 };
+template <typename K>
+struct IsAvalanchingHasher<hasher<std::string>, K> : std::true_type {};
 
-template <class T>
+template <typename T>
 struct hasher<T, typename std::enable_if<std::is_enum<T>::value, void>::type> {
   size_t operator()(T key) const {
     return Hash()(static_cast<typename std::underlying_type<T>::type>(key));
   }
 };
 
-template <class T1, class T2>
+template <typename T1, typename T2>
 struct hasher<std::pair<T1, T2>> {
   size_t operator()(const std::pair<T1, T2>& key) const {
     return Hash()(key.first, key.second);
   }
 };
+template <typename T1, typename T2, typename K>
+struct IsAvalanchingHasher<hasher<std::pair<T1, T2>>, K> : std::true_type {};
 
 template <typename... Ts>
 struct hasher<std::tuple<Ts...>> {
-  size_t operator() (const std::tuple<Ts...>& key) const {
+  size_t operator()(const std::tuple<Ts...>& key) const {
     return applyTuple(Hash(), key);
   }
 };
 
+// combiner for multi-arg tuple also mixes bits
+template <typename T, typename K>
+struct IsAvalanchingHasher<hasher<std::tuple<T>>, K>
+    : IsAvalanchingHasher<hasher<T>, K> {};
+template <typename T1, typename T2, typename... Ts, typename K>
+struct IsAvalanchingHasher<hasher<std::tuple<T1, T2, Ts...>>, K>
+    : std::true_type {};
+
 // recursion
 template <size_t index, typename... Ts>
 struct TupleHasher {
   size_t operator()(std::tuple<Ts...> const& key) const {
     return hash::hash_combine(
-      TupleHasher<index - 1, Ts...>()(key),
-      std::get<index>(key));
+        TupleHasher<index - 1, Ts...>()(key), std::get<index>(key));
   }
 };
 
@@ -524,25 +585,65 @@ struct TupleHasher<0, Ts...> {
 
 // Custom hash functions.
 namespace std {
-  // Hash function for pairs. Requires default hash functions for both
-  // items in the pair.
-  template <typename T1, typename T2>
-  struct hash<std::pair<T1, T2> > {
-   public:
-    size_t operator()(const std::pair<T1, T2>& x) const {
-      return folly::hash::hash_combine(x.first, x.second);
-    }
-  };
+#if FOLLY_SUPPLY_MISSING_INT128_TRAITS
+template <>
+struct hash<__int128> : folly::detail::integral_hasher<__int128> {};
 
-  // Hash function for tuples. Requires default hash functions for all types.
-  template <typename... Ts>
-  struct hash<std::tuple<Ts...>> {
-    size_t operator()(std::tuple<Ts...> const& key) const {
-      folly::TupleHasher<
+template <>
+struct hash<unsigned __int128>
+    : folly::detail::integral_hasher<unsigned __int128> {};
+#endif
+
+// Hash function for pairs. Requires default hash functions for both
+// items in the pair.
+template <typename T1, typename T2>
+struct hash<std::pair<T1, T2>> {
+ public:
+  size_t operator()(const std::pair<T1, T2>& x) const {
+    return folly::hash::hash_combine(x.first, x.second);
+  }
+};
+
+// Hash function for tuples. Requires default hash functions for all types.
+template <typename... Ts>
+struct hash<std::tuple<Ts...>> {
+  size_t operator()(std::tuple<Ts...> const& key) const {
+    folly::TupleHasher<
         std::tuple_size<std::tuple<Ts...>>::value - 1, // start index
-        Ts...> hasher;
+        Ts...>
+        hasher;
+
+    return hasher(key);
+  }
+};
 
-      return hasher(key);
-    }
-  };
 } // namespace std
+
+namespace folly {
+
+// These IsAvalanchingHasher<std::hash<T>> specializations refer to the
+// std::hash specializations defined in this file
+
+template <typename U1, typename U2, typename K>
+struct IsAvalanchingHasher<std::hash<std::pair<U1, U2>>, K> : std::true_type {};
+
+template <typename U, typename K>
+struct IsAvalanchingHasher<std::hash<std::tuple<U>>, K>
+    : IsAvalanchingHasher<std::hash<U>, U> {};
+
+template <typename U1, typename U2, typename... Us, typename K>
+struct IsAvalanchingHasher<std::hash<std::tuple<U1, U2, Us...>>, K>
+    : std::true_type {};
+
+// std::hash<std::string> is avalanching on libstdc++-v3 (code checked),
+// libc++ (code checked), and MSVC (based on online information).
+// std::hash for float and double on libstdc++-v3 are avalanching,
+// but they are not on libc++.  std::hash for integral types is not
+// avalanching for libstdc++-v3 or libc++.  We're conservative here and
+// just mark std::string as avalanching.  std::string_view will also be
+// so, once it exists.
+template <typename... Args, typename K>
+struct IsAvalanchingHasher<std::hash<std::basic_string<Args...>>, K>
+    : std::true_type {};
+
+} // namespace folly
diff --git a/folly/hash/test/HashTest.cpp b/folly/hash/test/HashTest.cpp
index 422673de434..d33dba179ef 100644
--- a/folly/hash/test/HashTest.cpp
+++ b/folly/hash/test/HashTest.cpp
@@ -15,13 +15,17 @@
  */
 
 #include <folly/hash/Hash.h>
-#include <folly/MapUtil.h>
-#include <folly/portability/GTest.h>
+
 #include <stdint.h>
+
 #include <unordered_map>
 #include <unordered_set>
 #include <utility>
 
+#include <folly/MapUtil.h>
+#include <folly/Range.h>
+#include <folly/portability/GTest.h>
+
 using namespace folly::hash;
 
 TEST(Hash, Fnv32) {
@@ -76,17 +80,12 @@ TEST(Hash, Fnv64) {
   int32_t t4_c = 0xAB12CD34;
   const char* t4_d = "Unum";
   uint64_t t4_res = 15571330457339273965ULL;
-  uint64_t t4_hash1 = fnv64_buf(t4_a,
-                                strlen(t4_a));
-  uint64_t t4_hash2 = fnv64_buf(reinterpret_cast<void*>(&t4_b),
-                                sizeof(int64_t),
-                                t4_hash1);
-  uint64_t t4_hash3 = fnv64_buf(reinterpret_cast<void*>(&t4_c),
-                                sizeof(int32_t),
-                                t4_hash2);
-  uint64_t t4_hash4 = fnv64_buf(t4_d,
-                                strlen(t4_d),
-                                t4_hash3);
+  uint64_t t4_hash1 = fnv64_buf(t4_a, strlen(t4_a));
+  uint64_t t4_hash2 =
+      fnv64_buf(reinterpret_cast<void*>(&t4_b), sizeof(int64_t), t4_hash1);
+  uint64_t t4_hash3 =
+      fnv64_buf(reinterpret_cast<void*>(&t4_c), sizeof(int32_t), t4_hash2);
+  uint64_t t4_hash4 = fnv64_buf(t4_d, strlen(t4_d), t4_hash3);
   EXPECT_EQ(t4_hash4, t4_res);
   // note: These are probabalistic, not determinate, but c'mon.
   // These hash values should be different, or something's not
@@ -181,7 +180,7 @@ TEST(Hash, Jenkins_Rev_Unmix32) {
 
 TEST(Hash, hasher) {
   // Basically just confirms that things compile ok.
-  std::unordered_map<int32_t,int32_t,folly::hasher<int32_t>> m;
+  std::unordered_map<int32_t, int32_t, folly::hasher<int32_t>> m;
   m.insert(std::make_pair(4, 5));
   EXPECT_EQ(get_default(m, 4), 5);
 }
@@ -214,9 +213,36 @@ TEST(Hash, integral_types) {
   hashes.insert(hasher((int64_t)22));
   hashes.insert(hasher((uint64_t)23));
   hashes.insert(hasher((size_t)24));
-  EXPECT_EQ(24, hashes.size());
+
+  size_t setSize = 24;
+#if FOLLY_HAVE_INT128_T
+  hashes.insert(hasher((__int128_t)25));
+  hashes.insert(hasher((__uint128_t)26));
+  setSize += 2;
+#endif
+  EXPECT_EQ(setSize, hashes.size());
+}
+
+TEST(Hash, integer_conversion) {
+  folly::hasher<uint64_t> h;
+  uint64_t k = 10;
+  EXPECT_EQ(h(k), h(10));
 }
 
+#if FOLLY_HAVE_INT128_T
+TEST(Hash, int128_std_hash) {
+  std::unordered_set<__int128> hs;
+  hs.insert(__int128_t{1});
+  hs.insert(__int128_t{2});
+  EXPECT_EQ(2, hs.size());
+
+  std::set<unsigned __int128> s;
+  s.insert(static_cast<unsigned __int128>(1));
+  s.insert(static_cast<unsigned __int128>(2));
+  EXPECT_EQ(2, s.size());
+}
+#endif
+
 TEST(Hash, float_types) {
   folly::Hash hasher;
 
@@ -253,38 +279,27 @@ TEST(Hash, pair) {
   auto b = std::make_pair(3, 4);
   auto c = std::make_pair(1, 2);
   auto d = std::make_pair(2, 1);
-  EXPECT_EQ(hash_combine(a),
-            hash_combine(c));
-  EXPECT_NE(hash_combine(b),
-            hash_combine(c));
-  EXPECT_NE(hash_combine(d),
-            hash_combine(c));
+  EXPECT_EQ(hash_combine(a), hash_combine(c));
+  EXPECT_NE(hash_combine(b), hash_combine(c));
+  EXPECT_NE(hash_combine(d), hash_combine(c));
 
   // With composition
-  EXPECT_EQ(hash_combine(a, b),
-            hash_combine(c, b));
+  EXPECT_EQ(hash_combine(a, b), hash_combine(c, b));
   // Test order dependence
-  EXPECT_NE(hash_combine(a, b),
-            hash_combine(b, a));
+  EXPECT_NE(hash_combine(a, b), hash_combine(b, a));
 
   // Test with custom hasher
-  EXPECT_EQ(hash_combine_test(a),
-            hash_combine_test(c));
+  EXPECT_EQ(hash_combine_test(a), hash_combine_test(c));
   // 3 + 4 != 1 + 2
-  EXPECT_NE(hash_combine_test(b),
-            hash_combine_test(c));
+  EXPECT_NE(hash_combine_test(b), hash_combine_test(c));
   // This time, thanks to a terrible hash function, these are equal
-  EXPECT_EQ(hash_combine_test(d),
-            hash_combine_test(c));
+  EXPECT_EQ(hash_combine_test(d), hash_combine_test(c));
   // With composition
-  EXPECT_EQ(hash_combine_test(a, b),
-            hash_combine_test(c, b));
+  EXPECT_EQ(hash_combine_test(a, b), hash_combine_test(c, b));
   // Test order dependence
-  EXPECT_NE(hash_combine_test(a, b),
-            hash_combine_test(b, a));
+  EXPECT_NE(hash_combine_test(a, b), hash_combine_test(b, a));
   // Again, 1 + 2 == 2 + 1
-  EXPECT_EQ(hash_combine_test(a, b),
-            hash_combine_test(d, b));
+  EXPECT_EQ(hash_combine_test(a, b), hash_combine_test(d, b));
 }
 
 TEST(Hash, hash_combine) {
@@ -392,18 +407,15 @@ TEST(Hash, std_tuple_different_hash) {
   tuple3 t2(9, "bar", 3);
   tuple3 t3(42, "foo", 3);
 
-  EXPECT_NE(std::hash<tuple3>()(t1),
-            std::hash<tuple3>()(t2));
-  EXPECT_NE(std::hash<tuple3>()(t1),
-            std::hash<tuple3>()(t3));
+  EXPECT_NE(std::hash<tuple3>()(t1), std::hash<tuple3>()(t2));
+  EXPECT_NE(std::hash<tuple3>()(t1), std::hash<tuple3>()(t3));
 }
 
 TEST(Hash, Strings) {
   using namespace folly;
 
-  StringPiece a1 = "10050517", b1 = "51107032",
-              a2 = "10050518", b2 = "51107033",
-              a3 = "10050519", b3 = "51107034",
+  StringPiece a1 = "10050517", b1 = "51107032", a2 = "10050518",
+              b2 = "51107033", a3 = "10050519", b3 = "51107034",
               a4 = "10050525", b4 = "51107040";
   Range<const wchar_t*> w1 = range(L"10050517"), w2 = range(L"51107032"),
                         w3 = range(L"10050518"), w4 = range(L"51107033");
@@ -435,8 +447,8 @@ struct FNVTestParam {
 class FNVTest : public ::testing::TestWithParam<FNVTestParam> {};
 
 TEST_P(FNVTest, Fnva64Buf) {
-  EXPECT_EQ(GetParam().out,
-            fnva64_buf(GetParam().in.data(), GetParam().in.size()));
+  EXPECT_EQ(
+      GetParam().out, fnva64_buf(GetParam().in.data(), GetParam().in.size()));
 }
 
 TEST_P(FNVTest, Fnva64) {
@@ -447,9 +459,10 @@ TEST_P(FNVTest, Fnva64Partial) {
   size_t partialLen = GetParam().in.size() / 2;
   auto data = GetParam().in.data();
   auto partial = fnva64_buf(data, partialLen);
-  EXPECT_EQ(GetParam().out,
-            fnva64_buf(
-                data + partialLen, GetParam().in.size() - partialLen, partial));
+  EXPECT_EQ(
+      GetParam().out,
+      fnva64_buf(
+          data + partialLen, GetParam().in.size() - partialLen, partial));
 }
 
 // Taken from http://www.isthe.com/chongo/src/fnv/test_fnv.c
@@ -468,3 +481,265 @@ INSTANTIATE_TEST_CASE_P(
             0xd9b957fb7fe794c5},
         (FNVTestParam){"http://norvig.com/21-days.html", // 136
                        0x07aaa640476e0b9a}));
+
+namespace {
+enum class TestEnum {
+  MIN = 0,
+  ITEM = 1,
+  MAX = 2,
+};
+
+enum class TestBigEnum : uint64_t {
+  ITEM = 1,
+};
+
+struct TestStruct {};
+} // namespace
+
+namespace std {
+template <>
+struct hash<TestEnum> : hash<int> {};
+
+template <>
+struct hash<TestStruct> {
+  std::size_t operator()(TestStruct const&) const {
+    return 0;
+  }
+};
+} // namespace std
+
+//////// static checks
+
+static_assert(!folly::IsAvalanchingHasher<std::hash<int>, int>::value, "");
+static_assert(
+    !folly::IsAvalanchingHasher<std::hash<char const*>, char const*>::value,
+    "");
+static_assert(!folly::IsAvalanchingHasher<std::hash<float>, float>::value, "");
+static_assert(
+    !folly::IsAvalanchingHasher<std::hash<double>, double>::value,
+    "");
+static_assert(
+    !folly::IsAvalanchingHasher<std::hash<long double>, long double>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<std::hash<std::string>, std::string>::value,
+    "");
+static_assert(
+    !folly::IsAvalanchingHasher<std::hash<TestEnum>, TestEnum>::value,
+    "");
+static_assert(
+    !folly::IsAvalanchingHasher<std::hash<TestStruct>, TestStruct>::value,
+    "");
+
+// these come from folly/hash/Hash.h
+static_assert(
+    folly::IsAvalanchingHasher<
+        std::hash<std::pair<int, int>>,
+        std::pair<int, int>>::value,
+    "");
+static_assert(
+    !folly::IsAvalanchingHasher<std::hash<std::tuple<int>>, std::tuple<int>>::
+        value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<
+        std::hash<std::tuple<std::string>>,
+        std::tuple<std::string>>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<
+        std::hash<std::tuple<int, int>>,
+        std::tuple<int, int>>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<
+        std::hash<std::tuple<int, int, int>>,
+        std::tuple<int, int, int>>::value,
+    "");
+
+static_assert(!folly::IsAvalanchingHasher<folly::Hash, uint8_t>::value, "");
+static_assert(!folly::IsAvalanchingHasher<folly::Hash, char>::value, "");
+static_assert(!folly::IsAvalanchingHasher<folly::Hash, uint16_t>::value, "");
+static_assert(!folly::IsAvalanchingHasher<folly::Hash, int16_t>::value, "");
+static_assert(!folly::IsAvalanchingHasher<folly::Hash, uint32_t>::value, "");
+static_assert(!folly::IsAvalanchingHasher<folly::Hash, int32_t>::value, "");
+static_assert(folly::IsAvalanchingHasher<folly::Hash, uint64_t>::value, "");
+static_assert(folly::IsAvalanchingHasher<folly::Hash, int64_t>::value, "");
+static_assert(
+    folly::IsAvalanchingHasher<folly::Hash, folly::StringPiece>::value,
+    "");
+static_assert(folly::IsAvalanchingHasher<folly::Hash, std::string>::value, "");
+static_assert(!folly::IsAvalanchingHasher<folly::Hash, TestEnum>::value, "");
+static_assert(folly::IsAvalanchingHasher<folly::Hash, TestBigEnum>::value, "");
+
+static_assert(
+    !folly::IsAvalanchingHasher<folly::hasher<uint8_t>, uint8_t>::value,
+    "");
+static_assert(
+    !folly::IsAvalanchingHasher<folly::hasher<char>, char>::value,
+    "");
+static_assert(
+    !folly::IsAvalanchingHasher<folly::hasher<uint16_t>, uint16_t>::value,
+    "");
+static_assert(
+    !folly::IsAvalanchingHasher<folly::hasher<int16_t>, int16_t>::value,
+    "");
+static_assert(
+    !folly::IsAvalanchingHasher<folly::hasher<uint32_t>, uint32_t>::value,
+    "");
+static_assert(
+    !folly::IsAvalanchingHasher<folly::hasher<int32_t>, int32_t>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<folly::hasher<uint64_t>, uint64_t>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<folly::hasher<int64_t>, int64_t>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<folly::hasher<float>, float>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<folly::hasher<double>, double>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<folly::hasher<std::string>, std::string>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<folly::hasher<folly::StringPiece>, std::string>::
+        value,
+    "");
+
+static_assert(
+    folly::IsAvalanchingHasher<folly::hasher<std::string>, std::string>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<
+        folly::hasher<std::pair<int, int>>,
+        std::pair<int, int>>::value,
+    "");
+static_assert(
+    !folly::IsAvalanchingHasher<
+        folly::hasher<std::tuple<int>>,
+        std::tuple<int>>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<
+        folly::hasher<std::tuple<std::string>>,
+        std::tuple<std::string>>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<
+        folly::hasher<std::tuple<int, int>>,
+        std::tuple<int, int>>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<
+        folly::hasher<std::tuple<int, int, int>>,
+        std::tuple<int, int, int>>::value,
+    "");
+static_assert(
+    !folly::IsAvalanchingHasher<folly::hasher<TestEnum>, TestEnum>::value,
+    "");
+static_assert(
+    folly::IsAvalanchingHasher<folly::hasher<TestBigEnum>, TestBigEnum>::value,
+    "");
+
+//////// dynamic checks
+
+namespace {
+template <typename H, typename T, typename F>
+void verifyAvalanching(T initialValue, F const& advance) {
+  // This doesn't check probabilities, but does verify that every bit
+  // changed independently of every other bit, in both directions, when
+  // traversing a sequence of dependent changes.  Note that it is NOT
+  // sufficient to just use a random sequence here, because even the
+  // identity function will pass.  As constructed this will require
+  // 2^63 steps to complete for an identity hash, because none of the
+  // transitions with on == 63 will occur until then.
+  H const hasher;
+  constexpr std::size_t N = sizeof(decltype(hasher(initialValue))) * 8;
+
+  // seen[i][j] if we have seen i flip on at the same time as j went off
+  bool seen[N][N] = {};
+  std::size_t unseenCount = N * (N - 1);
+  auto v = initialValue;
+  auto h = hasher(v);
+  std::size_t steps = 0;
+  // wait for 95% coverage
+  while (unseenCount > (N * (N - 1)) / 95) {
+    ++steps;
+    auto hPrev = h;
+    advance(v);
+    h = hasher(v);
+
+    uint64_t delta = hPrev ^ h;
+    for (std::size_t i = 0; i < N - 1; ++i) {
+      if (((delta >> i) & 1) == 0) {
+        continue;
+      }
+      // we know i flipped
+      for (std::size_t j = i + 1; j < N; ++j) {
+        if (((delta >> j) & 1) == 0) {
+          continue;
+        }
+        // we know j flipped
+        bool iOn = ((hPrev >> i) & 1) == 0;
+        bool jOn = ((hPrev >> j) & 1) == 0;
+        if (iOn != jOn) {
+          auto on = iOn ? i : j;
+          auto off = iOn ? j : i;
+          if (!seen[on][off]) {
+            seen[on][off] = true;
+            --unseenCount;
+          }
+        }
+      }
+    }
+
+    // we should actually only need a couple hundred
+    ASSERT_LT(steps, 1000) << unseenCount << " of " << (N * (N - 1))
+                           << " pair transitions unseen";
+  }
+}
+} // namespace
+
+TEST(Traits, stdHashPairAvalances) {
+  verifyAvalanching<std::hash<std::pair<int, int>>>(
+      std::make_pair(0, 0), [](std::pair<int, int>& v) { v.first++; });
+}
+
+TEST(Traits, stdHashTuple2Avalances) {
+  verifyAvalanching<std::hash<std::tuple<int, int>>>(
+      std::make_tuple(0, 0),
+      [](std::tuple<int, int>& v) { std::get<0>(v) += 1; });
+}
+
+TEST(Traits, stdHashStringAvalances) {
+  verifyAvalanching<std::hash<std::string>, std::string>(
+      "00000000000000000000000000000", [](std::string& str) {
+        std::size_t i = 0;
+        while (str[i] == '1') {
+          str[i] = '0';
+          ++i;
+        }
+        str[i] = '1';
+      });
+}
+
+TEST(Traits, follyHashUint64Avalances) {
+  verifyAvalanching<folly::Hash>(uint64_t{0}, [](uint64_t& v) { v++; });
+}
+
+TEST(Traits, follyHasherInt64Avalances) {
+  verifyAvalanching<folly::hasher<int64_t>>(
+      int64_t{0}, [](int64_t& v) { v++; });
+}
+
+TEST(Traits, follyHasherFloatAvalanches) {
+  verifyAvalanching<folly::hasher<float>>(0.0f, [](float& v) { v += 1; });
+}
+
+TEST(Traits, follyHasherDoubleAvalanches) {
+  verifyAvalanching<folly::hasher<double>>(0.0, [](double& v) { v += 1; });
+}
diff --git a/folly/io/Cursor-inl.h b/folly/io/Cursor-inl.h
index c562ea7a666..0d137f7064d 100644
--- a/folly/io/Cursor-inl.h
+++ b/folly/io/Cursor-inl.h
@@ -60,7 +60,7 @@ std::string CursorBase<Derived, BufType>::readTerminatedString(
   auto result = readWhile(keepReading);
   // skip over the terminator character
   if (isAtEnd()) {
-    std::__throw_out_of_range("terminator not found");
+    throw_exception<std::out_of_range>("terminator not found");
   }
   skip(1);
 
diff --git a/folly/io/Cursor.h b/folly/io/Cursor.h
index be231d9b624..2448f2af177 100644
--- a/folly/io/Cursor.h
+++ b/folly/io/Cursor.h
@@ -29,7 +29,7 @@
 #include <folly/io/IOBuf.h>
 #include <folly/io/IOBufQueue.h>
 #include <folly/lang/Bits.h>
-#include <folly/portability/BitsFunctexcept.h>
+#include <folly/lang/Exception.h>
 
 /**
  * Cursor class for fast iteration over IOBuf chains.
@@ -413,6 +413,9 @@ class CursorBase {
   }
 
   void pull(void* buf, size_t len) {
+    if (UNLIKELY(len == 0)) {
+      return;
+    }
     dcheckIntegrity();
     if (LIKELY(crtPos_ + len <= crtEnd_)) {
       memcpy(buf, data(), len);
@@ -449,13 +452,13 @@ class CursorBase {
 
   void clone(std::unique_ptr<folly::IOBuf>& buf, size_t len) {
     if (UNLIKELY(cloneAtMost(buf, len) != len)) {
-      std::__throw_out_of_range("underflow");
+      throw_exception<std::out_of_range>("underflow");
     }
   }
 
   void clone(folly::IOBuf& buf, size_t len) {
     if (UNLIKELY(cloneAtMost(buf, len) != len)) {
-      std::__throw_out_of_range("underflow");
+      throw_exception<std::out_of_range>("underflow");
     }
   }
 
@@ -526,13 +529,13 @@ class CursorBase {
       }
 
       if (otherBuf == other.buffer_) {
-        std::__throw_out_of_range("wrap-around");
+        throw_exception<std::out_of_range>("wrap-around");
       }
 
       len += crtPos_ - crtBegin_;
     } else {
       if (crtPos_ < other.crtPos_) {
-        std::__throw_out_of_range("underflow");
+        throw_exception<std::out_of_range>("underflow");
       }
 
       len += crtPos_ - other.crtPos_;
@@ -552,7 +555,7 @@ class CursorBase {
       len += curBuf->length();
       curBuf = curBuf->next();
       if (curBuf == buf || curBuf == buffer_) {
-        std::__throw_out_of_range("wrap-around");
+        throw_exception<std::out_of_range>("wrap-around");
       }
     }
 
@@ -629,7 +632,7 @@ class CursorBase {
     for (size_t available; (available = length()) < len; ) {
       str->append(reinterpret_cast<const char*>(data()), available);
       if (UNLIKELY(!tryAdvanceBuffer())) {
-        std::__throw_out_of_range("string underflow");
+        throw_exception<std::out_of_range>("string underflow");
       }
       len -= available;
     }
@@ -639,6 +642,12 @@ class CursorBase {
   }
 
   size_t pullAtMostSlow(void* buf, size_t len) {
+    // If the length of this buffer is 0 try advancing it.
+    // Otherwise on the first iteration of the following loop memcpy is called
+    // with a null source pointer.
+    if (UNLIKELY(length() == 0 && !tryAdvanceBuffer())) {
+      return 0;
+    }
     uint8_t* p = reinterpret_cast<uint8_t*>(buf);
     size_t copied = 0;
     for (size_t available; (available = length()) < len; ) {
@@ -658,7 +667,7 @@ class CursorBase {
 
   void pullSlow(void* buf, size_t len) {
     if (UNLIKELY(pullAtMostSlow(buf, len) != len)) {
-      std::__throw_out_of_range("underflow");
+      throw_exception<std::out_of_range>("underflow");
     }
   }
 
@@ -678,7 +687,7 @@ class CursorBase {
 
   void skipSlow(size_t len) {
     if (UNLIKELY(skipAtMostSlow(len) != len)) {
-      std::__throw_out_of_range("underflow");
+      throw_exception<std::out_of_range>("underflow");
     }
   }
 
@@ -697,7 +706,7 @@ class CursorBase {
 
   void retreatSlow(size_t len) {
     if (UNLIKELY(retreatAtMostSlow(len) != len)) {
-      std::__throw_out_of_range("underflow");
+      throw_exception<std::out_of_range>("underflow");
     }
   }
 
@@ -745,13 +754,13 @@ class Writable {
   void push(const uint8_t* buf, size_t len) {
     Derived* d = static_cast<Derived*>(this);
     if (d->pushAtMost(buf, len) != len) {
-      std::__throw_out_of_range("overflow");
+      throw_exception<std::out_of_range>("overflow");
     }
   }
 
   void push(ByteRange buf) {
     if (this->pushAtMost(buf) != buf.size()) {
-      std::__throw_out_of_range("overflow");
+      throw_exception<std::out_of_range>("overflow");
     }
   }
 
@@ -767,7 +776,7 @@ class Writable {
    */
   void push(Cursor cursor, size_t len) {
     if (this->pushAtMost(cursor, len) != len) {
-      std::__throw_out_of_range("overflow");
+      throw_exception<std::out_of_range>("overflow");
     }
   }
 
@@ -996,7 +1005,7 @@ class Appender : public detail::Writable<Appender> {
     // Waste the rest of the current buffer and allocate a new one.
     // Don't make it too small, either.
     if (growth_ == 0) {
-      std::__throw_out_of_range("can't grow buffer chain");
+      throw_exception<std::out_of_range>("can't grow buffer chain");
     }
 
     n = std::max(n, growth_);
@@ -1014,6 +1023,13 @@ class Appender : public detail::Writable<Appender> {
       return 0;
     }
 
+    // If the length of this buffer is 0 try growing it.
+    // Otherwise on the first iteration of the following loop memcpy is called
+    // with a null source pointer.
+    if (UNLIKELY(length() == 0 && !tryGrowChain())) {
+      return 0;
+    }
+
     size_t copied = 0;
     for (;;) {
       // Fast path: it all fits in one buffer.
diff --git a/folly/io/IOBufQueue.cpp b/folly/io/IOBufQueue.cpp
index 5384e3c9673..f685c7aa0a4 100644
--- a/folly/io/IOBufQueue.cpp
+++ b/folly/io/IOBufQueue.cpp
@@ -47,9 +47,8 @@ appendToChain(unique_ptr<IOBuf>& dst, unique_ptr<IOBuf>&& src, bool pack) {
       // joining two IOBufQueues together.
       size_t copyRemaining = MAX_PACK_COPY;
       uint64_t n;
-      while (src &&
-             (n = src->length()) < copyRemaining &&
-             n < tail->tailroom()) {
+      while (src && (n = src->length()) < copyRemaining &&
+             n < tail->tailroom() && n > 0) {
         memcpy(tail->writableTail(), src->data(), n);
         tail->append(n);
         copyRemaining -= n;
diff --git a/folly/io/async/AsyncSSLSocket.cpp b/folly/io/async/AsyncSSLSocket.cpp
index 3ca4a86784e..7b3c88f2ade 100644
--- a/folly/io/async/AsyncSSLSocket.cpp
+++ b/folly/io/async/AsyncSSLSocket.cpp
@@ -19,7 +19,6 @@
 #include <folly/io/async/EventBase.h>
 #include <folly/portability/Sockets.h>
 
-#include <boost/noncopyable.hpp>
 #include <errno.h>
 #include <fcntl.h>
 #include <sys/types.h>
@@ -365,9 +364,11 @@ void AsyncSSLSocket::shutdownWriteNow() {
 }
 
 bool AsyncSSLSocket::good() const {
-  return (AsyncSocket::good() &&
-          (sslState_ == STATE_ACCEPTING || sslState_ == STATE_CONNECTING ||
-           sslState_ == STATE_ESTABLISHED || sslState_ == STATE_UNENCRYPTED));
+  return (
+      AsyncSocket::good() &&
+      (sslState_ == STATE_ACCEPTING || sslState_ == STATE_CONNECTING ||
+       sslState_ == STATE_ESTABLISHED || sslState_ == STATE_UNENCRYPTED ||
+       sslState_ == STATE_UNINIT));
 }
 
 // The TAsyncTransport definition of 'good' states that the transport is
@@ -823,6 +824,9 @@ const SSL* AsyncSSLSocket::getSSL() const {
 }
 
 void AsyncSSLSocket::setSSLSession(SSL_SESSION *session, bool takeOwnership) {
+  if (sslSession_) {
+    SSL_SESSION_free(sslSession_);
+  }
   sslSession_ = session;
   if (!takeOwnership && session != nullptr) {
     // Increment the reference count
diff --git a/folly/io/async/AsyncUDPSocket.cpp b/folly/io/async/AsyncUDPSocket.cpp
index 5ed0204de84..6dfad74edd1 100644
--- a/folly/io/async/AsyncUDPSocket.cpp
+++ b/folly/io/async/AsyncUDPSocket.cpp
@@ -132,6 +132,35 @@ void AsyncUDPSocket::bind(const folly::SocketAddress& address) {
   }
 }
 
+void AsyncUDPSocket::dontFragment(bool df) {
+  (void)df; // to avoid potential unused variable warning
+#if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DO) && \
+    defined(IP_PMTUDISC_WANT)
+  if (address().getFamily() == AF_INET) {
+    int v4 = df ? IP_PMTUDISC_DO : IP_PMTUDISC_WANT;
+    if (fsp::setsockopt(fd_, IPPROTO_IP, IP_MTU_DISCOVER, &v4, sizeof(v4))) {
+      throw AsyncSocketException(
+          AsyncSocketException::NOT_OPEN,
+          "Failed to set DF with IP_MTU_DISCOVER",
+          errno);
+    }
+  }
+#endif
+#if defined(IPV6_MTU_DISCOVER) && defined(IPV6_PMTUDISC_DO) && \
+    defined(IPV6_PMTUDISC_WANT)
+  if (address().getFamily() == AF_INET6) {
+    int v6 = df ? IPV6_PMTUDISC_DO : IPV6_PMTUDISC_WANT;
+    if (fsp::setsockopt(
+            fd_, IPPROTO_IPV6, IPV6_MTU_DISCOVER, &v6, sizeof(v6))) {
+      throw AsyncSocketException(
+          AsyncSocketException::NOT_OPEN,
+          "Failed to set DF with IPV6_MTU_DISCOVER",
+          errno);
+    }
+  }
+#endif
+}
+
 void AsyncUDPSocket::setFD(int fd, FDOwnership ownership) {
   CHECK_EQ(-1, fd_) << "Already bound to another FD";
 
diff --git a/folly/io/async/AsyncUDPSocket.h b/folly/io/async/AsyncUDPSocket.h
index 0f390384f97..6e953efc9ea 100644
--- a/folly/io/async/AsyncUDPSocket.h
+++ b/folly/io/async/AsyncUDPSocket.h
@@ -162,6 +162,18 @@ class AsyncUDPSocket : public EventHandler {
     return eventBase_;
   }
 
+  /**
+   * Enable or disable fragmentation on the socket.
+   *
+   * On Linux, this sets IP(V6)_MTU_DISCOVER to IP(V6)_PMTUDISC_DO when enabled,
+   * and to IP(V6)_PMTUDISC_WANT when disabled. IP(V6)_PMTUDISC_WANT will use
+   * per-route setting to set DF bit. It may be more desirable to use
+   * IP(V6)_PMTUDISC_PROBE as opposed to IP(V6)_PMTUDISC_DO for apps that has
+   * its own PMTU Discovery mechanism.
+   * Note this doesn't work on Apple.
+   */
+  virtual void dontFragment(bool df);
+
  protected:
   virtual ssize_t sendmsg(int socket, const struct msghdr* message, int flags) {
     return ::sendmsg(socket, message, flags);
diff --git a/folly/io/async/EventBase.cpp b/folly/io/async/EventBase.cpp
index a14cdbe1552..ccb3a3410c9 100644
--- a/folly/io/async/EventBase.cpp
+++ b/folly/io/async/EventBase.cpp
@@ -701,6 +701,9 @@ bool EventBase::scheduleTimeout(AsyncTimeout* obj,
   tv.tv_usec = long((timeout.count() % 1000LL) * 1000LL);
 
   struct event* ev = obj->getEvent();
+
+  DCHECK(ev->ev_base);
+
   if (event_add(ev, &tv) < 0) {
     LOG(ERROR) << "EventBase: failed to schedule timeout: " << strerror(errno);
     return false;
@@ -732,6 +735,13 @@ const std::string& EventBase::getName() {
   return name_;
 }
 
+void EventBase::scheduleAt(Func&& fn, TimePoint const& timeout) {
+  auto duration = timeout - now();
+  timer().scheduleTimeoutFn(
+      std::move(fn),
+      std::chrono::duration_cast<std::chrono::milliseconds>(duration));
+}
+
 const char* EventBase::getLibeventVersion() { return event_get_version(); }
 const char* EventBase::getLibeventMethod() { return event_get_method(); }
 
@@ -743,5 +753,9 @@ VirtualEventBase& EventBase::getVirtualEventBase() {
   return *virtualEventBase_;
 }
 
+EventBase* EventBase::getEventBase() {
+  return this;
+}
+
 constexpr std::chrono::milliseconds EventBase::SmoothLoopTime::buffer_interval_;
 } // namespace folly
diff --git a/folly/io/async/EventBase.h b/folly/io/async/EventBase.h
index a7662d5b3b0..73b80db7d85 100644
--- a/folly/io/async/EventBase.h
+++ b/folly/io/async/EventBase.h
@@ -39,6 +39,9 @@
 #include <folly/Portability.h>
 #include <folly/ScopeGuard.h>
 #include <folly/executors/DrivableExecutor.h>
+#include <folly/executors/IOExecutor.h>
+#include <folly/executors/ScheduledExecutor.h>
+#include <folly/executors/SequencedExecutor.h>
 #include <folly/experimental/ExecutionObserver.h>
 #include <folly/io/async/AsyncTimeout.h>
 #include <folly/io/async/HHWheelTimer.h>
@@ -126,7 +129,10 @@ class VirtualEventBase;
  */
 class EventBase : private boost::noncopyable,
                   public TimeoutManager,
-                  public DrivableExecutor {
+                  public DrivableExecutor,
+                  public IOExecutor,
+                  public SequencedExecutor,
+                  public ScheduledExecutor {
  public:
   using Func = folly::Function<void()>;
 
@@ -624,6 +630,9 @@ class EventBase : private boost::noncopyable,
     loopOnce();
   }
 
+  // Implements the ScheduledExecutor interface
+  void scheduleAt(Func&& fn, TimePoint const& timeout) override;
+
   /// Returns you a handle which make loop() behave like loopForever() until
   /// destroyed. loop() will return to its original behavior only when all
   /// loop keep-alives are released.
@@ -657,6 +666,9 @@ class EventBase : private boost::noncopyable,
   // don't need to manage the life time of the VirtualEventBase used.
   folly::VirtualEventBase& getVirtualEventBase();
 
+  /// Implements the IOExecutor interface
+  EventBase* getEventBase() override;
+
  protected:
   void keepAliveAcquire() override {
     if (inRunningEventBaseThread()) {
diff --git a/folly/io/async/EventBaseLocal.cpp b/folly/io/async/EventBaseLocal.cpp
index 8b21fc18b78..2e6b782873f 100644
--- a/folly/io/async/EventBaseLocal.cpp
+++ b/folly/io/async/EventBaseLocal.cpp
@@ -43,17 +43,13 @@ void EventBaseLocalBase::erase(EventBase& evb) {
   evb.localStorage_.erase(key_);
   evb.localStorageToDtor_.erase(this);
 
-  SYNCHRONIZED(eventBases_) {
-    eventBases_.erase(&evb);
-  }
+  eventBases_.wlock()->erase(&evb);
 }
 
 void EventBaseLocalBase::onEventBaseDestruction(EventBase& evb) {
   evb.dcheckIsInEventBaseThread();
 
-  SYNCHRONIZED(eventBases_) {
-    eventBases_.erase(&evb);
-  }
+  eventBases_.wlock()->erase(&evb);
 }
 
 void EventBaseLocalBase::setVoid(EventBase& evb, std::shared_ptr<void>&& ptr) {
diff --git a/folly/io/async/SSLContext.cpp b/folly/io/async/SSLContext.cpp
index 548f64dc3f3..c8404e9a9b3 100644
--- a/folly/io/async/SSLContext.cpp
+++ b/folly/io/async/SSLContext.cpp
@@ -680,7 +680,7 @@ std::string SSLContext::getErrors(int errnoCopy) {
     }
     const char* reason = ERR_reason_error_string(errorCode);
     if (reason == nullptr) {
-      snprintf(message, sizeof(message) - 1, "SSL error # %lu", errorCode);
+      snprintf(message, sizeof(message) - 1, "SSL error # %08lX", errorCode);
       reason = message;
     }
     errors += reason;
diff --git a/folly/io/async/ssl/OpenSSLUtils.cpp b/folly/io/async/ssl/OpenSSLUtils.cpp
index 9aa6444b18d..1a4bc3e396c 100644
--- a/folly/io/async/ssl/OpenSSLUtils.cpp
+++ b/folly/io/async/ssl/OpenSSLUtils.cpp
@@ -48,8 +48,8 @@ bool OpenSSLUtils::getTLSMasterKey(
     return true;
   }
 #else
-  (SSL_SESSION*)session;
-  (MutableByteRange) keyOut;
+  (void)session;
+  (void)keyOut;
 #endif
   return false;
 }
@@ -65,8 +65,8 @@ bool OpenSSLUtils::getTLSClientRandom(
     return true;
   }
 #else
-  (SSL*)ssl;
-  (MutableByteRange) randomOut;
+  (void)ssl;
+  (void)randomOut;
 #endif
   return false;
 }
diff --git a/folly/io/async/test/EventBaseTest.cpp b/folly/io/async/test/EventBaseTest.cpp
index de1149c148d..429beef3300 100644
--- a/folly/io/async/test/EventBaseTest.cpp
+++ b/folly/io/async/test/EventBaseTest.cpp
@@ -1073,6 +1073,55 @@ TEST(EventBaseTest, DestroyTimeout) {
   T_CHECK_TIMEOUT(start, end, milliseconds(10));
 }
 
+/**
+ * Test the scheduled executor impl
+ */
+TEST(EventBaseTest, ScheduledFn) {
+  EventBase eb;
+
+  TimePoint timestamp1(false);
+  TimePoint timestamp2(false);
+  TimePoint timestamp3(false);
+  eb.schedule(std::bind(&TimePoint::reset, &timestamp1), milliseconds(9));
+  eb.schedule(std::bind(&TimePoint::reset, &timestamp2), milliseconds(19));
+  eb.schedule(std::bind(&TimePoint::reset, &timestamp3), milliseconds(39));
+
+  TimePoint start;
+  eb.loop();
+  TimePoint end;
+
+  T_CHECK_TIMEOUT(start, timestamp1, milliseconds(9));
+  T_CHECK_TIMEOUT(start, timestamp2, milliseconds(19));
+  T_CHECK_TIMEOUT(start, timestamp3, milliseconds(39));
+  T_CHECK_TIMEOUT(start, end, milliseconds(39));
+}
+
+TEST(EventBaseTest, ScheduledFnAt) {
+  EventBase eb;
+
+  TimePoint timestamp0(false);
+  TimePoint timestamp1(false);
+  TimePoint timestamp2(false);
+  TimePoint timestamp3(false);
+  eb.scheduleAt(
+      std::bind(&TimePoint::reset, &timestamp1), eb.now() - milliseconds(5));
+  eb.scheduleAt(
+      std::bind(&TimePoint::reset, &timestamp1), eb.now() + milliseconds(9));
+  eb.scheduleAt(
+      std::bind(&TimePoint::reset, &timestamp2), eb.now() + milliseconds(19));
+  eb.scheduleAt(
+      std::bind(&TimePoint::reset, &timestamp3), eb.now() + milliseconds(39));
+
+  TimePoint start;
+  eb.loop();
+  TimePoint end;
+
+  T_CHECK_TIME_LT(start, timestamp0, milliseconds(0));
+  T_CHECK_TIMEOUT(start, timestamp1, milliseconds(9));
+  T_CHECK_TIMEOUT(start, timestamp2, milliseconds(19));
+  T_CHECK_TIMEOUT(start, timestamp3, milliseconds(39));
+  T_CHECK_TIMEOUT(start, end, milliseconds(39));
+}
 
 ///////////////////////////////////////////////////////////////////////////
 // Test for runInThreadTestFunc()
@@ -1917,6 +1966,13 @@ TEST(EventBaseTest, DrivableExecutorTest) {
   t.join();
 }
 
+TEST(EventBaseTest, IOExecutorTest) {
+  EventBase base;
+
+  // Ensure EventBase manages itself as an IOExecutor.
+  EXPECT_EQ(base.getEventBase(), &base);
+}
+
 TEST(EventBaseTest, RequestContextTest) {
   EventBase evb;
   auto defaultCtx = RequestContext::get();
diff --git a/folly/io/async/test/MockAsyncUDPSocket.h b/folly/io/async/test/MockAsyncUDPSocket.h
index 847a0ca209f..c82a5620932 100644
--- a/folly/io/async/test/MockAsyncUDPSocket.h
+++ b/folly/io/async/test/MockAsyncUDPSocket.h
@@ -39,6 +39,7 @@ struct MockAsyncUDPSocket : public AsyncUDPSocket {
   MOCK_CONST_METHOD0(getFD, int());
   MOCK_METHOD1(setReusePort, void(bool));
   MOCK_METHOD1(setReuseAddr, void(bool));
+  MOCK_METHOD1(dontFragment, void(bool));
 };
 
 }}
diff --git a/folly/io/async/test/NotificationQueueBenchmark.cpp b/folly/io/async/test/NotificationQueueBenchmark.cpp
new file mode 100644
index 00000000000..d48b81aaa54
--- /dev/null
+++ b/folly/io/async/test/NotificationQueueBenchmark.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2018-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/Benchmark.h>
+#include <folly/io/async/EventBase.h>
+#include <folly/synchronization/Baton.h>
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+
+using namespace folly;
+
+static size_t constexpr kMaxRead = 20;
+
+void runTest(int iters, int numThreads) {
+  BenchmarkSuspender susp;
+  EventBase evb;
+  evb.setMaxReadAtOnce(kMaxRead);
+
+  std::mutex m;
+  std::condition_variable cv;
+  int numRunning = 0;
+  int numProcessed = 0;
+  int numTotal = iters * numThreads;
+
+  std::vector<std::thread> threads;
+  for (int i = 0; i < numThreads; i++) {
+    threads.push_back(std::thread([&]() mutable {
+      // wait for all the threads to start up
+      bool notifyAll = false;
+      {
+        std::lock_guard<std::mutex> lk(m);
+        if (++numRunning == numThreads) {
+          notifyAll = true;
+          susp.dismiss();
+        }
+      }
+
+      if (notifyAll) {
+        cv.notify_all();
+      } else {
+        std::unique_lock<std::mutex> lk(m);
+        cv.wait(lk, [&]() { return numRunning == numThreads; });
+      }
+
+      for (auto j = 0; j < iters; j++) {
+        evb.runInEventBaseThread([&]() mutable {
+          if (++numProcessed == numTotal) {
+            evb.terminateLoopSoon();
+            ;
+          }
+        });
+      }
+    }));
+  }
+
+  evb.loopForever();
+  susp.rehire();
+
+  for (auto& t : threads) {
+    t.join();
+  }
+}
+
+BENCHMARK_PARAM(runTest, 1);
+BENCHMARK_PARAM(runTest, 2);
+BENCHMARK_PARAM(runTest, 4);
+BENCHMARK_PARAM(runTest, 8);
+BENCHMARK_PARAM(runTest, 16);
+BENCHMARK_PARAM(runTest, 32);
+
+int main(int argc, char* argv[]) {
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+  folly::runBenchmarks();
+
+  return 0;
+}
diff --git a/folly/lang/Exception.h b/folly/lang/Exception.h
new file mode 100644
index 00000000000..d2eecb4c015
--- /dev/null
+++ b/folly/lang/Exception.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2018-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <exception>
+
+#include <folly/CPortability.h>
+#include <folly/CppAttributes.h>
+
+namespace folly {
+
+/// throw_exception
+///
+/// Throw an exception if exceptions are enabled, or terminate if compiled with
+/// -fno-exceptions.
+template <typename Ex>
+[[noreturn]] FOLLY_NOINLINE FOLLY_COLD void throw_exception(Ex&& ex) {
+#if (__GNUC__ && !__EXCEPTIONS)
+  std::terminate();
+#else
+  throw static_cast<Ex&&>(ex);
+#endif
+}
+
+/// throw_exception
+///
+/// Construct and throw an exception if exceptions are enabled, or terminate if
+/// compiled with -fno-exceptions.
+template <typename Ex, typename... Args>
+[[noreturn]] FOLLY_NOINLINE FOLLY_COLD void throw_exception(Args&&... args) {
+  throw_exception(Ex(static_cast<Args&&>(args)...));
+}
+
+} // namespace folly
diff --git a/folly/lang/Launder.h b/folly/lang/Launder.h
index b54d7e68b30..90192cd0400 100644
--- a/folly/lang/Launder.h
+++ b/folly/lang/Launder.h
@@ -26,7 +26,7 @@
  *  * std::launder
  */
 
-#if __cpp_lib_launder >= 201606
+#if __cpp_lib_launder >= 201606 || _MSC_FULL_VER >= 191426310
 
 namespace folly {
 
diff --git a/folly/lang/PropagateConst.h b/folly/lang/PropagateConst.h
index 61c80d96726..71d88cbf676 100644
--- a/folly/lang/PropagateConst.h
+++ b/folly/lang/PropagateConst.h
@@ -64,8 +64,8 @@ class propagate_const {
       _t<std::remove_reference<decltype(*std::declval<Pointer&>())>>;
 
   constexpr propagate_const() = default;
-  constexpr propagate_const(propagate_const&&) = default;
-  constexpr propagate_const(propagate_const const&) = delete;
+  FOLLY_CPP14_CONSTEXPR propagate_const(propagate_const&&) = default;
+  propagate_const(propagate_const const&) = delete;
 
   template <
       typename OtherPointer,
@@ -105,8 +105,8 @@ class propagate_const {
   constexpr propagate_const(OtherPointer&& other)
       : pointer_(static_cast<OtherPointer&&>(other)) {}
 
-  constexpr propagate_const& operator=(propagate_const&&) = default;
-  constexpr propagate_const& operator=(propagate_const const&) = delete;
+  FOLLY_CPP14_CONSTEXPR propagate_const& operator=(propagate_const&&) = default;
+  propagate_const& operator=(propagate_const const&) = delete;
 
   template <
       typename OtherPointer,
@@ -134,7 +134,7 @@ class propagate_const {
     detail::propagate_const_adl::adl_swap(pointer_, other.pointer_);
   }
 
-  constexpr element_type* get() {
+  FOLLY_CPP14_CONSTEXPR element_type* get() {
     return get_(pointer_);
   }
 
@@ -146,7 +146,7 @@ class propagate_const {
     return static_cast<bool>(pointer_);
   }
 
-  constexpr element_type& operator*() {
+  FOLLY_CPP14_CONSTEXPR element_type& operator*() {
     return *get();
   }
 
@@ -154,7 +154,7 @@ class propagate_const {
     return *get();
   }
 
-  constexpr element_type* operator->() {
+  FOLLY_CPP14_CONSTEXPR element_type* operator->() {
     return get();
   }
 
@@ -167,7 +167,7 @@ class propagate_const {
       typename = _t<std::enable_if<
           std::is_pointer<OtherPointer>::value ||
           std::is_convertible<OtherPointer, element_type*>::value>>>
-  constexpr operator element_type*() {
+  FOLLY_CPP14_CONSTEXPR operator element_type*() {
     return get();
   }
 
@@ -183,13 +183,15 @@ class propagate_const {
  private:
   friend Pointer& get_underlying<>(propagate_const&);
   friend Pointer const& get_underlying<>(propagate_const const&);
+  template <typename OtherPointer>
+  friend class propagate_const;
 
   template <typename T>
-  static T* get_(T* t) {
+  constexpr static T* get_(T* t) {
     return t;
   }
   template <typename T>
-  static auto get_(T& t) -> decltype(t.get()) {
+  constexpr static auto get_(T& t) -> decltype(t.get()) {
     return t.get();
   }
 
diff --git a/folly/memory/Arena-inl.h b/folly/memory/Arena-inl.h
index 5deebb0e62e..2e7bb435b80 100644
--- a/folly/memory/Arena-inl.h
+++ b/folly/memory/Arena-inl.h
@@ -30,14 +30,14 @@ Arena<Alloc>::Block::allocate(Alloc& alloc, size_t size, bool allowSlack) {
     allocSize = ArenaAllocatorTraits<Alloc>::goodSize(alloc, allocSize);
   }
 
-  void* mem = alloc.allocate(allocSize);
+  void* mem = std::allocator_traits<Alloc>::allocate(alloc, allocSize);
   return std::make_pair(new (mem) Block(), allocSize - sizeof(Block));
 }
 
 template <class Alloc>
 void Arena<Alloc>::Block::deallocate(Alloc& alloc) {
   this->~Block();
-  alloc.deallocate(this);
+  std::allocator_traits<Alloc>::deallocate(alloc, this, 1);
 }
 
 template <class Alloc>
diff --git a/folly/memory/Arena.h b/folly/memory/Arena.h
index 189954504cb..f20480e5708 100644
--- a/folly/memory/Arena.h
+++ b/folly/memory/Arena.h
@@ -36,16 +36,10 @@ namespace folly {
  * Simple arena: allocate memory which gets freed when the arena gets
  * destroyed.
  *
- * The arena itself allocates memory using a custom allocator which provides
- * the following interface (same as required by StlAllocator in StlAllocator.h)
+ * The arena itself allocates memory using a custom allocator which conforms
+ * to the C++ concept Allocator.
  *
- *   void* allocate(size_t size);
- *      Allocate a block of size bytes, properly aligned to the maximum
- *      alignment required on your system; throw std::bad_alloc if the
- *      allocation can't be satisfied.
- *
- *   void deallocate(void* ptr);
- *      Deallocate a previously allocated block.
+ *   http://en.cppreference.com/w/cpp/concept/Allocator
  *
  * You may also specialize ArenaAllocatorTraits for your allocator type to
  * provide:
@@ -101,7 +95,7 @@ class Arena {
     return r;
   }
 
-  void deallocate(void* /* p */) {
+  void deallocate(void* /* p */, size_t = 0) {
     // Deallocate? Never!
   }
 
@@ -121,13 +115,11 @@ class Arena {
     return bytesUsed_;
   }
 
-  // not copyable
+  // not copyable or movable
   Arena(const Arena&) = delete;
   Arena& operator=(const Arena&) = delete;
-
-  // movable
-  Arena(Arena&&) = default;
-  Arena& operator=(Arena&&) = default;
+  Arena(Arena&&) = delete;
+  Arena& operator=(Arena&&) = delete;
 
  private:
   struct Block;
@@ -184,7 +176,7 @@ class Arena {
   void* allocateSlow(size_t size);
 
   // Empty member optimization: package Alloc with a non-empty member
-  // in case Alloc is empty (as it is in the case of SysAlloc).
+  // in case Alloc is empty (as it is in the case of SysAllocator).
   struct AllocAndSize : public Alloc {
     explicit AllocAndSize(const Alloc& a, size_t s)
       : Alloc(a), minBlockSize(s) {
@@ -210,7 +202,7 @@ class Arena {
 };
 
 template <class Alloc>
-struct IsArenaAllocator<Arena<Alloc>> : std::true_type { };
+struct AllocatorHasTrivialDeallocate<Arena<Alloc>> : std::true_type {};
 
 /**
  * By default, don't pad the given size.
@@ -221,8 +213,8 @@ struct ArenaAllocatorTraits {
 };
 
 template <>
-struct ArenaAllocatorTraits<SysAlloc> {
-  static size_t goodSize(const SysAlloc& /* alloc */, size_t size) {
+struct ArenaAllocatorTraits<SysAllocator<void>> {
+  static size_t goodSize(const SysAllocator<void>& /* alloc */, size_t size) {
     return goodMallocSize(size);
   }
 };
@@ -230,17 +222,23 @@ struct ArenaAllocatorTraits<SysAlloc> {
 /**
  * Arena that uses the system allocator (malloc / free)
  */
-class SysArena : public Arena<SysAlloc> {
+class SysArena : public Arena<SysAllocator<void>> {
  public:
-  explicit SysArena(size_t minBlockSize = kDefaultMinBlockSize,
-                    size_t sizeLimit = kNoSizeLimit,
-                    size_t maxAlign = kDefaultMaxAlign)
-    : Arena<SysAlloc>(SysAlloc(), minBlockSize, sizeLimit, maxAlign) {
-  }
+  explicit SysArena(
+      size_t minBlockSize = kDefaultMinBlockSize,
+      size_t sizeLimit = kNoSizeLimit,
+      size_t maxAlign = kDefaultMaxAlign)
+      : Arena<SysAllocator<void>>({}, minBlockSize, sizeLimit, maxAlign) {}
 };
 
 template <>
-struct IsArenaAllocator<SysArena> : std::true_type { };
+struct AllocatorHasTrivialDeallocate<SysArena> : std::true_type {};
+
+template <typename T, typename Alloc>
+using ArenaAllocator = CxxAllocatorAdaptor<T, Arena<Alloc>>;
+
+template <typename T>
+using SysArenaAllocator = ArenaAllocator<T, SysAllocator<void>>;
 
 } // namespace folly
 
diff --git a/folly/memory/Malloc.h b/folly/memory/Malloc.h
index 7e6bd2dc71d..46e0e8e9f9b 100644
--- a/folly/memory/Malloc.h
+++ b/folly/memory/Malloc.h
@@ -46,8 +46,8 @@
 // includes and uses fbstring.
 #if defined(_GLIBCXX_USE_FB) && !defined(_LIBSTDCXX_FBSTRING)
 
+#include <folly/lang/Exception.h>
 #include <folly/memory/detail/MallocImpl.h>
-#include <folly/portability/BitsFunctexcept.h>
 
 #include <string>
 
@@ -93,14 +93,12 @@ extern "C" int mallctlbymib(const size_t*, size_t, void*, size_t*, void*,
                             size_t)
 __attribute__((__weak__));
 
-#include <bits/functexcept.h>
-
 #define FOLLY_HAVE_MALLOC_H 1
 
 #else // !defined(_LIBSTDCXX_FBSTRING)
 
+#include <folly/lang/Exception.h> /* nolint */
 #include <folly/memory/detail/MallocImpl.h> /* nolint */
-#include <folly/portability/BitsFunctexcept.h> /* nolint */
 
 #endif
 
@@ -229,7 +227,7 @@ static const size_t jemallocMinInPlaceExpandable = 4096;
 inline void* checkedMalloc(size_t size) {
   void* p = malloc(size);
   if (!p) {
-    std::__throw_bad_alloc();
+    throw_exception<std::bad_alloc>();
   }
   return p;
 }
@@ -237,7 +235,7 @@ inline void* checkedMalloc(size_t size) {
 inline void* checkedCalloc(size_t n, size_t size) {
   void* p = calloc(n, size);
   if (!p) {
-    std::__throw_bad_alloc();
+    throw_exception<std::bad_alloc>();
   }
   return p;
 }
@@ -245,7 +243,7 @@ inline void* checkedCalloc(size_t n, size_t size) {
 inline void* checkedRealloc(void* ptr, size_t size) {
   void* p = realloc(ptr, size);
   if (!p) {
-    std::__throw_bad_alloc();
+    throw_exception<std::bad_alloc>();
   }
   return p;
 }
diff --git a/folly/memory/ThreadCachedArena.h b/folly/memory/ThreadCachedArena.h
index 67e5a6ab8b4..0f4a432a446 100644
--- a/folly/memory/ThreadCachedArena.h
+++ b/folly/memory/ThreadCachedArena.h
@@ -51,7 +51,7 @@ class ThreadCachedArena {
     return arena->allocate(size);
   }
 
-  void deallocate(void* /* p */) {
+  void deallocate(void* /* p */, size_t = 0) {
     // Deallocate? Never!
   }
 
@@ -82,6 +82,9 @@ class ThreadCachedArena {
 };
 
 template <>
-struct IsArenaAllocator<ThreadCachedArena> : std::true_type { };
+struct AllocatorHasTrivialDeallocate<ThreadCachedArena> : std::true_type {};
+
+template <typename T>
+using ThreadCachedArenaAllocator = CxxAllocatorAdaptor<T, ThreadCachedArena>;
 
 } // namespace folly
diff --git a/folly/memory/test/ArenaTest.cpp b/folly/memory/test/ArenaTest.cpp
index 46f4dbb142c..d7c06f33fb4 100644
--- a/folly/memory/test/ArenaTest.cpp
+++ b/folly/memory/test/ArenaTest.cpp
@@ -25,7 +25,7 @@
 
 using namespace folly;
 
-static_assert(IsArenaAllocator<SysArena>::value, "");
+static_assert(AllocatorHasTrivialDeallocate<SysArena>::value, "");
 
 TEST(Arena, SizeSanity) {
   std::set<size_t*> allocatedItems;
@@ -79,7 +79,7 @@ TEST(Arena, SizeSanity) {
 
   // Nuke 'em all
   for (const auto& item : allocatedItems) {
-    arena.deallocate(item);
+    arena.deallocate(item, 0 /* unused */);
   }
   //The total size should be the same
   EXPECT_TRUE(arena.totalSize() >= minimum_size);
@@ -134,8 +134,8 @@ TEST(Arena, Vector) {
 
   EXPECT_EQ(arena.totalSize(), sizeof(SysArena));
 
-  std::vector<size_t, StlAllocator<SysArena, size_t>>
-    vec { {}, StlAllocator<SysArena, size_t>(&arena) };
+  std::vector<size_t, SysArenaAllocator<size_t>> vec{
+      {}, SysArenaAllocator<size_t>(arena)};
 
   for (size_t i = 0; i < 1000; i++) {
     vec.push_back(i);
@@ -157,17 +157,6 @@ TEST(Arena, SizeLimit) {
   EXPECT_THROW(arena.allocate(maxSize + 1), std::bad_alloc);
 }
 
-TEST(Arena, MoveArena) {
-  SysArena arena(sizeof(size_t) * 2);
-  arena.allocate(sizeof(size_t));
-  auto totalSize = arena.totalSize();
-  auto bytesUsed = arena.bytesUsed();
-
-  SysArena moved(std::move(arena));
-  EXPECT_EQ(totalSize, moved.totalSize());
-  EXPECT_EQ(bytesUsed, moved.bytesUsed());
-}
-
 int main(int argc, char *argv[]) {
   testing::InitGoogleTest(&argc, argv);
   gflags::ParseCommandLineFlags(&argc, &argv, true);
diff --git a/folly/memory/test/ThreadCachedArenaTest.cpp b/folly/memory/test/ThreadCachedArenaTest.cpp
index b8ed76c50c6..31a85e51cf3 100644
--- a/folly/memory/test/ThreadCachedArenaTest.cpp
+++ b/folly/memory/test/ThreadCachedArenaTest.cpp
@@ -154,16 +154,21 @@ TEST(ThreadCachedArena, MultiThreaded) {
   mainTester.verify();
 }
 
-TEST(ThreadCachedArena, StlAllocator) {
-  typedef std::unordered_map<
-    int, int, std::hash<int>, std::equal_to<int>,
-    StlAllocator<ThreadCachedArena, std::pair<const int, int>>> Map;
+TEST(ThreadCachedArena, ThreadCachedArenaAllocator) {
+  using Map = std::unordered_map<
+      int,
+      int,
+      std::hash<int>,
+      std::equal_to<int>,
+      ThreadCachedArenaAllocator<std::pair<const int, int>>>;
 
   static const size_t requestedBlockSize = 64;
   ThreadCachedArena arena(requestedBlockSize);
 
-  Map map {0, std::hash<int>(), std::equal_to<int>(),
-           StlAllocator<ThreadCachedArena, std::pair<const int, int>>(&arena)};
+  Map map{0,
+          std::hash<int>(),
+          std::equal_to<int>(),
+          ThreadCachedArenaAllocator<std::pair<const int, int>>(arena)};
 
   for (int i = 0; i < 1000; i++) {
     map[i] = i;
@@ -179,7 +184,7 @@ namespace {
 static const int kNumValues = 10000;
 
 BENCHMARK(bmUMStandard, iters) {
-  typedef std::unordered_map<int, int> Map;
+  using Map = std::unordered_map<int, int>;
 
   while (iters--) {
     Map map {0};
@@ -190,16 +195,20 @@ BENCHMARK(bmUMStandard, iters) {
 }
 
 BENCHMARK(bmUMArena, iters) {
-  typedef std::unordered_map<
-    int, int, std::hash<int>, std::equal_to<int>,
-    StlAllocator<ThreadCachedArena, std::pair<const int, int>>> Map;
+  using Map = std::unordered_map<
+      int,
+      int,
+      std::hash<int>,
+      std::equal_to<int>,
+      ThreadCachedArenaAllocator<std::pair<const int, int>>>;
 
   while (iters--) {
     ThreadCachedArena arena;
 
-    Map map {0, std::hash<int>(), std::equal_to<int>(),
-             StlAllocator<ThreadCachedArena, std::pair<const int, int>>(
-                 &arena)};
+    Map map{0,
+            std::hash<int>(),
+            std::equal_to<int>(),
+            ThreadCachedArenaAllocator<std::pair<const int, int>>(arena)};
 
     for (int i = 0; i < kNumValues; i++) {
       map[i] = i;
@@ -210,7 +219,7 @@ BENCHMARK(bmUMArena, iters) {
 BENCHMARK_DRAW_LINE()
 
 BENCHMARK(bmMStandard, iters) {
-  typedef std::map<int, int> Map;
+  using Map = std::map<int, int>;
 
   while (iters--) {
     Map map;
@@ -223,16 +232,17 @@ BENCHMARK(bmMStandard, iters) {
 BENCHMARK_DRAW_LINE()
 
 BENCHMARK(bmMArena, iters) {
-  typedef std::map<
-    int, int, std::less<int>,
-    StlAllocator<ThreadCachedArena, std::pair<const int, int>>> Map;
+  using Map = std::map<
+      int,
+      int,
+      std::less<int>,
+      ThreadCachedArenaAllocator<std::pair<const int, int>>>;
 
   while (iters--) {
     ThreadCachedArena arena;
 
-    Map map {std::less<int>(),
-             StlAllocator<ThreadCachedArena, std::pair<const int, int>>(
-                 &arena)};
+    Map map{std::less<int>(),
+            ThreadCachedArenaAllocator<std::pair<const int, int>>(arena)};
 
     for (int i = 0; i < kNumValues; i++) {
       map[i] = i;
diff --git a/folly/portability/BitsFunctexcept.cpp b/folly/portability/BitsFunctexcept.cpp
deleted file mode 100644
index 787f8da8def..00000000000
--- a/folly/portability/BitsFunctexcept.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright 2013-present Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <folly/portability/BitsFunctexcept.h>
-
-#include <stdexcept>
-
-#if FOLLY_HAVE_BITS_FUNCTEXCEPT_H
-
-// for symmetry with the header; this section intentionally left blank
-
-#else
-
-namespace std {
-
-#if _LIBCPP_VERSION < 4000
-void __throw_length_error(char const* msg) {
-  throw std::length_error(msg);
-}
-
-void __throw_logic_error(char const* msg) {
-  throw std::logic_error(msg);
-}
-
-void __throw_out_of_range(char const* msg) {
-  throw std::out_of_range(msg);
-}
-#endif
-
-#if _CPPLIB_VER // msvc c++ std lib
-void __throw_bad_alloc() {
-  throw std::bad_alloc();
-}
-#endif
-}
-
-#endif
diff --git a/folly/portability/Builtins.h b/folly/portability/Builtins.h
index 28448e1c4f6..e77e39117b1 100644
--- a/folly/portability/Builtins.h
+++ b/folly/portability/Builtins.h
@@ -55,6 +55,15 @@ FOLLY_ALWAYS_INLINE int __builtin_clzll(unsigned long long x) {
   return int(_BitScanReverse64(&index, x) ? 63 - index : 64);
 }
 
+FOLLY_ALWAYS_INLINE int __builtin_ctz(unsigned int x) {
+  unsigned long index;
+  return int(_BitScanForward(&index, (unsigned long)x) ? index : 32);
+}
+
+FOLLY_ALWAYS_INLINE int __builtin_ctzl(unsigned long x) {
+  return __builtin_ctz((unsigned int)x);
+}
+
 FOLLY_ALWAYS_INLINE int __builtin_ctzll(unsigned long long x) {
   unsigned long index;
   return int(_BitScanForward64(&index, x) ? index : 64);
diff --git a/folly/portability/GFlags.h b/folly/portability/GFlags.h
index 23a0fb5b2f6..694e32cac99 100644
--- a/folly/portability/GFlags.h
+++ b/folly/portability/GFlags.h
@@ -37,6 +37,7 @@
 #define DECLARE_double(_name) FOLLY_DECLARE_FLAG(double, D, _name)
 #define DECLARE_int32(_name) FOLLY_DECLARE_FLAG(int, I, _name)
 #define DECLARE_int64(_name) FOLLY_DECLARE_FLAG(long long, I64, _name)
+#define DECLARE_uint32(_name) FOLLY_DECLARE_FLAG(unsigned long, U32, _name)
 #define DECLARE_uint64(_name) FOLLY_DECLARE_FLAG(unsigned long long, U64, _name)
 #define DECLARE_string(_name) FOLLY_DECLARE_FLAG(std::string, S, _name)
 
@@ -54,6 +55,8 @@
   FOLLY_DEFINE_FLAG(int, I, _name, _default)
 #define DEFINE_int64(_name, _default, _description) \
   FOLLY_DEFINE_FLAG(long long, I64, _name, _default)
+#define DEFINE_uint32(_name, _default, _description) \
+  FOLLY_DEFINE_FLAG(unsigned long, U32, _name, _default)
 #define DEFINE_uint64(_name, _default, _description) \
   FOLLY_DEFINE_FLAG(unsigned long long, U64, _name, _default)
 #define DEFINE_string(_name, _default, _description) \
diff --git a/folly/portability/OpenSSL.cpp b/folly/portability/OpenSSL.cpp
index a1d47837842..4ee6640dbde 100644
--- a/folly/portability/OpenSSL.cpp
+++ b/folly/portability/OpenSSL.cpp
@@ -64,6 +64,10 @@ int X509_up_ref(X509* x) {
   return CRYPTO_add(&x->references, 1, CRYPTO_LOCK_X509);
 }
 
+int X509_STORE_up_ref(X509_STORE* v) {
+  return CRYPTO_add(&v->references, 1, CRYPTO_LOCK_X509_STORE);
+}
+
 int EVP_PKEY_up_ref(EVP_PKEY* evp) {
   return CRYPTO_add(&evp->references, 1, CRYPTO_LOCK_EVP_PKEY);
 }
diff --git a/folly/portability/OpenSSL.h b/folly/portability/OpenSSL.h
index 553a088a51f..387ac41a9f8 100644
--- a/folly/portability/OpenSSL.h
+++ b/folly/portability/OpenSSL.h
@@ -110,6 +110,7 @@ int X509_get_signature_nid(X509* cert);
 int SSL_CTX_up_ref(SSL_CTX* session);
 int SSL_SESSION_up_ref(SSL_SESSION* session);
 int X509_up_ref(X509* x);
+int X509_STORE_up_ref(X509_STORE* v);
 int EVP_PKEY_up_ref(EVP_PKEY* evp);
 void RSA_get0_key(
     const RSA* r,
diff --git a/folly/small_vector.h b/folly/small_vector.h
index f8333a26785..42bed9e4570 100644
--- a/folly/small_vector.h
+++ b/folly/small_vector.h
@@ -50,11 +50,20 @@
 #include <folly/Portability.h>
 #include <folly/Traits.h>
 #include <folly/lang/Assume.h>
+#include <folly/lang/Exception.h>
 #include <folly/memory/Malloc.h>
-#include <folly/portability/BitsFunctexcept.h>
 #include <folly/portability/Malloc.h>
 #include <folly/portability/TypeTraits.h>
-#include <folly/synchronization/SmallLocks.h>
+
+#if (FOLLY_X64 || FOLLY_PPC64)
+#define FOLLY_SV_PACK_ATTR FOLLY_PACK_ATTR
+#define FOLLY_SV_PACK_PUSH FOLLY_PACK_PUSH
+#define FOLLY_SV_PACK_POP FOLLY_PACK_POP
+#else
+#define FOLLY_SV_PACK_ATTR
+#define FOLLY_SV_PACK_PUSH
+#define FOLLY_SV_PACK_POP
+#endif
 
 // Ignore shadowing warnings within this file, so includers can use -Wshadow.
 FOLLY_PUSH_WARNING
@@ -381,7 +390,7 @@ inline void* shiftPointer(void* p, size_t sizeBytes) {
 } // namespace detail
 
 //////////////////////////////////////////////////////////////////////
-FOLLY_PACK_PUSH
+FOLLY_SV_PACK_PUSH
 template <
     class Value,
     std::size_t RequestedMaxInline = 1,
@@ -872,14 +881,14 @@ class small_vector : public detail::small_vector_base<
 
   reference at(size_type i) {
     if (i >= size()) {
-      std::__throw_out_of_range("index out of range");
+      throw_exception<std::out_of_range>("index out of range");
     }
     return (*this)[i];
   }
 
   const_reference at(size_type i) const {
     if (i >= size()) {
-      std::__throw_out_of_range("index out of range");
+      throw_exception<std::out_of_range>("index out of range");
     }
     return (*this)[i];
   }
@@ -1094,7 +1103,7 @@ class small_vector : public detail::small_vector_base<
     void setCapacity(InternalSizeType c) {
       capacity_ = c;
     }
-  } FOLLY_PACK_ATTR;
+  } FOLLY_SV_PACK_ATTR;
 
   struct HeapPtr {
     // Lower order bit of heap_ is used as flag to indicate whether capacity is
@@ -1108,7 +1117,7 @@ class small_vector : public detail::small_vector_base<
     void setCapacity(InternalSizeType c) {
       *static_cast<InternalSizeType*>(detail::pointerFlagClear(heap_)) = c;
     }
-  } FOLLY_PACK_ATTR;
+  } FOLLY_SV_PACK_ATTR;
 
 #if (FOLLY_X64 || FOLLY_PPC64)
   typedef unsigned char InlineStorageDataType[sizeof(value_type) * MaxInline];
@@ -1180,9 +1189,9 @@ class small_vector : public detail::small_vector_base<
       auto vp = detail::pointerFlagClear(pdata_.heap_);
       free(vp);
     }
-  } FOLLY_PACK_ATTR u;
-} FOLLY_PACK_ATTR;
-FOLLY_PACK_POP
+  } FOLLY_SV_PACK_ATTR u;
+} FOLLY_SV_PACK_ATTR;
+FOLLY_SV_PACK_POP
 
 //////////////////////////////////////////////////////////////////////
 
@@ -1209,3 +1218,7 @@ struct IndexableTraits<small_vector<T, M, A, B, C>>
 } // namespace folly
 
 FOLLY_POP_WARNING
+
+#undef FOLLY_SV_PACK_ATTR
+#undef FOLLY_SV_PACK_PUSH
+#undef FOLLY_SV_PACK_POP
diff --git a/folly/sorted_vector_types.h b/folly/sorted_vector_types.h
index fb12f50c366..c5da16b0642 100644
--- a/folly/sorted_vector_types.h
+++ b/folly/sorted_vector_types.h
@@ -72,7 +72,7 @@
 
 #include <folly/Traits.h>
 #include <folly/Utility.h>
-#include <folly/portability/BitsFunctexcept.h>
+#include <folly/lang/Exception.h>
 
 namespace folly {
 
@@ -737,7 +737,7 @@ class sorted_vector_map
     if (it != end()) {
       return it->second;
     }
-    std::__throw_out_of_range("sorted_vector_map::at");
+    throw_exception<std::out_of_range>("sorted_vector_map::at");
   }
 
   const mapped_type& at(const key_type& key) const {
@@ -745,7 +745,7 @@ class sorted_vector_map
     if (it != end()) {
       return it->second;
     }
-    std::__throw_out_of_range("sorted_vector_map::at");
+    throw_exception<std::out_of_range>("sorted_vector_map::at");
   }
 
   size_type count(const key_type& key) const {
diff --git a/folly/ssl/OpenSSLVersionFinder.h b/folly/ssl/OpenSSLVersionFinder.h
index 98705fed8c3..d0110d72f36 100644
--- a/folly/ssl/OpenSSLVersionFinder.h
+++ b/folly/ssl/OpenSSLVersionFinder.h
@@ -21,22 +21,24 @@
 // This is used to find the OpenSSL version at runtime. Just returning
 // OPENSSL_VERSION_NUMBER is insufficient as runtime version may be different
 // from the compile-time version
-struct OpenSSLVersionFinder {
-  static std::string getOpenSSLLongVersion() {
+namespace folly {
+namespace ssl {
+inline std::string getOpenSSLLongVersion() {
 #ifdef OPENSSL_VERSION_TEXT
-    return SSLeay_version(SSLEAY_VERSION);
+  return SSLeay_version(SSLEAY_VERSION);
 #elif defined(OPENSSL_VERSION_NUMBER)
-    return folly::format("0x{:x}", OPENSSL_VERSION_NUMBER).str();
+  return folly::format("0x{:x}", OPENSSL_VERSION_NUMBER).str();
 #else
-    return "";
+  return "";
 #endif
-  }
+}
 
-  uint64_t getOpenSSLNumericVersion() {
+inline uint64_t getOpenSSLNumericVersion() {
 #ifdef OPENSSL_VERSION_NUMBER
-    return SSLeay();
+  return SSLeay();
 #else
-    return 0;
+  return 0;
 #endif
-  }
-};
+}
+} // namespace ssl
+} // namespace folly
diff --git a/folly/synchronization/AsymmetricMemoryBarrier.h b/folly/synchronization/AsymmetricMemoryBarrier.h
index 0289eb99ab0..6a0c66ad3e5 100644
--- a/folly/synchronization/AsymmetricMemoryBarrier.h
+++ b/folly/synchronization/AsymmetricMemoryBarrier.h
@@ -35,5 +35,5 @@ FOLLY_ALWAYS_INLINE void asymmetricLightBarrier() {
   }
 }
 
-void asymmetricHeavyBarrier(AMBFlags flags = AMBFlags::NORMAL);
+void asymmetricHeavyBarrier(AMBFlags flags = AMBFlags::EXPEDITED);
 } // namespace folly
diff --git a/folly/synchronization/Baton.h b/folly/synchronization/Baton.h
index 2c9a131c780..4628926cf84 100644
--- a/folly/synchronization/Baton.h
+++ b/folly/synchronization/Baton.h
@@ -263,13 +263,24 @@ class Baton {
   FOLLY_NOINLINE bool tryWaitSlow(
       const std::chrono::time_point<Clock, Duration>& deadline,
       const WaitOptions& opt) noexcept {
-    if (detail::spin_pause_until(deadline, opt, [=] { return ready(); })) {
-      assert(ready());
-      return true;
+    switch (detail::spin_pause_until(deadline, opt, [=] { return ready(); })) {
+      case detail::spin_result::success:
+        return true;
+      case detail::spin_result::timeout:
+        return false;
+      case detail::spin_result::advance:
+        break;
     }
 
     if (!MayBlock) {
-      return detail::spin_yield_until(deadline, [=] { return ready(); });
+      switch (detail::spin_yield_until(deadline, [=] { return ready(); })) {
+        case detail::spin_result::success:
+          return true;
+        case detail::spin_result::timeout:
+          return false;
+        case detail::spin_result::advance:
+          break;
+      }
     }
 
     // guess we have to block :(
diff --git a/folly/synchronization/LifoSem.h b/folly/synchronization/LifoSem.h
index e83c0f16c27..0a0b7031446 100644
--- a/folly/synchronization/LifoSem.h
+++ b/folly/synchronization/LifoSem.h
@@ -26,14 +26,15 @@
 #include <folly/CachelinePadded.h>
 #include <folly/IndexedMemPool.h>
 #include <folly/Likely.h>
+#include <folly/lang/SafeAssert.h>
 #include <folly/synchronization/AtomicStruct.h>
-#include <folly/synchronization/Baton.h>
+#include <folly/synchronization/SaturatingSemaphore.h>
 
 namespace folly {
 
 template <
     template <typename> class Atom = std::atomic,
-    class BatonType = Baton<true, Atom>>
+    class BatonType = SaturatingSemaphore<true, Atom>>
 struct LifoSemImpl;
 
 /// LifoSem is a semaphore that wakes its waiters in a manner intended to
@@ -71,6 +72,11 @@ struct LifoSemImpl;
 ///
 /// -- wait() -- waits until tryWait() can succeed.  Compare to sem_wait().
 ///
+/// -- timed wait variants - will wait until timeout.  Note when these
+///    timeout, the current implementation takes a lock, blocking
+///    concurrent pushes and pops.  (If timed wait calls are
+///    substantial, consider re-working this code to be lock-free).
+///
 /// LifoSem also has the notion of a shutdown state, in which any calls
 /// that would block (or are already blocked) throw ShutdownSemError.
 /// Note the difference between a call to wait() and a call to wait()
@@ -81,11 +87,10 @@ struct LifoSemImpl;
 /// you can just check isShutdown() yourself (preferrably wrapped in
 /// an UNLIKELY).  This fast-stop behavior is easy to add, but difficult
 /// to remove if you want the draining behavior, which is why we have
-/// chosen the former.  Since wait() is the only method that can block,
-/// it is the only one that is affected by the shutdown state.
+/// chosen the former.
 ///
-/// All LifoSem operations operations except valueGuess() are guaranteed
-/// to be linearizable.
+/// All LifoSem operations except valueGuess() are guaranteed to be
+/// linearizable.
 typedef LifoSemImpl<> LifoSem;
 
 
@@ -212,11 +217,13 @@ class LifoSemHead {
   enum {
     IsNodeIdxShift = 32,
     IsShutdownShift = 33,
-    SeqShift = 34,
+    IsLockedShift = 34,
+    SeqShift = 35,
   };
   enum : uint64_t {
     IsNodeIdxMask = uint64_t(1) << IsNodeIdxShift,
     IsShutdownMask = uint64_t(1) << IsShutdownShift,
+    IsLockedMask = uint64_t(1) << IsLockedShift,
     SeqIncr = uint64_t(1) << SeqShift,
     SeqMask = ~(SeqIncr - 1),
   };
@@ -242,6 +249,9 @@ class LifoSemHead {
   inline constexpr bool isShutdown() const {
     return (bits & IsShutdownMask) != 0;
   }
+  inline constexpr bool isLocked() const {
+    return (bits & IsLockedMask) != 0;
+  }
   inline constexpr uint32_t seq() const {
     return uint32_t(bits >> SeqShift);
   }
@@ -257,6 +267,7 @@ class LifoSemHead {
   /// Returns the LifoSemHead that results from popping a waiter node,
   /// given the current waiter node's next ptr
   inline LifoSemHead withPop(uint32_t idxNext) const {
+    assert(!isLocked());
     assert(isNodeIdx());
     if (idxNext == 0) {
       // no isNodeIdx bit or data bits.  Wraparound of seq bits is okay
@@ -272,6 +283,7 @@ class LifoSemHead {
 
   /// Returns the LifoSemHead that results from pushing a new waiter node
   inline LifoSemHead withPush(uint32_t _idx) const {
+    assert(!isLocked());
     assert(isNodeIdx() || value() == 0);
     assert(!isShutdown());
     assert(_idx != 0);
@@ -281,6 +293,7 @@ class LifoSemHead {
   /// Returns the LifoSemHead with value increased by delta, with
   /// saturation if the maximum value is reached
   inline LifoSemHead withValueIncr(uint32_t delta) const {
+    assert(!isLocked());
     assert(!isNodeIdx());
     auto rv = LifoSemHead{ bits + SeqIncr + delta };
     if (UNLIKELY(rv.isNodeIdx())) {
@@ -292,6 +305,7 @@ class LifoSemHead {
 
   /// Returns the LifoSemHead that results from decrementing the value
   inline LifoSemHead withValueDecr(uint32_t delta) const {
+    assert(!isLocked());
     assert(delta > 0 && delta <= value());
     return LifoSemHead{ bits + SeqIncr - delta };
   }
@@ -302,6 +316,20 @@ class LifoSemHead {
     return LifoSemHead{ bits | IsShutdownMask };
   }
 
+  // Returns LifoSemHead with lock bit set, but rest of bits unchanged.
+  inline LifoSemHead withLock() const {
+    assert(!isLocked());
+    return LifoSemHead{bits | IsLockedMask};
+  }
+
+  // Returns LifoSemHead with lock bit unset, and updated seqno based
+  // on idx.
+  inline LifoSemHead withoutLock(uint32_t idxNext) const {
+    assert(isLocked());
+    // We need to treat this as a pop, as we may change the list head.
+    return LifoSemHead{bits & ~IsLockedMask}.withPop(idxNext);
+  }
+
   inline constexpr bool operator== (const LifoSemHead& rhs) const {
     return bits == rhs.bits;
   }
@@ -377,6 +405,11 @@ struct LifoSemBase {
 
     // now wake up any waiters
     while (h.isNodeIdx()) {
+      if (h.isLocked()) {
+        std::this_thread::yield();
+        h = head_->load(std::memory_order_acquire);
+        continue;
+      }
       auto& node = idxToNode(h.idx());
       auto repl = h.withPop(node.next);
       if (head_->compare_exchange_strong(h, repl)) {
@@ -424,10 +457,23 @@ struct LifoSemBase {
   /// Note that wait() doesn't throw during shutdown if tryWait() would
   /// return true
   void wait() {
+    auto const deadline = std::chrono::steady_clock::time_point::max();
+    auto res = try_wait_until(deadline);
+    FOLLY_SAFE_DCHECK(res, "infinity time has passed");
+  }
+
+  template <typename Rep, typename Period>
+  bool try_wait_for(const std::chrono::duration<Rep, Period>& timeout) {
+    return try_wait_until(timeout + std::chrono::steady_clock::now());
+  }
+
+  template <typename Clock, typename Duration>
+  bool try_wait_until(
+      const std::chrono::time_point<Clock, Duration>& deadline) {
     // early check isn't required for correctness, but is an important
     // perf win if we can avoid allocating and deallocating a node
     if (tryWait()) {
-      return;
+      return true;
     }
 
     // allocateNode() won't compile unless Handoff has a default
@@ -441,10 +487,23 @@ struct LifoSemBase {
     }
 
     if (rv == WaitResult::PUSH) {
-      node->handoff().wait();
+      if (!node->handoff().try_wait_until(deadline)) {
+        if (tryRemoveNode(*node)) {
+          return false;
+        } else {
+          // We could not remove our node. Return to waiting.
+          //
+          // This only happens if we lose a removal race with post(),
+          // so we are not likely to wait long.  This is only
+          // necessary to ensure we don't return node's memory back to
+          // IndexedMemPool before post() has had a chance to post to
+          // handoff().  In a stronger memory reclamation scheme, such
+          // as hazptr or rcu, this would not be necessary.
+          node->handoff().wait();
+        }
+      }
       if (UNLIKELY(node->isShutdownNotice())) {
         // this wait() didn't consume a value, it was triggered by shutdown
-        assert(isShutdown());
         throw ShutdownSemError(
             "blocking wait() interrupted by semaphore shutdown");
       }
@@ -454,6 +513,7 @@ struct LifoSemBase {
       // recycle the node now
     }
     // else node wasn't pushed, so it is safe to recycle
+    return true;
   }
 
   /// Returns a guess at the current value, designed for debugging.
@@ -522,6 +582,57 @@ struct LifoSemBase {
     return LifoSemRawNode<Atom>::pool().locateElem(&node);
   }
 
+  // Locks the list head (blocking concurrent pushes and pops)
+  // and attempts to remove this node.  Returns true if node was
+  // found and removed, false if not found.
+  bool tryRemoveNode(const LifoSemNode<Handoff, Atom>& removenode) {
+    auto removeidx = nodeToIdx(removenode);
+    auto head = head_->load(std::memory_order_acquire);
+    // Try to lock the head.
+    while (true) {
+      if (head.isLocked()) {
+        std::this_thread::yield();
+        head = head_->load(std::memory_order_acquire);
+        continue;
+      }
+      if (!head.isNodeIdx()) {
+        return false;
+      }
+      if (head_->compare_exchange_weak(
+              head,
+              head.withLock(),
+              std::memory_order_acquire,
+              std::memory_order_relaxed)) {
+        break;
+      }
+    }
+    // Update local var to what head_ is, for better assert() checking.
+    head = head.withLock();
+    bool result = false;
+    auto idx = head.idx();
+    if (idx == removeidx) {
+      // pop from head.  Head seqno is updated.
+      head_->store(
+          head.withoutLock(removenode.next), std::memory_order_release);
+      return true;
+    }
+    auto node = &idxToNode(idx);
+    idx = node->next;
+    while (idx) {
+      if (idx == removeidx) {
+        // Pop from mid-list.
+        node->next = removenode.next;
+        result = true;
+        break;
+      }
+      node = &idxToNode(idx);
+      idx = node->next;
+    }
+    // Unlock and return result
+    head_->store(head.withoutLock(head.idx()), std::memory_order_release);
+    return result;
+  }
+
   /// Either increments by n and returns 0, or pops a node and returns it.
   /// If n + the stripe's value overflows, then the stripe's value
   /// saturates silently at 2^32-1
@@ -530,6 +641,10 @@ struct LifoSemBase {
       assert(n > 0);
 
       auto head = head_->load(std::memory_order_acquire);
+      if (head.isLocked()) {
+        std::this_thread::yield();
+        continue;
+      }
       if (head.isNodeIdx()) {
         auto& node = idxToNode(head.idx());
         if (head_->compare_exchange_strong(head, head.withPop(node.next))) {
@@ -560,6 +675,11 @@ struct LifoSemBase {
     while (true) {
       auto head = head_->load(std::memory_order_acquire);
 
+      if (head.isLocked()) {
+        std::this_thread::yield();
+        continue;
+      }
+
       if (!head.isNodeIdx() && head.value() > 0) {
         // decr
         auto delta = std::min(n, head.value());
diff --git a/folly/synchronization/MicroSpinLock.h b/folly/synchronization/MicroSpinLock.h
new file mode 100644
index 00000000000..17d181f79dc
--- /dev/null
+++ b/folly/synchronization/MicroSpinLock.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright 2015-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * N.B. You most likely do _not_ want to use MicroSpinLock or any
+ * other kind of spinlock.  Consider MicroLock instead.
+ *
+ * In short, spinlocks in preemptive multi-tasking operating systems
+ * have serious problems and fast mutexes like std::mutex are almost
+ * certainly the better choice, because letting the OS scheduler put a
+ * thread to sleep is better for system responsiveness and throughput
+ * than wasting a timeslice repeatedly querying a lock held by a
+ * thread that's blocked, and you can't prevent userspace
+ * programs blocking.
+ *
+ * Spinlocks in an operating system kernel make much more sense than
+ * they do in userspace.
+ */
+
+#pragma once
+
+/*
+ * @author Keith Adams <kma@fb.com>
+ * @author Jordan DeLong <delong.j@fb.com>
+ */
+
+#include <array>
+#include <atomic>
+#include <cassert>
+#include <cstdint>
+#include <mutex>
+#include <type_traits>
+
+#include <folly/Portability.h>
+#include <folly/lang/Align.h>
+#include <folly/synchronization/detail/Sleeper.h>
+
+namespace folly {
+
+/*
+ * A really, *really* small spinlock for fine-grained locking of lots
+ * of teeny-tiny data.
+ *
+ * Zero initializing these is guaranteed to be as good as calling
+ * init(), since the free state is guaranteed to be all-bits zero.
+ *
+ * This class should be kept a POD, so we can used it in other packed
+ * structs (gcc does not allow __attribute__((__packed__)) on structs that
+ * contain non-POD data).  This means avoid adding a constructor, or
+ * making some members private, etc.
+ */
+struct MicroSpinLock {
+  enum { FREE = 0, LOCKED = 1 };
+  // lock_ can't be std::atomic<> to preserve POD-ness.
+  uint8_t lock_;
+
+  // Initialize this MSL.  It is unnecessary to call this if you
+  // zero-initialize the MicroSpinLock.
+  void init() {
+    payload()->store(FREE);
+  }
+
+  bool try_lock() {
+    return cas(FREE, LOCKED);
+  }
+
+  void lock() {
+    detail::Sleeper sleeper;
+    do {
+      while (payload()->load() != FREE) {
+        sleeper.wait();
+      }
+    } while (!try_lock());
+    assert(payload()->load() == LOCKED);
+  }
+
+  void unlock() {
+    assert(payload()->load() == LOCKED);
+    payload()->store(FREE, std::memory_order_release);
+  }
+
+ private:
+  std::atomic<uint8_t>* payload() {
+    return reinterpret_cast<std::atomic<uint8_t>*>(&this->lock_);
+  }
+
+  bool cas(uint8_t compare, uint8_t newVal) {
+    return std::atomic_compare_exchange_strong_explicit(payload(), &compare, newVal,
+                                                        std::memory_order_acquire,
+                                                        std::memory_order_relaxed);
+  }
+};
+static_assert(
+    std::is_pod<MicroSpinLock>::value,
+    "MicroSpinLock must be kept a POD type.");
+
+//////////////////////////////////////////////////////////////////////
+
+/**
+ * Array of spinlocks where each one is padded to prevent false sharing.
+ * Useful for shard-based locking implementations in environments where
+ * contention is unlikely.
+ */
+
+// TODO: generate it from configure (`getconf LEVEL1_DCACHE_LINESIZE`)
+#define FOLLY_CACHE_LINE_SIZE 64
+
+template <class T, size_t N>
+struct alignas(max_align_v) SpinLockArray {
+  T& operator[](size_t i) {
+    return data_[i].lock;
+  }
+
+  const T& operator[](size_t i) const {
+    return data_[i].lock;
+  }
+
+  constexpr size_t size() const { return N; }
+
+ private:
+  struct PaddedSpinLock {
+    PaddedSpinLock() : lock() {}
+    T lock;
+    char padding[FOLLY_CACHE_LINE_SIZE - sizeof(T)];
+  };
+  static_assert(sizeof(PaddedSpinLock) == FOLLY_CACHE_LINE_SIZE,
+                "Invalid size of PaddedSpinLock");
+
+  // Check if T can theoretically cross a cache line.
+  static_assert(
+      max_align_v > 0 && FOLLY_CACHE_LINE_SIZE % max_align_v == 0 &&
+          sizeof(T) <= max_align_v,
+      "T can cross cache line boundaries");
+
+  char padding_[FOLLY_CACHE_LINE_SIZE];
+  std::array<PaddedSpinLock, N> data_;
+};
+
+//////////////////////////////////////////////////////////////////////
+
+typedef std::lock_guard<MicroSpinLock> MSLGuard;
+
+//////////////////////////////////////////////////////////////////////
+
+} // namespace folly
diff --git a/folly/synchronization/SaturatingSemaphore.h b/folly/synchronization/SaturatingSemaphore.h
index 96b167bda60..e983ed31b0d 100644
--- a/folly/synchronization/SaturatingSemaphore.h
+++ b/folly/synchronization/SaturatingSemaphore.h
@@ -280,12 +280,24 @@ template <typename Clock, typename Duration>
 FOLLY_NOINLINE bool SaturatingSemaphore<MayBlock, Atom>::tryWaitSlow(
     const std::chrono::time_point<Clock, Duration>& deadline,
     const WaitOptions& opt) noexcept {
-  if (detail::spin_pause_until(deadline, opt, [=] { return ready(); })) {
-    return true;
+  switch (detail::spin_pause_until(deadline, opt, [=] { return ready(); })) {
+    case detail::spin_result::success:
+      return true;
+    case detail::spin_result::timeout:
+      return false;
+    case detail::spin_result::advance:
+      break;
   }
 
   if (!MayBlock) {
-    return detail::spin_yield_until(deadline, [=] { return ready(); });
+    switch (detail::spin_yield_until(deadline, [=] { return ready(); })) {
+      case detail::spin_result::success:
+        return true;
+      case detail::spin_result::timeout:
+        return false;
+      case detail::spin_result::advance:
+        break;
+    }
   }
 
   auto before = state_.load(std::memory_order_relaxed);
diff --git a/folly/synchronization/SmallLocks.h b/folly/synchronization/SmallLocks.h
index e2bf7adfd0e..93e1141b3b9 100644
--- a/folly/synchronization/SmallLocks.h
+++ b/folly/synchronization/SmallLocks.h
@@ -34,7 +34,7 @@
  */
 
 #include <folly/MicroLock.h>
-#include <folly/MicroSpinLock.h>
+#include <folly/synchronization/MicroSpinLock.h>
 
 #include <folly/Portability.h>
 #if FOLLY_X64 || FOLLY_AARCH64 || FOLLY_PPC64
diff --git a/folly/synchronization/detail/Spin.h b/folly/synchronization/detail/Spin.h
index d372fbe173f..0d0a1bb7fe2 100644
--- a/folly/synchronization/detail/Spin.h
+++ b/folly/synchronization/detail/Spin.h
@@ -26,30 +26,36 @@
 namespace folly {
 namespace detail {
 
+enum class spin_result {
+  success, // condition passed
+  timeout, // exceeded deadline
+  advance, // exceeded current wait-options component timeout
+};
+
 template <typename Clock, typename Duration, typename F>
-bool spin_pause_until(
+spin_result spin_pause_until(
     std::chrono::time_point<Clock, Duration> const& deadline,
     WaitOptions const& opt,
     F f) {
   if (opt.spin_max() <= opt.spin_max().zero()) {
-    return false;
+    return spin_result::advance;
   }
 
   auto tbegin = Clock::now();
   while (true) {
     if (f()) {
-      return true;
+      return spin_result::success;
     }
 
     auto const tnow = Clock::now();
     if (tnow >= deadline) {
-      return false;
+      return spin_result::timeout;
     }
 
     //  Backward time discontinuity in Clock? revise pre_block starting point
     tbegin = std::min(tbegin, tnow);
     if (tnow >= tbegin + opt.spin_max()) {
-      return false;
+      return spin_result::advance;
     }
 
     //  The pause instruction is the polite way to spin, but it doesn't
@@ -61,17 +67,17 @@ bool spin_pause_until(
 }
 
 template <typename Clock, typename Duration, typename F>
-bool spin_yield_until(
+spin_result spin_yield_until(
     std::chrono::time_point<Clock, Duration> const& deadline,
     F f) {
   while (true) {
     if (f()) {
-      return true;
+      return spin_result::success;
     }
 
     auto const max = std::chrono::time_point<Clock, Duration>::max();
     if (deadline != max && Clock::now() >= deadline) {
-      return false;
+      return spin_result::timeout;
     }
 
     std::this_thread::yield();
diff --git a/folly/synchronization/test/LifoSemTests.cpp b/folly/synchronization/test/LifoSemTests.cpp
index cce205aab09..fe089a2d4fb 100644
--- a/folly/synchronization/test/LifoSemTests.cpp
+++ b/folly/synchronization/test/LifoSemTests.cpp
@@ -286,6 +286,52 @@ TEST(LifoSem, multi_try_wait) {
   ASSERT_EQ(NPOSTS, consumed);
 }
 
+TEST(LifoSem, timeout) {
+  long seed = folly::randomNumberSeed() % 10000;
+  LOG(INFO) << "seed=" << seed;
+  DSched sched(DSched::uniform(seed));
+  DeterministicAtomic<uint32_t> handoffs{0};
+
+  for (int pass = 0; pass < 10; ++pass) {
+    DLifoSem a;
+    std::vector<std::thread> threads;
+    while (threads.size() < 20) {
+      threads.push_back(DSched::thread([&] {
+        for (int i = 0; i < 10; i++) {
+          try {
+            if (a.try_wait_for(std::chrono::milliseconds(1))) {
+              handoffs--;
+            }
+          } catch (ShutdownSemError&) {
+            // expected
+            EXPECT_TRUE(a.isShutdown());
+          }
+        }
+      }));
+    }
+    std::vector<std::thread> threads2;
+    while (threads2.size() < 20) {
+      threads2.push_back(DSched::thread([&] {
+        for (int i = 0; i < 10; i++) {
+          a.post();
+          handoffs++;
+        }
+      }));
+    }
+    if (pass > 5) {
+      a.shutdown();
+    }
+    for (auto& thr : threads) {
+      DSched::join(thr);
+    }
+    for (auto& thr : threads2) {
+      DSched::join(thr);
+    }
+    // At least one timeout must occur.
+    EXPECT_GT(handoffs.load(), 0);
+  }
+}
+
 BENCHMARK(lifo_sem_pingpong, iters) {
   LifoSem a;
   LifoSem b;
diff --git a/folly/test/ArenaSmartPtrTest.cpp b/folly/test/ArenaSmartPtrTest.cpp
index aade81131e4..d7d688f0f68 100644
--- a/folly/test/ArenaSmartPtrTest.cpp
+++ b/folly/test/ArenaSmartPtrTest.cpp
@@ -24,11 +24,6 @@
 
 using namespace folly;
 
-static_assert(
-  is_simple_allocator<SysArena, int>::value,
-  "SysArena should be a simple allocator"
-);
-
 struct global_counter {
   global_counter(): count_(0) {}
 
@@ -61,7 +56,7 @@ struct Foo {
 
 template <typename Allocator>
 void unique_ptr_test(Allocator& allocator) {
-  typedef typename AllocatorUniquePtr<Foo, Allocator>::type ptr_type;
+  using ptr_type = std::unique_ptr<Foo, allocator_delete<Allocator>>;
 
   global_counter counter;
   EXPECT_EQ(counter.count(), 0);
@@ -95,18 +90,13 @@ void unique_ptr_test(Allocator& allocator) {
   }
   EXPECT_EQ(counter.count(), 1);
 
-  StlAllocator<Allocator, Foo>().destroy(foo);
+  std::allocator_traits<Allocator>::destroy(allocator, foo);
   EXPECT_EQ(counter.count(), 0);
 }
 
 TEST(ArenaSmartPtr, unique_ptr_SysArena) {
   SysArena arena;
-  unique_ptr_test(arena);
-}
-
-TEST(ArenaSmartPtr, unique_ptr_StlAlloc_SysArena) {
-  SysArena arena;
-  StlAllocator<SysArena, Foo> alloc(&arena);
+  SysArenaAllocator<Foo> alloc(arena);
   unique_ptr_test(alloc);
 }
 
@@ -122,7 +112,7 @@ void shared_ptr_test(Allocator& allocator) {
   EXPECT_EQ(foo.use_count(), 0);
 
   {
-    auto p = folly::allocate_shared<Foo>(allocator, counter);
+    auto p = std::allocate_shared<Foo>(allocator, counter);
     EXPECT_EQ(counter.count(), 1);
     EXPECT_EQ(p.use_count(), 1);
 
@@ -130,7 +120,7 @@ void shared_ptr_test(Allocator& allocator) {
     EXPECT_EQ(counter.count(), 0);
     EXPECT_EQ(p.use_count(), 0);
 
-    p = folly::allocate_shared<Foo>(allocator, counter);
+    p = std::allocate_shared<Foo>(allocator, counter);
     EXPECT_EQ(counter.count(), 1);
     EXPECT_EQ(p.use_count(), 1);
 
@@ -167,12 +157,7 @@ void shared_ptr_test(Allocator& allocator) {
 
 TEST(ArenaSmartPtr, shared_ptr_SysArena) {
   SysArena arena;
-  shared_ptr_test(arena);
-}
-
-TEST(ArenaSmartPtr, shared_ptr_StlAlloc_SysArena) {
-  SysArena arena;
-  StlAllocator<SysArena, Foo> alloc(&arena);
+  SysArenaAllocator<Foo> alloc(arena);
   shared_ptr_test(alloc);
 }
 
diff --git a/folly/test/AtomicLinkedListTest.cpp b/folly/test/AtomicLinkedListTest.cpp
index 455f8794530..2e8ae3afcfa 100644
--- a/folly/test/AtomicLinkedListTest.cpp
+++ b/folly/test/AtomicLinkedListTest.cpp
@@ -51,8 +51,10 @@ TEST(AtomicIntrusiveLinkedList, Basic) {
     EXPECT_FALSE(list.empty());
 
     size_t id = 0;
-    list.sweep(
-        [&](TestIntrusiveObject* obj) mutable { EXPECT_EQ(++id, obj->id()); });
+    list.sweep([&](TestIntrusiveObject* obj) mutable {
+      ++id;
+      EXPECT_EQ(id, obj->id());
+    });
 
     EXPECT_TRUE(list.empty());
   }
@@ -65,8 +67,10 @@ TEST(AtomicIntrusiveLinkedList, Basic) {
     EXPECT_FALSE(list.empty());
 
     size_t id = 1;
-    list.sweep(
-        [&](TestIntrusiveObject* obj) mutable { EXPECT_EQ(++id, obj->id()); });
+    list.sweep([&](TestIntrusiveObject* obj) mutable {
+      ++id;
+      EXPECT_EQ(id, obj->id());
+    });
 
     EXPECT_TRUE(list.empty());
   }
@@ -82,7 +86,8 @@ TEST(AtomicIntrusiveLinkedList, ReverseSweep) {
   list.insertHead(&c);
   size_t next_expected_id = 3;
   list.reverseSweep([&](TestIntrusiveObject* obj) {
-    EXPECT_EQ(next_expected_id--, obj->id());
+    auto const expected = next_expected_id--;
+    EXPECT_EQ(expected, obj->id());
   });
   EXPECT_TRUE(list.empty());
   // Test that we can still insert
@@ -116,8 +121,10 @@ TEST(AtomicIntrusiveLinkedList, Move) {
   EXPECT_FALSE(list3.empty());
 
   size_t id = 0;
-  list3.sweep(
-      [&](TestIntrusiveObject* obj) mutable { EXPECT_EQ(++id, obj->id()); });
+  list3.sweep([&](TestIntrusiveObject* obj) mutable {
+    ++id;
+    EXPECT_EQ(id, obj->id());
+  });
 }
 
 TEST(AtomicIntrusiveLinkedList, Stress) {
diff --git a/folly/test/CachelinePaddedTest.cpp b/folly/test/CachelinePaddedTest.cpp
index 159e672933b..a3a7f184128 100644
--- a/folly/test/CachelinePaddedTest.cpp
+++ b/folly/test/CachelinePaddedTest.cpp
@@ -42,7 +42,8 @@ struct alignas(alignment) SizedData {
   void doModifications() {
     size_t i = 0;
     for (auto& datum : data) {
-      EXPECT_EQ(static_cast<unsigned char>(i++), datum);
+      EXPECT_EQ(static_cast<unsigned char>(i), datum);
+      ++i;
       ++datum;
     }
   }
@@ -50,7 +51,8 @@ struct alignas(alignment) SizedData {
   ~SizedData() {
     size_t i = 1;
     for (auto& datum : data) {
-      EXPECT_EQ(static_cast<unsigned char>(i++), datum);
+      EXPECT_EQ(static_cast<unsigned char>(i), datum);
+      ++i;
     }
   }
 
diff --git a/folly/test/ConcurrentSkipListTest.cpp b/folly/test/ConcurrentSkipListTest.cpp
index 2c393fbeaf8..4aea7159778 100644
--- a/folly/test/ConcurrentSkipListTest.cpp
+++ b/folly/test/ConcurrentSkipListTest.cpp
@@ -40,29 +40,34 @@ namespace {
 
 template <typename ParentAlloc>
 struct ParanoidArenaAlloc {
-  explicit ParanoidArenaAlloc(ParentAlloc* arena) : arena_(arena) {}
+  explicit ParanoidArenaAlloc(ParentAlloc& arena) : arena_(arena) {}
+  ParanoidArenaAlloc(ParanoidArenaAlloc const&) = delete;
+  ParanoidArenaAlloc(ParanoidArenaAlloc&&) = delete;
+  ParanoidArenaAlloc& operator=(ParanoidArenaAlloc const&) = delete;
+  ParanoidArenaAlloc& operator=(ParanoidArenaAlloc&&) = delete;
 
   void* allocate(size_t size) {
-    void* result = arena_->allocate(size);
+    void* result = arena_.get().allocate(size);
     allocated_.insert(result);
     return result;
   }
 
-  void deallocate(void* ptr) {
+  void deallocate(void* ptr, size_t n) {
     EXPECT_EQ(1, allocated_.erase(ptr));
-    arena_->deallocate(ptr);
+    arena_.get().deallocate(ptr, n);
   }
 
   bool isEmpty() const { return allocated_.empty(); }
 
-  ParentAlloc* arena_;
+  std::reference_wrapper<ParentAlloc> arena_;
   std::set<void*> allocated_;
 };
 } // namespace
 
 namespace folly {
-template <>
-struct IsArenaAllocator<ParanoidArenaAlloc<SysArena>> : std::true_type {};
+template <typename ParentAlloc>
+struct AllocatorHasTrivialDeallocate<ParanoidArenaAlloc<ParentAlloc>>
+    : AllocatorHasTrivialDeallocate<ParentAlloc> {};
 } // namespace folly
 
 namespace {
@@ -472,29 +477,37 @@ void TestNonTrivialDeallocation(SkipListPtrType& list) {
 }
 
 template <typename ParentAlloc>
-void NonTrivialDeallocationWithParanoid() {
-  using Alloc = ParanoidArenaAlloc<ParentAlloc>;
+void NonTrivialDeallocationWithParanoid(ParentAlloc& parentAlloc) {
+  using ParanoidAlloc = ParanoidArenaAlloc<ParentAlloc>;
+  using Alloc = CxxAllocatorAdaptor<void, ParanoidAlloc>;
   using ParanoidSkipListType =
       ConcurrentSkipList<NonTrivialValue, std::less<NonTrivialValue>, Alloc>;
-  ParentAlloc parentAlloc;
-  Alloc paranoidAlloc(&parentAlloc);
-  auto list = ParanoidSkipListType::createInstance(10, paranoidAlloc);
+  ParanoidAlloc paranoidAlloc(parentAlloc);
+  Alloc alloc(paranoidAlloc);
+  auto list = ParanoidSkipListType::createInstance(10, alloc);
   TestNonTrivialDeallocation(list);
   EXPECT_TRUE(paranoidAlloc.isEmpty());
 }
 
 TEST(ConcurrentSkipList, NonTrivialDeallocationWithParanoidSysAlloc) {
-  NonTrivialDeallocationWithParanoid<SysAlloc>();
+  SysAllocator<void> alloc;
+  NonTrivialDeallocationWithParanoid(alloc);
 }
 
 TEST(ConcurrentSkipList, NonTrivialDeallocationWithParanoidSysArena) {
-  NonTrivialDeallocationWithParanoid<SysArena>();
+  SysArena arena;
+  SysArenaAllocator<void> alloc(arena);
+  NonTrivialDeallocationWithParanoid(alloc);
 }
 
 TEST(ConcurrentSkipList, NonTrivialDeallocationWithSysArena) {
-  using SysArenaSkipListType =
-      ConcurrentSkipList<NonTrivialValue, std::less<NonTrivialValue>, SysArena>;
-  auto list = SysArenaSkipListType::createInstance(10);
+  using SysArenaSkipListType = ConcurrentSkipList<
+      NonTrivialValue,
+      std::less<NonTrivialValue>,
+      SysArenaAllocator<void>>;
+  SysArena arena;
+  SysArenaAllocator<void> alloc(arena);
+  auto list = SysArenaSkipListType::createInstance(10, alloc);
   TestNonTrivialDeallocation(list);
 }
 
diff --git a/folly/test/ConstexprMathBenchmark.cpp b/folly/test/ConstexprMathBenchmark.cpp
new file mode 100644
index 00000000000..cd88de51022
--- /dev/null
+++ b/folly/test/ConstexprMathBenchmark.cpp
@@ -0,0 +1,196 @@
+/*
+ * Copyright 2018-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <folly/Benchmark.h>
+#include <folly/ConstexprMath.h>
+#include <glog/logging.h>
+#include <limits>
+#include <type_traits>
+
+template <typename ValueT>
+constexpr ValueT UBSafeAdd(ValueT a, ValueT b) {
+  using UnsignedT = typename std::make_unsigned<ValueT>::type;
+  return static_cast<ValueT>(
+      static_cast<UnsignedT>(a) + static_cast<UnsignedT>(b));
+}
+
+template <typename ValueT>
+constexpr ValueT UBSafeSub(ValueT a, ValueT b) {
+  using UnsignedT = typename std::make_unsigned<ValueT>::type;
+  return static_cast<ValueT>(
+      static_cast<UnsignedT>(a) - static_cast<UnsignedT>(b));
+}
+
+template <typename ValueT, typename Op>
+void Run(size_t iterations, ValueT kMin, ValueT kMax, Op&& op) {
+  auto kMid = (kMin + kMax) / 2;
+
+  for (size_t round = 0; round < iterations; round++) {
+    for (ValueT a = kMin; a < kMin + 100; a++) {
+      for (ValueT b = kMin; b < kMin + 100; b++) {
+        auto a1 = a, b1 = b;
+        folly::makeUnpredictable(a1);
+        folly::makeUnpredictable(b1);
+        ValueT c = op(a1, b1);
+        folly::doNotOptimizeAway(c);
+      }
+    }
+    for (ValueT a = kMin; a < kMin + 100; a++) {
+      for (ValueT b = kMid - 50; b < kMid + 50; b++) {
+        auto a1 = a, b1 = b;
+        folly::makeUnpredictable(a1);
+        folly::makeUnpredictable(b1);
+        ValueT c = op(a1, b1);
+        folly::doNotOptimizeAway(c);
+      }
+    }
+    for (ValueT a = kMin; a < kMin + 100; a++) {
+      for (ValueT b = kMax - 100; b < kMax; b++) {
+        auto a1 = a, b1 = b;
+        folly::makeUnpredictable(a1);
+        folly::makeUnpredictable(b1);
+        ValueT c = op(a1, b1);
+        folly::doNotOptimizeAway(c);
+      }
+    }
+    for (ValueT a = kMid - 50; a < kMid + 50; a++) {
+      for (ValueT b = kMin; b < kMin + 100; b++) {
+        auto a1 = a, b1 = b;
+        folly::makeUnpredictable(a1);
+        folly::makeUnpredictable(b1);
+        ValueT c = op(a1, b1);
+        folly::doNotOptimizeAway(c);
+      }
+    }
+    for (ValueT a = kMid - 50; a < kMid + 50; a++) {
+      for (ValueT b = kMid - 50; b < kMid + 50; b++) {
+        auto a1 = a, b1 = b;
+        folly::makeUnpredictable(a1);
+        folly::makeUnpredictable(b1);
+        ValueT c = op(a1, b1);
+        folly::doNotOptimizeAway(c);
+      }
+    }
+    for (ValueT a = kMid - 50; a < kMid + 50; a++) {
+      for (ValueT b = kMax - 100; b < kMax; b++) {
+        auto a1 = a, b1 = b;
+        folly::makeUnpredictable(a1);
+        folly::makeUnpredictable(b1);
+        ValueT c = op(a1, b1);
+        folly::doNotOptimizeAway(c);
+      }
+    }
+    for (ValueT a = kMax - 100; a < kMax; a++) {
+      for (ValueT b = kMin; b < kMin + 100; b++) {
+        auto a1 = a, b1 = b;
+        folly::makeUnpredictable(a1);
+        folly::makeUnpredictable(b1);
+        ValueT c = op(a1, b1);
+        folly::doNotOptimizeAway(c);
+      }
+    }
+    for (ValueT a = kMax - 100; a < kMax; a++) {
+      for (ValueT b = kMid - 50; b < kMid + 50; b++) {
+        auto a1 = a, b1 = b;
+        folly::makeUnpredictable(a1);
+        folly::makeUnpredictable(b1);
+        ValueT c = op(a1, b1);
+        folly::doNotOptimizeAway(c);
+      }
+    }
+    for (ValueT a = kMax - 100; a < kMax; a++) {
+      for (ValueT b = kMax - 100; b < kMax; b++) {
+        auto a1 = a, b1 = b;
+        folly::makeUnpredictable(a1);
+        folly::makeUnpredictable(b1);
+        ValueT c = op(a1, b1);
+        folly::doNotOptimizeAway(c);
+      }
+    }
+  }
+}
+
+template <typename ValueT>
+void Add(size_t iterations, ValueT kMin, ValueT kMax) {
+  Run<ValueT>(iterations, kMin, kMax, [](ValueT a, ValueT b) {
+    return UBSafeAdd(a, b);
+  });
+}
+
+template <typename ValueT>
+void NoOverflowAdd(size_t iterations, ValueT kMin, ValueT kMax) {
+  Run<ValueT>(iterations, kMin, kMax, [](ValueT a, ValueT b) {
+    return folly::constexpr_add_overflow_clamped(a, b);
+  });
+}
+
+template <typename ValueT>
+void Sub(size_t iterations, ValueT kMin, ValueT kMax) {
+  Run<ValueT>(iterations, kMin, kMax, [](ValueT a, ValueT b) {
+    return UBSafeSub(a, b);
+  });
+}
+
+template <typename ValueT>
+void NoOverflowSub(size_t iterations, ValueT kMin, ValueT kMax) {
+  Run<ValueT>(iterations, kMin, kMax, [](ValueT a, ValueT b) {
+    return folly::constexpr_sub_overflow_clamped(a, b);
+  });
+}
+
+#define GENERATE_BENCHMARKS_FOR_TYPE(ValueT) \
+  BENCHMARK_NAMED_PARAM(                     \
+      Add,                                   \
+      ValueT,                                \
+      std::numeric_limits<ValueT>::min(),    \
+      std::numeric_limits<ValueT>::max());   \
+  BENCHMARK_RELATIVE_NAMED_PARAM(            \
+      NoOverflowAdd,                         \
+      ValueT,                                \
+      std::numeric_limits<ValueT>::min(),    \
+      std::numeric_limits<ValueT>::max());   \
+  BENCHMARK_NAMED_PARAM(                     \
+      Sub,                                   \
+      ValueT,                                \
+      std::numeric_limits<ValueT>::min(),    \
+      std::numeric_limits<ValueT>::max());   \
+  BENCHMARK_RELATIVE_NAMED_PARAM(            \
+      NoOverflowSub,                         \
+      ValueT,                                \
+      std::numeric_limits<ValueT>::min(),    \
+      std::numeric_limits<ValueT>::max())
+
+GENERATE_BENCHMARKS_FOR_TYPE(int8_t);
+BENCHMARK_DRAW_LINE()
+GENERATE_BENCHMARKS_FOR_TYPE(uint8_t);
+BENCHMARK_DRAW_LINE()
+GENERATE_BENCHMARKS_FOR_TYPE(int16_t);
+BENCHMARK_DRAW_LINE()
+GENERATE_BENCHMARKS_FOR_TYPE(uint16_t);
+BENCHMARK_DRAW_LINE()
+GENERATE_BENCHMARKS_FOR_TYPE(int32_t);
+BENCHMARK_DRAW_LINE()
+GENERATE_BENCHMARKS_FOR_TYPE(uint32_t);
+BENCHMARK_DRAW_LINE()
+GENERATE_BENCHMARKS_FOR_TYPE(int64_t);
+BENCHMARK_DRAW_LINE()
+GENERATE_BENCHMARKS_FOR_TYPE(uint64_t);
+
+int main(int argc, char** argv) {
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+  folly::runBenchmarks();
+  return 0;
+}
diff --git a/folly/test/ConstexprMathTest.cpp b/folly/test/ConstexprMathTest.cpp
index c2258527832..03bc1a8d727 100644
--- a/folly/test/ConstexprMathTest.cpp
+++ b/folly/test/ConstexprMathTest.cpp
@@ -193,3 +193,110 @@ TEST_F(ConstexprMathTest, constexpr_pow) {
     EXPECT_EQ(64, a);
   }
 }
+
+constexpr auto kInt64Max = std::numeric_limits<int64_t>::max();
+constexpr auto kInt64Min = std::numeric_limits<int64_t>::min();
+constexpr auto kUInt64Max = std::numeric_limits<uint64_t>::max();
+constexpr auto kInt8Max = std::numeric_limits<int8_t>::max();
+constexpr auto kInt8Min = std::numeric_limits<int8_t>::min();
+constexpr auto kUInt8Max = std::numeric_limits<uint8_t>::max();
+
+TEST_F(ConstexprMathTest, constexpr_add_overflow_clamped) {
+  for (int a = kInt8Min; a <= kInt8Max; a++) {
+    for (int b = kInt8Min; b <= kInt8Max; b++) {
+      int c = folly::constexpr_clamp(a + b, int(kInt8Min), int(kInt8Max));
+      int8_t a1 = a;
+      int8_t b1 = b;
+      int8_t c1 = folly::constexpr_add_overflow_clamped(a1, b1);
+      ASSERT_LE(c1, kInt8Max);
+      ASSERT_GE(c1, kInt8Min);
+      ASSERT_EQ(c1, c);
+    }
+  }
+
+  for (int a = 0; a <= kUInt8Max; a++) {
+    for (int b = 0; b <= kUInt8Max; b++) {
+      int c = folly::constexpr_clamp(a + b, 0, int(kUInt8Max));
+      uint8_t a1 = a;
+      uint8_t b1 = b;
+      uint8_t c1 = folly::constexpr_add_overflow_clamped(a1, b1);
+      ASSERT_LE(c1, kUInt8Max);
+      ASSERT_GE(c1, 0);
+      ASSERT_EQ(c1, c);
+    }
+  }
+
+  constexpr auto v1 =
+      folly::constexpr_add_overflow_clamped(int64_t(23), kInt64Max - 12);
+  EXPECT_EQ(kInt64Max, v1);
+
+  constexpr auto v2 =
+      folly::constexpr_add_overflow_clamped(int64_t(23), int64_t(12));
+  EXPECT_EQ(int64_t(35), v2);
+
+  constexpr auto v3 =
+      folly::constexpr_add_overflow_clamped(int64_t(-23), int64_t(12));
+  EXPECT_EQ(int64_t(-11), v3);
+
+  constexpr auto v4 =
+      folly::constexpr_add_overflow_clamped(int64_t(-23), int64_t(-12));
+  EXPECT_EQ(int64_t(-35), v4);
+
+  constexpr auto v5 =
+      folly::constexpr_add_overflow_clamped(uint64_t(23), kUInt64Max - 12);
+  EXPECT_EQ(kUInt64Max, v5);
+}
+
+TEST_F(ConstexprMathTest, constexpr_sub_overflow_clamped) {
+  for (int a = kInt8Min; a <= kInt8Max; a++) {
+    for (int b = kInt8Min; b <= kInt8Max; b++) {
+      int c = folly::constexpr_clamp(a - b, int(kInt8Min), int(kInt8Max));
+      int8_t a1 = a;
+      int8_t b1 = b;
+      int8_t c1 = folly::constexpr_sub_overflow_clamped(a1, b1);
+      ASSERT_LE(c1, kInt8Max);
+      ASSERT_GE(c1, kInt8Min);
+      ASSERT_EQ(c1, c);
+    }
+  }
+
+  for (int a = 0; a <= kUInt8Max; a++) {
+    for (int b = 0; b <= kUInt8Max; b++) {
+      int c = folly::constexpr_clamp(a - b, 0, int(kUInt8Max));
+      uint8_t a1 = a;
+      uint8_t b1 = b;
+      uint8_t c1 = folly::constexpr_sub_overflow_clamped(a1, b1);
+      ASSERT_LE(c1, kUInt8Max);
+      ASSERT_GE(c1, 0);
+      ASSERT_EQ(c1, c);
+    }
+  }
+
+  constexpr auto v1 =
+      folly::constexpr_sub_overflow_clamped(int64_t(23), int64_t(12));
+  EXPECT_EQ(int64_t(11), v1);
+
+  constexpr auto v2 =
+      folly::constexpr_sub_overflow_clamped(int64_t(-23), int64_t(-12));
+  EXPECT_EQ(int64_t(-11), v2);
+
+  constexpr auto v3 =
+      folly::constexpr_sub_overflow_clamped(int64_t(23), int64_t(-12));
+  EXPECT_EQ(int64_t(35), v3);
+
+  constexpr auto v4 =
+      folly::constexpr_sub_overflow_clamped(int64_t(23), kInt64Min);
+  EXPECT_EQ(kInt64Max, v4);
+
+  constexpr auto v5 =
+      folly::constexpr_sub_overflow_clamped(int64_t(-23), kInt64Min);
+  EXPECT_EQ(kInt64Max - 22, v5);
+
+  constexpr auto v6 =
+      folly::constexpr_sub_overflow_clamped(uint64_t(23), uint64_t(12));
+  EXPECT_EQ(uint64_t(11), v6);
+
+  constexpr auto v7 =
+      folly::constexpr_sub_overflow_clamped(uint64_t(12), uint64_t(23));
+  EXPECT_EQ(uint64_t(0), v7);
+}
diff --git a/folly/test/DynamicConverterTest.cpp b/folly/test/DynamicConverterTest.cpp
index fd0d88b1431..f02874b54ea 100644
--- a/folly/test/DynamicConverterTest.cpp
+++ b/folly/test/DynamicConverterTest.cpp
@@ -463,3 +463,10 @@ TEST(DynamicConverter, double_destroy) {
   EXPECT_THROW(convertTo<std::vector<B>>(d), B::BException);
   EXPECT_EQ(constructB, destroyB);
 }
+
+TEST(DynamicConverter, simple_vector_bool) {
+  std::vector<bool> bools{true, false};
+  auto d = toDynamic(bools);
+  auto actual = convertTo<decltype(bools)>(d);
+  EXPECT_EQ(bools, actual);
+}
diff --git a/folly/test/DynamicTest.cpp b/folly/test/DynamicTest.cpp
index be588c7b957..e51b8cf0adb 100644
--- a/folly/test/DynamicTest.cpp
+++ b/folly/test/DynamicTest.cpp
@@ -101,8 +101,19 @@ TEST(Dynamic, ObjectBasics) {
 
   EXPECT_EQ(objInsert.find("1")->second.size(), 1);
 
-  // We don't allow objects as keys in objects.
-  EXPECT_ANY_THROW(newObject[d3] = 12);
+  // Looking up objects as keys
+  dynamic objDefinedInOneOrder = folly::dynamic::object
+    ("bar", "987")
+    ("baz", folly::dynamic::array(1, 2, 3))
+    ("foo2", folly::dynamic::object("1", "2"));
+  dynamic sameObjInDifferentOrder = folly::dynamic::object
+    ("bar", "987")
+    ("foo2", folly::dynamic::object("1", "2"))
+    ("baz", folly::dynamic::array(1, 2, 3));
+
+  newObject[objDefinedInOneOrder] = 12;
+  EXPECT_EQ(newObject.at(objDefinedInOneOrder).getInt(), 12);
+  EXPECT_EQ(newObject.at(sameObjInDifferentOrder).getInt(), 12);
 
   // Merge two objects
   dynamic origMergeObj1 = folly::dynamic::object();
diff --git a/folly/test/FBStringTest.cpp b/folly/test/FBStringTest.cpp
index ffe8ccd04b9..03641cc49ff 100644
--- a/folly/test/FBStringTest.cpp
+++ b/folly/test/FBStringTest.cpp
@@ -31,6 +31,7 @@
 #include <folly/Conv.h>
 #include <folly/Portability.h>
 #include <folly/Random.h>
+#include <folly/Utility.h>
 #include <folly/container/Foreach.h>
 #include <folly/portability/GTest.h>
 
@@ -269,6 +270,9 @@ template <class String> void clause11_21_4_5(String & test) {
     EXPECT_EQ(test[i], test.at(i));
     test = test[i];
   }
+
+  EXPECT_THROW(test.at(test.size()), std::out_of_range);
+  EXPECT_THROW(as_const(test).at(test.size()), std::out_of_range);
 }
 
 template <class String> void clause11_21_4_6_1(String & test) {
diff --git a/folly/test/FBVectorBenchmark.cpp b/folly/test/FBVectorBenchmark.cpp
index eaa0bc90357..463d7896681 100644
--- a/folly/test/FBVectorBenchmark.cpp
+++ b/folly/test/FBVectorBenchmark.cpp
@@ -17,95 +17,366 @@
 //
 // Author: andrei.alexandrescu@fb.com
 
+#include <deque>
 #include <list>
 #include <memory>
+#include <string>
 
 #include <boost/random.hpp>
 
-#include <folly/Benchmark.h>
-#include <folly/FBString.h>
 #include <folly/FBVector.h>
-#include <folly/Random.h>
 #include <folly/Traits.h>
 #include <folly/container/Foreach.h>
 #include <folly/portability/GFlags.h>
-#include <folly/portability/GTest.h>
+#include <folly/small_vector.h>
+#include <folly/test/FBVectorTestUtil.h>
 
 using namespace std;
 using namespace folly;
+using namespace folly::test::detail;
 
-auto static const seed = randomNumberSeed();
-typedef boost::mt19937 RandomT;
-static RandomT rng(seed);
+using IntVector = vector<int>;
+using IntFBVector = fbvector<int>;
+using IntList = list<int>;
+using IntDeque = deque<int>;
+using IntSmallVector = small_vector<int>;
 
-template <class Integral1, class Integral2>
-Integral2 random(Integral1 low, Integral2 up) {
-  boost::uniform_int<> range(low, up);
-  return range(rng);
-}
-
-template <class String>
-void randomString(String* toFill, unsigned int maxSize = 1000) {
-  assert(toFill);
-  toFill->resize(random(0, maxSize));
-  FOR_EACH (i, *toFill) {
-    *i = random('a', 'z');
-  }
-}
-
-template <class String, class Integral>
-void Num2String(String& str, Integral /* n */) {
-  str.resize(10, '\0');
-  sprintf(&str[0], "%ul", 10);
-  str.resize(strlen(str.c_str()));
-}
-
-std::list<char> RandomList(unsigned int maxSize) {
-  std::list<char> lst(random(0u, maxSize));
-  std::list<char>::iterator i = lst.begin();
-  for (; i != lst.end(); ++i) {
-    *i = random('a', 'z');
-  }
-  return lst;
-}
-
-template <class T> T randomObject();
-
-template <> int randomObject<int>() {
-  return random(0, 1024);
-}
-
-template <> folly::fbstring randomObject<folly::fbstring>() {
-  folly::fbstring result;
-  randomString(&result);
-  return result;
-}
-
-#define CONCAT(A, B) CONCAT_HELPER(A, B)
-#define CONCAT_HELPER(A, B) A##B
-#define BENCHFUN(F) CONCAT(CONCAT(BM_, F), CONCAT(_, VECTOR))
-#define TESTFUN(F) TEST(fbvector, CONCAT(F, VECTOR))
+using StringVector = vector<std::string>;
+using StringFBVector = fbvector<std::string>;
+using StringList = list<std::string>;
+using StringDeque = deque<std::string>;
+using StringSmallVector = small_vector<std::string>;
 
-typedef vector<int> IntVector;
-typedef fbvector<int> IntFBVector;
-typedef vector<folly::fbstring> FBStringVector;
-typedef fbvector<folly::fbstring> FBStringFBVector;
+using FBStringVector = vector<folly::fbstring>;
+using FBStringFBVector = fbvector<folly::fbstring>;
 
 #define VECTOR IntVector
-#include <folly/test/FBVectorTestBenchmarks.cpp.h> // nolint
+#include <folly/test/FBVectorBenchmarks.cpp.h> // nolint
 #undef VECTOR
 #define VECTOR IntFBVector
-#include <folly/test/FBVectorTestBenchmarks.cpp.h> // nolint
+#include <folly/test/FBVectorBenchmarks.cpp.h> // nolint
 #undef VECTOR
+#define VECTOR IntSmallVector
+#include <folly/test/FBVectorBenchmarks.cpp.h> // nolint
+#undef VECTOR
+#define VECTOR IntList
+#define SKIP_RESERVE
+#include <folly/test/FBVectorBenchmarks.cpp.h> // nolint
+#undef SKIP_RESERVE
+#undef VECTOR
+#define VECTOR IntDeque
+#define SKIP_RESERVE
+#include <folly/test/FBVectorBenchmarks.cpp.h> // nolint
+#undef SKIP_RESERVE
+#undef VECTOR
+
+#define VECTOR StringVector
+#include <folly/test/FBVectorBenchmarks.cpp.h> // nolint
+#undef VECTOR
+#define VECTOR StringFBVector
+#include <folly/test/FBVectorBenchmarks.cpp.h> // nolint
+#undef VECTOR
+#define VECTOR StringSmallVector
+#include <folly/test/FBVectorBenchmarks.cpp.h> // nolint
+#undef VECTOR
+#define VECTOR StringList
+#define SKIP_RESERVE
+#include <folly/test/FBVectorBenchmarks.cpp.h> // nolint
+#undef SKIP_RESERVE
+#undef VECTOR
+#define VECTOR StringDeque
+#define SKIP_RESERVE
+#include <folly/test/FBVectorBenchmarks.cpp.h> // nolint
+#undef SKIP_RESERVE
+#undef VECTOR
+
 #define VECTOR FBStringVector
-#include <folly/test/FBVectorTestBenchmarks.cpp.h> // nolint
+#include <folly/test/FBVectorBenchmarks.cpp.h> // nolint
 #undef VECTOR
 #define VECTOR FBStringFBVector
-#include <folly/test/FBVectorTestBenchmarks.cpp.h> // nolint
+#include <folly/test/FBVectorBenchmarks.cpp.h> // nolint
 #undef VECTOR
 
 int main(int argc, char** argv) {
   gflags::ParseCommandLineFlags(&argc, &argv, true);
+  gflags::SetCommandLineOptionWithMode(
+      "bm_max_iters", "1000000", gflags::SET_FLAG_IF_DEFAULT);
+  gflags::SetCommandLineOptionWithMode(
+      "bm_min_iters", "100000", gflags::SET_FLAG_IF_DEFAULT);
+  gflags::SetCommandLineOptionWithMode(
+      "bm_max_secs", "1", gflags::SET_FLAG_IF_DEFAULT);
+
   folly::runBenchmarks();
   return 0;
 }
+
+/*
+============================================================================
+buck-out/opt/gen/folly/test/fbvector_benchmark#gcc-5-glibc-2.23,private-headers/folly/test/FBVectorBenchmarks.cpp.hrelative  time/iter  iters/s
+============================================================================
+BM_zzInitRNG_IntVector                                       1.05us  951.24K
+BM_defaultCtor_IntVector                                     1.31ns  765.93M
+BM_sizeCtor_IntVector(16)                                   19.33ns   51.73M
+BM_sizeCtor_IntVector(128)                                  42.11ns   23.75M
+BM_sizeCtor_IntVector(1024)                                 60.90ns   16.42M
+BM_fillCtor_IntVector(16)                                   30.67ns   32.61M
+BM_fillCtor_IntVector(128)                                  41.22ns   24.26M
+BM_fillCtor_IntVector(1024)                                133.70ns    7.48M
+BM_reserve_IntVector(16)                                    40.27ns   24.83M
+BM_reserve_IntVector(128)                                   40.20ns   24.88M
+BM_reserve_IntVector(1024)                                  40.17ns   24.90M
+BM_insertFront_IntVector(16)                                 7.90us  126.52K
+BM_insertFront_IntVector(128)                                8.12us  123.09K
+BM_insertFront_IntVector(1024)                               8.30us  120.46K
+BM_insertFront_IntVector(10240)                             10.14us   98.67K
+BM_insertFront_IntVector(102400)                            30.71us   32.56K
+BM_insertFront_IntVector(1024000)                          220.69us    4.53K
+BM_pushBack_IntVector(16)                                  776.38ps    1.29G
+BM_pushBack_IntVector(128)                                 775.89ps    1.29G
+BM_pushBack_IntVector(1024)                                742.50ps    1.35G
+BM_pushBack_IntVector(10240)                               787.75ps    1.27G
+BM_pushBack_IntVector(102400)                              714.07ps    1.40G
+BM_pushBack_IntVector(1024000)                               3.15ns  317.26M
+BM_zzInitRNG_IntFBVector                                     1.17us  853.35K
+BM_defaultCtor_IntFBVector                                 989.76ps    1.01G
+BM_sizeCtor_IntFBVector(16)                                 27.19ns   36.78M
+BM_sizeCtor_IntFBVector(128)                                46.73ns   21.40M
+BM_sizeCtor_IntFBVector(1024)                               69.03ns   14.49M
+BM_fillCtor_IntFBVector(16)                                 35.97ns   27.80M
+BM_fillCtor_IntFBVector(128)                                55.11ns   18.15M
+BM_fillCtor_IntFBVector(1024)                              147.89ns    6.76M
+BM_reserve_IntFBVector(16)                                  54.18ns   18.46M
+BM_reserve_IntFBVector(128)                                 54.24ns   18.44M
+BM_reserve_IntFBVector(1024)                                54.24ns   18.44M
+BM_insertFront_IntFBVector(16)                               8.41us  118.86K
+BM_insertFront_IntFBVector(128)                              8.45us  118.41K
+BM_insertFront_IntFBVector(1024)                             8.56us  116.80K
+BM_insertFront_IntFBVector(10240)                           10.72us   93.32K
+BM_insertFront_IntFBVector(102400)                          30.83us   32.43K
+BM_insertFront_IntFBVector(1024000)                        217.31us    4.60K
+BM_pushBack_IntFBVector(16)                                  2.05ns  488.26M
+BM_pushBack_IntFBVector(128)                                 1.99ns  503.65M
+BM_pushBack_IntFBVector(1024)                                2.16ns  462.50M
+BM_pushBack_IntFBVector(10240)                               2.13ns  468.48M
+BM_pushBack_IntFBVector(102400)                              1.93ns  517.23M
+BM_pushBack_IntFBVector(1024000)                             1.89ns  529.29M
+BM_zzInitRNG_IntSmallVector                                  1.17us  855.04K
+BM_defaultCtor_IntSmallVector                              698.82ps    1.43G
+BM_sizeCtor_IntSmallVector(16)                              37.59ns   26.60M
+BM_sizeCtor_IntSmallVector(128)                             85.90ns   11.64M
+BM_sizeCtor_IntSmallVector(1024)                           401.37ns    2.49M
+BM_fillCtor_IntSmallVector(16)                              48.22ns   20.74M
+BM_fillCtor_IntSmallVector(128)                             99.99ns   10.00M
+BM_fillCtor_IntSmallVector(1024)                           458.71ns    2.18M
+BM_reserve_IntSmallVector(16)                               44.30ns   22.57M
+BM_reserve_IntSmallVector(128)                              44.29ns   22.58M
+BM_reserve_IntSmallVector(1024)                             45.15ns   22.15M
+BM_insertFront_IntSmallVector(16)                            8.40us  119.11K
+BM_insertFront_IntSmallVector(128)                           7.74us  129.25K
+BM_insertFront_IntSmallVector(1024)                          8.17us  122.47K
+BM_insertFront_IntSmallVector(10240)                        10.17us   98.34K
+BM_insertFront_IntSmallVector(102400)                       29.60us   33.79K
+BM_insertFront_IntSmallVector(1024000)                     208.82us    4.79K
+BM_pushBack_IntSmallVector(16)                               2.92ns  342.66M
+BM_pushBack_IntSmallVector(128)                              2.91ns  343.36M
+BM_pushBack_IntSmallVector(1024)                             2.76ns  362.74M
+BM_pushBack_IntSmallVector(10240)                            2.71ns  369.18M
+BM_pushBack_IntSmallVector(102400)                           3.04ns  329.36M
+BM_pushBack_IntSmallVector(1024000)                          4.90ns  204.21M
+BM_zzInitRNG_IntList                                         1.04us  958.67K
+BM_defaultCtor_IntList                                     911.25ps    1.10G
+BM_sizeCtor_IntList(16)                                    264.10ns    3.79M
+BM_sizeCtor_IntList(128)                                     2.08us  481.87K
+BM_sizeCtor_IntList(1024)                                   35.52us   28.15K
+BM_fillCtor_IntList(16)                                    269.86ns    3.71M
+BM_fillCtor_IntList(128)                                     2.12us  470.70K
+BM_fillCtor_IntList(1024)                                   46.59us   21.47K
+BM_insertFront_IntList(16)                                  18.88ns   52.95M
+BM_insertFront_IntList(128)                                 19.67ns   50.85M
+BM_insertFront_IntList(1024)                                18.79ns   53.22M
+BM_insertFront_IntList(10240)                               20.47ns   48.85M
+BM_insertFront_IntList(102400)                              17.43ns   57.37M
+BM_insertFront_IntList(1024000)                             17.65ns   56.65M
+BM_pushBack_IntList(16)                                     20.45ns   48.89M
+BM_pushBack_IntList(128)                                    21.54ns   46.42M
+BM_pushBack_IntList(1024)                                   20.14ns   49.64M
+BM_pushBack_IntList(10240)                                  21.21ns   47.15M
+BM_pushBack_IntList(102400)                                 18.53ns   53.98M
+BM_pushBack_IntList(1024000)                                22.16ns   45.12M
+BM_zzInitRNG_IntDeque                                        1.14us  879.33K
+BM_defaultCtor_IntDeque                                     33.14ns   30.18M
+BM_sizeCtor_IntDeque(16)                                    44.34ns   22.56M
+BM_sizeCtor_IntDeque(128)                                   81.28ns   12.30M
+BM_sizeCtor_IntDeque(1024)                                 338.93ns    2.95M
+BM_fillCtor_IntDeque(16)                                    52.18ns   19.16M
+BM_fillCtor_IntDeque(128)                                   76.01ns   13.16M
+BM_fillCtor_IntDeque(1024)                                 329.99ns    3.03M
+BM_insertFront_IntDeque(16)                                  2.56ns  390.51M
+BM_insertFront_IntDeque(128)                                 2.48ns  403.57M
+BM_insertFront_IntDeque(1024)                                2.31ns  432.60M
+BM_insertFront_IntDeque(10240)                               2.30ns  434.90M
+BM_insertFront_IntDeque(102400)                              2.32ns  431.00M
+BM_insertFront_IntDeque(1024000)                             2.36ns  423.26M
+BM_pushBack_IntDeque(16)                                   935.50ps    1.07G
+BM_pushBack_IntDeque(128)                                  935.72ps    1.07G
+BM_pushBack_IntDeque(1024)                                 942.23ps    1.06G
+BM_pushBack_IntDeque(10240)                                934.27ps    1.07G
+BM_pushBack_IntDeque(102400)                               947.61ps    1.06G
+BM_pushBack_IntDeque(1024000)                              993.47ps    1.01G
+BM_zzInitRNG_StringVector                                    1.03us  966.54K
+BM_defaultCtor_StringVector                                911.27ps    1.10G
+BM_sizeCtor_StringVector(16)                                35.94ns   27.83M
+BM_sizeCtor_StringVector(128)                              233.07ns    4.29M
+BM_sizeCtor_StringVector(1024)                               1.83us  546.61K
+BM_fillCtor_StringVector(16)                                10.30us   97.07K
+BM_fillCtor_StringVector(128)                               21.56us   46.37K
+BM_fillCtor_StringVector(1024)                             128.63us    7.77K
+BM_reserve_StringVector(16)                                 45.76ns   21.85M
+BM_reserve_StringVector(128)                                60.52ns   16.52M
+BM_reserve_StringVector(1024)                               59.59ns   16.78M
+BM_insertFront_StringVector(16)                            124.99us    8.00K
+BM_insertFront_StringVector(128)                           120.57us    8.29K
+BM_insertFront_StringVector(1024)                          126.47us    7.91K
+BM_insertFront_StringVector(10240)                         153.43us    6.52K
+BM_insertFront_StringVector(102400)                        380.73us    2.63K
+BM_insertFront_StringVector(1024000)                         3.96ms   252.31
+BM_pushBack_StringVector(16)                                40.16ns   24.90M
+BM_pushBack_StringVector(128)                               41.94ns   23.85M
+BM_pushBack_StringVector(1024)                              36.92ns   27.08M
+BM_pushBack_StringVector(10240)                             18.19ns   54.99M
+BM_pushBack_StringVector(102400)                            41.21ns   24.27M
+BM_pushBack_StringVector(1024000)                          234.95ns    4.26M
+BM_zzInitRNG_StringFBVector                                  1.05us  956.06K
+BM_defaultCtor_StringFBVector                              911.25ps    1.10G
+BM_sizeCtor_StringFBVector(16)                              38.40ns   26.04M
+BM_sizeCtor_StringFBVector(128)                            202.10ns    4.95M
+BM_sizeCtor_StringFBVector(1024)                             1.68us  593.56K
+BM_fillCtor_StringFBVector(16)                               6.65us  150.29K
+BM_fillCtor_StringFBVector(128)                             14.76us   67.76K
+BM_fillCtor_StringFBVector(1024)                           117.60us    8.50K
+BM_reserve_StringFBVector(16)                               60.40ns   16.56M
+BM_reserve_StringFBVector(128)                              62.28ns   16.06M
+BM_reserve_StringFBVector(1024)                             66.76ns   14.98M
+BM_insertFront_StringFBVector(16)                          126.51us    7.90K
+BM_insertFront_StringFBVector(128)                         121.29us    8.24K
+BM_insertFront_StringFBVector(1024)                        129.81us    7.70K
+BM_insertFront_StringFBVector(10240)                       148.77us    6.72K
+BM_insertFront_StringFBVector(102400)                      380.46us    2.63K
+BM_insertFront_StringFBVector(1024000)                       3.73ms   268.02
+BM_pushBack_StringFBVector(16)                              11.89ns   84.13M
+BM_pushBack_StringFBVector(128)                             20.32ns   49.20M
+BM_pushBack_StringFBVector(1024)                            47.91ns   20.87M
+BM_pushBack_StringFBVector(10240)                           39.74ns   25.16M
+BM_pushBack_StringFBVector(102400)                          36.86ns   27.13M
+BM_pushBack_StringFBVector(1024000)                        285.22ns    3.51M
+BM_zzInitRNG_StringSmallVector                               1.04us  965.73K
+BM_defaultCtor_StringSmallVector                           607.54ps    1.65G
+BM_sizeCtor_StringSmallVector(16)                           44.30ns   22.57M
+BM_sizeCtor_StringSmallVector(128)                         234.40ns    4.27M
+BM_sizeCtor_StringSmallVector(1024)                          1.96us  510.33K
+BM_fillCtor_StringSmallVector(16)                            6.12us  163.46K
+BM_fillCtor_StringSmallVector(128)                          18.65us   53.63K
+BM_fillCtor_StringSmallVector(1024)                        132.36us    7.56K
+BM_reserve_StringSmallVector(16)                            43.86ns   22.80M
+BM_reserve_StringSmallVector(128)                           51.03ns   19.60M
+BM_reserve_StringSmallVector(1024)                          48.61ns   20.57M
+BM_insertFront_StringSmallVector(16)                       127.32us    7.85K
+BM_insertFront_StringSmallVector(128)                      118.93us    8.41K
+BM_insertFront_StringSmallVector(1024)                     130.04us    7.69K
+BM_insertFront_StringSmallVector(10240)                    143.89us    6.95K
+BM_insertFront_StringSmallVector(102400)                   386.40us    2.59K
+BM_insertFront_StringSmallVector(1024000)                    3.74ms   267.73
+BM_pushBack_StringSmallVector(16)                           50.77ns   19.70M
+BM_pushBack_StringSmallVector(128)                          44.12ns   22.67M
+BM_pushBack_StringSmallVector(1024)                         45.62ns   21.92M
+BM_pushBack_StringSmallVector(10240)                        69.06ns   14.48M
+BM_pushBack_StringSmallVector(102400)                      139.62ns    7.16M
+BM_pushBack_StringSmallVector(1024000)                     445.65ns    2.24M
+BM_zzInitRNG_StringList                                      1.17us  854.00K
+BM_defaultCtor_StringList                                  911.39ps    1.10G
+BM_sizeCtor_StringList(16)                                 309.90ns    3.23M
+BM_sizeCtor_StringList(128)                                  3.18us  314.57K
+BM_sizeCtor_StringList(1024)                                41.72us   23.97K
+BM_fillCtor_StringList(16)                                   7.12us  140.54K
+BM_fillCtor_StringList(128)                                 19.22us   52.04K
+BM_fillCtor_StringList(1024)                               160.20us    6.24K
+BM_insertFront_StringList(16)                               27.71ns   36.09M
+BM_insertFront_StringList(128)                              51.34ns   19.48M
+BM_insertFront_StringList(1024)                             55.53ns   18.01M
+BM_insertFront_StringList(10240)                            24.62ns   40.62M
+BM_insertFront_StringList(102400)                           25.63ns   39.02M
+BM_insertFront_StringList(1024000)                         341.85ns    2.93M
+BM_pushBack_StringList(16)                                  28.69ns   34.85M
+BM_pushBack_StringList(128)                                 29.11ns   34.36M
+BM_pushBack_StringList(1024)                                33.28ns   30.05M
+BM_pushBack_StringList(10240)                               26.47ns   37.78M
+BM_pushBack_StringList(102400)                              48.51ns   20.62M
+BM_pushBack_StringList(1024000)                             75.97ns   13.16M
+BM_zzInitRNG_StringDeque                                     1.17us  852.21K
+BM_defaultCtor_StringDeque                                  39.44ns   25.36M
+BM_sizeCtor_StringDeque(16)                                 88.29ns   11.33M
+BM_sizeCtor_StringDeque(128)                               444.53ns    2.25M
+BM_sizeCtor_StringDeque(1024)                                6.20us  161.17K
+BM_fillCtor_StringDeque(16)                                  6.82us  146.73K
+BM_fillCtor_StringDeque(128)                                16.95us   58.99K
+BM_fillCtor_StringDeque(1024)                              121.97us    8.20K
+BM_insertFront_StringDeque(16)                              10.75ns   92.98M
+BM_insertFront_StringDeque(128)                             40.83ns   24.49M
+BM_insertFront_StringDeque(1024)                            10.26ns   97.43M
+BM_insertFront_StringDeque(10240)                           37.85ns   26.42M
+BM_insertFront_StringDeque(102400)                          34.75ns   28.78M
+BM_insertFront_StringDeque(1024000)                         39.31ns   25.44M
+BM_pushBack_StringDeque(16)                                 11.32ns   88.31M
+BM_pushBack_StringDeque(128)                                11.93ns   83.80M
+BM_pushBack_StringDeque(1024)                               10.41ns   96.02M
+BM_pushBack_StringDeque(10240)                               9.83ns  101.72M
+BM_pushBack_StringDeque(102400)                             64.98ns   15.39M
+BM_pushBack_StringDeque(1024000)                            33.45ns   29.89M
+BM_zzInitRNG_FBStringVector                                  1.17us  855.50K
+BM_defaultCtor_FBStringVector                              989.77ps    1.01G
+BM_sizeCtor_FBStringVector(16)                              35.38ns   28.26M
+BM_sizeCtor_FBStringVector(128)                            180.30ns    5.55M
+BM_sizeCtor_FBStringVector(1024)                             1.21us  823.15K
+BM_fillCtor_FBStringVector(16)                               6.42us  155.85K
+BM_fillCtor_FBStringVector(128)                              8.90us  112.32K
+BM_fillCtor_FBStringVector(1024)                            36.57us   27.35K
+BM_reserve_FBStringVector(16)                               50.12ns   19.95M
+BM_reserve_FBStringVector(128)                              50.09ns   19.96M
+BM_reserve_FBStringVector(1024)                             53.58ns   18.66M
+BM_insertFront_FBStringVector(16)                          105.90us    9.44K
+BM_insertFront_FBStringVector(128)                         102.06us    9.80K
+BM_insertFront_FBStringVector(1024)                        103.67us    9.65K
+BM_insertFront_FBStringVector(10240)                       122.63us    8.15K
+BM_insertFront_FBStringVector(102400)                      312.48us    3.20K
+BM_insertFront_FBStringVector(1024000)                       2.30ms   434.80
+BM_pushBack_FBStringVector(16)                              10.18ns   98.26M
+BM_pushBack_FBStringVector(128)                             10.13ns   98.75M
+BM_pushBack_FBStringVector(1024)                            10.14ns   98.62M
+BM_pushBack_FBStringVector(10240)                           11.60ns   86.19M
+BM_pushBack_FBStringVector(102400)                           8.47ns  118.02M
+BM_pushBack_FBStringVector(1024000)                         88.01ns   11.36M
+BM_zzInitRNG_FBStringFBVector                                1.03us  971.03K
+BM_defaultCtor_FBStringFBVector                            911.25ps    1.10G
+BM_sizeCtor_FBStringFBVector(16)                            33.53ns   29.82M
+BM_sizeCtor_FBStringFBVector(128)                          135.17ns    7.40M
+BM_sizeCtor_FBStringFBVector(1024)                         951.05ns    1.05M
+BM_fillCtor_FBStringFBVector(16)                             5.71us  175.27K
+BM_fillCtor_FBStringFBVector(128)                            8.11us  123.37K
+BM_fillCtor_FBStringFBVector(1024)                          37.95us   26.35K
+BM_reserve_FBStringFBVector(16)                             54.53ns   18.34M
+BM_reserve_FBStringFBVector(128)                            51.41ns   19.45M
+BM_reserve_FBStringFBVector(1024)                           55.52ns   18.01M
+BM_insertFront_FBStringFBVector(16)                         58.80us   17.01K
+BM_insertFront_FBStringFBVector(128)                        58.45us   17.11K
+BM_insertFront_FBStringFBVector(1024)                       59.08us   16.93K
+BM_insertFront_FBStringFBVector(10240)                      69.85us   14.32K
+BM_insertFront_FBStringFBVector(102400)                    176.99us    5.65K
+BM_insertFront_FBStringFBVector(1024000)                     4.07ms   245.84
+BM_pushBack_FBStringFBVector(16)                             4.19ns  238.39M
+BM_pushBack_FBStringFBVector(128)                            3.76ns  265.90M
+BM_pushBack_FBStringFBVector(1024)                           4.68ns  213.66M
+BM_pushBack_FBStringFBVector(10240)                          3.24ns  309.08M
+BM_pushBack_FBStringFBVector(102400)                         3.17ns  315.07M
+BM_pushBack_FBStringFBVector(1024000)                       25.88ns   38.65M
+============================================================================
+*/
diff --git a/folly/test/FBVectorBenchmarks.cpp.h b/folly/test/FBVectorBenchmarks.cpp.h
new file mode 100644
index 00000000000..045dc0d8376
--- /dev/null
+++ b/folly/test/FBVectorBenchmarks.cpp.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2011-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This file is supposed to be included from within
+ * FBVectorBenchmark. Do not use otherwise.
+ */
+
+BENCHMARK(BENCHFUN(zzInitRNG)) {
+  srand(seed);
+}
+
+BENCHMARK(BENCHFUN(defaultCtor), iters) {
+  FOR_EACH_RANGE (i, 0, iters) {
+    VECTOR v;
+    doNotOptimizeAway(&v);
+  }
+}
+
+void BENCHFUN(sizeCtor)(int iters, int size) {
+  FOR_EACH_RANGE (i, 0, iters) {
+    VECTOR v(size);
+    doNotOptimizeAway(&v);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(sizeCtor), 16);
+BENCHMARK_PARAM(BENCHFUN(sizeCtor), 128);
+BENCHMARK_PARAM(BENCHFUN(sizeCtor), 1024);
+
+void BENCHFUN(fillCtor)(int iters, int size) {
+  FOR_EACH_RANGE (i, 0, iters) {
+    VECTOR v(size_t(size), randomObject<VECTOR::value_type>());
+    doNotOptimizeAway(&v);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(fillCtor), 16);
+BENCHMARK_PARAM(BENCHFUN(fillCtor), 128);
+BENCHMARK_PARAM(BENCHFUN(fillCtor), 1024);
+
+#ifndef SKIP_RESERVE
+void BENCHFUN(reserve)(int iters, int size) {
+  auto const obj = randomObject<VECTOR::value_type>();
+  FOR_EACH_RANGE (i, 0, iters) {
+    VECTOR v(random(0U, 1U), obj);
+    v.reserve(size);
+  }
+}
+BENCHMARK_PARAM(BENCHFUN(reserve), 16);
+BENCHMARK_PARAM(BENCHFUN(reserve), 128);
+BENCHMARK_PARAM(BENCHFUN(reserve), 1024);
+#endif
+
+void BENCHFUN(insertFront)(int iters, int initialSize) {
+  BenchmarkSuspender braces;
+  auto const obj = randomObject<VECTOR::value_type>();
+  VECTOR v(initialSize, obj);
+  braces.dismissing([&]() {
+    FOR_EACH_RANGE (i, 0, iters) { v.insert(v.begin(), obj); }
+  });
+}
+
+BENCHMARK_PARAM(BENCHFUN(insertFront), 16);
+BENCHMARK_PARAM(BENCHFUN(insertFront), 128);
+BENCHMARK_PARAM(BENCHFUN(insertFront), 1024);
+BENCHMARK_PARAM(BENCHFUN(insertFront), 10240);
+BENCHMARK_PARAM(BENCHFUN(insertFront), 102400);
+BENCHMARK_PARAM(BENCHFUN(insertFront), 1024000);
+
+void BENCHFUN(pushBack)(int iters, int initialSize) {
+  BenchmarkSuspender braces;
+  auto const obj = randomObject<VECTOR::value_type>();
+  VECTOR v(initialSize, obj);
+  braces.dismissing([&]() {
+    FOR_EACH_RANGE (i, 0, iters) { v.push_back(obj); }
+  });
+}
+
+BENCHMARK_PARAM(BENCHFUN(pushBack), 16);
+BENCHMARK_PARAM(BENCHFUN(pushBack), 128);
+BENCHMARK_PARAM(BENCHFUN(pushBack), 1024);
+BENCHMARK_PARAM(BENCHFUN(pushBack), 10240);
+BENCHMARK_PARAM(BENCHFUN(pushBack), 102400);
+BENCHMARK_PARAM(BENCHFUN(pushBack), 1024000);
diff --git a/folly/test/FBVectorTest.cpp b/folly/test/FBVectorTest.cpp
index f0594e96a07..4af628292cb 100644
--- a/folly/test/FBVectorTest.cpp
+++ b/folly/test/FBVectorTest.cpp
@@ -30,48 +30,21 @@
 #include <folly/Traits.h>
 #include <folly/container/Foreach.h>
 #include <folly/portability/GTest.h>
+#include <folly/test/FBVectorTestUtil.h>
 
 using namespace std;
 using namespace folly;
+using namespace folly::test::detail;
 
-namespace {
-
-auto static const seed = randomNumberSeed();
-typedef boost::mt19937 RandomT;
-static RandomT rng(seed);
-
-template <class Integral1, class Integral2>
-Integral2 random(Integral1 low, Integral2 up) {
-  boost::uniform_int<> range(low, up);
-  return range(rng);
-}
-
-template <class String>
-void randomString(String* toFill, unsigned int maxSize = 1000) {
-  assert(toFill);
-  toFill->resize(random(0, maxSize));
-  FOR_EACH (i, *toFill) {
-    *i = random('a', 'z');
-  }
-}
-
-template <class String, class Integral>
-void Num2String(String& str, Integral /* n */) {
-  str.resize(10, '\0');
-  sprintf(&str[0], "%ul", 10);
-  str.resize(strlen(str.c_str()));
-}
-
-template <class T> T randomObject();
-
-template <> int randomObject<int>() {
-  return random(0, 1024);
-}
-} // namespace
+using IntFBVector = fbvector<int>;
+using FBStringFBVector = fbvector<fbstring>;
 
-////////////////////////////////////////////////////////////////////////////////
-// Tests begin here
-////////////////////////////////////////////////////////////////////////////////
+#define VECTOR IntFBVector
+#include <folly/test/FBVectorTests.cpp.h> // nolint
+#undef VECTOR
+#define VECTOR FBStringFBVector
+#include <folly/test/FBVectorTests.cpp.h> // nolint
+#undef VECTOR
 
 TEST(fbvector, clause_23_3_6_1_3_ambiguity) {
   fbvector<int> v(10, 20);
diff --git a/folly/test/FBVectorTestUtil.h b/folly/test/FBVectorTestUtil.h
new file mode 100644
index 00000000000..0af45d19f8a
--- /dev/null
+++ b/folly/test/FBVectorTestUtil.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2012-present Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//
+// Author: andrei.alexandrescu@fb.com
+
+#include <folly/Benchmark.h>
+#include <folly/FBString.h>
+#include <folly/Random.h>
+#include <folly/portability/GTest.h>
+
+namespace folly {
+namespace test {
+namespace detail {
+
+auto static const seed = randomNumberSeed();
+typedef boost::mt19937 RandomT;
+static RandomT rng(seed);
+
+template <class Integral1, class Integral2>
+Integral2 random(Integral1 low, Integral2 up) {
+  boost::uniform_int<> range(low, up);
+  return range(rng);
+}
+
+template <class String>
+void randomString(String* toFill, unsigned int maxSize = 1000) {
+  assert(toFill);
+  toFill->resize(random(0, maxSize));
+  for (auto& c : *toFill) {
+    c = random('a', 'z');
+  }
+}
+
+template <class String, class Integral>
+void Num2String(String& str, Integral /* n */) {
+  str.resize(10, '\0');
+  sprintf(&str[0], "%ul", 10);
+  str.resize(strlen(str.c_str()));
+}
+
+std::list<char> RandomList(unsigned int maxSize) {
+  std::list<char> lst(random(0u, maxSize));
+  std::list<char>::iterator i = lst.begin();
+  for (; i != lst.end(); ++i) {
+    *i = random('a', 'z');
+  }
+  return lst;
+}
+
+template <class T>
+T randomObject();
+
+template <>
+int randomObject<int>() {
+  return random(0, 1024);
+}
+
+template <>
+std::string randomObject<std::string>() {
+  std::string result;
+  randomString(&result);
+  return result;
+}
+
+template <>
+folly::fbstring randomObject<folly::fbstring>() {
+  folly::fbstring result;
+  randomString(&result);
+  return result;
+}
+
+#define CONCAT(A, B) CONCAT_HELPER(A, B)
+#define CONCAT_HELPER(A, B) A##B
+#define BENCHFUN(F) CONCAT(CONCAT(BM_, F), CONCAT(_, VECTOR))
+#define TESTFUN(F) TEST(fbvector, CONCAT(F, VECTOR))
+
+} // namespace detail
+} // namespace test
+} // namespace folly
diff --git a/folly/test/FBVectorTestBenchmarks.cpp.h b/folly/test/FBVectorTests.cpp.h
similarity index 66%
rename from folly/test/FBVectorTestBenchmarks.cpp.h
rename to folly/test/FBVectorTests.cpp.h
index ab76da59794..efe92fa7d2a 100644
--- a/folly/test/FBVectorTestBenchmarks.cpp.h
+++ b/folly/test/FBVectorTests.cpp.h
@@ -31,9 +31,7 @@ TESTFUN(clause_23_3_6_1_3) {
   auto const n = random(0U, 10000U);
   VECTOR v(n);
   EXPECT_EQ(v.size(), n);
-  FOR_EACH (i, v) {
-    EXPECT_EQ(*i, VECTOR::value_type());
-  }
+  FOR_EACH (i, v) { EXPECT_EQ(*i, VECTOR::value_type()); }
 }
 
 TESTFUN(clause_23_3_6_1_9) {
@@ -87,9 +85,7 @@ TESTFUN(clause_23_3_6_1_12) {
   auto const obj = randomObject<VECTOR::value_type>();
   v.assign(n, obj);
   EXPECT_EQ(v.size(), n);
-  FOR_EACH (i, v) {
-    EXPECT_EQ(*i, obj);
-  }
+  FOR_EACH (i, v) { EXPECT_EQ(*i, obj); }
 }
 
 TESTFUN(clause_23_3_6_2_1) {
@@ -108,24 +104,16 @@ TESTFUN(clause_23_3_6_2_7) {
   v1.swap(v2);
   EXPECT_EQ(v1.size(), n2);
   EXPECT_EQ(v2.size(), n1);
-  FOR_EACH (i, v1) {
-    EXPECT_EQ(*i, obj2);
-  }
-  FOR_EACH (i, v2) {
-    EXPECT_EQ(*i, obj1);
-  }
+  FOR_EACH (i, v1) { EXPECT_EQ(*i, obj2); }
+  FOR_EACH (i, v2) { EXPECT_EQ(*i, obj1); }
 }
 
 TESTFUN(clause_23_3_6_2_9) {
   VECTOR v;
   auto const n1 = random(0U, 10000U);
   v.resize(n1);
-  FOR_EACH (i, v) {
-    EXPECT_EQ(*i, VECTOR::value_type());
-  }
-  FOR_EACH (i, v) {
-    EXPECT_EQ(*i, VECTOR::value_type());
-  }
+  FOR_EACH (i, v) { EXPECT_EQ(*i, VECTOR::value_type()); }
+  FOR_EACH (i, v) { EXPECT_EQ(*i, VECTOR::value_type()); }
 }
 
 TESTFUN(clause_23_3_6_2_11) {
@@ -133,16 +121,12 @@ TESTFUN(clause_23_3_6_2_11) {
   auto const n1 = random(0U, 10000U);
   auto const obj1 = randomObject<VECTOR::value_type>();
   v.resize(n1, obj1);
-  FOR_EACH (i, v) {
-    EXPECT_EQ(*i, obj1);
-  }
+  FOR_EACH (i, v) { EXPECT_EQ(*i, obj1); }
   auto const n2 = random(0U, 10000U);
   auto const obj2 = randomObject<VECTOR::value_type>();
   v.resize(n2, obj2);
   if (n1 < n2) {
-    FOR_EACH_RANGE (i, n1, n2) {
-      EXPECT_EQ(v[i], obj2);
-    }
+    FOR_EACH_RANGE (i, n1, n2) { EXPECT_EQ(v[i], obj2); }
   }
 }
 
@@ -188,12 +172,8 @@ TESTFUN(clause_23_3_6_4_1_a) {
   EXPECT_EQ(v.size(), w.size() + 1);
   EXPECT_EQ(r - v.begin(), n2);
   EXPECT_EQ(*r, obj2);
-  FOR_EACH_RANGE (i, 0, r - v.begin()) {
-    EXPECT_EQ(v[i], w[i]);
-  }
-  FOR_EACH_RANGE (i, r - v.begin() + 1, v.size()) {
-    EXPECT_EQ(v[i], w[i - 1]);
-  }
+  FOR_EACH_RANGE (i, 0, r - v.begin()) { EXPECT_EQ(v[i], w[i]); }
+  FOR_EACH_RANGE (i, r - v.begin() + 1, v.size()) { EXPECT_EQ(v[i], w[i - 1]); }
 }
 
 TESTFUN(clause_23_3_6_4_1_c) {
@@ -205,7 +185,7 @@ TESTFUN(clause_23_3_6_4_1_c) {
     v.push_back(obj1);
     w.push_back(obj1);
   }
-  auto const n2 = random(0U, n1-1);
+  auto const n2 = random(0U, n1 - 1);
   auto pos = v.begin() + n2;
   auto const obj2 = randomObject<VECTOR::value_type>();
   auto const n3 = random(0U, 10000U);
@@ -214,9 +194,7 @@ TESTFUN(clause_23_3_6_4_1_c) {
 
   EXPECT_EQ(v.size(), w.size() + n3);
   EXPECT_EQ(r - v.begin(), n2);
-  FOR_EACH_RANGE (i, 0, r - v.begin()) {
-    EXPECT_EQ(v[i], w[i]);
-  }
+  FOR_EACH_RANGE (i, 0, r - v.begin()) { EXPECT_EQ(v[i], w[i]); }
   FOR_EACH_RANGE (i, r - v.begin(), r - v.begin() + n3) {
     EXPECT_EQ(v[i], obj2);
   }
@@ -240,9 +218,7 @@ TESTFUN(clause_23_3_6_4_1_d) {
   EXPECT_EQ(v.back(), obj2);
   EXPECT_EQ(v.size(), w.size() + 1);
 
-  FOR_EACH_RANGE (i, 0, w.size()) {
-    EXPECT_EQ(v[i], w[i]);
-  }
+  FOR_EACH_RANGE (i, 0, w.size()) { EXPECT_EQ(v[i], w[i]); }
 }
 
 TESTFUN(clause_23_3_6_4_3) {
@@ -259,13 +235,9 @@ TESTFUN(clause_23_3_6_4_3) {
   auto it = v.erase(v.begin() + n2);
   EXPECT_EQ(v.size() + 1, w.size());
 
-  FOR_EACH_RANGE (i, 0, it - v.begin()) {
-    EXPECT_EQ(v[i], w[i]);
-  }
+  FOR_EACH_RANGE (i, 0, it - v.begin()) { EXPECT_EQ(v[i], w[i]); }
 
-  FOR_EACH_RANGE (i, it - v.begin(), v.size()) {
-    EXPECT_EQ(v[i], w[i + 1]);
-  }
+  FOR_EACH_RANGE (i, it - v.begin(), v.size()) { EXPECT_EQ(v[i], w[i + 1]); }
 }
 
 TESTFUN(clause_23_3_6_4_4) {
@@ -283,9 +255,7 @@ TESTFUN(clause_23_3_6_4_4) {
   auto it = v.erase(v.begin() + n2, v.begin() + n3);
   EXPECT_EQ(v.size() + (n3 - n2), w.size());
 
-  FOR_EACH_RANGE (i, 0, it - v.begin()) {
-    EXPECT_EQ(v[i], w[i]);
-  }
+  FOR_EACH_RANGE (i, 0, it - v.begin()) { EXPECT_EQ(v[i], w[i]); }
 
   FOR_EACH_RANGE (i, it - v.begin(), v.size()) {
     EXPECT_EQ(v[i], w[i + (n3 - n2)]);
@@ -302,83 +272,3 @@ TESTFUN(clause_23_3_6_4_clear) {
   EXPECT_TRUE(v.empty());
   EXPECT_EQ(v.capacity(), c);
 }
-
-BENCHMARK(BENCHFUN(zzInitRNG)) {
-  //LOG(INFO) << "\nTesting with type " << typeid(VECTOR).name() << "\n";
-  srand(seed);
-}
-
-BENCHMARK(BENCHFUN(defaultCtor), iters) {
-  FOR_EACH_RANGE (i, 0, iters) {
-    VECTOR v[4096];
-    doNotOptimizeAway(&v);
-  }
-}
-
-void BENCHFUN(sizeCtor)(int iters, int size) {
-  FOR_EACH_RANGE (i, 0, iters) {
-    VECTOR v(size);
-    doNotOptimizeAway(&v);
-  }
-}
-BENCHMARK_PARAM(BENCHFUN(sizeCtor), 128);
-BENCHMARK_PARAM(BENCHFUN(sizeCtor), 1024);
-BENCHMARK_PARAM(BENCHFUN(sizeCtor), 1048576);
-
-void BENCHFUN(fillCtor)(int iters, int size) {
-  FOR_EACH_RANGE (i, 0, iters) {
-    VECTOR v(size_t(size), randomObject<VECTOR::value_type>());
-    doNotOptimizeAway(&v);
-  }
-}
-BENCHMARK_PARAM(BENCHFUN(fillCtor), 128);
-BENCHMARK_PARAM(BENCHFUN(fillCtor), 1024);
-BENCHMARK_PARAM(BENCHFUN(fillCtor), 10240);
-
-void BENCHFUN(pushBack)(int iters, int size) {
-  auto const obj = randomObject<VECTOR::value_type>();
-  FOR_EACH_RANGE (i, 0, iters) {
-    VECTOR v;
-    FOR_EACH_RANGE (j, 0, size) {
-      v.push_back(obj);
-    }
-  }
-}
-BENCHMARK_PARAM(BENCHFUN(pushBack), 128);
-BENCHMARK_PARAM(BENCHFUN(pushBack), 1024);
-BENCHMARK_PARAM(BENCHFUN(pushBack), 10240);
-BENCHMARK_PARAM(BENCHFUN(pushBack), 102400);
-BENCHMARK_PARAM(BENCHFUN(pushBack), 512000);
-
-void BENCHFUN(reserve)(int iters, int /* size */) {
-  auto const obj = randomObject<VECTOR::value_type>();
-  VECTOR v(random(0U, 10000U), obj);
-  FOR_EACH_RANGE (i, 0, iters) {
-    v.reserve(random(0U, 100000U));
-  }
-}
-BENCHMARK_PARAM(BENCHFUN(reserve), 128);
-BENCHMARK_PARAM(BENCHFUN(reserve), 1024);
-BENCHMARK_PARAM(BENCHFUN(reserve), 10240);
-
-void BENCHFUN(insert)(int iters, int /* size */) {
-  auto const obj1 = randomObject<VECTOR::value_type>();
-  auto const obj2 = randomObject<VECTOR::value_type>();
-  VECTOR v(random(0U, 1U), obj1);
-  FOR_EACH_RANGE (i, 0, iters / 100) {
-    v.insert(v.begin(), obj2);
-  }
-}
-BENCHMARK_PARAM(BENCHFUN(insert), 100);
-
-void BENCHFUN(erase)(int iters, int /* size */) {
-  auto const obj1 = randomObject<VECTOR::value_type>();
-  VECTOR v(random(0U, 100U), obj1);
-  FOR_EACH_RANGE (i, 0, iters) {
-    if (v.empty()) {
-      continue;
-    }
-    v.erase(v.begin());
-  }
-}
-BENCHMARK_PARAM(BENCHFUN(erase), 1024);
diff --git a/folly/test/Makefile.am b/folly/test/Makefile.am
index c478d5c41b9..a1b922b03d4 100644
--- a/folly/test/Makefile.am
+++ b/folly/test/Makefile.am
@@ -39,7 +39,8 @@ thread_local_test_lib_la_LDFLAGS = -module -rpath /force_shared
 thread_local_test_lib_la_LIBADD = $(top_builddir)/libfolly.la
 
 noinst_HEADERS = FBStringTestBenchmarks.cpp.h \
-	FBVectorTestBenchmarks.cpp.h
+	FBVectorBenchmarks.cpp.h \
+	FBVectorTests.cpp.h
 
 spin_lock_test_SOURCES = SpinLockTest.cpp
 spin_lock_test_LDADD = libfollytestmain.la
diff --git a/folly/test/MemoryTest.cpp b/folly/test/MemoryTest.cpp
index 747d6ac7b2c..b5536bfbbb3 100644
--- a/folly/test/MemoryTest.cpp
+++ b/folly/test/MemoryTest.cpp
@@ -23,6 +23,7 @@
 
 #include <folly/String.h>
 #include <folly/memory/Arena.h>
+#include <folly/portability/GMock.h>
 #include <folly/portability/GTest.h>
 
 using namespace folly;
@@ -57,72 +58,113 @@ TEST(to_weak_ptr, example) {
   EXPECT_EQ(3, (to_weak_ptr(decltype(s)(s)).lock(), s.use_count())) << "rvalue";
 }
 
+TEST(SysAllocator, equality) {
+  using Alloc = SysAllocator<float>;
+  Alloc const a, b;
+  EXPECT_TRUE(a == b);
+  EXPECT_FALSE(a != b);
+}
+
+TEST(SysAllocator, allocate_unique) {
+  using Alloc = SysAllocator<float>;
+  Alloc const alloc;
+  auto ptr = allocate_unique<float>(alloc, 3.);
+  EXPECT_EQ(3., *ptr);
+}
+
+TEST(SysAllocator, vector) {
+  using Alloc = SysAllocator<float>;
+  Alloc const alloc;
+  std::vector<float, Alloc> nums(alloc);
+  nums.push_back(3.);
+  nums.push_back(5.);
+  EXPECT_THAT(nums, testing::ElementsAreArray({3., 5.}));
+}
+
+TEST(SysAllocator, bad_alloc) {
+  using Alloc = SysAllocator<float>;
+  Alloc const alloc;
+  std::vector<float, Alloc> nums(alloc);
+  if (!kIsSanitize) {
+    EXPECT_THROW(nums.reserve(1ull << 50), std::bad_alloc);
+  }
+}
+
+TEST(AlignedSysAllocator, equality_fixed) {
+  using Alloc = AlignedSysAllocator<float, FixedAlign<1024>>;
+  Alloc const a, b;
+  EXPECT_TRUE(a == b);
+  EXPECT_FALSE(a != b);
+}
+
+TEST(AlignedSysAllocator, allocate_unique_fixed) {
+  using Alloc = AlignedSysAllocator<float, FixedAlign<1024>>;
+  Alloc const alloc;
+  auto ptr = allocate_unique<float>(alloc, 3.);
+  EXPECT_EQ(3., *ptr);
+  EXPECT_EQ(0, std::uintptr_t(ptr.get()) % 1024);
+}
+
+TEST(AlignedSysAllocator, vector_fixed) {
+  using Alloc = AlignedSysAllocator<float, FixedAlign<1024>>;
+  Alloc const alloc;
+  std::vector<float, Alloc> nums(alloc);
+  nums.push_back(3.);
+  nums.push_back(5.);
+  EXPECT_THAT(nums, testing::ElementsAreArray({3., 5.}));
+  EXPECT_EQ(0, std::uintptr_t(nums.data()) % 1024);
+}
+
+TEST(AlignedSysAllocator, bad_alloc_fixed) {
+  using Alloc = AlignedSysAllocator<float, FixedAlign<1024>>;
+  Alloc const alloc;
+  std::vector<float, Alloc> nums(alloc);
+  if (!kIsSanitize) {
+    EXPECT_THROW(nums.reserve(1ull << 50), std::bad_alloc);
+  }
+}
+
+TEST(AlignedSysAllocator, equality_default) {
+  using Alloc = AlignedSysAllocator<float>;
+  Alloc const a(1024), b(1024), c(512);
+  EXPECT_TRUE(a == b);
+  EXPECT_FALSE(a != b);
+  EXPECT_FALSE(a == c);
+  EXPECT_TRUE(a != c);
+}
+
+TEST(AlignedSysAllocator, allocate_unique_default) {
+  using Alloc = AlignedSysAllocator<float>;
+  Alloc const alloc(1024);
+  auto ptr = allocate_unique<float>(alloc, 3.);
+  EXPECT_EQ(3., *ptr);
+  EXPECT_EQ(0, std::uintptr_t(ptr.get()) % 1024);
+}
+
+TEST(AlignedSysAllocator, vector_default) {
+  using Alloc = AlignedSysAllocator<float>;
+  Alloc const alloc(1024);
+  std::vector<float, Alloc> nums(alloc);
+  nums.push_back(3.);
+  nums.push_back(5.);
+  EXPECT_THAT(nums, testing::ElementsAreArray({3., 5.}));
+  EXPECT_EQ(0, std::uintptr_t(nums.data()) % 1024);
+}
+
+TEST(AlignedSysAllocator, bad_alloc_default) {
+  using Alloc = AlignedSysAllocator<float>;
+  Alloc const alloc(1024);
+  std::vector<float, Alloc> nums(alloc);
+  if (!kIsSanitize) {
+    EXPECT_THROW(nums.reserve(1ull << 50), std::bad_alloc);
+  }
+}
+
 TEST(allocate_sys_buffer, compiles) {
   auto buf = allocate_sys_buffer(256);
   //  Freed at the end of the scope.
 }
 
-template <std::size_t> struct T {};
-template <std::size_t> struct S {};
-template <std::size_t> struct P {};
-
-TEST(as_stl_allocator, sanity_check) {
-  typedef StlAllocator<SysArena, int> stl_arena_alloc;
-
-  EXPECT_TRUE((std::is_same<
-    as_stl_allocator<int, SysArena>::type,
-    stl_arena_alloc
-  >::value));
-
-  EXPECT_TRUE((std::is_same<
-    as_stl_allocator<int, stl_arena_alloc>::type,
-    stl_arena_alloc
-  >::value));
-}
-
-TEST(StlAllocator, void_allocator) {
-  typedef StlAllocator<SysArena, void> void_allocator;
-  SysArena arena;
-  void_allocator valloc(&arena);
-
-  typedef void_allocator::rebind<int>::other int_allocator;
-  int_allocator ialloc(valloc);
-
-  auto i = std::allocate_shared<int>(ialloc, 10);
-  ASSERT_NE(nullptr, i.get());
-  EXPECT_EQ(10, *i);
-  i.reset();
-  ASSERT_EQ(nullptr, i.get());
-}
-
-TEST(rebind_allocator, sanity_check) {
-  std::allocator<long> alloc;
-
-  auto i = std::allocate_shared<int>(
-    rebind_allocator<int, decltype(alloc)>(alloc), 10
-  );
-  ASSERT_NE(nullptr, i.get());
-  EXPECT_EQ(10, *i);
-  i.reset();
-  ASSERT_EQ(nullptr, i.get());
-
-  auto d = std::allocate_shared<double>(
-    rebind_allocator<double>(alloc), 5.6
-  );
-  ASSERT_NE(nullptr, d.get());
-  EXPECT_EQ(5.6, *d);
-  d.reset();
-  ASSERT_EQ(nullptr, d.get());
-
-  auto s = std::allocate_shared<std::string>(
-    rebind_allocator<std::string>(alloc), "HELLO, WORLD"
-  );
-  ASSERT_NE(nullptr, s.get());
-  EXPECT_EQ("HELLO, WORLD", *s);
-  s.reset();
-  ASSERT_EQ(nullptr, s.get());
-}
-
 template <typename C>
 static void test_enable_shared_from_this(std::shared_ptr<C> sp) {
   ASSERT_EQ(1l, sp.use_count());
diff --git a/folly/test/StringTest.cpp b/folly/test/StringTest.cpp
index 5b2d6598844..b8dd9d4c9a7 100644
--- a/folly/test/StringTest.cpp
+++ b/folly/test/StringTest.cpp
@@ -27,6 +27,7 @@
 
 #include <folly/container/Array.h>
 #include <folly/portability/GTest.h>
+#include <folly/test/TestUtils.h>
 
 using namespace folly;
 using namespace std;
@@ -184,13 +185,20 @@ TEST(Escape, cUnescape) {
   EXPECT_EQ("hello\nworld", cUnescape<std::string>("hello\\x0aworld"));
   EXPECT_EQ("hello\xff\xfe", cUnescape<std::string>("hello\\377\\376"));
   EXPECT_EQ("hello\xff\xfe", cUnescape<std::string>("hello\\xff\\xfe"));
-
-  EXPECT_THROW({cUnescape<std::string>("hello\\");},
-               std::invalid_argument);
-  EXPECT_THROW({cUnescape<std::string>("hello\\x");},
-               std::invalid_argument);
-  EXPECT_THROW({cUnescape<std::string>("hello\\q");},
-               std::invalid_argument);
+  EXPECT_EQ("hello\\", cUnescape<std::string>("hello\\", false));
+
+  EXPECT_THROW_RE(
+      cUnescape<std::string>("hello\\"),
+      std::invalid_argument,
+      "incomplete escape sequence");
+  EXPECT_THROW_RE(
+      cUnescape<std::string>("hello\\x"),
+      std::invalid_argument,
+      "incomplete hex escape sequence");
+  EXPECT_THROW_RE(
+      cUnescape<std::string>("hello\\q"),
+      std::invalid_argument,
+      "invalid escape sequence");
 }
 
 TEST(Escape, uriEscape) {
diff --git a/folly/test/UtilityTest.cpp b/folly/test/UtilityTest.cpp
index d27f4675392..a717261e024 100644
--- a/folly/test/UtilityTest.cpp
+++ b/folly/test/UtilityTest.cpp
@@ -119,3 +119,29 @@ TEST_F(UtilityTest, MoveOnly) {
       std::is_nothrow_move_constructible<FooBar>::value,
       "Should have noexcept move constructor");
 }
+
+TEST_F(UtilityTest, to_signed) {
+  {
+    constexpr auto actual = folly::to_signed(int32_t(-12));
+    EXPECT_TRUE(std::is_signed<decltype(actual)>::value);
+    EXPECT_EQ(-12, actual);
+  }
+  {
+    constexpr auto actual = folly::to_signed(uint32_t(-12));
+    EXPECT_TRUE(std::is_signed<decltype(actual)>::value);
+    EXPECT_EQ(-12, actual);
+  }
+}
+
+TEST_F(UtilityTest, to_unsigned) {
+  {
+    constexpr auto actual = folly::to_unsigned(int32_t(-12));
+    EXPECT_TRUE(!std::is_signed<decltype(actual)>::value);
+    EXPECT_EQ(-12, actual);
+  }
+  {
+    constexpr auto actual = folly::to_unsigned(uint32_t(-12));
+    EXPECT_TRUE(!std::is_signed<decltype(actual)>::value);
+    EXPECT_EQ(-12, actual);
+  }
+}
diff --git a/folly/test/common/TestMain.cpp b/folly/test/common/TestMain.cpp
index 53c29650020..d722c439129 100644
--- a/folly/test/common/TestMain.cpp
+++ b/folly/test/common/TestMain.cpp
@@ -25,7 +25,7 @@
  * The Makefile links it into all of the test programs so that tests do not need
  * to - and indeed should typically not - define their own main() functions
  */
-int main(int argc, char** argv) __attribute__((__weak__));
+FOLLY_ATTR_WEAK int main(int argc, char** argv);
 
 int main(int argc, char** argv) {
 #if FOLLY_HAVE_LIBGFLAGS
diff --git a/folly/test/stl_tests/StlVectorTest.cpp b/folly/test/stl_tests/StlVectorTest.cpp
index b9dde02485a..87bf4875357 100644
--- a/folly/test/stl_tests/StlVectorTest.cpp
+++ b/folly/test/stl_tests/StlVectorTest.cpp
@@ -195,6 +195,8 @@ the cascade of errors will be enormous. They are, therefore, tested first.
 FOLLY_PUSH_WARNING
 FOLLY_GCC_DISABLE_WARNING("-Wunused-parameter")
 FOLLY_GCC_DISABLE_WARNING("-Wunused-variable")
+// Using SCOPED_TRACE repeatedly from within a macro violates -Wshadow
+FOLLY_GCC_DISABLE_WARNING("-Wshadow-compatible-local")
 
 using namespace std;
 using namespace folly;
@@ -2012,6 +2014,7 @@ STL_TEST("23.2.1 Table 99.5", copyWithAllocator, is_copy_constructible, a, m) {
 
 STL_TEST("23.2.1 Table 99.6", moveConstructionWithAllocator,
          is_destructible, a) {
+  (void)a;
   // there is nothing new to test here
 }