Skip to content
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ add_benchmark(fill src/fill.cpp)
add_benchmark(find_and_count src/find_and_count.cpp)
add_benchmark(find_first_of src/find_first_of.cpp)
add_benchmark(has_single_bit src/has_single_bit.cpp)
add_benchmark(includes src/includes.cpp)
add_benchmark(iota src/iota.cpp)
add_benchmark(is_sorted_until src/is_sorted_until.cpp)
add_benchmark(locale_classic src/locale_classic.cpp)
Expand Down
124 changes: 124 additions & 0 deletions benchmarks/src/includes.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <algorithm>
#include <benchmark/benchmark.h>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <iostream>
#include <random>
#include <ranges>
#include <type_traits>
#include <vector>

#include "skewed_allocator.hpp"
#include "utility.hpp"

using namespace std;

enum class alg_type { std_fn, rng };

enum class needle_spread { dense, dense_random, sparse, sparse_random };

template <class T, alg_type Alg>
void bm_includes(benchmark::State& state) {
const auto hay_size = static_cast<size_t>(state.range(0));
const auto needle_size = static_cast<size_t>(state.range(1));
const auto spread = static_cast<needle_spread>(state.range(2));
const auto expected_match = static_cast<bool>(state.range(3));

auto hay = random_vector<T, not_highly_aligned_allocator>(hay_size);
ranges::sort(hay);

vector<T, not_highly_aligned_allocator<T>> needle;
switch (spread) {
case needle_spread::dense:
needle.assign(hay.begin() + hay_size / 2 - needle_size / 2, hay.begin() + hay_size / 2 + (needle_size + 1) / 2);
break;

case needle_spread::dense_random:
{
mt19937 gen{};
geometric_distribution<size_t> dis_dis{};
vector<size_t> idx(needle_size);
const size_t mid = needle_size / 2;
idx[mid] = hay_size / 2;

const size_t max_shift = hay_size / needle_size;

for (size_t i = mid; i != 0; --i) {
idx[i - 1] = idx[i] - min(dis_dis(gen) + 1, max_shift);
}

for (size_t i = mid; i != needle_size - 1; ++i) {
idx[i + 1] = idx[i] + min(dis_dis(gen) + 1, max_shift);
}

needle.assign_range(idx | views::transform([&hay](const size_t i) { return hay[i]; }));
}
break;

case needle_spread::sparse:
needle.resize(needle_size);
for (size_t i = 0; i != needle_size; ++i) {
needle[i] = hay[hay_size * i / needle_size + hay_size / (needle_size * 2)];
}
break;

case needle_spread::sparse_random:
needle.resize(needle_size);
ranges::sample(hay, needle.begin(), needle_size, mt19937{});
break;
}

if (!expected_match) {
const T v = needle[needle_size / 2];
const T r = static_cast<T>(static_cast<make_unsigned_t<T>>(v) + 1);
ranges::replace(hay, v, r);
ranges::sort(hay);
}

for (auto _ : state) {
benchmark::DoNotOptimize(hay);
benchmark::DoNotOptimize(needle);
bool found;
if constexpr (Alg == alg_type::rng) {
found = ranges::includes(hay, needle);
} else {
found = includes(hay.begin(), hay.end(), needle.begin(), needle.end());
}
benchmark::DoNotOptimize(found);
if (found != expected_match) {
cerr << "Unexpected 'includes' result: " << found << '\n';
abort();
}
}
}

void common_args(auto bm) {
for (const auto& spread :
{needle_spread::dense, needle_spread::dense_random, needle_spread::sparse, needle_spread::sparse_random}) {
for (const auto& expected_match : {true, false}) {
for (const auto& needle_size : {3, 22, 105, 1504, 2750}) {
bm->Args({3000, needle_size, static_cast<underlying_type_t<needle_spread>>(spread), expected_match});
}

for (const auto& needle_size : {3, 22, 105, 290}) {
bm->Args({300, needle_size, static_cast<underlying_type_t<needle_spread>>(spread), expected_match});
}
}
}
}

BENCHMARK(bm_includes<int8_t, alg_type::std_fn>)->Apply(common_args);
BENCHMARK(bm_includes<int16_t, alg_type::std_fn>)->Apply(common_args);
BENCHMARK(bm_includes<int32_t, alg_type::std_fn>)->Apply(common_args);
BENCHMARK(bm_includes<int64_t, alg_type::std_fn>)->Apply(common_args);

BENCHMARK(bm_includes<int8_t, alg_type::rng>)->Apply(common_args);
BENCHMARK(bm_includes<int16_t, alg_type::rng>)->Apply(common_args);
BENCHMARK(bm_includes<int32_t, alg_type::rng>)->Apply(common_args);
BENCHMARK(bm_includes<int64_t, alg_type::rng>)->Apply(common_args);

BENCHMARK_MAIN();
28 changes: 21 additions & 7 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -10158,17 +10158,31 @@ _NODISCARD _CONSTEXPR20 bool includes(_InIt1 _First1, _InIt1 _Last1, _InIt2 _Fir
const auto _ULast2 = _STD _Get_unwrapped(_Last2);
_DEBUG_ORDER_SET_UNWRAPPED(_InIt2, _UFirst1, _ULast1, _Pred);
_DEBUG_ORDER_SET_UNWRAPPED(_InIt1, _UFirst2, _ULast2, _Pred);
for (; _UFirst1 != _ULast1 && _UFirst2 != _ULast2; ++_UFirst1) {
if (_DEBUG_LT_PRED(_Pred, *_UFirst2, *_UFirst1)) {
return false;
}

if (!_Pred(*_UFirst1, *_UFirst2)) {
if (_UFirst2 == _ULast2) {
return true;
} else if (_UFirst1 == _ULast1) {
return false;
}

for (;;) {
if (_DEBUG_LT_PRED(_Pred, *_UFirst1, *_UFirst2)) {
++_UFirst1;
if (_UFirst1 == _ULast1) {
return false;
}
} else if (_Pred(*_UFirst2, *_UFirst1)) {
return false;
} else {
++_UFirst1;
++_UFirst2;
if (_UFirst2 == _ULast2) {
return true;
} else if (_UFirst1 == _ULast1) {
return false;
}
}
}

return _UFirst2 == _ULast2;
}

_EXPORT_STD template <class _InIt1, class _InIt2>
Expand Down