Skip to content

Commit 0d484dd

Browse files
Optimize all_of / any_of / none_of for vector<bool> with some predicates (#5802)
Co-authored-by: Stephan T. Lavavej <stl@nuwen.net>
1 parent 116f1cb commit 0d484dd

6 files changed

Lines changed: 288 additions & 15 deletions

File tree

benchmarks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,5 +144,6 @@ add_benchmark(unique src/unique.cpp)
144144
add_benchmark(vector_bool_copy src/vector_bool_copy.cpp)
145145
add_benchmark(vector_bool_copy_n src/vector_bool_copy_n.cpp)
146146
add_benchmark(vector_bool_count src/vector_bool_count.cpp)
147+
add_benchmark(vector_bool_meow_of src/vector_bool_meow_of.cpp)
147148
add_benchmark(vector_bool_move src/vector_bool_move.cpp)
148149
add_benchmark(vector_bool_transform src/vector_bool_transform.cpp)
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
// Copyright (c) Microsoft Corporation.
2+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
3+
4+
#include <benchmark/benchmark.h>
5+
//
6+
#include <algorithm>
7+
#include <cstddef>
8+
#include <functional>
9+
#include <vector>
10+
11+
#include "skewed_allocator.hpp"
12+
13+
using namespace std;
14+
15+
enum class alg { all_, any_, none_ };
16+
enum class content { ones_then_zeros, zeros_then_ones };
17+
18+
template <alg Alg, content Content, class Pred = identity>
19+
void meow_of(benchmark::State& state) {
20+
const auto n = static_cast<size_t>(state.range(0));
21+
vector<bool, not_highly_aligned_allocator<bool>> source(n);
22+
23+
if constexpr (Content == content::ones_then_zeros) {
24+
fill(source.begin(), source.begin() + source.size() / 2, true);
25+
} else {
26+
fill(source.begin() + source.size() / 2, source.end(), true);
27+
}
28+
29+
for (auto _ : state) {
30+
benchmark::DoNotOptimize(source);
31+
bool result;
32+
if constexpr (Alg == alg::all_) {
33+
result = all_of(source.begin(), source.end(), Pred{});
34+
} else if constexpr (Alg == alg::any_) {
35+
result = any_of(source.begin(), source.end(), Pred{});
36+
} else {
37+
result = none_of(source.begin(), source.end(), Pred{});
38+
}
39+
benchmark::DoNotOptimize(result);
40+
}
41+
}
42+
43+
void common_args(benchmark::Benchmark* bm) {
44+
bm->RangeMultiplier(64)->Range(64, 64 << 10);
45+
}
46+
47+
using not_ = logical_not<>;
48+
49+
BENCHMARK(meow_of<alg::all_, content::ones_then_zeros>)->Apply(common_args);
50+
BENCHMARK(meow_of<alg::any_, content::zeros_then_ones>)->Apply(common_args);
51+
BENCHMARK(meow_of<alg::any_, content::ones_then_zeros, not_>)->Apply(common_args);
52+
BENCHMARK(meow_of<alg::none_, content::zeros_then_ones>)->Apply(common_args);
53+
54+
BENCHMARK_MAIN();

stl/inc/algorithm

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1619,18 +1619,30 @@ namespace ranges {
16191619
} // namespace ranges
16201620
#endif // _HAS_CXX20
16211621

1622+
struct _All_of_vbool_traits;
1623+
struct _Any_of_vbool_traits;
1624+
struct _None_of_vbool_traits;
1625+
1626+
template <class _Traits, class _VbIt, class _Mapped_fn>
1627+
_NODISCARD _CONSTEXPR20 bool _Meow_of_vbool(_VbIt _First, _VbIt _Last, _Mapped_fn _Mapped_func);
1628+
16221629
_EXPORT_STD template <class _InIt, class _Pr>
16231630
_NODISCARD _CONSTEXPR20 bool all_of(_InIt _First, _InIt _Last, _Pr _Pred) { // test if all elements satisfy _Pred
16241631
_STD _Adl_verify_range(_First, _Last);
16251632
auto _UFirst = _STD _Get_unwrapped(_First);
16261633
const auto _ULast = _STD _Get_unwrapped(_Last);
1627-
for (; _UFirst != _ULast; ++_UFirst) {
1628-
if (!_Pred(*_UFirst)) {
1629-
return false;
1634+
1635+
if constexpr (_Is_vb_iterator<decltype(_UFirst)> && !is_void_v<_Map_vb_functor_t<_Pr>>) {
1636+
return _Meow_of_vbool<_All_of_vbool_traits>(_UFirst, _ULast, _Map_vb_functor_t<_Pr>{});
1637+
} else {
1638+
for (; _UFirst != _ULast; ++_UFirst) {
1639+
if (!_Pred(*_UFirst)) {
1640+
return false;
1641+
}
16301642
}
1631-
}
16321643

1633-
return true;
1644+
return true;
1645+
}
16341646
}
16351647

16361648
#if _HAS_CXX17
@@ -1686,13 +1698,18 @@ _NODISCARD _CONSTEXPR20 bool any_of(const _InIt _First, const _InIt _Last, _Pr _
16861698
_STD _Adl_verify_range(_First, _Last);
16871699
auto _UFirst = _STD _Get_unwrapped(_First);
16881700
const auto _ULast = _STD _Get_unwrapped(_Last);
1689-
for (; _UFirst != _ULast; ++_UFirst) {
1690-
if (_Pred(*_UFirst)) {
1691-
return true;
1701+
1702+
if constexpr (_Is_vb_iterator<decltype(_UFirst)> && !is_void_v<_Map_vb_functor_t<_Pr>>) {
1703+
return _Meow_of_vbool<_Any_of_vbool_traits>(_UFirst, _ULast, _Map_vb_functor_t<_Pr>{});
1704+
} else {
1705+
for (; _UFirst != _ULast; ++_UFirst) {
1706+
if (_Pred(*_UFirst)) {
1707+
return true;
1708+
}
16921709
}
1693-
}
16941710

1695-
return false;
1711+
return false;
1712+
}
16961713
}
16971714

16981715
#if _HAS_CXX17
@@ -1748,13 +1765,17 @@ _NODISCARD _CONSTEXPR20 bool none_of(const _InIt _First, const _InIt _Last, _Pr
17481765
_STD _Adl_verify_range(_First, _Last);
17491766
auto _UFirst = _STD _Get_unwrapped(_First);
17501767
const auto _ULast = _STD _Get_unwrapped(_Last);
1751-
for (; _UFirst != _ULast; ++_UFirst) {
1752-
if (_Pred(*_UFirst)) {
1753-
return false;
1768+
if constexpr (_Is_vb_iterator<decltype(_UFirst)> && !is_void_v<_Map_vb_functor_t<_Pr>>) {
1769+
return _Meow_of_vbool<_None_of_vbool_traits>(_UFirst, _ULast, _Map_vb_functor_t<_Pr>{});
1770+
} else {
1771+
for (; _UFirst != _ULast; ++_UFirst) {
1772+
if (_Pred(*_UFirst)) {
1773+
return false;
1774+
}
17541775
}
1755-
}
17561776

1757-
return true;
1777+
return true;
1778+
}
17581779
}
17591780

17601781
#if _HAS_CXX17

stl/inc/vector

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4051,6 +4051,79 @@ _CONSTEXPR20 _OutIt _Transform_vbool_aligned(
40514051
return _Dest;
40524052
}
40534053

4054+
struct _All_of_vbool_traits {
4055+
static constexpr bool _Default_result = true;
4056+
4057+
static _CONSTEXPR20 bool _Check(const _Vbase _Value) {
4058+
return _Value != ~_Vbase{0};
4059+
}
4060+
4061+
static _CONSTEXPR20 bool _Check(const _Vbase _Value, const _Vbase _Mask) {
4062+
return (_Value & _Mask) != _Mask;
4063+
}
4064+
};
4065+
4066+
struct _Any_of_vbool_traits_base {
4067+
static _CONSTEXPR20 bool _Check(const _Vbase _Value) {
4068+
return _Value != 0;
4069+
}
4070+
4071+
static _CONSTEXPR20 bool _Check(const _Vbase _Value, const _Vbase _Mask) {
4072+
return (_Value & _Mask) != 0;
4073+
}
4074+
};
4075+
4076+
struct _Any_of_vbool_traits : _Any_of_vbool_traits_base {
4077+
static constexpr bool _Default_result = false;
4078+
};
4079+
4080+
struct _None_of_vbool_traits : _Any_of_vbool_traits_base {
4081+
static constexpr bool _Default_result = true;
4082+
};
4083+
4084+
template <class _Traits, class _VbIt, class _Mapped_fn>
4085+
_NODISCARD _CONSTEXPR20 bool _Meow_of_vbool(const _VbIt _First, const _VbIt _Last, const _Mapped_fn _Mapped_func) {
4086+
constexpr bool _Early_result = !_Traits::_Default_result;
4087+
auto _First_ptr = _First._Myptr;
4088+
const auto _Last_ptr = _Last._Myptr;
4089+
4090+
if (_First_ptr == _Last_ptr) {
4091+
if (_First._Myoff == _Last._Myoff) { // empty, can't read the word
4092+
return _Traits::_Default_result;
4093+
}
4094+
4095+
const _Vbase _Mask = (_Vbase{1} << _Last._Myoff) - (_Vbase{1} << _First._Myoff); // handle partial single word
4096+
if (_Traits::_Check(_Mapped_func(*_First_ptr), _Mask)) {
4097+
return _Early_result;
4098+
}
4099+
return _Traits::_Default_result;
4100+
}
4101+
4102+
if (_First._Myoff != 0) { // if we have a partial first word, handle it
4103+
const _Vbase _Mask = static_cast<_Vbase>(-1) << _First._Myoff;
4104+
if (_Traits::_Check(_Mapped_func(*_First_ptr), _Mask)) {
4105+
return _Early_result;
4106+
}
4107+
4108+
++_First_ptr;
4109+
}
4110+
4111+
for (; _First_ptr != _Last_ptr; ++_First_ptr) { // handle full words
4112+
if (_Traits::_Check(_Mapped_func(*_First_ptr))) {
4113+
return _Early_result;
4114+
}
4115+
}
4116+
4117+
if (_Last._Myoff != 0) { // if we have a partial last word, handle it
4118+
const _Vbase _Mask = (_Vbase{1} << _Last._Myoff) - 1;
4119+
if (_Traits::_Check(_Mapped_func(*_First_ptr), _Mask)) {
4120+
return _Early_result;
4121+
}
4122+
}
4123+
4124+
return _Traits::_Default_result;
4125+
}
4126+
40544127
#undef _ASAN_VECTOR_MODIFY
40554128
#undef _ASAN_VECTOR_REMOVE
40564129
#undef _ASAN_VECTOR_CREATE

stl/inc/xutility

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5028,6 +5028,13 @@ struct _Map_vb_functor {
50285028
using type = void;
50295029
};
50305030

5031+
#if _HAS_CXX20
5032+
template <>
5033+
struct _Map_vb_functor<identity> {
5034+
using type = identity;
5035+
};
5036+
#endif // _HAS_CXX20
5037+
50315038
template <class _Fn>
50325039
using _Map_vb_functor_t = typename _Map_vb_functor<_Fn>::type;
50335040

tests/std/tests/GH_000625_vector_bool_optimization/test.cpp

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,121 @@ CONSTEXPR20 bool test_transform() {
233233
return true;
234234
}
235235

236+
CONSTEXPR20 bool test_meow_of_helper(const size_t length_before, const size_t length, const size_t length_after) {
237+
const size_t total_length = length_before + length + length_after;
238+
239+
vector<bool> zeros(total_length);
240+
vector<bool> ones(total_length);
241+
vector<bool> mix(total_length);
242+
243+
const auto first_0 = zeros.begin() + static_cast<ptrdiff_t>(length_before);
244+
const auto last_0 = zeros.end() - static_cast<ptrdiff_t>(length_after);
245+
const auto first_1 = ones.begin() + static_cast<ptrdiff_t>(length_before);
246+
const auto last_1 = ones.end() - static_cast<ptrdiff_t>(length_after);
247+
const auto first_m = mix.begin() + static_cast<ptrdiff_t>(length_before);
248+
const auto last_m = mix.end() - static_cast<ptrdiff_t>(length_after);
249+
250+
fill(zeros.begin(), first_0, true);
251+
fill(last_0, zeros.end(), true);
252+
fill(first_1, last_1, true);
253+
fill(mix.begin(), first_m, true);
254+
fill(first_m + static_cast<ptrdiff_t>(length / 2), last_m, true);
255+
256+
if (length == 0) {
257+
#if _HAS_CXX20
258+
assert(all_of(first_0, last_0, identity{}) == true);
259+
assert(all_of(first_1, last_1, identity{}) == true);
260+
assert(all_of(first_m, last_m, identity{}) == true);
261+
262+
assert(any_of(first_0, last_0, identity{}) == false);
263+
assert(any_of(first_1, last_1, identity{}) == false);
264+
assert(any_of(first_m, last_m, identity{}) == false);
265+
266+
assert(none_of(first_0, last_0, identity{}) == true);
267+
assert(none_of(first_1, last_1, identity{}) == true);
268+
assert(none_of(first_m, last_m, identity{}) == true);
269+
#endif // _HAS_CXX20
270+
271+
assert(all_of(first_0, last_0, logical_not<>{}) == true);
272+
assert(all_of(first_1, last_1, logical_not<>{}) == true);
273+
assert(all_of(first_m, last_m, logical_not<>{}) == true);
274+
275+
assert(any_of(first_0, last_0, logical_not<>{}) == false);
276+
assert(any_of(first_1, last_1, logical_not<>{}) == false);
277+
assert(any_of(first_m, last_m, logical_not<>{}) == false);
278+
279+
assert(none_of(first_0, last_0, logical_not<>{}) == true);
280+
assert(none_of(first_1, last_1, logical_not<>{}) == true);
281+
assert(none_of(first_m, last_m, logical_not<>{}) == true);
282+
} else {
283+
assert(length != 1); // [first_m, last_m) needs to contain both true and false
284+
285+
#if _HAS_CXX20
286+
assert(all_of(first_0, last_0, identity{}) == false);
287+
assert(all_of(first_1, last_1, identity{}) == true);
288+
assert(all_of(first_m, last_m, identity{}) == false);
289+
290+
assert(any_of(first_0, last_0, identity{}) == false);
291+
assert(any_of(first_1, last_1, identity{}) == true);
292+
assert(any_of(first_m, last_m, identity{}) == true);
293+
294+
assert(none_of(first_0, last_0, identity{}) == true);
295+
assert(none_of(first_1, last_1, identity{}) == false);
296+
assert(none_of(first_m, last_m, identity{}) == false);
297+
#endif // _HAS_CXX20
298+
299+
assert(all_of(first_0, last_0, logical_not<>{}) == true);
300+
assert(all_of(first_1, last_1, logical_not<>{}) == false);
301+
assert(all_of(first_m, last_m, logical_not<>{}) == false);
302+
303+
assert(any_of(first_0, last_0, logical_not<>{}) == true);
304+
assert(any_of(first_1, last_1, logical_not<>{}) == false);
305+
assert(any_of(first_m, last_m, logical_not<>{}) == true);
306+
307+
assert(none_of(first_0, last_0, logical_not<>{}) == false);
308+
assert(none_of(first_1, last_1, logical_not<>{}) == true);
309+
assert(none_of(first_m, last_m, logical_not<>{}) == false);
310+
}
311+
312+
return true;
313+
}
314+
315+
CONSTEXPR20 bool test_meow_of() {
316+
{ // Super empty range
317+
const vector<bool>::const_iterator it{}; // value-initialized, compares equal to itself
318+
319+
#if _HAS_CXX20
320+
assert(all_of(it, it, identity{}) == true);
321+
assert(any_of(it, it, identity{}) == false);
322+
assert(none_of(it, it, identity{}) == true);
323+
#endif // _HAS_CXX20
324+
325+
assert(all_of(it, it, logical_not<>{}) == true);
326+
assert(any_of(it, it, logical_not<>{}) == false);
327+
assert(none_of(it, it, logical_not<>{}) == true);
328+
}
329+
330+
// Empty range
331+
test_meow_of_helper(0, 0, 3);
332+
test_meow_of_helper(3, 0, 3);
333+
334+
// One block, ends within block
335+
test_meow_of_helper(0, 10, 3);
336+
test_meow_of_helper(3, 10, 3);
337+
338+
// One block, exactly
339+
test_meow_of_helper(0, blockSize, 0);
340+
341+
// Multiple blocks, spanning
342+
test_meow_of_helper(3, blockSize - 2, 3);
343+
test_meow_of_helper(3, blockSize + 2, 3);
344+
345+
// Many blocks, exactly
346+
test_meow_of_helper(blockSize, 4 * blockSize, blockSize);
347+
348+
return true;
349+
}
350+
236351
CONSTEXPR20 void test_fill_helper(const size_t length) {
237352
// No offset
238353
{
@@ -1590,6 +1705,7 @@ static_assert(test_fill());
15901705
static_assert(test_find());
15911706
static_assert(test_count());
15921707
static_assert(test_transform());
1708+
static_assert(test_meow_of());
15931709

15941710
#if defined(__clang__) || defined(__EDG__) // TRANSITION, VSO-2574489
15951711
static_assert(test_copy_part_1());
@@ -1602,6 +1718,7 @@ int main() {
16021718
test_find();
16031719
test_count();
16041720
test_transform();
1721+
test_meow_of();
16051722
test_copy_part_1();
16061723
test_copy_part_2();
16071724

0 commit comments

Comments
 (0)