Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

unique vectorization #5092

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ add_benchmark(search src/search.cpp)
add_benchmark(std_copy src/std_copy.cpp)
add_benchmark(sv_equal src/sv_equal.cpp)
add_benchmark(swap_ranges src/swap_ranges.cpp)
add_benchmark(unique src/unique.cpp)

add_benchmark(vector_bool_copy src/std/containers/sequences/vector.bool/copy/test.cpp)
add_benchmark(vector_bool_copy_n src/std/containers/sequences/vector.bool/copy_n/test.cpp)
Expand Down
47 changes: 47 additions & 0 deletions benchmarks/src/unique.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <algorithm>
#include <benchmark/benchmark.h>
#include <cstdint>
#include <random>
#include <vector>

#include "skewed_allocator.hpp"

enum class alg_type { std_fn, rng };

template <alg_type Type, class T>
void u(benchmark::State& state) {
std::mt19937_64 gen(22033);
using TD = std::conditional_t<sizeof(T) == 1, int, T>;
std::binomial_distribution<TD> dis(5);

std::vector<T, not_highly_aligned_allocator<T>> src;
src.resize(2552);
std::generate(src.begin(), src.end(), [&] { return static_cast<T>(dis(gen)); });

std::vector<T, not_highly_aligned_allocator<T>> v;
v.reserve(src.size());
for (auto _ : state) {
v = src;
benchmark::DoNotOptimize(v);
if constexpr (Type == alg_type::std_fn) {
benchmark::DoNotOptimize(std::unique(v.begin(), v.end()));
} else {
benchmark::DoNotOptimize(std::ranges::unique(v));
}
}
}

BENCHMARK(u<alg_type::std_fn, std::uint8_t>);
BENCHMARK(u<alg_type::std_fn, std::uint16_t>);
BENCHMARK(u<alg_type::std_fn, std::uint32_t>);
BENCHMARK(u<alg_type::std_fn, std::uint64_t>);

BENCHMARK(u<alg_type::rng, std::uint8_t>);
BENCHMARK(u<alg_type::rng, std::uint16_t>);
BENCHMARK(u<alg_type::rng, std::uint32_t>);
BENCHMARK(u<alg_type::rng, std::uint64_t>);

BENCHMARK_MAIN();
61 changes: 61 additions & 0 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ __declspec(noalias) void __stdcall __std_replace_4(
void* _First, void* _Last, uint32_t _Old_val, uint32_t _New_val) noexcept;
__declspec(noalias) void __stdcall __std_replace_8(
void* _First, void* _Last, uint64_t _Old_val, uint64_t _New_val) noexcept;

void* __stdcall __std_unique_1(void* _First, void* _Last) noexcept;
void* __stdcall __std_unique_2(void* _First, void* _Last) noexcept;
void* __stdcall __std_unique_4(void* _First, void* _Last) noexcept;
void* __stdcall __std_unique_8(void* _First, void* _Last) noexcept;
} // extern "C"

_STD_BEGIN
Expand Down Expand Up @@ -207,6 +212,21 @@ __declspec(noalias) void _Replace_vectorized(
}
}

template <class _Ty>
_Ty* _Unique_vectorized(_Ty* const _First, _Ty* const _Last) noexcept {
if constexpr (sizeof(_Ty) == 1) {
return reinterpret_cast<_Ty*>(::__std_unique_1(_First, _Last));
} else if constexpr (sizeof(_Ty) == 2) {
return reinterpret_cast<_Ty*>(::__std_unique_2(_First, _Last));
} else if constexpr (sizeof(_Ty) == 4) {
return reinterpret_cast<_Ty*>(::__std_unique_4(_First, _Last));
} else if constexpr (sizeof(_Ty) == 8) {
return reinterpret_cast<_Ty*>(::__std_unique_8(_First, _Last));
} else {
_STL_INTERNAL_STATIC_ASSERT(false); // Unexpected size
}
}

// Can we activate the vector algorithms for find_first_of?
template <class _It1, class _It2, class _Pr>
constexpr bool _Vector_alg_in_find_first_of_is_safe = _Equal_memcmp_is_safe<_It1, _It2, _Pr>;
Expand All @@ -221,6 +241,10 @@ template <class _Iter, class _Ty1, class _Ty2>
constexpr bool _Vector_alg_in_ranges_replace_is_safe =
_Vector_alg_in_replace_is_safe<_Iter, _Ty1> // can search and replace
&& _Vector_alg_in_find_is_safe_elem<_Ty2, _Iter_value_t<_Iter>>; // replacement fits

// Can we activate the vector algorithms for unique?
template <class _Iter, class _Pr>
constexpr bool _Vector_alg_in_unique_is_safe = _Equal_memcmp_is_safe<_Iter, _Iter, _Pr>;
_STD_END
#endif // _USE_STD_VECTOR_ALGORITHMS

Expand Down Expand Up @@ -4853,6 +4877,25 @@ _NODISCARD_UNIQUE_ALG _CONSTEXPR20 _FwdIt unique(_FwdIt _First, _FwdIt _Last, _P
_STD _Adl_verify_range(_First, _Last);
auto _UFirst = _STD _Get_unwrapped(_First);
const auto _ULast = _STD _Get_unwrapped(_Last);

#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Vector_alg_in_unique_is_safe<decltype(_UFirst), _Pr>) {
if (!_STD _Is_constant_evaluated()) {
const auto _First_ptr = _STD _To_address(_UFirst);
const auto _Result = _STD _Unique_vectorized(_First_ptr, _STD _To_address(_ULast));

if constexpr (is_pointer_v<decltype(_UFirst)>) {
_UFirst = _Result;
} else {
_UFirst += _Result - _First_ptr;
}

_STD _Seek_wrapped(_Last, _UFirst);
return _Last;
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

if (_UFirst != _ULast) {
for (auto _UFirstb = _UFirst; ++_UFirst != _ULast; _UFirstb = _UFirst) {
if (_Pred(*_UFirstb, *_UFirst)) { // copy down
Expand Down Expand Up @@ -4929,6 +4972,24 @@ namespace ranges {
_STL_INTERNAL_STATIC_ASSERT(sentinel_for<_Se, _It>);
_STL_INTERNAL_STATIC_ASSERT(indirect_equivalence_relation<_Pr, projected<_It, _Pj>>);

#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (is_same_v<_Pj, identity> && sized_sentinel_for<_Se, _It>
&& _Vector_alg_in_unique_is_safe<_It, _Pr>) {
if (!_STD is_constant_evaluated()) {
const auto _Size = _Last - _First;
const auto _First_ptr = _STD to_address(_First);
const auto _Last_ptr = _First_ptr + static_cast<size_t>(_Size);
const auto _Result = _STD _Unique_vectorized(_First_ptr, _Last_ptr);

if constexpr (is_pointer_v<_It>) {
return {_Result, _Last_ptr};
} else {
return {_First + (_Result - _First_ptr), _First + _Size};
}
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS

auto _Current = _First;
if (_First == _Last) {
return {_STD move(_Current), _STD move(_First)};
Expand Down
Loading