Skip to content

Commit 5eba555

Browse files
committed
Make __find_first_set endianness aware
1 parent 9bf257d commit 5eba555

File tree

2 files changed

+16
-43
lines changed

2 files changed

+16
-43
lines changed

libcxx/include/__algorithm/mismatch.h

Lines changed: 6 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
#include <__type_traits/is_constant_evaluated.h>
2323
#include <__type_traits/is_equality_comparable.h>
2424
#include <__type_traits/is_integral.h>
25-
#include <__utility/integer_sequence.h>
2625
#include <__utility/move.h>
2726
#include <__utility/pair.h>
2827
#include <__utility/unreachable.h>
@@ -56,32 +55,7 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
5655
}
5756

5857
#if _LIBCPP_VECTORIZE_ALGORITHMS
59-
template <class _ValueType, size_t _Np>
60-
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI __simd_vector<long long, _Np>
61-
__reverse_vector(__simd_vector<long long, _Np> __cmp_res) {
62-
return [&]<size_t... _Indices>(index_sequence<_Indices...>) {
63-
return __builtin_shufflevector(__cmp_res, __cmp_res, (_Np - _Indices - 1)...);
64-
}(make_index_sequence<_Np>{});
65-
}
66-
template <class _ValueType, size_t _Np>
67-
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI __simd_vector<long, _Np> __reverse_vector(__simd_vector<long, _Np> __cmp_res) {
68-
return [&]<size_t... _Indices>(index_sequence<_Indices...>) {
69-
return __builtin_shufflevector(__cmp_res, __cmp_res, (_Np - _Indices - 1)...);
70-
}(make_index_sequence<_Np>{});
71-
}
72-
template <class _ValueType, size_t _Np>
73-
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI __simd_vector<int, _Np> __reverse_vector(__simd_vector<int, _Np> __cmp_res) {
74-
return [&]<size_t... _Indices>(index_sequence<_Indices...>) {
75-
return __builtin_shufflevector(__cmp_res, __cmp_res, (_Np - _Indices - 1)...);
76-
}(make_index_sequence<_Np>{});
77-
}
78-
template <class _ValueType, size_t _Np>
79-
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI __simd_vector<_ValueType, _Np>
80-
__reverse_vector(__simd_vector<_ValueType, _Np> __cmp_res) {
81-
return [&]<size_t... _Indices>(index_sequence<_Indices...>) {
82-
return __builtin_shufflevector(__cmp_res, __cmp_res, (_Np - _Indices - 1)...);
83-
}(make_index_sequence<_Np>{});
84-
}
58+
8559
template <class _Iter>
8660
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter>
8761
__mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
@@ -103,11 +77,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
10377
}
10478

10579
for (size_t __i = 0; __i != __unroll_count; ++__i) {
106-
auto __cmp_res = __lhs[__i] == __rhs[__i];
107-
# if defined(_LIBCPP_BIG_ENDIAN)
108-
__cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
109-
# endif
110-
if (!std::__all_of(__cmp_res)) {
80+
if (auto __cmp_res = __lhs[__i] == __rhs[__i]; !std::__all_of(__cmp_res)) {
11181
auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
11282
return {__first1 + __offset, __first2 + __offset};
11383
}
@@ -119,11 +89,8 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
11989

12090
// check the remaining 0-3 vectors
12191
while (static_cast<size_t>(__last1 - __first1) >= __vec_size) {
122-
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
123-
# if defined(_LIBCPP_BIG_ENDIAN)
124-
__cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
125-
# endif
126-
if (!std::__all_of(__cmp_res)) {
92+
if (auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
93+
!std::__all_of(__cmp_res)) {
12794
auto __offset = std::__find_first_not_set(__cmp_res);
12895
return {__first1 + __offset, __first2 + __offset};
12996
}
@@ -139,11 +106,8 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
139106
if (static_cast<size_t>(__first1 - __orig_first1) >= __vec_size) {
140107
__first1 = __last1 - __vec_size;
141108
__first2 = __last2 - __vec_size;
142-
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
143-
# if defined(_LIBCPP_BIG_ENDIAN)
144-
__cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
145-
# endif
146-
auto __offset = std::__find_first_not_set(__cmp_res);
109+
auto __offset =
110+
std::__find_first_not_set(std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2));
147111
return {__first1 + __offset, __first2 + __offset};
148112
} // else loop over the elements individually
149113
}

libcxx/include/__algorithm/simd_utils.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,11 @@
1111

1212
#include <__algorithm/min.h>
1313
#include <__bit/bit_cast.h>
14-
#include <__bit/countr.h>
14+
#if defined(_LIBCPP_BIG_ENDIAN)
15+
# include <__bit/countl.h>
16+
#else
17+
# include <__bit/countr.h>
18+
#endif
1519
#include <__config>
1620
#include <__type_traits/is_arithmetic.h>
1721
#include <__type_traits/is_same.h>
@@ -126,8 +130,13 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_T
126130

127131
// This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876
128132
auto __impl = [&]<class _MaskT>(_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept {
133+
# if defined(_LIBCPP_BIG_ENDIAN)
134+
return std::min<size_t>(
135+
_Np, std::__countl_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))));
136+
# else
129137
return std::min<size_t>(
130138
_Np, std::__countr_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))));
139+
# endif
131140
};
132141

133142
if constexpr (sizeof(__mask_vec) == sizeof(uint8_t)) {

0 commit comments

Comments
 (0)