Skip to content

Commit b4ce872

Browse files
committed
Add more __reverse_vector overloads
1 parent 05395e0 commit b4ce872

File tree

1 file changed

+55
-15
lines changed

1 file changed

+55
-15
lines changed

libcxx/include/__algorithm/mismatch.h

Lines changed: 55 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -56,31 +56,71 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
5656

5757
#if _LIBCPP_VECTORIZE_ALGORITHMS
5858

59-
template <class _Value_type>
60-
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type, 8>
61-
__reverse_vector(__simd_vector<_Value_type, 8>& __cmp_res) {
59+
template <class _ValueType>
60+
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<long, 2>
61+
__reverse_vector(__simd_vector<long, 2>& __cmp_res) {
6262
# if defined(_LIBCPP_BIG_ENDIAN)
63-
static_assert(__native_vector_size<_Value_type> == 8, "The __native_vector_size has to be 8");
63+
static_assert(__native_vector_size<long> == 2, "The __native_vector_size has to be 2");
64+
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 1, 0);
65+
# endif
66+
return __cmp_res;
67+
}
68+
69+
template <class _ValueType>
70+
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<long, 4>
71+
__reverse_vector(__simd_vector<long, 4>& __cmp_res) {
72+
# if defined(_LIBCPP_BIG_ENDIAN)
73+
static_assert(__native_vector_size<long> == 4, "The __native_vector_size has to be 4");
74+
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 3, 2, 1, 0);
75+
# endif
76+
return __cmp_res;
77+
}
78+
79+
template <class _ValueType>
80+
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<int, 8>
81+
__reverse_vector(__simd_vector<int, 8>& __cmp_res) {
82+
# if defined(_LIBCPP_BIG_ENDIAN)
83+
static_assert(__native_vector_size<int> == 8, "The __native_vector_size has to be 8");
84+
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 7, 6, 5, 4, 3, 2, 1, 0);
85+
# endif
86+
return __cmp_res;
87+
}
88+
89+
template <class _ValueType>
90+
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<int, 4>
91+
__reverse_vector(__simd_vector<int, 4>& __cmp_res) {
92+
# if defined(_LIBCPP_BIG_ENDIAN)
93+
static_assert(__native_vector_size<int> == 4, "The __native_vector_size has to be 4");
94+
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 3, 2, 1, 0);
95+
# endif
96+
return __cmp_res;
97+
}
98+
99+
template <class _ValueType>
100+
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType, 8>
101+
__reverse_vector(__simd_vector<_ValueType, 8>& __cmp_res) {
102+
# if defined(_LIBCPP_BIG_ENDIAN)
103+
static_assert(__native_vector_size<_ValueType> == 8, "The __native_vector_size has to be 8");
64104
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 7, 6, 5, 4, 3, 2, 1, 0);
65105
# endif
66106
return __cmp_res;
67107
}
68108

69-
template <class _Value_type>
70-
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type, 16>
71-
__reverse_vector(__simd_vector<_Value_type, 16> __cmp_res) {
109+
template <class _ValueType>
110+
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType, 16>
111+
__reverse_vector(__simd_vector<_ValueType, 16> __cmp_res) {
72112
# if defined(_LIBCPP_BIG_ENDIAN)
73-
static_assert(__native_vector_size<_Value_type> == 16, "The __native_vector_size has to be 16");
113+
static_assert(__native_vector_size<_ValueType> == 16, "The __native_vector_size has to be 16");
74114
__cmp_res = __builtin_shufflevector(__cmp_res, __cmp_res, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
75115
# endif
76116
return __cmp_res;
77117
}
78118

79-
template <class _Value_type>
80-
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type, 32>
81-
__reverse_vector(__simd_vector<_Value_type, 32> __cmp_res) {
119+
template <class _ValueType>
120+
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType, 32>
121+
__reverse_vector(__simd_vector<_ValueType, 32> __cmp_res) {
82122
# if defined(_LIBCPP_BIG_ENDIAN)
83-
static_assert(__native_vector_size<_Value_type> == 32, "The __native_vector_size has to be 32");
123+
static_assert(__native_vector_size<_ValueType> == 32, "The __native_vector_size has to be 32");
84124
__cmp_res = __builtin_shufflevector(
85125
__cmp_res,
86126
__cmp_res,
@@ -142,7 +182,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
142182

143183
for (size_t __i = 0; __i != __unroll_count; ++__i) {
144184
auto __cmp_res = __lhs[__i] == __rhs[__i];
145-
__cmp_res = __reverse_vector<__value_type>(__cmp_res);
185+
__cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
146186
if (!std::__all_of(__cmp_res)) {
147187
auto __offset = __i * __vec_size + std::__find_first_not_set(__cmp_res);
148188
return {__first1 + __offset, __first2 + __offset};
@@ -156,7 +196,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
156196
// check the remaining 0-3 vectors
157197
while (static_cast<size_t>(__last1 - __first1) >= __vec_size) {
158198
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
159-
__cmp_res = __reverse_vector<__value_type>(__cmp_res);
199+
__cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
160200
if (!std::__all_of(__cmp_res)) {
161201
auto __offset = std::__find_first_not_set(__cmp_res);
162202
return {__first1 + __offset, __first2 + __offset};
@@ -174,7 +214,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
174214
__first1 = __last1 - __vec_size;
175215
__first2 = __last2 - __vec_size;
176216
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
177-
__cmp_res = __reverse_vector<__value_type>(__cmp_res);
217+
__cmp_res = std::__reverse_vector<__value_type>(__cmp_res);
178218
auto __offset = std::__find_first_not_set(__cmp_res);
179219
return {__first1 + __offset, __first2 + __offset};
180220
} // else loop over the elements individually

0 commit comments

Comments
 (0)