@@ -56,6 +56,39 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
56
56
57
57
#if _LIBCPP_VECTORIZE_ALGORITHMS
58
58
59
+ template <class _Tp ,
60
+ __enable_if_t <is_integral<_Tp>::value, int > = 0 >
61
+ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 8 >
62
+ __reverse_vector (__simd_vector<_Tp, 8 >& __cmp_res) {
63
+ #if defined(_LIBCPP_BIG_ENDIAN)
64
+ static_assert (__native_vector_size<_Tp> == 8 , " The __native_vector_size has to be 8" );
65
+ __cmp_res = __builtin_shufflevector (__cmp_res, __cmp_res, 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 );
66
+ #endif
67
+ return __cmp_res;
68
+ }
69
+
70
+ template <class _Tp ,
71
+ __enable_if_t <is_integral<_Tp>::value, int > = 0 >
72
+ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 16 >
73
+ __reverse_vector (__simd_vector<_Tp, 16 > __cmp_res) {
74
+ #if defined(_LIBCPP_BIG_ENDIAN)
75
+ static_assert (__native_vector_size<_Tp> == 16 , " The __native_vector_size has to be 16" );
76
+ __cmp_res = __builtin_shufflevector (__cmp_res, __cmp_res, 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 );
77
+ #endif
78
+ return __cmp_res;
79
+ }
80
+
81
+ template <class _Tp ,
82
+ __enable_if_t <is_integral<_Tp>::value, int > = 0 >
83
+ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 32 >
84
+ __reverse_vector (__simd_vector<_Tp, 32 > __cmp_res) {
85
+ #if defined(_LIBCPP_BIG_ENDIAN)
86
+ static_assert (__native_vector_size<_Tp> == 32 , " The __native_vector_size has to be 32" );
87
+ __cmp_res = __builtin_shufflevector (__cmp_res, __cmp_res, 31 , 30 , 29 , 28 , 27 , 26 , 25 , 24 , 23 , 22 , 21 , 20 , 19 , 18 , 17 , 16 , 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 );
88
+ #endif
89
+ return __cmp_res;
90
+ }
91
+
59
92
template <class _Iter >
60
93
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter>
61
94
__mismatch_vectorized (_Iter __first1, _Iter __last1, _Iter __first2) {
@@ -77,7 +110,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
77
110
}
78
111
79
112
for (size_t __i = 0 ; __i != __unroll_count; ++__i) {
80
- if (auto __cmp_res = __lhs[__i] == __rhs[__i]; !std::__all_of (__cmp_res)) {
113
+ auto __cmp_res = __lhs[__i] == __rhs[__i];
114
+ __cmp_res = __reverse_vector<_Tp>(__cmp_res);
115
+ if (!std::__all_of (__cmp_res)) {
81
116
auto __offset = __i * __vec_size + std::__find_first_not_set (__cmp_res);
82
117
return {__first1 + __offset, __first2 + __offset};
83
118
}
@@ -89,8 +124,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
89
124
90
125
// check the remaining 0-3 vectors
91
126
while (static_cast <size_t >(__last1 - __first1) >= __vec_size) {
92
- if (auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
93
- !std::__all_of (__cmp_res)) {
127
+ auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
128
+ __cmp_res = __reverse_vector<_Tp>(__cmp_res);
129
+ if (!std::__all_of (__cmp_res)) {
94
130
auto __offset = std::__find_first_not_set (__cmp_res);
95
131
return {__first1 + __offset, __first2 + __offset};
96
132
}
@@ -106,8 +142,9 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
106
142
if (static_cast <size_t >(__first1 - __orig_first1) >= __vec_size) {
107
143
__first1 = __last1 - __vec_size;
108
144
__first2 = __last2 - __vec_size;
109
- auto __offset =
110
- std::__find_first_not_set (std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2));
145
+ auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
146
+ __cmp_res = __reverse_vector<_Tp>(__cmp_res);
147
+ auto __offset = std::__find_first_not_set (__cmp_res);
111
148
return {__first1 + __offset, __first2 + __offset};
112
149
} // else loop over the elements individually
113
150
}
0 commit comments