@@ -56,31 +56,71 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
56
56
57
57
#if _LIBCPP_VECTORIZE_ALGORITHMS
58
58
59
- template <class _Value_type >
60
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type, 8 >
61
- __reverse_vector (__simd_vector<_Value_type, 8 >& __cmp_res) {
59
+ template <class _ValueType >
60
+ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<long , 2 >
61
+ __reverse_vector (__simd_vector<long , 2 >& __cmp_res) {
62
62
# if defined(_LIBCPP_BIG_ENDIAN)
63
- static_assert (__native_vector_size<_Value_type> == 8 , " The __native_vector_size has to be 8" );
63
+ static_assert (__native_vector_size<long > == 2 , " The __native_vector_size has to be 2" );
64
+ __cmp_res = __builtin_shufflevector (__cmp_res, __cmp_res, 1 , 0 );
65
+ # endif
66
+ return __cmp_res;
67
+ }
68
+
69
+ template <class _ValueType >
70
+ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<long , 4 >
71
+ __reverse_vector (__simd_vector<long , 4 >& __cmp_res) {
72
+ # if defined(_LIBCPP_BIG_ENDIAN)
73
+ static_assert (__native_vector_size<long > == 4 , " The __native_vector_size has to be 4" );
74
+ __cmp_res = __builtin_shufflevector (__cmp_res, __cmp_res, 3 , 2 , 1 , 0 );
75
+ # endif
76
+ return __cmp_res;
77
+ }
78
+
79
+ template <class _ValueType >
80
+ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<int , 8 >
81
+ __reverse_vector (__simd_vector<int , 8 >& __cmp_res) {
82
+ # if defined(_LIBCPP_BIG_ENDIAN)
83
+ static_assert (__native_vector_size<int > == 8 , " The __native_vector_size has to be 8" );
84
+ __cmp_res = __builtin_shufflevector (__cmp_res, __cmp_res, 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 );
85
+ # endif
86
+ return __cmp_res;
87
+ }
88
+
89
+ template <class _ValueType >
90
+ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<int , 4 >
91
+ __reverse_vector (__simd_vector<int , 4 >& __cmp_res) {
92
+ # if defined(_LIBCPP_BIG_ENDIAN)
93
+ static_assert (__native_vector_size<int > == 4 , " The __native_vector_size has to be 4" );
94
+ __cmp_res = __builtin_shufflevector (__cmp_res, __cmp_res, 3 , 2 , 1 , 0 );
95
+ # endif
96
+ return __cmp_res;
97
+ }
98
+
99
+ template <class _ValueType >
100
+ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType, 8 >
101
+ __reverse_vector (__simd_vector<_ValueType, 8 >& __cmp_res) {
102
+ # if defined(_LIBCPP_BIG_ENDIAN)
103
+ static_assert (__native_vector_size<_ValueType> == 8 , " The __native_vector_size has to be 8" );
64
104
__cmp_res = __builtin_shufflevector (__cmp_res, __cmp_res, 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 );
65
105
# endif
66
106
return __cmp_res;
67
107
}
68
108
69
- template <class _Value_type >
70
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type , 16 >
71
- __reverse_vector (__simd_vector<_Value_type , 16 > __cmp_res) {
109
+ template <class _ValueType >
110
+ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType , 16 >
111
+ __reverse_vector (__simd_vector<_ValueType , 16 > __cmp_res) {
72
112
# if defined(_LIBCPP_BIG_ENDIAN)
73
- static_assert (__native_vector_size<_Value_type > == 16 , " The __native_vector_size has to be 16" );
113
+ static_assert (__native_vector_size<_ValueType > == 16 , " The __native_vector_size has to be 16" );
74
114
__cmp_res = __builtin_shufflevector (__cmp_res, __cmp_res, 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 );
75
115
# endif
76
116
return __cmp_res;
77
117
}
78
118
79
- template <class _Value_type >
80
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type , 32 >
81
- __reverse_vector (__simd_vector<_Value_type , 32 > __cmp_res) {
119
+ template <class _ValueType >
120
+ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_ValueType , 32 >
121
+ __reverse_vector (__simd_vector<_ValueType , 32 > __cmp_res) {
82
122
# if defined(_LIBCPP_BIG_ENDIAN)
83
- static_assert (__native_vector_size<_Value_type > == 32 , " The __native_vector_size has to be 32" );
123
+ static_assert (__native_vector_size<_ValueType > == 32 , " The __native_vector_size has to be 32" );
84
124
__cmp_res = __builtin_shufflevector (
85
125
__cmp_res,
86
126
__cmp_res,
@@ -142,7 +182,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
142
182
143
183
for (size_t __i = 0 ; __i != __unroll_count; ++__i) {
144
184
auto __cmp_res = __lhs[__i] == __rhs[__i];
145
- __cmp_res = __reverse_vector<__value_type>(__cmp_res);
185
+ __cmp_res = std:: __reverse_vector<__value_type>(__cmp_res);
146
186
if (!std::__all_of (__cmp_res)) {
147
187
auto __offset = __i * __vec_size + std::__find_first_not_set (__cmp_res);
148
188
return {__first1 + __offset, __first2 + __offset};
@@ -156,7 +196,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
156
196
// check the remaining 0-3 vectors
157
197
while (static_cast <size_t >(__last1 - __first1) >= __vec_size) {
158
198
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
159
- __cmp_res = __reverse_vector<__value_type>(__cmp_res);
199
+ __cmp_res = std:: __reverse_vector<__value_type>(__cmp_res);
160
200
if (!std::__all_of (__cmp_res)) {
161
201
auto __offset = std::__find_first_not_set (__cmp_res);
162
202
return {__first1 + __offset, __first2 + __offset};
@@ -174,7 +214,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
174
214
__first1 = __last1 - __vec_size;
175
215
__first2 = __last2 - __vec_size;
176
216
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
177
- __cmp_res = __reverse_vector<__value_type>(__cmp_res);
217
+ __cmp_res = std:: __reverse_vector<__value_type>(__cmp_res);
178
218
auto __offset = std::__find_first_not_set (__cmp_res);
179
219
return {__first1 + __offset, __first2 + __offset};
180
220
} // else loop over the elements individually
0 commit comments