@@ -56,36 +56,67 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro
56
56
57
57
#if _LIBCPP_VECTORIZE_ALGORITHMS
58
58
59
- template <class _Tp ,
60
- __enable_if_t <is_integral<_Tp>::value, int > = 0 >
61
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 8 >
62
- __reverse_vector (__simd_vector<_Tp, 8 >& __cmp_res) {
63
- #if defined(_LIBCPP_BIG_ENDIAN)
64
- static_assert (__native_vector_size<_Tp> == 8 , " The __native_vector_size has to be 8" );
59
+ template <class _Value_type >
60
+ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type, 8 >
61
+ __reverse_vector (__simd_vector<_Value_type, 8 >& __cmp_res) {
62
+ # if defined(_LIBCPP_BIG_ENDIAN)
63
+ static_assert (__native_vector_size<_Value_type> == 8 , " The __native_vector_size has to be 8" );
65
64
__cmp_res = __builtin_shufflevector (__cmp_res, __cmp_res, 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 );
66
- #endif
65
+ # endif
67
66
return __cmp_res;
68
67
}
69
68
70
- template <class _Tp ,
71
- __enable_if_t <is_integral<_Tp>::value, int > = 0 >
72
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 16 >
73
- __reverse_vector (__simd_vector<_Tp, 16 > __cmp_res) {
74
- #if defined(_LIBCPP_BIG_ENDIAN)
75
- static_assert (__native_vector_size<_Tp> == 16 , " The __native_vector_size has to be 16" );
69
+ template <class _Value_type >
70
+ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type, 16 >
71
+ __reverse_vector (__simd_vector<_Value_type, 16 > __cmp_res) {
72
+ # if defined(_LIBCPP_BIG_ENDIAN)
73
+ static_assert (__native_vector_size<_Value_type> == 16 , " The __native_vector_size has to be 16" );
76
74
__cmp_res = __builtin_shufflevector (__cmp_res, __cmp_res, 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 );
77
- #endif
75
+ # endif
78
76
return __cmp_res;
79
77
}
80
78
81
- template <class _Tp ,
82
- __enable_if_t <is_integral<_Tp>::value, int > = 0 >
83
- _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Tp, 32 >
84
- __reverse_vector (__simd_vector<_Tp, 32 > __cmp_res) {
85
- #if defined(_LIBCPP_BIG_ENDIAN)
86
- static_assert (__native_vector_size<_Tp> == 32 , " The __native_vector_size has to be 32" );
87
- __cmp_res = __builtin_shufflevector (__cmp_res, __cmp_res, 31 , 30 , 29 , 28 , 27 , 26 , 25 , 24 , 23 , 22 , 21 , 20 , 19 , 18 , 17 , 16 , 15 , 14 , 13 , 12 , 11 , 10 , 9 , 8 , 7 , 6 , 5 , 4 , 3 , 2 , 1 , 0 );
88
- #endif
79
+ template <class _Value_type >
80
+ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __simd_vector<_Value_type, 32 >
81
+ __reverse_vector (__simd_vector<_Value_type, 32 > __cmp_res) {
82
+ # if defined(_LIBCPP_BIG_ENDIAN)
83
+ static_assert (__native_vector_size<_Value_type> == 32 , " The __native_vector_size has to be 32" );
84
+ __cmp_res = __builtin_shufflevector (
85
+ __cmp_res,
86
+ __cmp_res,
87
+ 31 ,
88
+ 30 ,
89
+ 29 ,
90
+ 28 ,
91
+ 27 ,
92
+ 26 ,
93
+ 25 ,
94
+ 24 ,
95
+ 23 ,
96
+ 22 ,
97
+ 21 ,
98
+ 20 ,
99
+ 19 ,
100
+ 18 ,
101
+ 17 ,
102
+ 16 ,
103
+ 15 ,
104
+ 14 ,
105
+ 13 ,
106
+ 12 ,
107
+ 11 ,
108
+ 10 ,
109
+ 9 ,
110
+ 8 ,
111
+ 7 ,
112
+ 6 ,
113
+ 5 ,
114
+ 4 ,
115
+ 3 ,
116
+ 2 ,
117
+ 1 ,
118
+ 0 );
119
+ # endif
89
120
return __cmp_res;
90
121
}
91
122
@@ -111,7 +142,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
111
142
112
143
for (size_t __i = 0 ; __i != __unroll_count; ++__i) {
113
144
auto __cmp_res = __lhs[__i] == __rhs[__i];
114
- __cmp_res = __reverse_vector<_Tp >(__cmp_res);
145
+ __cmp_res = __reverse_vector<__value_type >(__cmp_res);
115
146
if (!std::__all_of (__cmp_res)) {
116
147
auto __offset = __i * __vec_size + std::__find_first_not_set (__cmp_res);
117
148
return {__first1 + __offset, __first2 + __offset};
@@ -125,7 +156,7 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
125
156
// check the remaining 0-3 vectors
126
157
while (static_cast <size_t >(__last1 - __first1) >= __vec_size) {
127
158
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
128
- __cmp_res = __reverse_vector<_Tp >(__cmp_res);
159
+ __cmp_res = __reverse_vector<__value_type >(__cmp_res);
129
160
if (!std::__all_of (__cmp_res)) {
130
161
auto __offset = std::__find_first_not_set (__cmp_res);
131
162
return {__first1 + __offset, __first2 + __offset};
@@ -143,8 +174,8 @@ __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) {
143
174
__first1 = __last1 - __vec_size;
144
175
__first2 = __last2 - __vec_size;
145
176
auto __cmp_res = std::__load_vector<__vec>(__first1) == std::__load_vector<__vec>(__first2);
146
- __cmp_res = __reverse_vector<_Tp >(__cmp_res);
147
- auto __offset = std::__find_first_not_set (__cmp_res);
177
+ __cmp_res = __reverse_vector<__value_type >(__cmp_res);
178
+ auto __offset = std::__find_first_not_set (__cmp_res);
148
179
return {__first1 + __offset, __first2 + __offset};
149
180
} // else loop over the elements individually
150
181
}
0 commit comments