-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Closed
Labels
Description
X86TargetLowering::computeKnownBitsForTargetNode currently just sets the upper 48-bits to be zero and makes no attempt to determine the remaining active bits.
typedef uint8_t __v2qu __attribute__((__vector_size__(2)));
auto sum_of_bits(__m128i x) {
x = _mm_and_si128(x, _mm_set1_epi8(1));
x = _mm_sad_epu8(x, _mm_setzero_si128());
return __builtin_convertvector(x, __v2qu);
}
https://godbolt.org/z/74bYbTMh8
sum_of_bits(long long vector[2]): # @sum_of_bits(long long vector[2])
pand .LCPI0_0(%rip), %xmm0 # v16i8 values: 0-1
pxor %xmm1, %xmm1
psadbw %xmm0, %xmm1 # v2i64 values: 0-8
pand .LCPI0_1(%rip), %xmm1 # unneccessary
packuswb %xmm1, %xmm1
packuswb %xmm1, %xmm1
packuswb %xmm1, %xmm1
movd %xmm1, %eax
retq