Skip to content

[X86] bcmp with zero not vectorized #144861

Open
@nikic

Description

@nikic

A bcmp with an all zero value results in a sequence of ors, while a bcmp with all ones uses vptest.

https://llvm.godbolt.org/z/c15xY8nKv

@zeroes = private unnamed_addr constant [64 x i8] zeroinitializer, align 1
@ones = private unnamed_addr constant [64 x i8] c"\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF", align 1

declare i32 @bcmp(ptr, ptr, i64)

define zeroext i1 @test_zeroes(ptr %x) {
  %bcmp = tail call i32 @bcmp(ptr %x, ptr @zeroes, i64 64)
  %icmp = icmp eq i32 %bcmp, 0
  ret i1 %icmp
}

define zeroext i1 @test_ones(ptr %x) {
  %bcmp = tail call i32 @bcmp(ptr %x, ptr @ones, i64 64)
  %icmp = icmp eq i32 %bcmp, 0
  ret i1 %icmp
}
test_zeroes:                            # @test_zeroes
        mov     rax, qword ptr [rdi + 24]
        mov     rcx, qword ptr [rdi]
        mov     rdx, qword ptr [rdi + 8]
        mov     rsi, qword ptr [rdi + 16]
        or      rsi, qword ptr [rdi + 48]
        or      rcx, qword ptr [rdi + 32]
        or      rcx, rsi
        or      rax, qword ptr [rdi + 56]
        or      rdx, qword ptr [rdi + 40]
        or      rdx, rax
        or      rdx, rcx
        sete    al
        ret
test_ones:                              # @test_ones
        vmovdqu ymm0, ymmword ptr [rdi]
        vpand   ymm0, ymm0, ymmword ptr [rdi + 32]
        vpcmpeqd        ymm1, ymm1, ymm1
        vptest  ymm0, ymm1
        setb    al
        vzeroupper
        ret

The bcmp expansions look like this (https://llvm.godbolt.org/z/Tba34zYod):

@zeroes = private unnamed_addr constant [64 x i8] zeroinitializer, align 1
@ones = private unnamed_addr constant [64 x i8] c"\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF\FF", align 1

define zeroext i1 @test_zeroes(ptr %x) {
start: 
  %0 = load i256, ptr %x, align 1
  %1 = getelementptr i8, ptr %x, i64 32
  %2 = load i256, ptr %1, align 1
  %3 = or i256 %0, %2
  %4 = icmp ne i256 %3, 0
  %5 = zext i1 %4 to i32
  %6 = icmp eq i32 %5, 0
  ret i1 %6
} 


define  zeroext i1 @test_ones(ptr %x) {
start:
  %0 = load i256, ptr %x, align 1
  %1 = xor i256 %0, -1
  %2 = getelementptr i8, ptr %x, i64 32
  %3 = load i256, ptr %2, align 1
  %4 = xor i256 %3, -1
  %5 = or i256 %1, %4
  %6 = icmp ne i256 %5, 0
  %7 = zext i1 %6 to i32
  %8 = icmp eq i32 %7, 0
  ret i1 %8
}

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions