Skip to content

Commit 7c648b1

Browse files
committed
[X86] Allow speculative BSR/BSF instructions on targets with CMOV
1 parent 95820ca commit 7c648b1

13 files changed

+483
-791
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3238,13 +3238,13 @@ bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
32383238

32393239
bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
32403240
// Speculate cttz only if we can directly use TZCNT or can promote to i32.
3241-
return Subtarget.hasBMI() ||
3241+
return Subtarget.hasBMI() || Subtarget.canUseCMOV() ||
32423242
(!Ty->isVectorTy() && Ty->getScalarSizeInBits() < 32);
32433243
}
32443244

32453245
bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
32463246
// Speculate ctlz only if we can directly use LZCNT.
3247-
return Subtarget.hasLZCNT();
3247+
return Subtarget.hasLZCNT() || Subtarget.canUseCMOV();
32483248
}
32493249

32503250
bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {

llvm/test/CodeGen/X86/atomic-bit-test.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,6 @@ define i32 @split_hoist_and(i32 %0) nounwind {
582582
; X64-NEXT: lock btsl $3, v32(%rip)
583583
; X64-NEXT: setb %al
584584
; X64-NEXT: shll $3, %eax
585-
; X64-NEXT: testl %edi, %edi
586585
; X64-NEXT: retq
587586
%2 = atomicrmw or ptr @v32, i32 8 monotonic, align 4
588587
%3 = tail call i32 @llvm.ctlz.i32(i32 %0, i1 false)

llvm/test/CodeGen/X86/bit_ceil.ll

Lines changed: 17 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,12 @@
88
define i32 @bit_ceil_i32(i32 %x) {
99
; NOBMI-LABEL: bit_ceil_i32:
1010
; NOBMI: # %bb.0:
11-
; NOBMI-NEXT: movl %edi, %eax
12-
; NOBMI-NEXT: decl %eax
13-
; NOBMI-NEXT: je .LBB0_1
14-
; NOBMI-NEXT: # %bb.2: # %cond.false
15-
; NOBMI-NEXT: bsrl %eax, %ecx
11+
; NOBMI-NEXT: # kill: def $edi killed $edi def $rdi
12+
; NOBMI-NEXT: leal -1(%rdi), %eax
13+
; NOBMI-NEXT: bsrl %eax, %eax
14+
; NOBMI-NEXT: movl $63, %ecx
15+
; NOBMI-NEXT: cmovnel %eax, %ecx
1616
; NOBMI-NEXT: xorl $31, %ecx
17-
; NOBMI-NEXT: jmp .LBB0_3
18-
; NOBMI-NEXT: .LBB0_1:
19-
; NOBMI-NEXT: movl $32, %ecx
20-
; NOBMI-NEXT: .LBB0_3: # %cond.end
2117
; NOBMI-NEXT: negb %cl
2218
; NOBMI-NEXT: movl $1, %edx
2319
; NOBMI-NEXT: movl $1, %eax
@@ -51,15 +47,10 @@ define i32 @bit_ceil_i32(i32 %x) {
5147
define i32 @bit_ceil_i32_plus1(i32 noundef %x) {
5248
; NOBMI-LABEL: bit_ceil_i32_plus1:
5349
; NOBMI: # %bb.0: # %entry
54-
; NOBMI-NEXT: testl %edi, %edi
55-
; NOBMI-NEXT: je .LBB1_1
56-
; NOBMI-NEXT: # %bb.2: # %cond.false
57-
; NOBMI-NEXT: bsrl %edi, %ecx
50+
; NOBMI-NEXT: bsrl %edi, %eax
51+
; NOBMI-NEXT: movl $63, %ecx
52+
; NOBMI-NEXT: cmovnel %eax, %ecx
5853
; NOBMI-NEXT: xorl $31, %ecx
59-
; NOBMI-NEXT: jmp .LBB1_3
60-
; NOBMI-NEXT: .LBB1_1:
61-
; NOBMI-NEXT: movl $32, %ecx
62-
; NOBMI-NEXT: .LBB1_3: # %cond.end
6354
; NOBMI-NEXT: negb %cl
6455
; NOBMI-NEXT: movl $1, %edx
6556
; NOBMI-NEXT: movl $1, %eax
@@ -94,16 +85,11 @@ entry:
9485
define i64 @bit_ceil_i64(i64 %x) {
9586
; NOBMI-LABEL: bit_ceil_i64:
9687
; NOBMI: # %bb.0:
97-
; NOBMI-NEXT: movq %rdi, %rax
98-
; NOBMI-NEXT: decq %rax
99-
; NOBMI-NEXT: je .LBB2_1
100-
; NOBMI-NEXT: # %bb.2: # %cond.false
101-
; NOBMI-NEXT: bsrq %rax, %rcx
102-
; NOBMI-NEXT: xorq $63, %rcx
103-
; NOBMI-NEXT: jmp .LBB2_3
104-
; NOBMI-NEXT: .LBB2_1:
105-
; NOBMI-NEXT: movl $64, %ecx
106-
; NOBMI-NEXT: .LBB2_3: # %cond.end
88+
; NOBMI-NEXT: leaq -1(%rdi), %rax
89+
; NOBMI-NEXT: bsrq %rax, %rax
90+
; NOBMI-NEXT: movl $127, %ecx
91+
; NOBMI-NEXT: cmovneq %rax, %rcx
92+
; NOBMI-NEXT: xorl $63, %ecx
10793
; NOBMI-NEXT: negb %cl
10894
; NOBMI-NEXT: movl $1, %edx
10995
; NOBMI-NEXT: movl $1, %eax
@@ -136,15 +122,10 @@ define i64 @bit_ceil_i64(i64 %x) {
136122
define i64 @bit_ceil_i64_plus1(i64 noundef %x) {
137123
; NOBMI-LABEL: bit_ceil_i64_plus1:
138124
; NOBMI: # %bb.0: # %entry
139-
; NOBMI-NEXT: testq %rdi, %rdi
140-
; NOBMI-NEXT: je .LBB3_1
141-
; NOBMI-NEXT: # %bb.2: # %cond.false
142-
; NOBMI-NEXT: bsrq %rdi, %rcx
143-
; NOBMI-NEXT: xorq $63, %rcx
144-
; NOBMI-NEXT: jmp .LBB3_3
145-
; NOBMI-NEXT: .LBB3_1:
146-
; NOBMI-NEXT: movl $64, %ecx
147-
; NOBMI-NEXT: .LBB3_3: # %cond.end
125+
; NOBMI-NEXT: bsrq %rdi, %rax
126+
; NOBMI-NEXT: movl $127, %ecx
127+
; NOBMI-NEXT: cmovneq %rax, %rcx
128+
; NOBMI-NEXT: xorl $63, %ecx
148129
; NOBMI-NEXT: negb %cl
149130
; NOBMI-NEXT: movl $1, %edx
150131
; NOBMI-NEXT: movl $1, %eax

llvm/test/CodeGen/X86/combine-or.ll

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -213,21 +213,18 @@ define i64 @PR89533(<64 x i8> %a0) {
213213
; SSE-NEXT: shll $16, %ecx
214214
; SSE-NEXT: orl %eax, %ecx
215215
; SSE-NEXT: pcmpeqb %xmm4, %xmm2
216-
; SSE-NEXT: pmovmskb %xmm2, %edx
217-
; SSE-NEXT: xorl $65535, %edx # imm = 0xFFFF
216+
; SSE-NEXT: pmovmskb %xmm2, %eax
217+
; SSE-NEXT: xorl $65535, %eax # imm = 0xFFFF
218218
; SSE-NEXT: pcmpeqb %xmm4, %xmm3
219-
; SSE-NEXT: pmovmskb %xmm3, %eax
220-
; SSE-NEXT: notl %eax
221-
; SSE-NEXT: shll $16, %eax
222-
; SSE-NEXT: orl %edx, %eax
223-
; SSE-NEXT: shlq $32, %rax
224-
; SSE-NEXT: orq %rcx, %rax
225-
; SSE-NEXT: je .LBB11_2
226-
; SSE-NEXT: # %bb.1: # %cond.false
227-
; SSE-NEXT: rep bsfq %rax, %rax
228-
; SSE-NEXT: retq
229-
; SSE-NEXT: .LBB11_2: # %cond.end
219+
; SSE-NEXT: pmovmskb %xmm3, %edx
220+
; SSE-NEXT: notl %edx
221+
; SSE-NEXT: shll $16, %edx
222+
; SSE-NEXT: orl %eax, %edx
223+
; SSE-NEXT: shlq $32, %rdx
224+
; SSE-NEXT: orq %rcx, %rdx
225+
; SSE-NEXT: bsfq %rdx, %rcx
230226
; SSE-NEXT: movl $64, %eax
227+
; SSE-NEXT: cmovneq %rcx, %rax
231228
; SSE-NEXT: retq
232229
;
233230
; AVX1-LABEL: PR89533:
@@ -243,23 +240,19 @@ define i64 @PR89533(<64 x i8> %a0) {
243240
; AVX1-NEXT: shll $16, %ecx
244241
; AVX1-NEXT: orl %eax, %ecx
245242
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm0
246-
; AVX1-NEXT: vpmovmskb %xmm0, %edx
247-
; AVX1-NEXT: xorl $65535, %edx # imm = 0xFFFF
243+
; AVX1-NEXT: vpmovmskb %xmm0, %eax
244+
; AVX1-NEXT: xorl $65535, %eax # imm = 0xFFFF
248245
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0
249246
; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0
250-
; AVX1-NEXT: vpmovmskb %xmm0, %eax
251-
; AVX1-NEXT: notl %eax
252-
; AVX1-NEXT: shll $16, %eax
253-
; AVX1-NEXT: orl %edx, %eax
254-
; AVX1-NEXT: shlq $32, %rax
255-
; AVX1-NEXT: orq %rcx, %rax
256-
; AVX1-NEXT: je .LBB11_2
257-
; AVX1-NEXT: # %bb.1: # %cond.false
258-
; AVX1-NEXT: rep bsfq %rax, %rax
259-
; AVX1-NEXT: vzeroupper
260-
; AVX1-NEXT: retq
261-
; AVX1-NEXT: .LBB11_2: # %cond.end
247+
; AVX1-NEXT: vpmovmskb %xmm0, %edx
248+
; AVX1-NEXT: notl %edx
249+
; AVX1-NEXT: shll $16, %edx
250+
; AVX1-NEXT: orl %eax, %edx
251+
; AVX1-NEXT: shlq $32, %rdx
252+
; AVX1-NEXT: orq %rcx, %rdx
253+
; AVX1-NEXT: bsfq %rdx, %rcx
262254
; AVX1-NEXT: movl $64, %eax
255+
; AVX1-NEXT: cmovneq %rcx, %rax
263256
; AVX1-NEXT: vzeroupper
264257
; AVX1-NEXT: retq
265258
;

llvm/test/CodeGen/X86/ctlo.ll

Lines changed: 90 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -13,36 +13,44 @@ declare i32 @llvm.ctlz.i32(i32, i1)
1313
declare i64 @llvm.ctlz.i64(i64, i1)
1414

1515
define i8 @ctlo_i8(i8 %x) {
16-
; X86-LABEL: ctlo_i8:
17-
; X86: # %bb.0:
18-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
19-
; X86-NEXT: xorb $-1, %al
20-
; X86-NEXT: je .LBB0_1
21-
; X86-NEXT: # %bb.2: # %cond.false
22-
; X86-NEXT: movzbl %al, %eax
23-
; X86-NEXT: bsrl %eax, %eax
24-
; X86-NEXT: xorl $7, %eax
25-
; X86-NEXT: # kill: def $al killed $al killed $eax
26-
; X86-NEXT: retl
27-
; X86-NEXT: .LBB0_1:
28-
; X86-NEXT: movb $8, %al
29-
; X86-NEXT: # kill: def $al killed $al killed $eax
30-
; X86-NEXT: retl
16+
; X86-NOCMOV-LABEL: ctlo_i8:
17+
; X86-NOCMOV: # %bb.0:
18+
; X86-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
19+
; X86-NOCMOV-NEXT: xorb $-1, %al
20+
; X86-NOCMOV-NEXT: je .LBB0_1
21+
; X86-NOCMOV-NEXT: # %bb.2: # %cond.false
22+
; X86-NOCMOV-NEXT: movzbl %al, %eax
23+
; X86-NOCMOV-NEXT: bsrl %eax, %eax
24+
; X86-NOCMOV-NEXT: xorl $7, %eax
25+
; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax
26+
; X86-NOCMOV-NEXT: retl
27+
; X86-NOCMOV-NEXT: .LBB0_1:
28+
; X86-NOCMOV-NEXT: movb $8, %al
29+
; X86-NOCMOV-NEXT: # kill: def $al killed $al killed $eax
30+
; X86-NOCMOV-NEXT: retl
31+
;
32+
; X86-CMOV-LABEL: ctlo_i8:
33+
; X86-CMOV: # %bb.0:
34+
; X86-CMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
35+
; X86-CMOV-NEXT: notb %al
36+
; X86-CMOV-NEXT: movzbl %al, %eax
37+
; X86-CMOV-NEXT: bsrl %eax, %ecx
38+
; X86-CMOV-NEXT: movl $15, %eax
39+
; X86-CMOV-NEXT: cmovnel %ecx, %eax
40+
; X86-CMOV-NEXT: xorl $7, %eax
41+
; X86-CMOV-NEXT: # kill: def $al killed $al killed $eax
42+
; X86-CMOV-NEXT: retl
3143
;
3244
; X64-LABEL: ctlo_i8:
3345
; X64: # %bb.0:
34-
; X64-NEXT: xorb $-1, %dil
35-
; X64-NEXT: je .LBB0_1
36-
; X64-NEXT: # %bb.2: # %cond.false
46+
; X64-NEXT: notb %dil
3747
; X64-NEXT: movzbl %dil, %eax
38-
; X64-NEXT: bsrl %eax, %eax
48+
; X64-NEXT: bsrl %eax, %ecx
49+
; X64-NEXT: movl $15, %eax
50+
; X64-NEXT: cmovnel %ecx, %eax
3951
; X64-NEXT: xorl $7, %eax
4052
; X64-NEXT: # kill: def $al killed $al killed $eax
4153
; X64-NEXT: retq
42-
; X64-NEXT: .LBB0_1:
43-
; X64-NEXT: movb $8, %al
44-
; X64-NEXT: # kill: def $al killed $al killed $eax
45-
; X64-NEXT: retq
4654
;
4755
; X86-CLZ-LABEL: ctlo_i8:
4856
; X86-CLZ: # %bb.0:
@@ -111,34 +119,41 @@ define i8 @ctlo_i8_undef(i8 %x) {
111119
}
112120

113121
define i16 @ctlo_i16(i16 %x) {
114-
; X86-LABEL: ctlo_i16:
115-
; X86: # %bb.0:
116-
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
117-
; X86-NEXT: xorw $-1, %ax
118-
; X86-NEXT: je .LBB2_1
119-
; X86-NEXT: # %bb.2: # %cond.false
120-
; X86-NEXT: bsrw %ax, %ax
121-
; X86-NEXT: xorl $15, %eax
122-
; X86-NEXT: # kill: def $ax killed $ax killed $eax
123-
; X86-NEXT: retl
124-
; X86-NEXT: .LBB2_1:
125-
; X86-NEXT: movw $16, %ax
126-
; X86-NEXT: # kill: def $ax killed $ax killed $eax
127-
; X86-NEXT: retl
122+
; X86-NOCMOV-LABEL: ctlo_i16:
123+
; X86-NOCMOV: # %bb.0:
124+
; X86-NOCMOV-NEXT: movzwl {{[0-9]+}}(%esp), %eax
125+
; X86-NOCMOV-NEXT: xorw $-1, %ax
126+
; X86-NOCMOV-NEXT: je .LBB2_1
127+
; X86-NOCMOV-NEXT: # %bb.2: # %cond.false
128+
; X86-NOCMOV-NEXT: bsrw %ax, %ax
129+
; X86-NOCMOV-NEXT: xorl $15, %eax
130+
; X86-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax
131+
; X86-NOCMOV-NEXT: retl
132+
; X86-NOCMOV-NEXT: .LBB2_1:
133+
; X86-NOCMOV-NEXT: movw $16, %ax
134+
; X86-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax
135+
; X86-NOCMOV-NEXT: retl
136+
;
137+
; X86-CMOV-LABEL: ctlo_i16:
138+
; X86-CMOV: # %bb.0:
139+
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
140+
; X86-CMOV-NEXT: notl %eax
141+
; X86-CMOV-NEXT: bsrw %ax, %cx
142+
; X86-CMOV-NEXT: movw $31, %ax
143+
; X86-CMOV-NEXT: cmovnew %cx, %ax
144+
; X86-CMOV-NEXT: xorl $15, %eax
145+
; X86-CMOV-NEXT: # kill: def $ax killed $ax killed $eax
146+
; X86-CMOV-NEXT: retl
128147
;
129148
; X64-LABEL: ctlo_i16:
130149
; X64: # %bb.0:
131-
; X64-NEXT: xorw $-1, %di
132-
; X64-NEXT: je .LBB2_1
133-
; X64-NEXT: # %bb.2: # %cond.false
134-
; X64-NEXT: bsrw %di, %ax
150+
; X64-NEXT: notl %edi
151+
; X64-NEXT: bsrw %di, %cx
152+
; X64-NEXT: movw $31, %ax
153+
; X64-NEXT: cmovnew %cx, %ax
135154
; X64-NEXT: xorl $15, %eax
136155
; X64-NEXT: # kill: def $ax killed $ax killed $eax
137156
; X64-NEXT: retq
138-
; X64-NEXT: .LBB2_1:
139-
; X64-NEXT: movw $16, %ax
140-
; X64-NEXT: # kill: def $ax killed $ax killed $eax
141-
; X64-NEXT: retq
142157
;
143158
; X86-CLZ-LABEL: ctlo_i16:
144159
; X86-CLZ: # %bb.0:
@@ -193,30 +208,37 @@ define i16 @ctlo_i16_undef(i16 %x) {
193208
}
194209

195210
define i32 @ctlo_i32(i32 %x) {
196-
; X86-LABEL: ctlo_i32:
197-
; X86: # %bb.0:
198-
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
199-
; X86-NEXT: xorl $-1, %eax
200-
; X86-NEXT: je .LBB4_1
201-
; X86-NEXT: # %bb.2: # %cond.false
202-
; X86-NEXT: bsrl %eax, %eax
203-
; X86-NEXT: xorl $31, %eax
204-
; X86-NEXT: retl
205-
; X86-NEXT: .LBB4_1:
206-
; X86-NEXT: movl $32, %eax
207-
; X86-NEXT: retl
211+
; X86-NOCMOV-LABEL: ctlo_i32:
212+
; X86-NOCMOV: # %bb.0:
213+
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
214+
; X86-NOCMOV-NEXT: xorl $-1, %eax
215+
; X86-NOCMOV-NEXT: je .LBB4_1
216+
; X86-NOCMOV-NEXT: # %bb.2: # %cond.false
217+
; X86-NOCMOV-NEXT: bsrl %eax, %eax
218+
; X86-NOCMOV-NEXT: xorl $31, %eax
219+
; X86-NOCMOV-NEXT: retl
220+
; X86-NOCMOV-NEXT: .LBB4_1:
221+
; X86-NOCMOV-NEXT: movl $32, %eax
222+
; X86-NOCMOV-NEXT: retl
223+
;
224+
; X86-CMOV-LABEL: ctlo_i32:
225+
; X86-CMOV: # %bb.0:
226+
; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
227+
; X86-CMOV-NEXT: notl %eax
228+
; X86-CMOV-NEXT: bsrl %eax, %ecx
229+
; X86-CMOV-NEXT: movl $63, %eax
230+
; X86-CMOV-NEXT: cmovnel %ecx, %eax
231+
; X86-CMOV-NEXT: xorl $31, %eax
232+
; X86-CMOV-NEXT: retl
208233
;
209234
; X64-LABEL: ctlo_i32:
210235
; X64: # %bb.0:
211-
; X64-NEXT: xorl $-1, %edi
212-
; X64-NEXT: je .LBB4_1
213-
; X64-NEXT: # %bb.2: # %cond.false
214-
; X64-NEXT: bsrl %edi, %eax
236+
; X64-NEXT: notl %edi
237+
; X64-NEXT: bsrl %edi, %ecx
238+
; X64-NEXT: movl $63, %eax
239+
; X64-NEXT: cmovnel %ecx, %eax
215240
; X64-NEXT: xorl $31, %eax
216241
; X64-NEXT: retq
217-
; X64-NEXT: .LBB4_1:
218-
; X64-NEXT: movl $32, %eax
219-
; X64-NEXT: retq
220242
;
221243
; X86-CLZ-LABEL: ctlo_i32:
222244
; X86-CLZ: # %bb.0:
@@ -314,15 +336,12 @@ define i64 @ctlo_i64(i64 %x) {
314336
;
315337
; X64-LABEL: ctlo_i64:
316338
; X64: # %bb.0:
317-
; X64-NEXT: xorq $-1, %rdi
318-
; X64-NEXT: je .LBB6_1
319-
; X64-NEXT: # %bb.2: # %cond.false
320-
; X64-NEXT: bsrq %rdi, %rax
339+
; X64-NEXT: notq %rdi
340+
; X64-NEXT: bsrq %rdi, %rcx
341+
; X64-NEXT: movl $127, %eax
342+
; X64-NEXT: cmovneq %rcx, %rax
321343
; X64-NEXT: xorq $63, %rax
322344
; X64-NEXT: retq
323-
; X64-NEXT: .LBB6_1:
324-
; X64-NEXT: movl $64, %eax
325-
; X64-NEXT: retq
326345
;
327346
; X86-CLZ-LABEL: ctlo_i64:
328347
; X86-CLZ: # %bb.0:

0 commit comments

Comments
 (0)