Skip to content

Commit d8d1ac1

Browse files
committed
[SelectionDAG] Add MaskedValueIsZero check to allow folding of zero extended variables we know are safe to extend
Add ones for every high bit that will cleared. This will allow us to evaluate variables that have their bits known to see if they have no risk of overflow despite the shift amount being greater than the difference between the two types.
1 parent 710a43d commit d8d1ac1

File tree

2 files changed

+29
-26
lines changed

2 files changed

+29
-26
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13802,11 +13802,21 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
1380213802
if (N0.getOpcode() == ISD::SHL) {
1380313803
// If the original shl may be shifting out bits, do not perform this
1380413804
// transformation.
13805-
// TODO: Add MaskedValueIsZero check.
1380613805
unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
1380713806
ShVal.getOperand(0).getValueSizeInBits();
13808-
if (ShAmtC->getAPIntValue().ugt(KnownZeroBits))
13809-
return SDValue();
13807+
if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
13808+
// If the shift is too large, then see if we can deduce that the
13809+
// shift is safe anyway.
13810+
// Create a mask that has ones for the bits being shifted out.
13811+
llvm::APInt ShiftOutMask = llvm::APInt::getHighBitsSet(
13812+
ShVal.getValueSizeInBits(),
13813+
ShAmtC->getAPIntValue().getZExtValue());
13814+
13815+
// Check if the bits being shifted out are known to be zero.
13816+
if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask)) {
13817+
return SDValue();
13818+
}
13819+
}
1381013820
}
1381113821

1381213822
// Ensure that the shift amount is wide enough for the shifted value.

llvm/test/CodeGen/X86/dagcombine-shifts.ll

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -326,18 +326,15 @@ define i32 @shift_zext_shl(i8 zeroext %x) {
326326
; X86-LABEL: shift_zext_shl:
327327
; X86: # %bb.0:
328328
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
329-
; X86-NEXT: andb $64, %al
330-
; X86-NEXT: movzbl %al, %eax
329+
; X86-NEXT: andl $64, %eax
331330
; X86-NEXT: shll $9, %eax
332-
; X86-NEXT: movzwl %ax, %eax
333331
; X86-NEXT: retl
334332
;
335333
; X64-LABEL: shift_zext_shl:
336334
; X64: # %bb.0:
337-
; X64-NEXT: andb $64, %dil
338-
; X64-NEXT: movzbl %dil, %eax
335+
; X64-NEXT: movl %edi, %eax
336+
; X64-NEXT: andl $64, %eax
339337
; X64-NEXT: shll $9, %eax
340-
; X64-NEXT: movzwl %ax, %eax
341338
; X64-NEXT: retq
342339
%a = and i8 %x, 64
343340
%b = zext i8 %a to i16
@@ -369,31 +366,27 @@ define i32 @shift_zext_shl2(i8 zeroext %x) {
369366
define <4 x i32> @shift_zext_shl_vec(<4 x i8> %x) nounwind {
370367
; X86-LABEL: shift_zext_shl_vec:
371368
; X86: # %bb.0:
372-
; X86-NEXT: pushl %ebx
373369
; X86-NEXT: pushl %edi
374370
; X86-NEXT: pushl %esi
375371
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
372+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi
376373
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
377374
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
378375
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
379-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
380-
; X86-NEXT: andb $64, %bl
381-
; X86-NEXT: movzbl %bl, %edi
382-
; X86-NEXT: shll $9, %edi
383-
; X86-NEXT: movzwl %di, %edi
384-
; X86-NEXT: andl $63, %ecx
385-
; X86-NEXT: shll $8, %ecx
386-
; X86-NEXT: andl $31, %edx
387-
; X86-NEXT: shll $7, %edx
388-
; X86-NEXT: andl $23, %esi
389-
; X86-NEXT: shll $6, %esi
390-
; X86-NEXT: movl %esi, 12(%eax)
391-
; X86-NEXT: movl %edx, 8(%eax)
392-
; X86-NEXT: movl %ecx, 4(%eax)
393-
; X86-NEXT: movl %edi, (%eax)
376+
; X86-NEXT: andl $64, %ecx
377+
; X86-NEXT: shll $9, %ecx
378+
; X86-NEXT: andl $63, %edx
379+
; X86-NEXT: shll $8, %edx
380+
; X86-NEXT: andl $31, %esi
381+
; X86-NEXT: shll $7, %esi
382+
; X86-NEXT: andl $23, %edi
383+
; X86-NEXT: shll $6, %edi
384+
; X86-NEXT: movl %edi, 12(%eax)
385+
; X86-NEXT: movl %esi, 8(%eax)
386+
; X86-NEXT: movl %edx, 4(%eax)
387+
; X86-NEXT: movl %ecx, (%eax)
394388
; X86-NEXT: popl %esi
395389
; X86-NEXT: popl %edi
396-
; X86-NEXT: popl %ebx
397390
; X86-NEXT: retl $4
398391
;
399392
; X64-LABEL: shift_zext_shl_vec:

0 commit comments

Comments
 (0)