Skip to content

Commit bcf9e89

Browse files
committed
[SelectionDAG] Add MaskedValueIsZero check to allow folding of zero extended variables we know are safe to extend
Add ones for every high bit that will cleared. This will allow us to evaluate variables that have their bits known to see if they have no risk of overflow despite the shift amount being greater than the difference between the two types.
1 parent 9910388 commit bcf9e89

File tree

2 files changed

+34
-34
lines changed

2 files changed

+34
-34
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13802,11 +13802,21 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
1380213802
if (N0.getOpcode() == ISD::SHL) {
1380313803
// If the original shl may be shifting out bits, do not perform this
1380413804
// transformation.
13805-
// TODO: Add MaskedValueIsZero check.
1380613805
unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
1380713806
ShVal.getOperand(0).getValueSizeInBits();
13808-
if (ShAmtC->getAPIntValue().ugt(KnownZeroBits))
13809-
return SDValue();
13807+
if (ShAmtC->getAPIntValue().ugt(KnownZeroBits)) {
13808+
// If the shift is too large, then see if we can deduce that the
13809+
// shift is safe anyway.
13810+
// Create a mask that has ones for the bits being shifted out.
13811+
llvm::APInt ShiftOutMask = llvm::APInt::getHighBitsSet(
13812+
ShVal.getValueSizeInBits(),
13813+
ShAmtC->getAPIntValue().getZExtValue());
13814+
13815+
// Check if the bits being shifted out are known to be zero.
13816+
if (!DAG.MaskedValueIsZero(ShVal, ShiftOutMask)) {
13817+
return SDValue();
13818+
}
13819+
}
1381013820
}
1381113821

1381213822
// Ensure that the shift amount is wide enough for the shifted value.

llvm/test/CodeGen/X86/dagcombine-shifts.ll

Lines changed: 21 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -326,18 +326,15 @@ define i32 @shift_zext_shl(i8 zeroext %x) {
326326
; X86-LABEL: shift_zext_shl:
327327
; X86: # %bb.0:
328328
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
329-
; X86-NEXT: andb $64, %al
330-
; X86-NEXT: movzbl %al, %eax
329+
; X86-NEXT: andl $64, %eax
331330
; X86-NEXT: shll $9, %eax
332-
; X86-NEXT: movzwl %ax, %eax
333331
; X86-NEXT: retl
334332
;
335333
; X64-LABEL: shift_zext_shl:
336334
; X64: # %bb.0:
337-
; X64-NEXT: andb $64, %dil
338-
; X64-NEXT: movzbl %dil, %eax
335+
; X64-NEXT: movl %edi, %eax
336+
; X64-NEXT: andl $64, %eax
339337
; X64-NEXT: shll $9, %eax
340-
; X64-NEXT: movzwl %ax, %eax
341338
; X64-NEXT: retq
342339
%a = and i8 %x, 64
343340
%b = zext i8 %a to i16
@@ -369,39 +366,32 @@ define i32 @shift_zext_shl2(i8 zeroext %x) {
369366
define <4 x i32> @shift_zext_shl_vec(<4 x i8> %x) {
370367
; X86-LABEL: shift_zext_shl_vec:
371368
; X86: # %bb.0:
372-
; X86-NEXT: pushl %ebx
373-
; X86-NEXT: .cfi_def_cfa_offset 8
374369
; X86-NEXT: pushl %edi
375-
; X86-NEXT: .cfi_def_cfa_offset 12
370+
; X86-NEXT: .cfi_def_cfa_offset 8
376371
; X86-NEXT: pushl %esi
377-
; X86-NEXT: .cfi_def_cfa_offset 16
378-
; X86-NEXT: .cfi_offset %esi, -16
379-
; X86-NEXT: .cfi_offset %edi, -12
380-
; X86-NEXT: .cfi_offset %ebx, -8
372+
; X86-NEXT: .cfi_def_cfa_offset 12
373+
; X86-NEXT: .cfi_offset %esi, -12
374+
; X86-NEXT: .cfi_offset %edi, -8
381375
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
376+
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edi
382377
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %esi
383378
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx
384379
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
385-
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ebx
386-
; X86-NEXT: andb $64, %bl
387-
; X86-NEXT: movzbl %bl, %edi
388-
; X86-NEXT: shll $9, %edi
389-
; X86-NEXT: movzwl %di, %edi
390-
; X86-NEXT: andl $63, %ecx
391-
; X86-NEXT: shll $8, %ecx
392-
; X86-NEXT: andl $31, %edx
393-
; X86-NEXT: shll $7, %edx
394-
; X86-NEXT: andl $23, %esi
395-
; X86-NEXT: shll $6, %esi
396-
; X86-NEXT: movl %esi, 12(%eax)
397-
; X86-NEXT: movl %edx, 8(%eax)
398-
; X86-NEXT: movl %ecx, 4(%eax)
399-
; X86-NEXT: movl %edi, (%eax)
380+
; X86-NEXT: andl $64, %ecx
381+
; X86-NEXT: shll $9, %ecx
382+
; X86-NEXT: andl $63, %edx
383+
; X86-NEXT: shll $8, %edx
384+
; X86-NEXT: andl $31, %esi
385+
; X86-NEXT: shll $7, %esi
386+
; X86-NEXT: andl $23, %edi
387+
; X86-NEXT: shll $6, %edi
388+
; X86-NEXT: movl %edi, 12(%eax)
389+
; X86-NEXT: movl %esi, 8(%eax)
390+
; X86-NEXT: movl %edx, 4(%eax)
391+
; X86-NEXT: movl %ecx, (%eax)
400392
; X86-NEXT: popl %esi
401-
; X86-NEXT: .cfi_def_cfa_offset 12
402-
; X86-NEXT: popl %edi
403393
; X86-NEXT: .cfi_def_cfa_offset 8
404-
; X86-NEXT: popl %ebx
394+
; X86-NEXT: popl %edi
405395
; X86-NEXT: .cfi_def_cfa_offset 4
406396
; X86-NEXT: retl $4
407397
;

0 commit comments

Comments
 (0)