Skip to content

Commit af32e51

Browse files
committed
[X86] LowerRotate - manually expand rotate by splat constant patterns.
Fixes issue identified on #63980 where the undef rotate amounts (during widening from v2i32 -> v4i32) were being constant folded to 0 when the shift amounts are created during expansion, losing the splat'd shift amounts.
1 parent 8bba3f0 commit af32e51

File tree

3 files changed

+32
-36
lines changed

3 files changed

+32
-36
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

+12-2
Original file line numberDiff line numberDiff line change
@@ -32489,8 +32489,18 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
3248932489
}
3249032490

3249132491
// Rotate by an uniform constant - expand back to shifts.
32492-
if (IsCstSplat)
32493-
return SDValue();
32492+
// TODO: Can't use generic expansion as UNDEF amt elements can be converted
32493+
// to other values when folded to shift amounts, losing the splat.
32494+
if (IsCstSplat) {
32495+
uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits);
32496+
uint64_t ShlAmt = IsROTL ? RotAmt : (EltSizeInBits - RotAmt);
32497+
uint64_t SrlAmt = IsROTL ? (EltSizeInBits - RotAmt) : RotAmt;
32498+
SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, R,
32499+
DAG.getShiftAmountConstant(ShlAmt, VT, DL));
32500+
SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, R,
32501+
DAG.getShiftAmountConstant(SrlAmt, VT, DL));
32502+
return DAG.getNode(ISD::OR, DL, VT, Shl, Srl);
32503+
}
3249432504

3249532505
// Split 512-bit integers on non 512-bit BWI targets.
3249632506
if (VT.is512BitVector() && !Subtarget.useBWIRegs())

llvm/test/CodeGen/X86/vector-fshl-rot-sub128.ll

+10-17
Original file line numberDiff line numberDiff line change
@@ -394,34 +394,29 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind {
394394
; SSE2: # %bb.0:
395395
; SSE2-NEXT: movdqa %xmm0, %xmm1
396396
; SSE2-NEXT: psrld $28, %xmm1
397-
; SSE2-NEXT: movdqa %xmm0, %xmm2
398-
; SSE2-NEXT: pslld $4, %xmm2
399-
; SSE2-NEXT: por %xmm1, %xmm2
400-
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
397+
; SSE2-NEXT: pslld $4, %xmm0
398+
; SSE2-NEXT: por %xmm1, %xmm0
401399
; SSE2-NEXT: retq
402400
;
403401
; SSE41-LABEL: splatconstant_funnnel_v2i32:
404402
; SSE41: # %bb.0:
405403
; SSE41-NEXT: movdqa %xmm0, %xmm1
406404
; SSE41-NEXT: psrld $28, %xmm1
407-
; SSE41-NEXT: movdqa %xmm0, %xmm2
408-
; SSE41-NEXT: pslld $4, %xmm2
409-
; SSE41-NEXT: por %xmm1, %xmm2
410-
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
405+
; SSE41-NEXT: pslld $4, %xmm0
406+
; SSE41-NEXT: por %xmm1, %xmm0
411407
; SSE41-NEXT: retq
412408
;
413409
; AVX1-LABEL: splatconstant_funnnel_v2i32:
414410
; AVX1: # %bb.0:
415411
; AVX1-NEXT: vpsrld $28, %xmm0, %xmm1
416-
; AVX1-NEXT: vpslld $4, %xmm0, %xmm2
417-
; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1
418-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
412+
; AVX1-NEXT: vpslld $4, %xmm0, %xmm0
413+
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
419414
; AVX1-NEXT: retq
420415
;
421416
; AVX2-LABEL: splatconstant_funnnel_v2i32:
422417
; AVX2: # %bb.0:
423-
; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
424-
; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
418+
; AVX2-NEXT: vpsrld $28, %xmm0, %xmm1
419+
; AVX2-NEXT: vpslld $4, %xmm0, %xmm0
425420
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
426421
; AVX2-NEXT: retq
427422
;
@@ -473,10 +468,8 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind {
473468
; X86-SSE2: # %bb.0:
474469
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
475470
; X86-SSE2-NEXT: psrld $28, %xmm1
476-
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
477-
; X86-SSE2-NEXT: pslld $4, %xmm2
478-
; X86-SSE2-NEXT: por %xmm1, %xmm2
479-
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
471+
; X86-SSE2-NEXT: pslld $4, %xmm0
472+
; X86-SSE2-NEXT: por %xmm1, %xmm0
480473
; X86-SSE2-NEXT: retl
481474
%res = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>)
482475
ret <2 x i32> %res

llvm/test/CodeGen/X86/vector-fshr-rot-sub128.ll

+10-17
Original file line numberDiff line numberDiff line change
@@ -408,34 +408,29 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind {
408408
; SSE2: # %bb.0:
409409
; SSE2-NEXT: movdqa %xmm0, %xmm1
410410
; SSE2-NEXT: psrld $4, %xmm1
411-
; SSE2-NEXT: movdqa %xmm0, %xmm2
412-
; SSE2-NEXT: pslld $28, %xmm2
413-
; SSE2-NEXT: por %xmm1, %xmm2
414-
; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
411+
; SSE2-NEXT: pslld $28, %xmm0
412+
; SSE2-NEXT: por %xmm1, %xmm0
415413
; SSE2-NEXT: retq
416414
;
417415
; SSE41-LABEL: splatconstant_funnnel_v2i32:
418416
; SSE41: # %bb.0:
419417
; SSE41-NEXT: movdqa %xmm0, %xmm1
420418
; SSE41-NEXT: psrld $4, %xmm1
421-
; SSE41-NEXT: movdqa %xmm0, %xmm2
422-
; SSE41-NEXT: pslld $28, %xmm2
423-
; SSE41-NEXT: por %xmm1, %xmm2
424-
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5,6,7]
419+
; SSE41-NEXT: pslld $28, %xmm0
420+
; SSE41-NEXT: por %xmm1, %xmm0
425421
; SSE41-NEXT: retq
426422
;
427423
; AVX1-LABEL: splatconstant_funnnel_v2i32:
428424
; AVX1: # %bb.0:
429425
; AVX1-NEXT: vpsrld $4, %xmm0, %xmm1
430-
; AVX1-NEXT: vpslld $28, %xmm0, %xmm2
431-
; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1
432-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
426+
; AVX1-NEXT: vpslld $28, %xmm0, %xmm0
427+
; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
433428
; AVX1-NEXT: retq
434429
;
435430
; AVX2-LABEL: splatconstant_funnnel_v2i32:
436431
; AVX2: # %bb.0:
437-
; AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
438-
; AVX2-NEXT: vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
432+
; AVX2-NEXT: vpsrld $4, %xmm0, %xmm1
433+
; AVX2-NEXT: vpslld $28, %xmm0, %xmm0
439434
; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
440435
; AVX2-NEXT: retq
441436
;
@@ -487,10 +482,8 @@ define <2 x i32> @splatconstant_funnnel_v2i32(<2 x i32> %x) nounwind {
487482
; X86-SSE2: # %bb.0:
488483
; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
489484
; X86-SSE2-NEXT: psrld $4, %xmm1
490-
; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
491-
; X86-SSE2-NEXT: pslld $28, %xmm2
492-
; X86-SSE2-NEXT: por %xmm1, %xmm2
493-
; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
485+
; X86-SSE2-NEXT: pslld $28, %xmm0
486+
; X86-SSE2-NEXT: por %xmm1, %xmm0
494487
; X86-SSE2-NEXT: retl
495488
%res = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %x, <2 x i32> <i32 4, i32 4>)
496489
ret <2 x i32> %res

0 commit comments

Comments
 (0)