Skip to content

Commit b3e3195

Browse files
committed
[AArch64] Use INDEX for constant Neon step vectors
When compiling for an SVE target we can use INDEX to generate constant fixed-length step vectors. The logic for this was already in `LowerBUILD_VECTOR`, though it was hidden under `!Subtarget->isNeonAvailable()`. This patch refactors this to enable the corresponding code path unconditionally for constant step vectors (as long as we can use SVE for them).
1 parent 4614b80 commit b3e3195

File tree

2 files changed

+11
-13
lines changed

2 files changed

+11
-13
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14512,7 +14512,9 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
1451214512
SelectionDAG &DAG) const {
1451314513
EVT VT = Op.getValueType();
1451414514

14515-
if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
14515+
bool OverrideNEON = !Subtarget->isNeonAvailable() ||
14516+
cast<BuildVectorSDNode>(Op)->isConstantSequence();
14517+
if (useSVEForFixedLengthVectorVT(VT, OverrideNEON))
1451614518
return LowerFixedLengthBuildVectorToSVE(Op, DAG);
1451714519

1451814520
// Try to build a simple constant vector.

llvm/test/CodeGen/AArch64/active_lane_mask.ll

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -430,10 +430,9 @@ define <2 x i1> @lane_mask_v2i1_i64(i64 %index, i64 %TC) {
430430
define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
431431
; CHECK-LABEL: lane_mask_v16i1_i8:
432432
; CHECK: // %bb.0:
433-
; CHECK-NEXT: adrp x8, .LCPI24_0
434-
; CHECK-NEXT: dup v0.16b, w0
435-
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0]
436-
; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b
433+
; CHECK-NEXT: index z0.b, #0, #1
434+
; CHECK-NEXT: dup v1.16b, w0
435+
; CHECK-NEXT: uqadd v0.16b, v1.16b, v0.16b
437436
; CHECK-NEXT: dup v1.16b, w1
438437
; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b
439438
; CHECK-NEXT: ret
@@ -444,10 +443,9 @@ define <16 x i1> @lane_mask_v16i1_i8(i8 %index, i8 %TC) {
444443
define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) {
445444
; CHECK-LABEL: lane_mask_v8i1_i8:
446445
; CHECK: // %bb.0:
447-
; CHECK-NEXT: dup v0.8b, w0
448-
; CHECK-NEXT: adrp x8, .LCPI25_0
449-
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI25_0]
450-
; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b
446+
; CHECK-NEXT: index z0.b, #0, #1
447+
; CHECK-NEXT: dup v1.8b, w0
448+
; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b
451449
; CHECK-NEXT: dup v1.8b, w1
452450
; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b
453451
; CHECK-NEXT: ret
@@ -459,9 +457,8 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) {
459457
; CHECK-LABEL: lane_mask_v4i1_i8:
460458
; CHECK: // %bb.0:
461459
; CHECK-NEXT: dup v0.4h, w0
462-
; CHECK-NEXT: adrp x8, .LCPI26_0
460+
; CHECK-NEXT: index z1.h, #0, #1
463461
; CHECK-NEXT: movi d2, #0xff00ff00ff00ff
464-
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI26_0]
465462
; CHECK-NEXT: dup v3.4h, w1
466463
; CHECK-NEXT: bic v0.4h, #255, lsl #8
467464
; CHECK-NEXT: bic v3.4h, #255, lsl #8
@@ -478,8 +475,7 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) {
478475
; CHECK: // %bb.0:
479476
; CHECK-NEXT: movi d0, #0x0000ff000000ff
480477
; CHECK-NEXT: dup v1.2s, w0
481-
; CHECK-NEXT: adrp x8, .LCPI27_0
482-
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI27_0]
478+
; CHECK-NEXT: index z2.s, #0, #1
483479
; CHECK-NEXT: dup v3.2s, w1
484480
; CHECK-NEXT: and v1.8b, v1.8b, v0.8b
485481
; CHECK-NEXT: add v1.2s, v1.2s, v2.2s

0 commit comments

Comments
 (0)