diff --git a/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c b/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c index 1391a1b09fbd1..36c3c7f745a2b 100644 --- a/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c +++ b/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c @@ -16,7 +16,7 @@ void func(int *restrict a, int *restrict b) { // CHECK256-COUNT-8: str // CHECK512-COUNT-4: str // CHECK1024-COUNT-2: str -// CHECK2048-COUNT-1: st1w +// CHECK2048-COUNT-1: str #pragma clang loop vectorize(enable) for (int i = 0; i < 64; ++i) a[i] += b[i]; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 795ac68e63087..6ae39401674f1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5725,8 +5725,8 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT, int Pattern) { - if (VT == MVT::nxv1i1 && Pattern == AArch64SVEPredPattern::all) - return DAG.getConstant(1, DL, MVT::nxv1i1); + if (Pattern == AArch64SVEPredPattern::all) + return DAG.getConstant(1, DL, VT); return DAG.getNode(AArch64ISD::PTRUE, DL, VT, DAG.getTargetConstant(Pattern, DL, MVT::i32)); } @@ -25030,7 +25030,7 @@ static SDValue foldCSELofLASTB(SDNode *Op, SelectionDAG &DAG) { if (AnyPred.getOpcode() == AArch64ISD::REINTERPRET_CAST) AnyPred = AnyPred.getOperand(0); - if (TruePred != AnyPred && TruePred.getOpcode() != AArch64ISD::PTRUE) + if (TruePred != AnyPred && !isAllActivePredicate(DAG, TruePred)) return SDValue(); SDValue LastB = Op->getOperand(0); @@ -28568,7 +28568,7 @@ static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT) { } } -// Return a PTRUE with active lanes corresponding to the extent of VT. +// Return a predicate with active lanes corresponding to the extent of VT. static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL, EVT VT) { assert(VT.isFixedLengthVector() && diff --git a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll index d5b9d17a98d55..c3322ca38f9e5 100644 --- a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll +++ b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll @@ -208,13 +208,8 @@ define @vec_scalable_subvec_fixed_idx_nonzero_large_i32(ptr % ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ptrue p1.d, vl8 -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] -; CHECK-NEXT: str z0, [sp] -; CHECK-NEXT: ld1w { z0.d }, p1/z, [x1] -; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: ldr z0, [sp] +; CHECK-NEXT: ptrue p0.d, vl8 +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x1] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll index 965af2a745afd..e10313773c73e 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -331,8 +331,7 @@ define void @extract_fixed_v4i64_nxv2i64( %vec, ptr %p) nounwi ; CHECK-LABEL: extract_fixed_v4i64_nxv2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #32 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %retval = call <4 x i64> @llvm.vector.extract.v4i64.nxv2i64( %vec, i64 4) store <4 x i64> %retval, ptr %p diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-fixed-ld2-alloca.ll index eaa99239b09e8..ac4c387b70583 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-ld2-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-ld2-alloca.ll @@ -18,7 +18,7 @@ define void @st1d_fixed(ptr %ptr) #0 { ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x20] ; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload -; CHECK-NEXT: st1d { z0.d }, p0, [x19] +; CHECK-NEXT: str z0, [x19] ; CHECK-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #160 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll index 55f70b2ffc15b..00002dd3269a2 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll @@ -544,11 +544,10 @@ define void @extract_subvector_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 { define void @extract_subvector_legalization_v8i32() vscale_range(2,2) #0 { ; CHECK-LABEL: extract_subvector_legalization_v8i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: adrp x8, .LCPI40_0 ; CHECK-NEXT: add x8, x8, :lo12:.LCPI40_0 ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] +; CHECK-NEXT: ldr z0, [x8] ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-convert.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-convert.ll index 25876f0ef44af..da1aa4cffe13a 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-convert.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-convert.ll @@ -7,12 +7,12 @@ target triple = "aarch64-unknown-linux-gnu" define void @fp_convert_combine_crash(ptr %a, ptr %b) #0 { ; CHECK-LABEL: fp_convert_combine_crash: ; CHECK: // %bb.0: +; CHECK-NEXT: fmov z0.s, #8.00000000 +; CHECK-NEXT: ldr z1, [x0] ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fmov z1.s, #8.00000000 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: fmul z0.s, z0.s, z1.s +; CHECK-NEXT: fmul z0.s, z1.s, z0.s ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s -; CHECK-NEXT: st1w { z0.s }, p0, [x1] +; CHECK-NEXT: str z0, [x1] ; CHECK-NEXT: ret %f = load <8 x float>, ptr %a %mul.i = fmul <8 x float> %f, &1 | FileCheck %s ; REQUIRES: asserts @@ -9,16 +9,15 @@ target triple = "aarch64-unknown-linux-gnu" ; accessing fixed width objects. define void @foo(ptr %a) #0 { ; CHECK-LABEL: foo: -; CHECK: SelectionDAG has 15 nodes: +; CHECK: SelectionDAG has 13 nodes: ; CHECK-NEXT: t0: ch,glue = EntryToken -; CHECK-NEXT: t12: nxv2i1 = PTRUE_D TargetConstant:i32<31> ; CHECK-NEXT: t2: i64,ch = CopyFromReg t0, Register:i64 %0 -; CHECK-NEXT: t18: nxv2i64,ch = LD1D_IMM t12, t2, TargetConstant:i64<0>, t0 +; CHECK-NEXT: t21: nxv2i64,ch = LDR_ZXI) from %ir.a, align 64)> t2, TargetConstant:i64<0>, t0 ; CHECK-NEXT: t8: i64 = ADDXri TargetFrameIndex:i64<1>, TargetConstant:i32<0>, TargetConstant:i32<0> ; CHECK-NEXT: t6: i64 = ADDXri TargetFrameIndex:i64<0>, TargetConstant:i32<0>, TargetConstant:i32<0> -; CHECK-NEXT: t17: ch = ST1D_IMM t18, t12, t6, TargetConstant:i64<0>, t18:1 -; CHECK-NEXT: t16: ch = ST1D_IMM t18, t12, t8, TargetConstant:i64<0>, t17 -; CHECK-NEXT: t10: ch = RET_ReallyLR t16 +; CHECK-NEXT: t22: ch = STR_ZXI) into %ir.r0, align 64)> t21, t6, TargetConstant:i64<0>, t21:1 +; CHECK-NEXT: t23: ch = STR_ZXI) into %ir.r1, align 64)> t21, t8, TargetConstant:i64<0>, t22 +; CHECK-NEXT: t10: ch = RET_ReallyLR t23 ; CHECK-EMPTY: entry: %r0 = alloca <8 x i64> diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll index d7b67d73a671e..7b82c0af329f0 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll @@ -380,11 +380,10 @@ define void @v8i32(ptr %ldptr, ptr %stptr) { ; ; CHECK-256-LABEL: v8i32: ; CHECK-256: // %bb.0: -; CHECK-256-NEXT: ptrue p0.s -; CHECK-256-NEXT: ld1w { z0.s }, p0/z, [x0, #2, mul vl] -; CHECK-256-NEXT: ld1w { z1.s }, p0/z, [x0, #1, mul vl] -; CHECK-256-NEXT: st1w { z0.s }, p0, [x1, #2, mul vl] -; CHECK-256-NEXT: st1w { z1.s }, p0, [x1, #1, mul vl] +; CHECK-256-NEXT: ldr z0, [x0, #2, mul vl] +; CHECK-256-NEXT: ldr z1, [x0, #1, mul vl] +; CHECK-256-NEXT: str z0, [x1, #2, mul vl] +; CHECK-256-NEXT: str z1, [x1, #1, mul vl] ; CHECK-256-NEXT: ret ; ; CHECK-512-LABEL: v8i32: @@ -437,8 +436,7 @@ define void @v8i32_vscale(ptr %0) { ; CHECK-256-LABEL: v8i32_vscale: ; CHECK-256: // %bb.0: ; CHECK-256-NEXT: mov z0.s, #1 // =0x1 -; CHECK-256-NEXT: ptrue p0.s -; CHECK-256-NEXT: st1w { z0.s }, p0, [x0, #2, mul vl] +; CHECK-256-NEXT: str z0, [x0, #2, mul vl] ; CHECK-256-NEXT: ret ; ; CHECK-512-LABEL: v8i32_vscale: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-optimize-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-optimize-ptrue.ll index 1512f5488bda4..d5aad7670cf7a 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-optimize-ptrue.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-optimize-ptrue.ll @@ -6,11 +6,10 @@ target triple = "aarch64-unknown-linux-gnu" define void @add_v64i8(ptr %a, ptr %b) #0 { ; CHECK-LABEL: add_v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] ; CHECK-NEXT: add z0.b, z0.b, z1.b -; CHECK-NEXT: st1b { z0.b }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %op1 = load <64 x i8>, ptr %a %op2 = load <64 x i8>, ptr %b @@ -22,11 +21,10 @@ define void @add_v64i8(ptr %a, ptr %b) #0 { define void @add_v32i16(ptr %a, ptr %b, ptr %c) #0 { ; CHECK-LABEL: add_v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] ; CHECK-NEXT: add z0.h, z0.h, z1.h -; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i16>, ptr %a %op2 = load <32 x i16>, ptr %b @@ -38,10 +36,10 @@ define void @add_v32i16(ptr %a, ptr %b, ptr %c) #0 { define void @abs_v16i32(ptr %a) #0 { ; CHECK-LABEL: abs_v16i32: ; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: abs z0.s, p0/m, z0.s -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a %res = call <16 x i32> @llvm.abs.v16i32(<16 x i32> %op1, i1 false) @@ -52,10 +50,10 @@ define void @abs_v16i32(ptr %a) #0 { define void @abs_v8i64(ptr %a) #0 { ; CHECK-LABEL: abs_v8i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: abs z0.d, p0/m, z0.d -; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i64>, ptr %a %res = call <8 x i64> @llvm.abs.v8i64(<8 x i64> %op1, i1 false) @@ -66,11 +64,10 @@ define void @abs_v8i64(ptr %a) #0 { define void @fadd_v32f16(ptr %a, ptr %b) #0 { ; CHECK-LABEL: fadd_v32f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] ; CHECK-NEXT: fadd z0.h, z0.h, z1.h -; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x half>, ptr %a %op2 = load <32 x half>, ptr %b @@ -82,11 +79,10 @@ define void @fadd_v32f16(ptr %a, ptr %b) #0 { define void @fadd_v16f32(ptr %a, ptr %b) #0 { ; CHECK-LABEL: fadd_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] ; CHECK-NEXT: fadd z0.s, z0.s, z1.s -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x float>, ptr %a %op2 = load <16 x float>, ptr %b @@ -98,11 +94,10 @@ define void @fadd_v16f32(ptr %a, ptr %b) #0 { define void @fadd_v8f64(ptr %a, ptr %b) #0 { ; CHECK-LABEL: fadd_v8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] ; CHECK-NEXT: fadd z0.d, z0.d, z1.d -; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %op1 = load <8 x double>, ptr %a %op2 = load <8 x double>, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll index 0d0b5cbc776c4..0cda4d94444e9 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll @@ -194,13 +194,12 @@ define void @test_revhv32i16(ptr %a) #0 { define void @test_rev_elts_fail(ptr %a) #1 { ; CHECK-LABEL: test_rev_elts_fail: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: add x8, x8, :lo12:.LCPI11_0 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x8] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x8] ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d -; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load <4 x i64>, ptr %a %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> poison, <4 x i32> @@ -275,10 +274,9 @@ define void @test_revv8i32(ptr %a) #0 { define void @test_revv32i8_vl256(ptr %a) #1 { ; CHECK-LABEL: test_revv32i8_vl256: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: rev z0.b, z0.b -; CHECK-NEXT: st1b { z0.b }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> poison, <32 x i32> @@ -290,10 +288,9 @@ define void @test_revv32i8_vl256(ptr %a) #1 { define void @test_revv16i16_vl256(ptr %a) #1 { ; CHECK-LABEL: test_revv16i16_vl256: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: rev z0.h, z0.h -; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load <16 x i16>, ptr %a %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> poison, <16 x i32> @@ -305,10 +302,9 @@ define void @test_revv16i16_vl256(ptr %a) #1 { define void @test_revv8f32_vl256(ptr %a) #1 { ; CHECK-LABEL: test_revv8f32_vl256: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: rev z0.s, z0.s -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load <8 x float>, ptr %a %tmp2 = shufflevector <8 x float> %tmp1, <8 x float> poison, <8 x i32> @@ -320,10 +316,9 @@ define void @test_revv8f32_vl256(ptr %a) #1 { define void @test_revv4f64_vl256(ptr %a) #1 { ; CHECK-LABEL: test_revv4f64_vl256: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: rev z0.d, z0.d -; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load <4 x double>, ptr %a %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> poison, <4 x i32> @@ -335,10 +330,9 @@ define void @test_revv4f64_vl256(ptr %a) #1 { define void @test_revv8i32v8i32(ptr %a, ptr %b) #1 { ; CHECK-LABEL: test_revv8i32v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x1] ; CHECK-NEXT: rev z0.s, z0.s -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp2 = load <8 x i32>, ptr %b @@ -351,13 +345,12 @@ define void @test_revv8i32v8i32(ptr %a, ptr %b) #1 { define void @test_rev_fail(ptr %a) #1 { ; CHECK-LABEL: test_rev_fail: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: adrp x8, .LCPI20_0 ; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_0 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x8] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x8] ; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h -; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load <16 x i16>, ptr %a %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> poison, <16 x i32> @@ -379,7 +372,6 @@ define void @test_revv8i16v8i16(ptr %a, ptr %b, ptr %c) #1 { ; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ldr q5, [x0] ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov h1, v0.h[4] ; CHECK-NEXT: mov h2, v0.h[5] ; CHECK-NEXT: mov h3, v0.h[6] @@ -409,8 +401,8 @@ define void @test_revv8i16v8i16(ptr %a, ptr %b, ptr %c) #1 { ; CHECK-NEXT: str h0, [sp, #10] ; CHECK-NEXT: str h1, [sp, #8] ; CHECK-NEXT: str h2, [sp, #4] -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] -; CHECK-NEXT: st1h { z0.h }, p0, [x2] +; CHECK-NEXT: ldr z0, [x8] +; CHECK-NEXT: str z0, [x2] ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll index 25143837285b0..24c5dccd5b420 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll @@ -7,11 +7,10 @@ target triple = "aarch64-unknown-linux-gnu" define void @zip1_v32i8(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: zip1_v32i8: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: ptrue p0.b -; VBITS_EQ_256-NEXT: ld1b { z0.b }, p0/z, [x0] -; VBITS_EQ_256-NEXT: ld1b { z1.b }, p0/z, [x1] +; VBITS_EQ_256-NEXT: ldr z0, [x0] +; VBITS_EQ_256-NEXT: ldr z1, [x1] ; VBITS_EQ_256-NEXT: zip1 z0.b, z0.b, z1.b -; VBITS_EQ_256-NEXT: st1b { z0.b }, p0, [x0] +; VBITS_EQ_256-NEXT: str z0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_EQ_512-LABEL: zip1_v32i8: @@ -32,30 +31,28 @@ define void @zip1_v32i8(ptr %a, ptr %b) #0 { define void @zip_v32i16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: zip_v32i16: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: ptrue p0.h -; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, #1, mul vl] -; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0] -; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x1, #1, mul vl] -; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1] +; VBITS_EQ_256-NEXT: ldr z0, [x0, #1, mul vl] +; VBITS_EQ_256-NEXT: ldr z1, [x0] +; VBITS_EQ_256-NEXT: ldr z2, [x1, #1, mul vl] +; VBITS_EQ_256-NEXT: ldr z3, [x1] ; VBITS_EQ_256-NEXT: zip1 z5.h, z0.h, z2.h ; VBITS_EQ_256-NEXT: zip2 z0.h, z0.h, z2.h ; VBITS_EQ_256-NEXT: zip1 z4.h, z1.h, z3.h ; VBITS_EQ_256-NEXT: zip2 z1.h, z1.h, z3.h ; VBITS_EQ_256-NEXT: add z2.h, z4.h, z5.h ; VBITS_EQ_256-NEXT: add z0.h, z1.h, z0.h -; VBITS_EQ_256-NEXT: st1h { z2.h }, p0, [x0] -; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, #1, mul vl] +; VBITS_EQ_256-NEXT: str z2, [x0] +; VBITS_EQ_256-NEXT: str z0, [x0, #1, mul vl] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_EQ_512-LABEL: zip_v32i16: ; VBITS_EQ_512: // %bb.0: -; VBITS_EQ_512-NEXT: ptrue p0.h -; VBITS_EQ_512-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_EQ_512-NEXT: ld1h { z1.h }, p0/z, [x1] +; VBITS_EQ_512-NEXT: ldr z0, [x0] +; VBITS_EQ_512-NEXT: ldr z1, [x1] ; VBITS_EQ_512-NEXT: zip1 z2.h, z0.h, z1.h ; VBITS_EQ_512-NEXT: zip2 z0.h, z0.h, z1.h ; VBITS_EQ_512-NEXT: add z0.h, z2.h, z0.h -; VBITS_EQ_512-NEXT: st1h { z0.h }, p0, [x0] +; VBITS_EQ_512-NEXT: str z0, [x0] ; VBITS_EQ_512-NEXT: ret %tmp1 = load <32 x i16>, ptr %a %tmp2 = load <32 x i16>, ptr %b @@ -69,11 +66,10 @@ define void @zip_v32i16(ptr %a, ptr %b) #0 { define void @zip1_v16i16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: zip1_v16i16: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: ptrue p0.h -; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x1] +; VBITS_EQ_256-NEXT: ldr z0, [x0] +; VBITS_EQ_256-NEXT: ldr z1, [x1] ; VBITS_EQ_256-NEXT: zip1 z0.h, z0.h, z1.h -; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0] +; VBITS_EQ_256-NEXT: str z0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_EQ_512-LABEL: zip1_v16i16: @@ -94,11 +90,10 @@ define void @zip1_v16i16(ptr %a, ptr %b) #0 { define void @zip1_v8i32(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: zip1_v8i32: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: ptrue p0.s -; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x1] +; VBITS_EQ_256-NEXT: ldr z0, [x0] +; VBITS_EQ_256-NEXT: ldr z1, [x1] ; VBITS_EQ_256-NEXT: zip1 z0.s, z0.s, z1.s -; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0] +; VBITS_EQ_256-NEXT: str z0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_EQ_512-LABEL: zip1_v8i32: @@ -119,13 +114,12 @@ define void @zip1_v8i32(ptr %a, ptr %b) #0 { define void @zip_v4f64(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: zip_v4f64: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: ptrue p0.d -; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0] -; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1] +; VBITS_EQ_256-NEXT: ldr z0, [x0] +; VBITS_EQ_256-NEXT: ldr z1, [x1] ; VBITS_EQ_256-NEXT: zip1 z2.d, z0.d, z1.d ; VBITS_EQ_256-NEXT: zip2 z0.d, z0.d, z1.d ; VBITS_EQ_256-NEXT: fadd z0.d, z2.d, z0.d -; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0] +; VBITS_EQ_256-NEXT: str z0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_EQ_512-LABEL: zip_v4f64: @@ -186,10 +180,9 @@ define void @zip_v4i32(ptr %a, ptr %b) #0 { define void @zip1_v8i32_undef(ptr %a) #0 { ; VBITS_EQ_256-LABEL: zip1_v8i32_undef: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: ptrue p0.s -; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0] +; VBITS_EQ_256-NEXT: ldr z0, [x0] ; VBITS_EQ_256-NEXT: zip1 z0.s, z0.s, z0.s -; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0] +; VBITS_EQ_256-NEXT: str z0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_EQ_512-LABEL: zip1_v8i32_undef: @@ -208,13 +201,12 @@ define void @zip1_v8i32_undef(ptr %a) #0 { define void @trn_v32i8(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: trn_v32i8: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: ptrue p0.b -; VBITS_EQ_256-NEXT: ld1b { z0.b }, p0/z, [x0] -; VBITS_EQ_256-NEXT: ld1b { z1.b }, p0/z, [x1] +; VBITS_EQ_256-NEXT: ldr z0, [x0] +; VBITS_EQ_256-NEXT: ldr z1, [x1] ; VBITS_EQ_256-NEXT: trn1 z2.b, z0.b, z1.b ; VBITS_EQ_256-NEXT: trn2 z0.b, z0.b, z1.b ; VBITS_EQ_256-NEXT: add z0.b, z2.b, z0.b -; VBITS_EQ_256-NEXT: st1b { z0.b }, p0, [x0] +; VBITS_EQ_256-NEXT: str z0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_EQ_512-LABEL: trn_v32i8: @@ -239,30 +231,28 @@ define void @trn_v32i8(ptr %a, ptr %b) #0 { define void @trn_v32i16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: trn_v32i16: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: ptrue p0.h -; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x1] -; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x0, #1, mul vl] -; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1, #1, mul vl] +; VBITS_EQ_256-NEXT: ldr z0, [x0] +; VBITS_EQ_256-NEXT: ldr z1, [x1] +; VBITS_EQ_256-NEXT: ldr z2, [x0, #1, mul vl] +; VBITS_EQ_256-NEXT: ldr z3, [x1, #1, mul vl] ; VBITS_EQ_256-NEXT: trn1 z4.h, z0.h, z1.h ; VBITS_EQ_256-NEXT: trn2 z0.h, z0.h, z1.h ; VBITS_EQ_256-NEXT: trn1 z1.h, z2.h, z3.h ; VBITS_EQ_256-NEXT: trn2 z2.h, z2.h, z3.h ; VBITS_EQ_256-NEXT: add z0.h, z4.h, z0.h ; VBITS_EQ_256-NEXT: add z1.h, z1.h, z2.h -; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0] -; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0, #1, mul vl] +; VBITS_EQ_256-NEXT: str z0, [x0] +; VBITS_EQ_256-NEXT: str z1, [x0, #1, mul vl] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_EQ_512-LABEL: trn_v32i16: ; VBITS_EQ_512: // %bb.0: -; VBITS_EQ_512-NEXT: ptrue p0.h -; VBITS_EQ_512-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_EQ_512-NEXT: ld1h { z1.h }, p0/z, [x1] +; VBITS_EQ_512-NEXT: ldr z0, [x0] +; VBITS_EQ_512-NEXT: ldr z1, [x1] ; VBITS_EQ_512-NEXT: trn1 z2.h, z0.h, z1.h ; VBITS_EQ_512-NEXT: trn2 z0.h, z0.h, z1.h ; VBITS_EQ_512-NEXT: add z0.h, z2.h, z0.h -; VBITS_EQ_512-NEXT: st1h { z0.h }, p0, [x0] +; VBITS_EQ_512-NEXT: str z0, [x0] ; VBITS_EQ_512-NEXT: ret %tmp1 = load <32 x i16>, ptr %a %tmp2 = load <32 x i16>, ptr %b @@ -276,13 +266,12 @@ define void @trn_v32i16(ptr %a, ptr %b) #0 { define void @trn_v16i16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: trn_v16i16: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: ptrue p0.h -; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x1] +; VBITS_EQ_256-NEXT: ldr z0, [x0] +; VBITS_EQ_256-NEXT: ldr z1, [x1] ; VBITS_EQ_256-NEXT: trn1 z2.h, z0.h, z1.h ; VBITS_EQ_256-NEXT: trn2 z0.h, z0.h, z1.h ; VBITS_EQ_256-NEXT: add z0.h, z2.h, z0.h -; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0] +; VBITS_EQ_256-NEXT: str z0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_EQ_512-LABEL: trn_v16i16: @@ -307,13 +296,12 @@ define void @trn_v16i16(ptr %a, ptr %b) #0 { define void @trn_v8i32(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: trn_v8i32: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: ptrue p0.s -; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x1] +; VBITS_EQ_256-NEXT: ldr z0, [x0] +; VBITS_EQ_256-NEXT: ldr z1, [x1] ; VBITS_EQ_256-NEXT: trn1 z2.s, z0.s, z1.s ; VBITS_EQ_256-NEXT: trn2 z0.s, z0.s, z1.s ; VBITS_EQ_256-NEXT: add z0.s, z2.s, z0.s -; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0] +; VBITS_EQ_256-NEXT: str z0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_EQ_512-LABEL: trn_v8i32: @@ -338,13 +326,12 @@ define void @trn_v8i32(ptr %a, ptr %b) #0 { define void @trn_v4f64(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: trn_v4f64: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: ptrue p0.d -; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0] -; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x1] +; VBITS_EQ_256-NEXT: ldr z0, [x0] +; VBITS_EQ_256-NEXT: ldr z1, [x1] ; VBITS_EQ_256-NEXT: trn1 z2.d, z0.d, z1.d ; VBITS_EQ_256-NEXT: trn2 z0.d, z0.d, z1.d ; VBITS_EQ_256-NEXT: fadd z0.d, z2.d, z0.d -; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0] +; VBITS_EQ_256-NEXT: str z0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_EQ_512-LABEL: trn_v4f64: @@ -389,12 +376,11 @@ define void @trn_v4f32(ptr %a, ptr %b) #0 { define void @trn_v8i32_undef(ptr %a) #0 { ; VBITS_EQ_256-LABEL: trn_v8i32_undef: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: ptrue p0.s -; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0] +; VBITS_EQ_256-NEXT: ldr z0, [x0] ; VBITS_EQ_256-NEXT: trn1 z1.s, z0.s, z0.s ; VBITS_EQ_256-NEXT: trn2 z0.s, z0.s, z0.s ; VBITS_EQ_256-NEXT: add z0.s, z1.s, z0.s -; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0] +; VBITS_EQ_256-NEXT: str z0, [x0] ; VBITS_EQ_256-NEXT: ret ; ; VBITS_EQ_512-LABEL: trn_v8i32_undef: @@ -419,11 +405,10 @@ define void @trn_v8i32_undef(ptr %a) #0 { define void @zip2_v32i8(ptr %a, ptr %b) #1 { ; CHECK-LABEL: zip2_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] ; CHECK-NEXT: zip2 z0.b, z0.b, z1.b -; CHECK-NEXT: st1b { z0.b }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load volatile <32 x i8>, ptr %a %tmp2 = load volatile <32 x i8>, ptr %b @@ -437,11 +422,10 @@ define void @zip2_v32i8(ptr %a, ptr %b) #1 { define void @zip2_v16i16(ptr %a, ptr %b) #1 { ; CHECK-LABEL: zip2_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] ; CHECK-NEXT: zip2 z0.h, z0.h, z1.h -; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load volatile <16 x i16>, ptr %a %tmp2 = load volatile <16 x i16>, ptr %b @@ -455,11 +439,10 @@ define void @zip2_v16i16(ptr %a, ptr %b) #1 { define void @zip2_v8i32(ptr %a, ptr %b) #1 { ; CHECK-LABEL: zip2_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] ; CHECK-NEXT: zip2 z0.s, z0.s, z1.s -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load volatile <8 x i32>, ptr %a %tmp2 = load volatile <8 x i32>, ptr %b @@ -472,10 +455,9 @@ define void @zip2_v8i32(ptr %a, ptr %b) #1 { define void @zip2_v8i32_undef(ptr %a) #1 { ; CHECK-LABEL: zip2_v8i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: zip2 z0.s, z0.s, z0.s -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load volatile <8 x i32>, ptr %a %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> poison, <8 x i32> @@ -488,13 +470,12 @@ define void @zip2_v8i32_undef(ptr %a) #1 { define void @uzp_v32i8(ptr %a, ptr %b) #1 { ; CHECK-LABEL: uzp_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] ; CHECK-NEXT: uzp1 z2.b, z0.b, z1.b ; CHECK-NEXT: uzp2 z0.b, z0.b, z1.b ; CHECK-NEXT: add z0.b, z2.b, z0.b -; CHECK-NEXT: st1b { z0.b }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = load <32 x i8>, ptr %b @@ -511,19 +492,18 @@ define void @uzp_v32i8(ptr %a, ptr %b) #1 { define void @uzp_v32i16(ptr %a, ptr %b) #1 { ; CHECK-LABEL: uzp_v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1, #1, mul vl] -; CHECK-NEXT: ld1h { z3.h }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0, #1, mul vl] +; CHECK-NEXT: ldr z1, [x0] +; CHECK-NEXT: ldr z2, [x1, #1, mul vl] +; CHECK-NEXT: ldr z3, [x1] ; CHECK-NEXT: uzp1 z4.h, z1.h, z0.h ; CHECK-NEXT: uzp2 z0.h, z1.h, z0.h ; CHECK-NEXT: uzp1 z1.h, z3.h, z2.h ; CHECK-NEXT: uzp2 z2.h, z3.h, z2.h ; CHECK-NEXT: add z0.h, z4.h, z0.h ; CHECK-NEXT: add z1.h, z1.h, z2.h -; CHECK-NEXT: st1h { z0.h }, p0, [x0] -; CHECK-NEXT: st1h { z1.h }, p0, [x0, #1, mul vl] +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: str z1, [x0, #1, mul vl] ; CHECK-NEXT: ret %tmp1 = load <32 x i16>, ptr %a %tmp2 = load <32 x i16>, ptr %b @@ -539,13 +519,12 @@ define void @uzp_v32i16(ptr %a, ptr %b) #1 { define void @uzp_v16i16(ptr %a, ptr %b) #1 { ; CHECK-LABEL: uzp_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] ; CHECK-NEXT: uzp1 z2.h, z0.h, z1.h ; CHECK-NEXT: uzp2 z0.h, z0.h, z1.h ; CHECK-NEXT: add z0.h, z2.h, z0.h -; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load <16 x i16>, ptr %a %tmp2 = load <16 x i16>, ptr %b @@ -561,13 +540,12 @@ define void @uzp_v16i16(ptr %a, ptr %b) #1 { define void @uzp_v8f32(ptr %a, ptr %b) #1 { ; CHECK-LABEL: uzp_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] ; CHECK-NEXT: uzp1 z2.s, z0.s, z1.s ; CHECK-NEXT: uzp2 z0.s, z0.s, z1.s ; CHECK-NEXT: fadd z0.s, z2.s, z0.s -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load <8 x float>, ptr %a %tmp2 = load <8 x float>, ptr %b @@ -583,13 +561,12 @@ define void @uzp_v8f32(ptr %a, ptr %b) #1 { define void @uzp_v4i64(ptr %a, ptr %b) #1 { ; CHECK-LABEL: uzp_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] ; CHECK-NEXT: uzp1 z2.d, z0.d, z1.d ; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d ; CHECK-NEXT: add z0.d, z2.d, z0.d -; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load <4 x i64>, ptr %a %tmp2 = load <4 x i64>, ptr %b @@ -624,12 +601,11 @@ define void @uzp_v8i16(ptr %a, ptr %b) #1 { define void @uzp_v8i32_undef(ptr %a) #1 { ; CHECK-LABEL: uzp_v8i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: uzp1 z1.s, z0.s, z0.s ; CHECK-NEXT: uzp2 z0.s, z0.s, z0.s ; CHECK-NEXT: add z0.s, z1.s, z0.s -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> poison, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll index 4d8855cd25772..23ae5f00b5a45 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll @@ -49,8 +49,8 @@ define i1 @ptest_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) { define i1 @ptest_v16i1_512bit_sve(ptr %a, ptr %b) vscale_range(4, 4) { ; CHECK-LABEL: ptest_v16i1_512bit_sve: ; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h @@ -99,11 +99,11 @@ declare i1 @llvm.vector.reduce.or.i1.v16i1(<16 x i1>) define i1 @ptest_and_v16i1_512bit_sve(ptr %a, ptr %b) vscale_range(4, 4) { ; CHECK-LABEL: ptest_and_v16i1_512bit_sve: ; CHECK: // %bb.0: +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1] -; CHECK-NEXT: fcmne p0.s, p1/z, z0.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ldr z0, [x1] +; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll index b24a9513b83e3..d916f26f9b26b 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll @@ -64,7 +64,7 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang ; CHECK-NEXT: lsl z2.s, z2.s, #31 ; CHECK-NEXT: asr z0.s, z0.s, #31 ; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0 -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] +; CHECK-NEXT: ldr z1, [x0] ; CHECK-NEXT: lsl z3.s, z3.s, #31 ; CHECK-NEXT: asr z2.s, z2.s, #31 ; CHECK-NEXT: and z0.s, z0.s, #0x1 @@ -72,19 +72,19 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang ; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 ; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #2, mul vl] +; CHECK-NEXT: ldr z0, [x0, #2, mul vl] ; CHECK-NEXT: and z3.s, z3.s, #0x1 -; CHECK-NEXT: cmpne p4.s, p0/z, z2.s, #0 -; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: st1w { z1.s }, p0, [x0] +; CHECK-NEXT: str z1, [x0] ; CHECK-NEXT: cmpne p3.s, p0/z, z3.s, #0 -; CHECK-NEXT: ld1w { z3.s }, p0/z, [x0, #3, mul vl] +; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0 +; CHECK-NEXT: ldr z3, [x0, #3, mul vl] +; CHECK-NEXT: ldr z2, [x0, #1, mul vl] ; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z2.s, p4/m, #0 // =0x0 -; CHECK-NEXT: st1w { z0.s }, p0, [x0, #2, mul vl] ; CHECK-NEXT: mov z3.s, p3/m, #0 // =0x0 -; CHECK-NEXT: st1w { z2.s }, p0, [x0, #1, mul vl] -; CHECK-NEXT: st1w { z3.s }, p0, [x0, #3, mul vl] +; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0 +; CHECK-NEXT: str z0, [x0, #2, mul vl] +; CHECK-NEXT: str z3, [x0, #3, mul vl] +; CHECK-NEXT: str z2, [x0, #1, mul vl] ; CHECK-NEXT: .LBB1_2: // %exit ; CHECK-NEXT: ret %broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll index a0dd0408025a6..a69808d32ed73 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll @@ -701,10 +701,9 @@ define void @splat_imm_v8f64(ptr %a) vscale_range(4,0) #0 { define void @load_splat_v8f32(ptr %a, ptr %b) vscale_range(2,2) #0 { ; CHECK-LABEL: load_splat_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: mov z0.s, s0 -; CHECK-NEXT: st1w { z0.s }, p0, [x1] +; CHECK-NEXT: str z0, [x1] ; CHECK-NEXT: ret %v = load <8 x float>, ptr %a %splat = shufflevector <8 x float> %v, <8 x float> poison, <8 x i32> zeroinitializer @@ -715,10 +714,9 @@ define void @load_splat_v8f32(ptr %a, ptr %b) vscale_range(2,2) #0 { define void @load_splat_v4f64(ptr %a, ptr %b) vscale_range(2,2) #0 { ; CHECK-LABEL: load_splat_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: mov z0.d, d0 -; CHECK-NEXT: st1d { z0.d }, p0, [x1] +; CHECK-NEXT: str z0, [x1] ; CHECK-NEXT: ret %v = load <4 x double>, ptr %a %splat = shufflevector <4 x double> %v, <4 x double> poison, <4 x i32> zeroinitializer @@ -729,10 +727,9 @@ define void @load_splat_v4f64(ptr %a, ptr %b) vscale_range(2,2) #0 { define void @load_splat_v32i8(ptr %a, ptr %b) vscale_range(2,2) #0 { ; CHECK-LABEL: load_splat_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: mov z0.b, b0 -; CHECK-NEXT: st1b { z0.b }, p0, [x1] +; CHECK-NEXT: str z0, [x1] ; CHECK-NEXT: ret %v = load <32 x i8>, ptr %a %splat = shufflevector <32 x i8> %v, <32 x i8> poison, <32 x i32> zeroinitializer @@ -743,10 +740,9 @@ define void @load_splat_v32i8(ptr %a, ptr %b) vscale_range(2,2) #0 { define void @load_splat_v16i16(ptr %a, ptr %b) vscale_range(2,2) #0 { ; CHECK-LABEL: load_splat_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: mov z0.h, h0 -; CHECK-NEXT: st1h { z0.h }, p0, [x1] +; CHECK-NEXT: str z0, [x1] ; CHECK-NEXT: ret %v = load <16 x i16>, ptr %a %splat = shufflevector <16 x i16> %v, <16 x i16> poison, <16 x i32> zeroinitializer @@ -757,10 +753,9 @@ define void @load_splat_v16i16(ptr %a, ptr %b) vscale_range(2,2) #0 { define void @load_splat_v8i32(ptr %a, ptr %b) vscale_range(2,2) #0 { ; CHECK-LABEL: load_splat_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: mov z0.s, s0 -; CHECK-NEXT: st1w { z0.s }, p0, [x1] +; CHECK-NEXT: str z0, [x1] ; CHECK-NEXT: ret %v = load <8 x i32>, ptr %a %splat = shufflevector <8 x i32> %v, <8 x i32> poison, <8 x i32> zeroinitializer @@ -771,10 +766,9 @@ define void @load_splat_v8i32(ptr %a, ptr %b) vscale_range(2,2) #0 { define void @load_splat_v4i64(ptr %a, ptr %b) vscale_range(2,2) #0 { ; CHECK-LABEL: load_splat_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: mov z0.d, d0 -; CHECK-NEXT: st1d { z0.d }, p0, [x1] +; CHECK-NEXT: str z0, [x1] ; CHECK-NEXT: ret %v = load <4 x i64>, ptr %a %splat = shufflevector <4 x i64> %v, <4 x i64> poison, <4 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll index 20659cde83ee0..3c8b09fe40985 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll @@ -376,11 +376,10 @@ define <8 x i8> @negative_test_shuffle_index_size_op_both_maxhw(ptr %a, ptr %b) define <8 x i8> @shuffle_index_size_op1_maxhw(ptr %a, ptr %b) "target-features"="+sve2" vscale_range(16,16) { ; CHECK-LABEL: shuffle_index_size_op1_maxhw: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: add x8, x8, :lo12:.LCPI6_0 ; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8] +; CHECK-NEXT: ldr z0, [x8] ; CHECK-NEXT: tbl z0.b, { z1.b }, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll index 14948647c2f8d..dcf3317a98b99 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -374,11 +374,7 @@ define @insert_fixed_v4i64_nxv2i64( %vec, p ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] -; CHECK-NEXT: str z0, [sp] -; CHECK-NEXT: st1d { z1.d }, p0, [sp] -; CHECK-NEXT: ldr z0, [sp] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll b/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll index 8b631199b0594..e5ab956d09e8a 100644 --- a/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll +++ b/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll @@ -41,16 +41,16 @@ define void @test_post_ld1_int_fixed(ptr %data, i64 %idx, ptr %addr, ptr %res_pt ; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: ldr z2, [x2] ; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ptrue p2.d, vl1 -; CHECK-NEXT: ld1d { z2.d }, p0/z, [x2] +; CHECK-NEXT: ptrue p1.d, vl1 ; CHECK-NEXT: ldr x9, [x0, x1, lsl #3] -; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d +; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d ; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: mov z0.d, p2/m, x8 -; CHECK-NEXT: mov z2.d, p1/m, x9 +; CHECK-NEXT: mov z0.d, p1/m, x8 +; CHECK-NEXT: mov z2.d, p0/m, x9 ; CHECK-NEXT: add z0.d, z0.d, z2.d -; CHECK-NEXT: st1d { z0.d }, p0, [x3] +; CHECK-NEXT: str z0, [x3] ; CHECK-NEXT: ret %A = load <4 x i64>, ptr %addr %ld1 = load i64, ptr %data @@ -70,15 +70,15 @@ define void @test_post_ld1_double_fixed(ptr %data, i64 %idx, ptr %addr, ptr %res ; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z1.d, x8 -; CHECK-NEXT: ptrue p2.d, vl1 +; CHECK-NEXT: ptrue p1.d, vl1 ; CHECK-NEXT: ldr d2, [x0, x1, lsl #3] -; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x2] +; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ldr z0, [x2] ; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: sel z1.d, p2, z1.d, z0.d -; CHECK-NEXT: mov z0.d, p1/m, d2 +; CHECK-NEXT: sel z1.d, p1, z1.d, z0.d +; CHECK-NEXT: mov z0.d, p0/m, d2 ; CHECK-NEXT: fadd z0.d, z1.d, z0.d -; CHECK-NEXT: st1d { z0.d }, p0, [x3] +; CHECK-NEXT: str z0, [x3] ; CHECK-NEXT: ret %A = load <4 x double>, ptr %addr %ld1 = load double, ptr %data diff --git a/llvm/test/CodeGen/AArch64/sve-uunpklo-load-uzp1-store-combine.ll b/llvm/test/CodeGen/AArch64/sve-uunpklo-load-uzp1-store-combine.ll index 8fa23ed2038ed..9ef8552376a73 100644 --- a/llvm/test/CodeGen/AArch64/sve-uunpklo-load-uzp1-store-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-uunpklo-load-uzp1-store-combine.ll @@ -86,8 +86,7 @@ define @uunpklo_i32_invalid(ptr %b) #0 { define @uunpklo_invalid_all(ptr %b) #0 { ; CHECK-LABEL: uunpklo_invalid_all: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldr z0, [x0] ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: ret %mask = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) @@ -183,8 +182,7 @@ define void @uzp1_invalid_all( %a, ptr %b) #0 { ; CHECK-LABEL: uzp1_invalid_all: ; CHECK: // %bb.0: ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %a.bc = bitcast %a to %uzp = call @llvm.aarch64.sve.uzp1.nxv4i32( %a.bc, %a.bc) diff --git a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll index 5d5aa4b1c0e92..16d26e442d306 100644 --- a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll +++ b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll @@ -62,15 +62,14 @@ attributes #1 = { "target-features"="+sve" vscale_range(1,1) } define void @func_vscale2_2(ptr %a, ptr %b) #2 { ; CHECK-LABEL: func_vscale2_2: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0, #1, mul vl] -; CHECK-NEXT: ld1w { z3.s }, p0/z, [x1, #1, mul vl] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] +; CHECK-NEXT: ldr z2, [x0, #1, mul vl] +; CHECK-NEXT: ldr z3, [x1, #1, mul vl] ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: add z1.s, z2.s, z3.s -; CHECK-NEXT: st1w { z0.s }, p0, [x0] -; CHECK-NEXT: st1w { z1.s }, p0, [x0, #1, mul vl] +; CHECK-NEXT: str z0, [x0] +; CHECK-NEXT: str z1, [x0, #1, mul vl] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a %op2 = load <16 x i32>, ptr %b @@ -107,11 +106,10 @@ attributes #3 = { "target-features"="+sve" vscale_range(2,4) } define void @func_vscale4_4(ptr %a, ptr %b) #4 { ; CHECK-LABEL: func_vscale4_4: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: ldr z0, [x0] +; CHECK-NEXT: ldr z1, [x1] ; CHECK-NEXT: add z0.s, z0.s, z1.s -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: str z0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a %op2 = load <16 x i32>, ptr %b