Skip to content

Commit db0e376

Browse files
[AArch64] Fix failure with inline asm and svcount (#112537)
This fixes an issue where the compiler runs into an assertion failure for the following example: register svcount_t pred asm("pn8") = svptrue_c8(); asm("ld1w { z0.s, z4.s, z8.s, z12.s }, %[pred]/z, [x0]\n" : : [pred] "Uph" (pred) : "memory", "cc"); Here the register constraint that ends up in the LLVM IR is "{pn8}", but the code in `TargetRegisterInfo::getRegForInlineAsmConstraint` that parses that string, follows a path where it queries a suitable register class for this register (<=> PPRorPNR regclass), for which it then chooses `nxv16i1` as a suitable type. These choices individually are correct, but the combined result isn't, because the type should be `aarch64svcount`. This then results in issues later on in SelectionDAGBuilder.cpp in CopyToReg because the type of the actual value and the computed type from the constraint don't match. This PR pre-empts this issue by parsing the predicate explicitly and returning the correct register class.
1 parent 9a8292f commit db0e376

File tree

2 files changed

+96
-0
lines changed

2 files changed

+96
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11867,6 +11867,36 @@ const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
1186711867

1186811868
enum class PredicateConstraint { Uph, Upl, Upa };
1186911869

11870+
// Returns a {Reg, RegisterClass} tuple if the constraint is
11871+
// a specific predicate register.
11872+
//
11873+
// For some constraint like "{pn3}" the default path in
11874+
// TargetLowering::getRegForInlineAsmConstraint() leads it to determine that a
11875+
// suitable register class for this register is "PPRorPNR", after which it
11876+
// determines that nxv16i1 is an appropriate type for the constraint, which is
11877+
// not what we want. The code here pre-empts this by matching the register
11878+
// explicitly.
11879+
static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
11880+
parsePredicateRegAsConstraint(StringRef Constraint) {
11881+
if (!Constraint.starts_with('{') || !Constraint.ends_with('}') ||
11882+
Constraint[1] != 'p')
11883+
return std::nullopt;
11884+
11885+
Constraint = Constraint.substr(2, Constraint.size() - 3);
11886+
bool IsPredicateAsCount = Constraint.starts_with("n");
11887+
if (IsPredicateAsCount)
11888+
Constraint = Constraint.drop_front(1);
11889+
11890+
unsigned V;
11891+
if (Constraint.getAsInteger(10, V) || V > 31)
11892+
return std::nullopt;
11893+
11894+
if (IsPredicateAsCount)
11895+
return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
11896+
else
11897+
return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
11898+
}
11899+
1187011900
static std::optional<PredicateConstraint>
1187111901
parsePredicateConstraint(StringRef Constraint) {
1187211902
return StringSwitch<std::optional<PredicateConstraint>>(Constraint)
@@ -12114,6 +12144,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
1211412144
break;
1211512145
}
1211612146
} else {
12147+
if (const auto P = parsePredicateRegAsConstraint(Constraint))
12148+
return *P;
1211712149
if (const auto PC = parsePredicateConstraint(Constraint))
1211812150
if (const auto *RegClass = getPredicateRegisterClass(*PC, VT))
1211912151
return std::make_pair(0U, RegClass);

llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,3 +119,67 @@ define <vscale x 8 x half> @test_svfadd_f16_Uph_constraint(<vscale x 16 x i1> %P
119119
%1 = tail call <vscale x 8 x half> asm "fadd $0.h, $1/m, $2.h, $3.h", "=w,@3Uph,w,w"(<vscale x 16 x i1> %Pg, <vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm)
120120
ret <vscale x 8 x half> %1
121121
}
122+
123+
define void @explicit_p0(ptr %p) {
124+
; CHECK-LABEL: name: explicit_p0
125+
; CHECK: bb.0 (%ir-block.0):
126+
; CHECK-NEXT: liveins: $x0
127+
; CHECK-NEXT: {{ $}}
128+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
129+
; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
130+
; CHECK-NEXT: $p0 = COPY [[PTRUE_B]]
131+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
132+
; CHECK-NEXT: INLINEASM &"ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 3538954 /* regdef:GPR64common */, def %1, 9 /* reguse */, $p0, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
133+
; CHECK-NEXT: RET_ReallyLR
134+
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.b8(i32 31)
135+
%2 = tail call i64 asm sideeffect "ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", "=r,{p0},0"(<vscale x 16 x i1> %1, ptr %p)
136+
ret void
137+
}
138+
139+
define void @explicit_p8_invalid(ptr %p) {
140+
; CHECK-LABEL: name: explicit_p8_invalid
141+
; CHECK: bb.0 (%ir-block.0):
142+
; CHECK-NEXT: liveins: $x0
143+
; CHECK-NEXT: {{ $}}
144+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
145+
; CHECK-NEXT: [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
146+
; CHECK-NEXT: $p8 = COPY [[PTRUE_B]]
147+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
148+
; CHECK-NEXT: INLINEASM &"ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 3538954 /* regdef:GPR64common */, def %1, 9 /* reguse */, $p8, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
149+
; CHECK-NEXT: RET_ReallyLR
150+
%1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.b8(i32 31)
151+
%2 = tail call i64 asm sideeffect "ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", "=r,{p8},0"(<vscale x 16 x i1> %1, ptr %p)
152+
ret void
153+
}
154+
155+
define void @explicit_pn8(ptr %p) {
156+
; CHECK-LABEL: name: explicit_pn8
157+
; CHECK: bb.0 (%ir-block.0):
158+
; CHECK-NEXT: liveins: $x0
159+
; CHECK-NEXT: {{ $}}
160+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
161+
; CHECK-NEXT: [[PTRUE_C_B:%[0-9]+]]:pnr_p8to15 = PTRUE_C_B implicit $vg
162+
; CHECK-NEXT: $pn8 = COPY [[PTRUE_C_B]]
163+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
164+
; CHECK-NEXT: INLINEASM &"ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 3538954 /* regdef:GPR64common */, def %1, 9 /* reguse */, $pn8, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
165+
; CHECK-NEXT: RET_ReallyLR
166+
%1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
167+
%2 = tail call i64 asm sideeffect "ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", "=r,{pn8},0"(target("aarch64.svcount") %1, ptr %p)
168+
ret void
169+
}
170+
171+
define void @explicit_pn0_invalid(ptr %p) {
172+
; CHECK-LABEL: name: explicit_pn0_invalid
173+
; CHECK: bb.0 (%ir-block.0):
174+
; CHECK-NEXT: liveins: $x0
175+
; CHECK-NEXT: {{ $}}
176+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
177+
; CHECK-NEXT: [[PTRUE_C_B:%[0-9]+]]:pnr_p8to15 = PTRUE_C_B implicit $vg
178+
; CHECK-NEXT: $pn0 = COPY [[PTRUE_C_B]]
179+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
180+
; CHECK-NEXT: INLINEASM &"ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, 3538954 /* regdef:GPR64common */, def %1, 9 /* reguse */, $pn0, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
181+
; CHECK-NEXT: RET_ReallyLR
182+
%1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
183+
%2 = tail call i64 asm sideeffect "ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", "=r,{pn0},0"(target("aarch64.svcount") %1, ptr %p)
184+
ret void
185+
}

0 commit comments

Comments
 (0)