Skip to content

Commit dfef87f

Browse files
[fixup] Fix various issues with variadic arguments
* fix incorrect determination whether an argument is named * fix where a small (<= 16 bytes) unnamed PST argument was passed directly * addes tests accordingly
1 parent a16c9e1 commit dfef87f

File tree

2 files changed

+123
-16
lines changed

2 files changed

+123
-16
lines changed

clang/lib/CodeGen/Targets/AArch64.cpp

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class AArch64ABIInfo : public ABIInfo {
4242
ABIArgInfo coerceIllegalVector(QualType Ty, unsigned &NSRN,
4343
unsigned &NPRN) const;
4444
ABIArgInfo coerceAndExpandPureScalableAggregate(
45-
QualType Ty, unsigned NVec, unsigned NPred,
45+
QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
4646
const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
4747
unsigned &NPRN) const;
4848
bool isHomogeneousAggregateBaseType(QualType Ty) const override;
@@ -63,11 +63,15 @@ class AArch64ABIInfo : public ABIInfo {
6363
FI.getReturnInfo() =
6464
classifyReturnType(FI.getReturnType(), FI.isVariadic());
6565

66+
unsigned ArgNo = 0;
6667
unsigned NSRN = 0, NPRN = 0;
67-
for (auto &it : FI.arguments())
68-
it.info =
69-
classifyArgumentType(it.type, FI.isVariadic(), /* IsNamedArg */ true,
70-
FI.getCallingConvention(), NSRN, NPRN);
68+
for (auto &it : FI.arguments()) {
69+
const bool IsNamedArg =
70+
!FI.isVariadic() || ArgNo < FI.getRequiredArgs().getNumRequiredArgs();
71+
++ArgNo;
72+
it.info = classifyArgumentType(it.type, FI.isVariadic(), IsNamedArg,
73+
FI.getCallingConvention(), NSRN, NPRN);
74+
}
7175
}
7276

7377
RValue EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
@@ -322,10 +326,10 @@ ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty, unsigned &NSRN,
322326
}
323327

324328
ABIArgInfo AArch64ABIInfo::coerceAndExpandPureScalableAggregate(
325-
QualType Ty, unsigned NVec, unsigned NPred,
329+
QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
326330
const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
327331
unsigned &NPRN) const {
328-
if (NSRN + NVec > 8 || NPRN + NPred > 4)
332+
if (!IsNamedArg || NSRN + NVec > 8 || NPRN + NPred > 4)
329333
return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
330334
NSRN += NVec;
331335
NPRN += NPred;
@@ -431,13 +435,13 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
431435

432436
// In AAPCS named arguments of a Pure Scalable Type are passed expanded in
433437
// registers, or indirectly if there are not enough registers.
434-
if (Kind == AArch64ABIKind::AAPCS && IsNamedArg) {
438+
if (Kind == AArch64ABIKind::AAPCS) {
435439
unsigned NVec = 0, NPred = 0;
436440
SmallVector<llvm::Type *> UnpaddedCoerceToSeq;
437441
if (passAsPureScalableType(Ty, NVec, NPred, UnpaddedCoerceToSeq) &&
438442
(NVec + NPred) > 0)
439443
return coerceAndExpandPureScalableAggregate(
440-
Ty, NVec, NPred, UnpaddedCoerceToSeq, NSRN, NPRN);
444+
Ty, IsNamedArg, NVec, NPred, UnpaddedCoerceToSeq, NSRN, NPRN);
441445
}
442446

443447
// Aggregates <= 16 bytes are passed directly in registers or on the stack.
@@ -522,7 +526,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
522526
if (passAsPureScalableType(RetTy, NVec, NPred, UnpaddedCoerceToSeq) &&
523527
(NVec + NPred) > 0)
524528
return coerceAndExpandPureScalableAggregate(
525-
RetTy, NVec, NPred, UnpaddedCoerceToSeq, NSRN, NPRN);
529+
RetTy, /* IsNamedArg */ true, NVec, NPred, UnpaddedCoerceToSeq, NSRN,
530+
NPRN);
526531
}
527532

528533
// Aggregates <= 16 bytes are returned directly in registers or on the stack.

clang/test/CodeGen/aarch64-pure-scalable-args.c

Lines changed: 108 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
// REQUIRES: aarch64-registered-target
66

77
#include <arm_sve.h>
8+
#include <stdarg.h>
89

910
typedef svfloat32_t fvec32 __attribute__((arm_sve_vector_bits(128)));
1011
typedef svfloat64_t fvec64 __attribute__((arm_sve_vector_bits(128)));
@@ -258,6 +259,7 @@ void test_small_pst(SmallPST *p, SmallAgg *s) {
258259
// CHECK-AAPCS: declare void @small_pst_callee([2 x i64], double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, ptr noundef, double noundef)
259260
// CHECK-DARWIN: declare void @small_pst_callee([2 x i64], double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, i128, double noundef)
260261

262+
261263
// Simple return, PST expanded to registers
262264
// p->a -> p0
263265
// p->x -> q0
@@ -289,7 +291,7 @@ BigPST test_return_big_pst(BigPST *p) {
289291
// CHECK-DARWIN: define void @test_return_big_pst(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.BigPST) align 16 %agg.result, ptr nocapture noundef readonly %p)
290292

291293
// Variadic arguments are unnamed, PST passed indirectly.
292-
// (Passing SVE types to bvariadic fucntion currently unsupported by
294+
// (Passing SVE types to a variadic function currently unsupported by
293295
// the AArch64 backend)
294296
// p->a -> p0
295297
// p->x -> q0
@@ -302,16 +304,32 @@ void test_pass_variadic(PST *p, PST *q) {
302304
void pass_variadic_callee(PST, ...);
303305
pass_variadic_callee(*p, *q);
304306
}
305-
// CHECK-AAPCS: declare void @pass_variadic_callee(<vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>, ...)
306-
// CHECK-DARWIN: declare void @pass_variadic_callee(ptr noundef, ...)
307+
// CHECK-AAPCS: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp, ptr noundef nonnull align 16 dereferenceable(96) %q, i64 96, i1 false)
308+
// CHECK-AAPCS: call void (<vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>, ...) @pass_variadic_callee(<vscale x 16 x i1> %1, <vscale x 2 x double> %cast.scalable1, <vscale x 4 x float> %cast.scalable2, <vscale x 4 x float> %cast.scalable3, <vscale x 16 x i8> %cast.scalable4, <vscale x 16 x i1> %12, ptr noundef nonnull %byval-temp)
309+
310+
// CHECK-DARWIN: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp, ptr noundef nonnull align 16 dereferenceable(96) %p, i64 96, i1 false)
311+
// CHECK-DARWIN: call void @llvm.lifetime.start.p0(i64 96, ptr nonnull %byval-temp1)
312+
// CHECK-DARWIN: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp1, ptr noundef nonnull align 16 dereferenceable(96) %q, i64 96, i1 false)
313+
// CHECK-DARWIN: call void (ptr, ...) @pass_variadic_callee(ptr noundef nonnull %byval-temp, ptr noundef nonnull %byval-temp1)
314+
315+
316+
// Test passing a small PST, still passed indirectly, despite being <= 128 bits
317+
void test_small_pst_variadic(SmallPST *p) {
318+
void small_pst_variadic_callee(int, ...);
319+
small_pst_variadic_callee(0, *p);
320+
}
321+
// CHECK-AAPCS: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(16) %byval-temp, ptr noundef nonnull align 16 dereferenceable(16) %p, i64 16, i1 false)
322+
// CHECK-AAPCS: call void (i32, ...) @small_pst_variadic_callee(i32 noundef 0, ptr noundef nonnull %byval-temp)
307323

324+
// CHECK-DARWIN: %0 = load i128, ptr %p, align 16
325+
// CHECK-DARWIN: tail call void (i32, ...) @small_pst_variadic_callee(i32 noundef 0, i128 %0)
308326

309327
// Test handling of a PST argument when passed in registers, from the callee side.
310-
void argpass_callee_side(PST v) {
328+
void test_argpass_callee_side(PST v) {
311329
void use(PST *p);
312330
use(&v);
313331
}
314-
// CHECK-AAPCS: define dso_local void @argpass_callee_side(<vscale x 16 x i1> %0, <vscale x 2 x double> %.coerce1, <vscale x 4 x float> %.coerce3, <vscale x 4 x float> %.coerce5, <vscale x 16 x i8> %.coerce7, <vscale x 16 x i1> %1) local_unnamed_addr #0 {
332+
// CHECK-AAPCS: define dso_local void @test_argpass_callee_side(<vscale x 16 x i1> %0, <vscale x 2 x double> %.coerce1, <vscale x 4 x float> %.coerce3, <vscale x 4 x float> %.coerce5, <vscale x 16 x i8> %.coerce7, <vscale x 16 x i1> %1)
315333
// CHECK-AAPCS-NEXT: entry:
316334
// CHECK-AAPCS-NEXT: %v = alloca %struct.PST, align 16
317335
// CHECK-AAPCS-NEXT: %.coerce = bitcast <vscale x 16 x i1> %0 to <vscale x 2 x i8>
@@ -333,6 +351,90 @@ void argpass_callee_side(PST v) {
333351
// CHECK-AAPCS-NEXT: %.coerce9 = bitcast <vscale x 16 x i1> %1 to <vscale x 2 x i8>
334352
// CHECK-AAPCS-NEXT: %cast.fixed10 = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %.coerce9, i64 0)
335353
// CHECK-AAPCS-NEXT: store <2 x i8> %cast.fixed10, ptr %6, align 16
336-
// CHECK-AAPCS-NEXT: call void @use(ptr noundef nonnull %v) #8
354+
// CHECK-AAPCS-NEXT: call void @use(ptr noundef nonnull %v)
337355
// CHECK-AAPCS-NEXT: ret void
338356
// CHECK-AAPCS-NEXT: }
357+
358+
// Test va_arg operation
359+
#ifdef __cplusplus
360+
extern "C"
361+
#endif
362+
void test_va_arg(int n, ...) {
363+
va_list ap;
364+
va_start(ap, n);
365+
PST v = va_arg(ap, PST);
366+
va_end(ap);
367+
368+
void use1(bvec, fvec32);
369+
use1(v.a, v.y[1]);
370+
}
371+
// CHECK-AAPCS: define dso_local void @test_va_arg(i32 noundef %n, ...)
372+
// CHECK-AAPCS-NEXT: entry:
373+
// CHECK-AAPCS-NEXT: %ap = alloca %struct.__va_list, align 8
374+
// CHECK-AAPCS-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %ap)
375+
// CHECK-AAPCS-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap)
376+
// CHECK-AAPCS-NEXT: %gr_offs_p = getelementptr inbounds nuw i8, ptr %ap, i64 24
377+
// CHECK-AAPCS-NEXT: %gr_offs = load i32, ptr %gr_offs_p, align 8
378+
// CHECK-AAPCS-NEXT: %0 = icmp sgt i32 %gr_offs, -1
379+
// CHECK-AAPCS-NEXT: br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg
380+
// CHECK-AAPCS-EMPTY:
381+
// CHECK-AAPCS-NEXT: vaarg.maybe_reg: ; preds = %entry
382+
383+
// Increment by 8, size of the pointer to the argument value, not size of the argument value itself.
384+
385+
// CHECK-AAPCS-NEXT: %new_reg_offs = add nsw i32 %gr_offs, 8
386+
// CHECK-AAPCS-NEXT: store i32 %new_reg_offs, ptr %gr_offs_p, align 8
387+
// CHECK-AAPCS-NEXT: %inreg = icmp ult i32 %gr_offs, -7
388+
// CHECK-AAPCS-NEXT: br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
389+
// CHECK-AAPCS-EMPTY:
390+
// CHECK-AAPCS-NEXT: vaarg.in_reg: ; preds = %vaarg.maybe_reg
391+
// CHECK-AAPCS-NEXT: %reg_top_p = getelementptr inbounds nuw i8, ptr %ap, i64 8
392+
// CHECK-AAPCS-NEXT: %reg_top = load ptr, ptr %reg_top_p, align 8
393+
// CHECK-AAPCS-NEXT: %1 = sext i32 %gr_offs to i64
394+
// CHECK-AAPCS-NEXT: %2 = getelementptr inbounds i8, ptr %reg_top, i64 %1
395+
// CHECK-AAPCS-NEXT: br label %vaarg.end
396+
// CHECK-AAPCS-EMPTY:
397+
// CHECK-AAPCS-NEXT: vaarg.on_stack: ; preds = %vaarg.maybe_reg, %entry
398+
// CHECK-AAPCS-NEXT: %stack = load ptr, ptr %ap, align 8
399+
// CHECK-AAPCS-NEXT: %new_stack = getelementptr inbounds i8, ptr %stack, i64 8
400+
// CHECK-AAPCS-NEXT: store ptr %new_stack, ptr %ap, align 8
401+
// CHECK-AAPCS-NEXT: br label %vaarg.end
402+
// CHECK-AAPCS-EMPTY:
403+
// CHECK-AAPCS-NEXT: vaarg.end: ; preds = %vaarg.on_stack, %vaarg.in_reg
404+
// CHECK-AAPCS-NEXT: %vaargs.addr = phi ptr [ %2, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
405+
406+
// Extra indirection, for a composite passed indirectly.
407+
// CHECK-AAPCS-NEXT: %vaarg.addr = load ptr, ptr %vaargs.addr, align 8
408+
409+
// CHECK-AAPCS-NEXT: %v.sroa.0.0.copyload = load <2 x i8>, ptr %vaarg.addr, align 16
410+
// CHECK-AAPCS-NEXT: %v.sroa.43.0.vaarg.addr.sroa_idx = getelementptr inbounds i8, ptr %vaarg.addr, i64 48
411+
// CHECK-AAPCS-NEXT: %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0.vaarg.addr.sroa_idx, align 16
412+
// CHECK-AAPCS-NEXT: call void @llvm.va_end.p0(ptr nonnull %ap)
413+
// CHECK-AAPCS-NEXT: %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %v.sroa.0.0.copyload, i64 0)
414+
// CHECK-AAPCS-NEXT: %3 = bitcast <vscale x 2 x i8> %cast.scalable to <vscale x 16 x i1>
415+
// CHECK-AAPCS-NEXT: %cast.scalable2 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %v.sroa.43.0.copyload, i64 0)
416+
// CHECK-AAPCS-NEXT: call void @use1(<vscale x 16 x i1> noundef %3, <vscale x 4 x float> noundef %cast.scalable2)
417+
// CHECK-AAPCS-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %ap)
418+
// CHECK-AAPCS-NEXT: ret void
419+
// CHECK-AAPCS-NEXT: }
420+
421+
// CHECK-DARWIN: define void @test_va_arg(i32 noundef %n, ...)
422+
// CHECK-DARWIN-NEXT: entry:
423+
// CHECK-DARWIN-NEXT: %ap = alloca ptr, align 8
424+
// CHECK-DARWIN-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ap)
425+
// CHECK-DARWIN-NEXT: call void @llvm.va_start.p0(ptr nonnull %ap)
426+
// CHECK-DARWIN-NEXT: %argp.cur = load ptr, ptr %ap, align 8
427+
// CHECK-DARWIN-NEXT: %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 8
428+
// CHECK-DARWIN-NEXT: store ptr %argp.next, ptr %ap, align 8
429+
// CHECK-DARWIN-NEXT: %0 = load ptr, ptr %argp.cur, align 8
430+
// CHECK-DARWIN-NEXT: %v.sroa.0.0.copyload = load <2 x i8>, ptr %0, align 16
431+
// CHECK-DARWIN-NEXT: %v.sroa.43.0..sroa_idx = getelementptr inbounds i8, ptr %0, i64 48
432+
// CHECK-DARWIN-NEXT: %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0..sroa_idx, align 16
433+
// CHECK-DARWIN-NEXT: call void @llvm.va_end.p0(ptr nonnull %ap)
434+
// CHECK-DARWIN-NEXT: %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %v.sroa.0.0.copyload, i64 0)
435+
// CHECK-DARWIN-NEXT: %1 = bitcast <vscale x 2 x i8> %cast.scalable to <vscale x 16 x i1>
436+
// CHECK-DARWIN-NEXT: %cast.scalable2 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %v.sroa.43.0.copyload, i64 0)
437+
// CHECK-DARWIN-NEXT: call void @use1(<vscale x 16 x i1> noundef %1, <vscale x 4 x float> noundef %cast.scalable2)
438+
// CHECK-DARWIN-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ap)
439+
// CHECK-DARWIN-NEXT: ret void
440+
// CHECK-DARWIN-NEXT: }

0 commit comments

Comments
 (0)