[fixup] Fix various issues with variadic arguments

momchil-velikov · momchil-velikov · commit dfef87fb8411 · 2024-10-24T17:00:58.000+01:00
* fix incorrect determination whether an argument is named
* fix where a small (&lt;= 16 bytes) unnamed PST argument
   was passed directly
* addes tests accordingly
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -42,7 +42,7 @@ class AArch64ABIInfo : public ABIInfo {
   ABIArgInfo coerceIllegalVector(QualType Ty, unsigned &NSRN,
                                  unsigned &NPRN) const;
   ABIArgInfo coerceAndExpandPureScalableAggregate(
-      QualType Ty, unsigned NVec, unsigned NPred,
+      QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
       const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
       unsigned &NPRN) const;
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
@@ -63,11 +63,15 @@ class AArch64ABIInfo : public ABIInfo {
       FI.getReturnInfo() =
           classifyReturnType(FI.getReturnType(), FI.isVariadic());
 
+    unsigned ArgNo = 0;
     unsigned NSRN = 0, NPRN = 0;
-    for (auto &it : FI.arguments())
-      it.info =
-          classifyArgumentType(it.type, FI.isVariadic(), /* IsNamedArg */ true,
-                               FI.getCallingConvention(), NSRN, NPRN);
+    for (auto &it : FI.arguments()) {
+      const bool IsNamedArg =
+          !FI.isVariadic() || ArgNo < FI.getRequiredArgs().getNumRequiredArgs();
+      ++ArgNo;
+      it.info = classifyArgumentType(it.type, FI.isVariadic(), IsNamedArg,
+                                     FI.getCallingConvention(), NSRN, NPRN);
+    }
   }
 
   RValue EmitDarwinVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
@@ -322,10 +326,10 @@ ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty, unsigned &NSRN,
 }
 
 ABIArgInfo AArch64ABIInfo::coerceAndExpandPureScalableAggregate(
-    QualType Ty, unsigned NVec, unsigned NPred,
+    QualType Ty, bool IsNamedArg, unsigned NVec, unsigned NPred,
     const SmallVectorImpl<llvm::Type *> &UnpaddedCoerceToSeq, unsigned &NSRN,
     unsigned &NPRN) const {
-  if (NSRN + NVec > 8 || NPRN + NPred > 4)
+  if (!IsNamedArg || NSRN + NVec > 8 || NPRN + NPred > 4)
     return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
   NSRN += NVec;
   NPRN += NPred;
@@ -431,13 +435,13 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
 
   // In AAPCS named arguments of a Pure Scalable Type are passed expanded in
   // registers, or indirectly if there are not enough registers.
-  if (Kind == AArch64ABIKind::AAPCS && IsNamedArg) {
+  if (Kind == AArch64ABIKind::AAPCS) {
     unsigned NVec = 0, NPred = 0;
     SmallVector<llvm::Type *> UnpaddedCoerceToSeq;
     if (passAsPureScalableType(Ty, NVec, NPred, UnpaddedCoerceToSeq) &&
         (NVec + NPred) > 0)
       return coerceAndExpandPureScalableAggregate(
-          Ty, NVec, NPred, UnpaddedCoerceToSeq, NSRN, NPRN);
+          Ty, IsNamedArg, NVec, NPred, UnpaddedCoerceToSeq, NSRN, NPRN);
   }
 
   // Aggregates <= 16 bytes are passed directly in registers or on the stack.
@@ -522,7 +526,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
     if (passAsPureScalableType(RetTy, NVec, NPred, UnpaddedCoerceToSeq) &&
         (NVec + NPred) > 0)
       return coerceAndExpandPureScalableAggregate(
-          RetTy, NVec, NPred, UnpaddedCoerceToSeq, NSRN, NPRN);
+          RetTy, /* IsNamedArg */ true, NVec, NPred, UnpaddedCoerceToSeq, NSRN,
+          NPRN);
   }
 
   // Aggregates <= 16 bytes are returned directly in registers or on the stack.
diff --git a/clang/test/CodeGen/aarch64-pure-scalable-args.c b/clang/test/CodeGen/aarch64-pure-scalable-args.c
@@ -5,6 +5,7 @@
 // REQUIRES: aarch64-registered-target
 
 #include <arm_sve.h>
+#include <stdarg.h>
 
 typedef svfloat32_t fvec32 __attribute__((arm_sve_vector_bits(128)));
 typedef svfloat64_t fvec64 __attribute__((arm_sve_vector_bits(128)));
@@ -258,6 +259,7 @@ void test_small_pst(SmallPST *p, SmallAgg *s) {
 // CHECK-AAPCS:  declare void @small_pst_callee([2 x i64], double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, ptr noundef, double noundef)
 // CHECK-DARWIN: declare void @small_pst_callee([2 x i64], double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, double noundef, i128, double noundef)
 
+
 // Simple return, PST expanded to registers
 //   p->a    -> p0
 //   p->x    -> q0
@@ -289,7 +291,7 @@ BigPST test_return_big_pst(BigPST *p) {
 // CHECK-DARWIN: define void @test_return_big_pst(ptr dead_on_unwind noalias nocapture writable writeonly sret(%struct.BigPST) align 16 %agg.result, ptr nocapture noundef readonly %p)
 
 // Variadic arguments are unnamed, PST passed indirectly.
-// (Passing SVE types to bvariadic fucntion currently unsupported by
+// (Passing SVE types to a variadic function currently unsupported by
 // the AArch64 backend)
 //   p->a    -> p0
 //   p->x    -> q0
@@ -302,16 +304,32 @@ void test_pass_variadic(PST *p, PST *q) {
     void pass_variadic_callee(PST, ...);
     pass_variadic_callee(*p, *q);
 }
-// CHECK-AAPCS:  declare void @pass_variadic_callee(<vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>, ...)
-// CHECK-DARWIN: declare void @pass_variadic_callee(ptr noundef, ...)
+// CHECK-AAPCS: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp, ptr noundef nonnull align 16 dereferenceable(96) %q, i64 96, i1 false)
+// CHECK-AAPCS: call void (<vscale x 16 x i1>, <vscale x 2 x double>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 16 x i8>, <vscale x 16 x i1>, ...) @pass_variadic_callee(<vscale x 16 x i1> %1, <vscale x 2 x double> %cast.scalable1, <vscale x 4 x float> %cast.scalable2, <vscale x 4 x float> %cast.scalable3, <vscale x 16 x i8> %cast.scalable4, <vscale x 16 x i1> %12, ptr noundef nonnull %byval-temp)
+
+// CHECK-DARWIN: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp, ptr noundef nonnull align 16 dereferenceable(96) %p, i64 96, i1 false)
+// CHECK-DARWIN: call void @llvm.lifetime.start.p0(i64 96, ptr nonnull %byval-temp1)
+// CHECK-DARWIN: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(96) %byval-temp1, ptr noundef nonnull align 16 dereferenceable(96) %q, i64 96, i1 false)
+// CHECK-DARWIN: call void (ptr, ...) @pass_variadic_callee(ptr noundef nonnull %byval-temp, ptr noundef nonnull %byval-temp1)
+
+
+// Test passing a small PST, still passed indirectly, despite being <= 128 bits
+void test_small_pst_variadic(SmallPST *p) {
+    void small_pst_variadic_callee(int, ...);
+    small_pst_variadic_callee(0, *p);
+}
+// CHECK-AAPCS: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 16 dereferenceable(16) %byval-temp, ptr noundef nonnull align 16 dereferenceable(16) %p, i64 16, i1 false)
+// CHECK-AAPCS: call void (i32, ...) @small_pst_variadic_callee(i32 noundef 0, ptr noundef nonnull %byval-temp)
 
+// CHECK-DARWIN: %0 = load i128, ptr %p, align 16
+// CHECK-DARWIN: tail call void (i32, ...) @small_pst_variadic_callee(i32 noundef 0, i128 %0)
 
 // Test handling of a PST argument when passed in registers, from the callee side.
-void argpass_callee_side(PST v) {
+void test_argpass_callee_side(PST v) {
     void use(PST *p);
     use(&v);
 }
-// CHECK-AAPCS:      define dso_local void @argpass_callee_side(<vscale x 16 x i1> %0, <vscale x 2 x double> %.coerce1, <vscale x 4 x float> %.coerce3, <vscale x 4 x float> %.coerce5, <vscale x 16 x i8> %.coerce7, <vscale x 16 x i1> %1) local_unnamed_addr #0 {
+// CHECK-AAPCS:      define dso_local void @test_argpass_callee_side(<vscale x 16 x i1> %0, <vscale x 2 x double> %.coerce1, <vscale x 4 x float> %.coerce3, <vscale x 4 x float> %.coerce5, <vscale x 16 x i8> %.coerce7, <vscale x 16 x i1> %1)
 // CHECK-AAPCS-NEXT: entry:
 // CHECK-AAPCS-NEXT:   %v = alloca %struct.PST, align 16
 // CHECK-AAPCS-NEXT:   %.coerce = bitcast <vscale x 16 x i1> %0 to <vscale x 2 x i8>
@@ -333,6 +351,90 @@ void argpass_callee_side(PST v) {
 // CHECK-AAPCS-NEXT:   %.coerce9 = bitcast <vscale x 16 x i1> %1 to <vscale x 2 x i8>
 // CHECK-AAPCS-NEXT:   %cast.fixed10 = tail call <2 x i8> @llvm.vector.extract.v2i8.nxv2i8(<vscale x 2 x i8> %.coerce9, i64 0)
 // CHECK-AAPCS-NEXT:   store <2 x i8> %cast.fixed10, ptr %6, align 16
-// CHECK-AAPCS-NEXT:   call void @use(ptr noundef nonnull %v) #8
+// CHECK-AAPCS-NEXT:   call void @use(ptr noundef nonnull %v)
 // CHECK-AAPCS-NEXT:   ret void
 // CHECK-AAPCS-NEXT: }
+
+// Test va_arg operation
+#ifdef __cplusplus
+ extern "C"
+#endif
+void test_va_arg(int n, ...) {
+     va_list ap;
+     va_start(ap, n);  
+     PST v = va_arg(ap, PST);
+     va_end(ap);
+
+     void use1(bvec, fvec32);
+     use1(v.a, v.y[1]);
+}
+// CHECK-AAPCS: define dso_local void @test_va_arg(i32 noundef %n, ...)
+// CHECK-AAPCS-NEXT: entry:
+// CHECK-AAPCS-NEXT:   %ap = alloca %struct.__va_list, align 8
+// CHECK-AAPCS-NEXT:   call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %ap)
+// CHECK-AAPCS-NEXT:   call void @llvm.va_start.p0(ptr nonnull %ap)
+// CHECK-AAPCS-NEXT:   %gr_offs_p = getelementptr inbounds nuw i8, ptr %ap, i64 24
+// CHECK-AAPCS-NEXT:   %gr_offs = load i32, ptr %gr_offs_p, align 8
+// CHECK-AAPCS-NEXT:   %0 = icmp sgt i32 %gr_offs, -1
+// CHECK-AAPCS-NEXT:   br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg
+// CHECK-AAPCS-EMPTY:
+// CHECK-AAPCS-NEXT: vaarg.maybe_reg:                                  ; preds = %entry
+
+// Increment by 8, size of the pointer to the argument value, not size of the argument value itself.
+
+// CHECK-AAPCS-NEXT:   %new_reg_offs = add nsw i32 %gr_offs, 8
+// CHECK-AAPCS-NEXT:   store i32 %new_reg_offs, ptr %gr_offs_p, align 8
+// CHECK-AAPCS-NEXT:   %inreg = icmp ult i32 %gr_offs, -7
+// CHECK-AAPCS-NEXT:   br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
+// CHECK-AAPCS-EMPTY:
+// CHECK-AAPCS-NEXT: vaarg.in_reg:                                     ; preds = %vaarg.maybe_reg
+// CHECK-AAPCS-NEXT:   %reg_top_p = getelementptr inbounds nuw i8, ptr %ap, i64 8
+// CHECK-AAPCS-NEXT:   %reg_top = load ptr, ptr %reg_top_p, align 8
+// CHECK-AAPCS-NEXT:   %1 = sext i32 %gr_offs to i64
+// CHECK-AAPCS-NEXT:   %2 = getelementptr inbounds i8, ptr %reg_top, i64 %1
+// CHECK-AAPCS-NEXT:   br label %vaarg.end
+// CHECK-AAPCS-EMPTY:
+// CHECK-AAPCS-NEXT: vaarg.on_stack:                                   ; preds = %vaarg.maybe_reg, %entry
+// CHECK-AAPCS-NEXT:   %stack = load ptr, ptr %ap, align 8
+// CHECK-AAPCS-NEXT:   %new_stack = getelementptr inbounds i8, ptr %stack, i64 8
+// CHECK-AAPCS-NEXT:   store ptr %new_stack, ptr %ap, align 8
+// CHECK-AAPCS-NEXT:   br label %vaarg.end
+// CHECK-AAPCS-EMPTY:
+// CHECK-AAPCS-NEXT: vaarg.end:                                        ; preds = %vaarg.on_stack, %vaarg.in_reg
+// CHECK-AAPCS-NEXT:   %vaargs.addr = phi ptr [ %2, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
+
+// Extra indirection, for a composite passed indirectly.
+// CHECK-AAPCS-NEXT:   %vaarg.addr = load ptr, ptr %vaargs.addr, align 8
+
+// CHECK-AAPCS-NEXT:   %v.sroa.0.0.copyload = load <2 x i8>, ptr %vaarg.addr, align 16
+// CHECK-AAPCS-NEXT:   %v.sroa.43.0.vaarg.addr.sroa_idx = getelementptr inbounds i8, ptr %vaarg.addr, i64 48
+// CHECK-AAPCS-NEXT:   %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0.vaarg.addr.sroa_idx, align 16
+// CHECK-AAPCS-NEXT:   call void @llvm.va_end.p0(ptr nonnull %ap)
+// CHECK-AAPCS-NEXT:   %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %v.sroa.0.0.copyload, i64 0)
+// CHECK-AAPCS-NEXT:   %3 = bitcast <vscale x 2 x i8> %cast.scalable to <vscale x 16 x i1>
+// CHECK-AAPCS-NEXT:   %cast.scalable2 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %v.sroa.43.0.copyload, i64 0)
+// CHECK-AAPCS-NEXT:   call void @use1(<vscale x 16 x i1> noundef %3, <vscale x 4 x float> noundef %cast.scalable2)
+// CHECK-AAPCS-NEXT:   call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %ap)
+// CHECK-AAPCS-NEXT:   ret void
+// CHECK-AAPCS-NEXT: }
+
+// CHECK-DARWIN: define void @test_va_arg(i32 noundef %n, ...)
+// CHECK-DARWIN-NEXT: entry:
+// CHECK-DARWIN-NEXT:   %ap = alloca ptr, align 8
+// CHECK-DARWIN-NEXT:   call void @llvm.lifetime.start.p0(i64 8, ptr nonnull %ap)
+// CHECK-DARWIN-NEXT:   call void @llvm.va_start.p0(ptr nonnull %ap)
+// CHECK-DARWIN-NEXT:   %argp.cur = load ptr, ptr %ap, align 8
+// CHECK-DARWIN-NEXT:   %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 8
+// CHECK-DARWIN-NEXT:   store ptr %argp.next, ptr %ap, align 8
+// CHECK-DARWIN-NEXT:   %0 = load ptr, ptr %argp.cur, align 8
+// CHECK-DARWIN-NEXT:   %v.sroa.0.0.copyload = load <2 x i8>, ptr %0, align 16
+// CHECK-DARWIN-NEXT:   %v.sroa.43.0..sroa_idx = getelementptr inbounds i8, ptr %0, i64 48
+// CHECK-DARWIN-NEXT:   %v.sroa.43.0.copyload = load <4 x float>, ptr %v.sroa.43.0..sroa_idx, align 16
+// CHECK-DARWIN-NEXT:   call void @llvm.va_end.p0(ptr nonnull %ap)
+// CHECK-DARWIN-NEXT:   %cast.scalable = call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %v.sroa.0.0.copyload, i64 0)
+// CHECK-DARWIN-NEXT:   %1 = bitcast <vscale x 2 x i8> %cast.scalable to <vscale x 16 x i1>
+// CHECK-DARWIN-NEXT:   %cast.scalable2 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %v.sroa.43.0.copyload, i64 0)
+// CHECK-DARWIN-NEXT:   call void @use1(<vscale x 16 x i1> noundef %1, <vscale x 4 x float> noundef %cast.scalable2)
+// CHECK-DARWIN-NEXT:   call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ap)
+// CHECK-DARWIN-NEXT:   ret void
+// CHECK-DARWIN-NEXT: }