From dc20255b4fecbfb546e7f6bf9f325bbc1187239d Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Wed, 22 Jan 2025 17:14:45 -0800 Subject: [PATCH] [CIR] Add initial support for array cookies This patch adds the minimal support for array cookies needed to enable ClangIR generation for an array new expression that requires cookies but does not require an explicit initializer. --- clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp | 2 +- clang/lib/CIR/CodeGen/CIRGenCXXABI.h | 20 ++++++ clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp | 64 +++++++++++++++++-- clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp | 64 +++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenModule.cpp | 5 +- clang/lib/CIR/CodeGen/CIRGenTypeCache.h | 20 +++--- clang/test/CIR/CodeGen/new.cpp | 54 ++++++++++++++++ clang/test/CIR/Lowering/new.cpp | 41 +++++++++++- 8 files changed, 253 insertions(+), 17 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp index 82b253ba4100..7f31b80f93cd 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.cpp @@ -89,7 +89,7 @@ bool CIRGenCXXABI::isZeroInitializable(const MemberPointerType *MPT) { CharUnits CIRGenCXXABI::getArrayCookieSize(const CXXNewExpr *E) { if (!requiresArrayCookie(E)) return CharUnits::Zero(); - llvm_unreachable("NYI"); + return getArrayCookieSizeImpl(E->getAllocatedType()); } bool CIRGenCXXABI::requiresArrayCookie(const CXXNewExpr *E) { diff --git a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h index ce217a6a6cd0..b82b744852f7 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCXXABI.h +++ b/clang/lib/CIR/CodeGen/CIRGenCXXABI.h @@ -362,6 +362,26 @@ class CIRGenCXXABI { /// /// \param E - the new-expression being allocated. virtual CharUnits getArrayCookieSize(const CXXNewExpr *E); + + /// Initialize the array cookie for the given allocation. + /// + /// \param NewPtr - a char* which is the presumed-non-null + /// return value of the allocation function + /// \param NumElements - the computed number of elements, + /// potentially collapsed from the multidimensional array case; + /// always a size_t + /// \param ElementType - the base element allocated type, + /// i.e. the allocated type after stripping all array types + virtual Address initializeArrayCookie(CIRGenFunction &CGF, Address NewPtr, + mlir::Value NumElements, + const CXXNewExpr *E, + QualType ElementType) = 0; + +protected: + /// Returns the extra size required in order to store the array + /// cookie for the given type. Assumes that an array cookie is + /// required. + virtual CharUnits getArrayCookieSizeImpl(QualType ElementType) = 0; }; /// Creates and Itanium-family ABI diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp index 90a55ff1a87f..4bc4866e03c7 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp @@ -623,6 +623,7 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &CGF, const CXXNewExpr *e, mlir::cast(constNumElements).getValue(); unsigned numElementsWidth = count.getBitWidth(); + bool hasAnyOverflow = false; // The equivalent code in CodeGen/CGExprCXX.cpp handles these cases as // overflow, but they should never happen. The size argument is implicitly @@ -653,10 +654,22 @@ static mlir::Value emitCXXNewAllocSize(CIRGenFunction &CGF, const CXXNewExpr *e, // Add in the cookie, and check whether it's overflowed. if (cookieSize != 0) { - llvm_unreachable("NYI"); + // Save the current size without a cookie. This shouldn't be + // used if there was overflow. + sizeWithoutCookie = CGF.getBuilder().getConstInt( + Loc, allocationSize.zextOrTrunc(sizeWidth)); + + allocationSize = allocationSize.uadd_ov(cookieSize, overflow); + hasAnyOverflow |= overflow; } - size = CGF.getBuilder().getConstInt(Loc, allocationSize); + // On overflow, produce a -1 so operator new will fail. + if (hasAnyOverflow) { + size = + CGF.getBuilder().getConstInt(Loc, llvm::APInt::getAllOnes(sizeWidth)); + } else { + size = CGF.getBuilder().getConstInt(Loc, allocationSize); + } } else { // TODO: Handle the variable size case llvm_unreachable("NYI"); @@ -858,6 +871,46 @@ void CIRGenFunction::emitNewArrayInitializer( if (!E->hasInitializer()) return; + Address CurPtr = BeginPtr; + + unsigned InitListElements = 0; + + const Expr *Init = E->getInitializer(); + CleanupDeactivationScope deactivation(*this); + + const InitListExpr *ILE = dyn_cast(Init); + if (ILE) { + llvm_unreachable("NYI"); + } + + // If all elements have already been initialized, skip any further + // initialization. + auto ConstOp = dyn_cast(NumElements.getDefiningOp()); + if (ConstOp) { + auto ConstIntAttr = mlir::dyn_cast(ConstOp.getValue()); + // Just skip out if the constant count is zero. + if (ConstIntAttr && ConstIntAttr.getUInt() <= InitListElements) + return; + } + + assert(Init && "have trailing elements to initialize but no initializer"); + + // If this is a constructor call, try to optimize it out, and failing that + // emit a single loop to initialize all remaining elements. + if (const CXXConstructExpr *CCE = dyn_cast(Init)) { + CXXConstructorDecl *Ctor = CCE->getConstructor(); + if (Ctor->isTrivial()) { + // If new expression did not specify value-initialization, then there + // is no initialization. + if (!CCE->requiresZeroInitialization() || Ctor->getParent()->isEmpty()) + return; + + llvm_unreachable("NYI"); + } + + llvm_unreachable("NYI"); + } + llvm_unreachable("NYI"); } @@ -1088,7 +1141,8 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *E) { ++ParamsToSkip; if (allocSize != allocSizeWithoutCookie) { - llvm_unreachable("NYI"); + CharUnits cookieAlign = getSizeAlign(); // FIXME: Ask the ABI. + allocAlign = std::max(allocAlign, cookieAlign); } // The allocation alignment may be passed as the second argument. @@ -1186,7 +1240,9 @@ mlir::Value CIRGenFunction::emitCXXNewExpr(const CXXNewExpr *E) { assert((allocSize == allocSizeWithoutCookie) == CalculateCookiePadding(*this, E).isZero()); if (allocSize != allocSizeWithoutCookie) { - llvm_unreachable("NYI"); + assert(E->isArray()); + allocation = CGM.getCXXABI().initializeArrayCookie( + *this, allocation, numElements, E, allocType); } mlir::Type elementTy; diff --git a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp index 495491799b3f..88861b7c544c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenItaniumCXXABI.cpp @@ -324,6 +324,13 @@ class CIRGenItaniumCXXABI : public CIRGenCXXABI { cir::MethodAttr buildVirtualMethodAttr(cir::MethodType MethodTy, const CXXMethodDecl *MD) override; + Address initializeArrayCookie(CIRGenFunction &CGF, Address NewPtr, + mlir::Value NumElements, const CXXNewExpr *E, + QualType ElementType) override; + +protected: + CharUnits getArrayCookieSizeImpl(QualType ElementType) override; + /**************************** RTTI Uniqueness ******************************/ protected: /// Returns true if the ABI requires RTTI type_info objects to be unique @@ -2637,3 +2644,60 @@ CIRGenItaniumCXXABI::buildVirtualMethodAttr(cir::MethodType MethodTy, bool CIRGenItaniumCXXABI::isZeroInitializable(const MemberPointerType *MPT) { return MPT->isMemberFunctionPointer(); } + +/************************** Array allocation cookies **************************/ + +CharUnits CIRGenItaniumCXXABI::getArrayCookieSizeImpl(QualType ElementType) { + // The array cookie is a size_t; pad that up to the element alignment. + // The cookie is actually right-justified in that space. + return std::max( + CharUnits::fromQuantity(CGM.SizeSizeInBytes), + CGM.getASTContext().getPreferredTypeAlignInChars(ElementType)); +} + +Address CIRGenItaniumCXXABI::initializeArrayCookie(CIRGenFunction &CGF, + Address NewPtr, + mlir::Value NumElements, + const CXXNewExpr *E, + QualType ElementType) { + assert(requiresArrayCookie(E)); + + // TODO: Get the address space when sanitizer support is implemented + + ASTContext &Ctx = getContext(); + CharUnits SizeSize = CGF.getSizeSize(); + mlir::Location Loc = CGF.getLoc(E->getSourceRange()); + + // The size of the cookie. + CharUnits CookieSize = + std::max(SizeSize, Ctx.getPreferredTypeAlignInChars(ElementType)); + assert(CookieSize == getArrayCookieSizeImpl(ElementType)); + + // Compute an offset to the cookie. + Address CookiePtr = NewPtr; + CharUnits CookieOffset = CookieSize - SizeSize; + if (!CookieOffset.isZero()) { + auto CastOp = CGF.getBuilder().createPtrBitcast( + CookiePtr.getPointer(), CGF.getBuilder().getUIntNTy(8)); + auto OffsetOp = CGF.getBuilder().getSignedInt( + Loc, CookieOffset.getQuantity(), /*width=*/32); + auto DataPtr = CGF.getBuilder().createPtrStride(Loc, CastOp, OffsetOp); + CookiePtr = Address(DataPtr, NewPtr.getType(), NewPtr.getAlignment()); + } + + // Write the number of elements into the appropriate slot. + Address NumElementsPtr = CookiePtr.withElementType(CGF.SizeTy); + CGF.getBuilder().createStore(Loc, NumElements, NumElementsPtr); + + if (CGF.SanOpts.has(SanitizerKind::Address)) + llvm_unreachable("NYI"); + + // Finally, compute a pointer to the actual data buffer by skipping + // over the cookie completely. + int64_t Offset = (CookieSize.getQuantity()); + auto CastOp = CGF.getBuilder().createPtrBitcast( + NewPtr.getPointer(), CGF.getBuilder().getUIntNTy(8)); + auto OffsetOp = CGF.getBuilder().getSignedInt(Loc, Offset, /*width=*/32); + auto DataPtr = CGF.getBuilder().createPtrStride(Loc, CastOp, OffsetOp); + return Address(DataPtr, NewPtr.getType(), NewPtr.getAlignment()); +} \ No newline at end of file diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 6c3fad1f021b..a427a297fd0d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -145,7 +145,10 @@ CIRGenModule::CIRGenModule(mlir::MLIRContext &mlirContext, .toCharUnitsFromBits( astContext.getTargetInfo().getPointerAlign(LangAS::Default)) .getQuantity(); - // TODO: SizeSizeInBytes + SizeSizeInBytes = + astContext + .toCharUnitsFromBits(astContext.getTargetInfo().getMaxPointerWidth()) + .getQuantity(); // TODO: IntAlignInBytes UCharTy = cir::IntType::get(&getMLIRContext(), astContext.getTargetInfo().getCharWidth(), diff --git a/clang/lib/CIR/CodeGen/CIRGenTypeCache.h b/clang/lib/CIR/CodeGen/CIRGenTypeCache.h index e625efb40dc0..4e29ad5dca46 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypeCache.h +++ b/clang/lib/CIR/CodeGen/CIRGenTypeCache.h @@ -105,19 +105,19 @@ struct CIRGenTypeCache { }; /// The size and alignment of size_t. - // union { - // unsigned char SizeSizeInBytes; // sizeof(size_t) - // unsigned char SizeAlignInBytes; - // }; + union { + unsigned char SizeSizeInBytes; // sizeof(size_t) + unsigned char SizeAlignInBytes; + }; cir::AddressSpaceAttr CIRAllocaAddressSpace; - // clang::CharUnits getSizeSize() const { - // return clang::CharUnits::fromQuantity(SizeSizeInBytes); - // } - // clang::CharUnits getSizeAlign() const { - // return clang::CharUnits::fromQuantity(SizeAlignInBytes); - // } + clang::CharUnits getSizeSize() const { + return clang::CharUnits::fromQuantity(SizeSizeInBytes); + } + clang::CharUnits getSizeAlign() const { + return clang::CharUnits::fromQuantity(SizeAlignInBytes); + } clang::CharUnits getPointerSize() const { return clang::CharUnits::fromQuantity(PointerSizeInBytes); } diff --git a/clang/test/CIR/CodeGen/new.cpp b/clang/test/CIR/CodeGen/new.cpp index 9447aff3b10a..fae2c429dd0a 100644 --- a/clang/test/CIR/CodeGen/new.cpp +++ b/clang/test/CIR/CodeGen/new.cpp @@ -90,3 +90,57 @@ void t_new_multidim_constant_size() { // CHECK: %5 = cir.cast(bitcast, %0 : !cir.ptr x 3>>>), !cir.ptr> // CHECK: cir.store %4, %5 : !cir.ptr, !cir.ptr> // CHECK: } + +class C { + public: + ~C(); +}; + +void t_constant_size_nontrivial() { + auto p = new C[3]; +} + +// CHECK: cir.func @_Z26t_constant_size_nontrivialv() +// CHECK: %0 = cir.alloca !cir.ptr, !cir.ptr>, ["p", init] {alignment = 8 : i64} +// CHECK: %[[#NUM_ELEMENTS:]] = cir.const #cir.int<3> : !u64i +// CHECK: %[[#SIZE_WITHOUT_COOKIE:]] = cir.const #cir.int<3> : !u64i +// CHECK: %[[#ALLOCATION_SIZE:]] = cir.const #cir.int<11> : !u64i +// CHECK: %4 = cir.call @_Znam(%[[#ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr +// CHECK: %5 = cir.cast(bitcast, %4 : !cir.ptr), !cir.ptr +// CHECK: cir.store %[[#NUM_ELEMENTS]], %5 : !u64i, !cir.ptr +// CHECK: %6 = cir.cast(bitcast, %4 : !cir.ptr), !cir.ptr +// CHECK: %[[#COOKIE_SIZE:]] = cir.const #cir.int<8> : !s32i +// CHECK: %8 = cir.ptr_stride(%6 : !cir.ptr, %[[#COOKIE_SIZE]] : !s32i), !cir.ptr +// CHECK: %9 = cir.cast(bitcast, %8 : !cir.ptr), !cir.ptr +// CHECK: cir.store %9, %0 : !cir.ptr, !cir.ptr> +// CHECK: cir.return +// CHECK: } + +class D { + public: + int x; + ~D(); +}; + +void t_constant_size_nontrivial2() { + auto p = new D[3]; +} + +// In this test SIZE_WITHOUT_COOKIE isn't used, but it would be if there were +// an initializer. + +// CHECK: cir.func @_Z27t_constant_size_nontrivial2v() +// CHECK: %0 = cir.alloca !cir.ptr, !cir.ptr>, ["p", init] {alignment = 8 : i64} +// CHECK: %[[#NUM_ELEMENTS:]] = cir.const #cir.int<3> : !u64i +// CHECK: %[[#SIZE_WITHOUT_COOKIE:]] = cir.const #cir.int<12> : !u64i +// CHECK: %[[#ALLOCATION_SIZE:]] = cir.const #cir.int<20> : !u64i +// CHECK: %4 = cir.call @_Znam(%[[#ALLOCATION_SIZE]]) : (!u64i) -> !cir.ptr +// CHECK: %5 = cir.cast(bitcast, %4 : !cir.ptr), !cir.ptr +// CHECK: cir.store %[[#NUM_ELEMENTS]], %5 : !u64i, !cir.ptr +// CHECK: %6 = cir.cast(bitcast, %4 : !cir.ptr), !cir.ptr +// CHECK: %[[#COOKIE_SIZE:]] = cir.const #cir.int<8> : !s32i +// CHECK: %8 = cir.ptr_stride(%6 : !cir.ptr, %[[#COOKIE_SIZE]] : !s32i), !cir.ptr +// CHECK: %9 = cir.cast(bitcast, %8 : !cir.ptr), !cir.ptr +// CHECK: cir.store %9, %0 : !cir.ptr, !cir.ptr> +// CHECK: cir.return +// CHECK: } \ No newline at end of file diff --git a/clang/test/CIR/Lowering/new.cpp b/clang/test/CIR/Lowering/new.cpp index 9760854fce20..9c276f53dad3 100644 --- a/clang/test/CIR/Lowering/new.cpp +++ b/clang/test/CIR/Lowering/new.cpp @@ -17,4 +17,43 @@ void t_new_multidim_constant_size() { // LLVM: @_Z28t_new_multidim_constant_sizev() // LLVM: %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8 // LLVM: %[[ADDR:.*]] = call ptr @_Znam(i64 192) -// LLVM: store ptr %[[ADDR]], ptr %[[ALLOCA]], align 8 \ No newline at end of file +// LLVM: store ptr %[[ADDR]], ptr %[[ALLOCA]], align 8 + +class C { + public: + ~C(); +}; + +void t_constant_size_nontrivial() { + auto p = new C[3]; +} + +// Note: The below differs from the IR emitted by clang without -fclangir in +// several respects. (1) The alloca here has an extra "i64 1" +// (2) The operator new call is missing "noalias noundef nonnull" on +// the call and "noundef" on the argument, (3) The getelementptr is +// missing "inbounds" + +// LLVM: @_Z26t_constant_size_nontrivialv() +// LLVM: %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8 +// LLVM: %[[COOKIE_PTR:.*]] = call ptr @_Znam(i64 11) +// LLVM: store i64 3, ptr %[[COOKIE_PTR]], align 8 +// LLVM: %[[ALLOCATED_PTR:.*]] = getelementptr i8, ptr %[[COOKIE_PTR]], i64 8 +// LLVM: store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8 + +class D { + public: + int x; + ~D(); +}; + +void t_constant_size_nontrivial2() { + auto p = new D[3]; +} + +// LLVM: @_Z27t_constant_size_nontrivial2v() +// LLVM: %[[ALLOCA:.*]] = alloca ptr, i64 1, align 8 +// LLVM: %[[COOKIE_PTR:.*]] = call ptr @_Znam(i64 20) +// LLVM: store i64 3, ptr %[[COOKIE_PTR]], align 8 +// LLVM: %[[ALLOCATED_PTR:.*]] = getelementptr i8, ptr %[[COOKIE_PTR]], i64 8 +// LLVM: store ptr %[[ALLOCATED_PTR]], ptr %[[ALLOCA]], align 8