-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[flang] use DataLayout instead of GEP to compute element size #140235
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-codegen @llvm/pr-subscribers-flang-fir-hlfir Author: None (jeanPerier) ChangesNow that the datalayout is part of codegen, use that to generate type size constants in codegen instead of generating GEP. This will be needed to be able to fold initializers of derived type arrays with descriptor components into ArrayAttr to speed-up compilation times which I will do in a different patch. Patch is 24.06 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140235.diff 6 Files Affected:
diff --git a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
index 53d16323beddf..7b1c14e4dfdc9 100644
--- a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
+++ b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h
@@ -173,6 +173,10 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern {
this->getTypeConverter());
}
+ const mlir::DataLayout &getDataLayout() const {
+ return lowerTy().getDataLayout();
+ }
+
void attachTBAATag(mlir::LLVM::AliasAnalysisOpInterface op,
mlir::Type baseFIRType, mlir::Type accessFIRType,
mlir::LLVM::GEPOp gep) const {
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index e534cfa5591c6..ad9119ba4a031 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -1043,22 +1043,12 @@ static mlir::SymbolRefAttr getMalloc(fir::AllocMemOp op,
static mlir::Value
computeElementDistance(mlir::Location loc, mlir::Type llvmObjectType,
mlir::Type idxTy,
- mlir::ConversionPatternRewriter &rewriter) {
- // Note that we cannot use something like
- // mlir::LLVM::getPrimitiveTypeSizeInBits() for the element type here. For
- // example, it returns 10 bytes for mlir::Float80Type for targets where it
- // occupies 16 bytes. Proper solution is probably to use
- // mlir::DataLayout::getTypeABIAlignment(), but DataLayout is not being set
- // yet (see llvm-project#57230). For the time being use the '(intptr_t)((type
- // *)0 + 1)' trick for all types. The generated instructions are optimized
- // into constant by the first pass of InstCombine, so it should not be a
- // performance issue.
- auto llvmPtrTy = ::getLlvmPtrType(llvmObjectType.getContext());
- auto nullPtr = rewriter.create<mlir::LLVM::ZeroOp>(loc, llvmPtrTy);
- auto gep = rewriter.create<mlir::LLVM::GEPOp>(
- loc, llvmPtrTy, llvmObjectType, nullPtr,
- llvm::ArrayRef<mlir::LLVM::GEPArg>{1});
- return rewriter.create<mlir::LLVM::PtrToIntOp>(loc, idxTy, gep);
+ mlir::ConversionPatternRewriter &rewriter,
+ const mlir::DataLayout &dataLayout) {
+ llvm::TypeSize size = dataLayout.getTypeSize(llvmObjectType);
+ unsigned short alignment = dataLayout.getTypeABIAlignment(llvmObjectType);
+ std::int64_t distance = llvm::alignTo(size, alignment);
+ return genConstantIndex(loc, idxTy, rewriter, distance);
}
/// Return value of the stride in bytes between adjacent elements
@@ -1066,10 +1056,10 @@ computeElementDistance(mlir::Location loc, mlir::Type llvmObjectType,
/// \p idxTy integer type.
static mlir::Value
genTypeStrideInBytes(mlir::Location loc, mlir::Type idxTy,
- mlir::ConversionPatternRewriter &rewriter,
- mlir::Type llTy) {
+ mlir::ConversionPatternRewriter &rewriter, mlir::Type llTy,
+ const mlir::DataLayout &dataLayout) {
// Create a pointer type and use computeElementDistance().
- return computeElementDistance(loc, llTy, idxTy, rewriter);
+ return computeElementDistance(loc, llTy, idxTy, rewriter, dataLayout);
}
namespace {
@@ -1111,7 +1101,7 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> {
mlir::Value genTypeSizeInBytes(mlir::Location loc, mlir::Type idxTy,
mlir::ConversionPatternRewriter &rewriter,
mlir::Type llTy) const {
- return computeElementDistance(loc, llTy, idxTy, rewriter);
+ return computeElementDistance(loc, llTy, idxTy, rewriter, getDataLayout());
}
};
} // namespace
@@ -1323,8 +1313,8 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
fir::CharacterType charTy,
mlir::ValueRange lenParams) const {
auto i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64);
- mlir::Value size =
- genTypeStrideInBytes(loc, i64Ty, rewriter, this->convertType(charTy));
+ mlir::Value size = genTypeStrideInBytes(
+ loc, i64Ty, rewriter, this->convertType(charTy), this->getDataLayout());
if (charTy.hasConstantLen())
return size; // Length accounted for in the genTypeStrideInBytes GEP.
// Otherwise, multiply the single character size by the length.
@@ -1338,6 +1328,7 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
std::tuple<mlir::Value, mlir::Value> getSizeAndTypeCode(
mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
mlir::Type boxEleTy, mlir::ValueRange lenParams = {}) const {
+ const mlir::DataLayout &dataLayout = this->getDataLayout();
auto i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64);
if (auto eleTy = fir::dyn_cast_ptrEleTy(boxEleTy))
boxEleTy = eleTy;
@@ -1354,18 +1345,19 @@ struct EmboxCommonConversion : public fir::FIROpConversion<OP> {
mlir::dyn_cast<fir::LogicalType>(boxEleTy) || fir::isa_real(boxEleTy) ||
fir::isa_complex(boxEleTy))
return {genTypeStrideInBytes(loc, i64Ty, rewriter,
- this->convertType(boxEleTy)),
+ this->convertType(boxEleTy), dataLayout),
typeCodeVal};
if (auto charTy = mlir::dyn_cast<fir::CharacterType>(boxEleTy))
return {getCharacterByteSize(loc, rewriter, charTy, lenParams),
typeCodeVal};
if (fir::isa_ref_type(boxEleTy)) {
auto ptrTy = ::getLlvmPtrType(rewriter.getContext());
- return {genTypeStrideInBytes(loc, i64Ty, rewriter, ptrTy), typeCodeVal};
+ return {genTypeStrideInBytes(loc, i64Ty, rewriter, ptrTy, dataLayout),
+ typeCodeVal};
}
if (mlir::isa<fir::RecordType>(boxEleTy))
return {genTypeStrideInBytes(loc, i64Ty, rewriter,
- this->convertType(boxEleTy)),
+ this->convertType(boxEleTy), dataLayout),
typeCodeVal};
fir::emitFatalError(loc, "unhandled type in fir.box code generation");
}
@@ -1909,8 +1901,8 @@ struct XEmboxOpConversion : public EmboxCommonConversion<fir::cg::XEmboxOp> {
if (hasSubcomp) {
// We have a subcomponent. The step value needs to be the number of
// bytes per element (which is a derived type).
- prevDimByteStride =
- genTypeStrideInBytes(loc, i64Ty, rewriter, convertType(seqEleTy));
+ prevDimByteStride = genTypeStrideInBytes(
+ loc, i64Ty, rewriter, convertType(seqEleTy), getDataLayout());
} else if (hasSubstr) {
// We have a substring. The step value needs to be the number of bytes
// per CHARACTER element.
@@ -3604,8 +3596,8 @@ struct CopyOpConversion : public fir::FIROpConversion<fir::CopyOp> {
mlir::Value llvmDestination = adaptor.getDestination();
mlir::Type i64Ty = mlir::IntegerType::get(rewriter.getContext(), 64);
mlir::Type copyTy = fir::unwrapRefType(copy.getSource().getType());
- mlir::Value copySize =
- genTypeStrideInBytes(loc, i64Ty, rewriter, convertType(copyTy));
+ mlir::Value copySize = genTypeStrideInBytes(
+ loc, i64Ty, rewriter, convertType(copyTy), getDataLayout());
mlir::LLVM::AliasAnalysisOpInterface newOp;
if (copy.getNoOverlap())
diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir
index 2960528fb6c24..6d8a8bb606b90 100644
--- a/flang/test/Fir/convert-to-llvm.fir
+++ b/flang/test/Fir/convert-to-llvm.fir
@@ -216,9 +216,7 @@ func.func @test_alloc_and_freemem_one() {
}
// CHECK-LABEL: llvm.func @test_alloc_and_freemem_one() {
-// CHECK-NEXT: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr
-// CHECK-NEXT: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1]
-// CHECK-NEXT: %[[N:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64
+// CHECK: %[[N:.*]] = llvm.mlir.constant(4 : i64) : i64
// CHECK-NEXT: llvm.call @malloc(%[[N]])
// CHECK: llvm.call @free(%{{.*}})
// CHECK-NEXT: llvm.return
@@ -235,10 +233,8 @@ func.func @test_alloc_and_freemem_several() {
}
// CHECK-LABEL: llvm.func @test_alloc_and_freemem_several() {
-// CHECK: [[NULL:%.*]] = llvm.mlir.zero : !llvm.ptr
-// CHECK: [[PTR:%.*]] = llvm.getelementptr [[NULL]][{{.*}}] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<100 x f32>
-// CHECK: [[N:%.*]] = llvm.ptrtoint [[PTR]] : !llvm.ptr to i64
-// CHECK: [[MALLOC:%.*]] = llvm.call @malloc([[N]])
+// CHECK: %[[N:.*]] = llvm.mlir.constant(400 : i64) : i64
+// CHECK: [[MALLOC:%.*]] = llvm.call @malloc(%[[N]])
// CHECK: llvm.call @free([[MALLOC]])
// CHECK: llvm.return
@@ -251,9 +247,7 @@ func.func @test_with_shape(%ncols: index, %nrows: index) {
// CHECK-LABEL: llvm.func @test_with_shape
// CHECK-SAME: %[[NCOLS:.*]]: i64, %[[NROWS:.*]]: i64
-// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr
-// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1]
-// CHECK: %[[FOUR:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64
+// CHECK: %[[FOUR:.*]] = llvm.mlir.constant(4 : i64) : i64
// CHECK: %[[DIM1_SIZE:.*]] = llvm.mul %[[FOUR]], %[[NCOLS]] : i64
// CHECK: %[[TOTAL_SIZE:.*]] = llvm.mul %[[DIM1_SIZE]], %[[NROWS]] : i64
// CHECK: %[[MEM:.*]] = llvm.call @malloc(%[[TOTAL_SIZE]])
@@ -269,9 +263,7 @@ func.func @test_string_with_shape(%len: index, %nelems: index) {
// CHECK-LABEL: llvm.func @test_string_with_shape
// CHECK-SAME: %[[LEN:.*]]: i64, %[[NELEMS:.*]]: i64)
-// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr
-// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1]
-// CHECK: %[[ONE:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64
+// CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i64) : i64
// CHECK: %[[LEN_SIZE:.*]] = llvm.mul %[[ONE]], %[[LEN]] : i64
// CHECK: %[[TOTAL_SIZE:.*]] = llvm.mul %[[LEN_SIZE]], %[[NELEMS]] : i64
// CHECK: %[[MEM:.*]] = llvm.call @malloc(%[[TOTAL_SIZE]])
@@ -1654,9 +1646,7 @@ func.func @embox0(%arg0: !fir.ref<!fir.array<100xi32>>) {
// AMDGPU: %[[AA:.*]] = llvm.alloca %[[C1]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5>
// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr
// CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(9 : i32) : i32
-// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr
-// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1]
-// CHECK: %[[I64_ELEM_SIZE:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64
+// CHECK: %[[I64_ELEM_SIZE:.*]] = llvm.mlir.constant(4 : i64) : i64
// CHECK: %[[DESC:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})>
// CHECK: %[[DESC0:.*]] = llvm.insertvalue %[[I64_ELEM_SIZE]], %[[DESC]][1] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}})>
// CHECK: %[[CFI_VERSION:.*]] = llvm.mlir.constant(20240719 : i32) : i32
@@ -1879,9 +1869,7 @@ func.func @xembox0(%arg0: !fir.ref<!fir.array<?xi32>>) {
// AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr
// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64
// CHECK: %[[TYPE:.*]] = llvm.mlir.constant(9 : i32) : i32
-// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr
-// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1]
-// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64
+// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.mlir.constant(4 : i64) : i64
// CHECK: %[[BOX0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>
// CHECK: %[[BOX1:.*]] = llvm.insertvalue %[[ELEM_LEN_I64]], %[[BOX0]][1] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>
// CHECK: %[[VERSION:.*]] = llvm.mlir.constant(20240719 : i32) : i32
@@ -1933,9 +1921,7 @@ func.func @xembox0_i32(%arg0: !fir.ref<!fir.array<?xi32>>) {
// CHECK: %[[C0_I32:.*]] = llvm.mlir.constant(0 : i32) : i32
// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64
// CHECK: %[[TYPE:.*]] = llvm.mlir.constant(9 : i32) : i32
-// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr
-// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1]
-// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64
+// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.mlir.constant(4 : i64) : i64
// CHECK: %[[BOX0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>
// CHECK: %[[BOX1:.*]] = llvm.insertvalue %[[ELEM_LEN_I64]], %[[BOX0]][1] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>
// CHECK: %[[VERSION:.*]] = llvm.mlir.constant(20240719 : i32) : i32
@@ -1988,9 +1974,7 @@ func.func @xembox1(%arg0: !fir.ref<!fir.array<?x!fir.char<1, 10>>>) {
// CHECK-LABEL: llvm.func @xembox1(%{{.*}}: !llvm.ptr) {
// CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64
-// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr
-// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1]
-// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64
+// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.mlir.constant(10 : i64) : i64
// CHECK: %{{.*}} = llvm.insertvalue %[[ELEM_LEN_I64]], %{{.*}}[1] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>
// CHECK: %[[PREV_PTROFF:.*]] = llvm.mul %[[ELEM_LEN_I64]], %[[C0]] : i64
@@ -2042,9 +2026,7 @@ func.func private @_QPxb(!fir.box<!fir.array<?x?xf64>>)
// AMDGPU: %[[AR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr"} : (i64) -> !llvm.ptr<5>
// AMDGPU: %[[ARR:.*]] = llvm.addrspacecast %[[AR]] : !llvm.ptr<5> to !llvm.ptr
// CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(28 : i32) : i32
-// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr
-// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1]
-// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64
+// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.mlir.constant(8 : i64) : i64
// CHECK: %[[BOX0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)>
// CHECK: %[[BOX1:.*]] = llvm.insertvalue %[[ELEM_LEN_I64]], %[[BOX0]][1] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<2 x array<3 x i64>>)>
// CHECK: %[[VERSION:.*]] = llvm.mlir.constant(20240719 : i32) : i32
@@ -2126,9 +2108,7 @@ func.func private @_QPtest_dt_callee(%arg0: !fir.box<!fir.array<?xi32>>)
// CHECK: %[[C10:.*]] = llvm.mlir.constant(10 : i64) : i64
// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64
// CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(9 : i32) : i32
-// CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr
-// CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1]
-// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64
+// CHECK: %[[ELEM_LEN_I64:.*]] = llvm.mlir.constant(4 : i64) : i64
// CHECK: %[[BOX0:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>
// CHECK: %[[BOX1:.*]] = llvm.insertvalue %[[ELEM_LEN_I64]], %[[BOX0]][1] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>
// CHECK: %[[VERSION:.*]] = llvm.mlir.constant(20240719 : i32) : i32
@@ -2146,9 +2126,7 @@ func.func private @_QPtest_dt_callee(%arg0: !fir.box<!fir.array<?xi32>>)
// CHECK: %[[BOX6:.*]] = llvm.insertvalue %[[F18ADDENDUM_I8]], %[[BOX5]][6] : !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)>
// CHECK: %[[ZERO:.*]] = llvm.mlir.constant(0 : i64) : i64
// CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i64) : i64
-// CHECK: %[[ELE_TYPE:.*]] = llvm.mlir.zero : !llvm.ptr
-// CHECK: %[[GEP_DTYPE_SIZE:.*]] = llvm.getelementptr %[[ELE_TYPE]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFtest_dt_sliceTt", (i32, i32)>
-// CHECK: %[[PTRTOINT_DTYPE_SIZE:.*]] = llvm.ptrtoint %[[GEP_DTYPE_SIZE]] : !llvm.ptr to i64
+// CHECK: %[[PTRTOINT_DTYPE_SIZE:.*]] = llvm.mlir.constant(8 : i64) : i64
// CHECK: %[[ADJUSTED_OFFSET:.*]] = llvm.sub %[[C1]], %[[ONE]] : i64
// CHECK: %[[EXT_SUB:.*]] = llvm.sub %[[C10]], %[[C1]] : i64
// CHECK: %[[EXT_ADD:.*]] = llvm.add %[[EXT_SUB]], %[[C2]] : i64
@@ -2429,9 +2407,7 @@ func.func @test_rebox_1(%arg0: !fir.box<!fir.array<?x?xf32>>) {
//CHECK: %[[SIX:.*]] = llvm.mlir.constant(6 : index) : i64
//CHECK: %[[EIGHTY:.*]] = llvm.mlir.constant(80 : index) : i64
//CHECK: %[[FLOAT_TYPE:.*]] = llvm.mlir.constant(27 : i32) : i32
-//CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr
-//CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1]
-//CHECK: %[[ELEM_SIZE_I64:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64
+//CHECK: %[[ELEM_SIZE_I64:.*]] = llvm.mlir.constant(4 : i64) : i64
//CHECK: %[[EXTRA_GEP:.*]] = llvm.getelementptr %[[ARG0]][0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>
//CHECK: %[[EXTRA:.*]] = llvm.load %[[EXTRA_GEP]] : !llvm.ptr -> i8
//CHECK: %[[RBOX:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>
@@ -2504,9 +2480,7 @@ func.func @foo(%arg0: !fir.box<!fir.array<?x!fir.type<t{i:i32,c:!fir.char<1,10>}
//CHECK: %[[COMPONENT_OFFSET_1:.*]] = llvm.mlir.constant(1 : i64) : i64
//CHECK: %[[ELEM_COUNT:.*]] = llvm.mlir.constant(7 : i64) : i64
//CHECK: %[[TYPE_CHAR:.*]] = llvm.mlir.constant(40 : i32) : i32
-//CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr
-//CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1]
-//CHECK: %[[CHAR_SIZE:.*]] = llvm.ptrtoint %[[GEP]] : !llvm.ptr to i64
+//CHECK: %[[CHAR_SIZE:.*]] = llvm.mlir.constant(1 : i64) : i64
//CHECK: %[[ELEM_SIZE:.*]] = llvm.mul %[[CHAR_SIZE]], %[[ELEM_COUNT]]
//CHECK: %[[EXTRA_GEP:.*]] = llvm.getelementptr %[[ARG0]][0, 6] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>, ptr, array<1 x i64>)>
//CHECK: %[[EXTRA:.*]] = llvm.load %[[EXTRA_GEP]] : !llvm.ptr -> i8
diff --git a/flang/test/Fir/copy-codegen.fir b/flang/test/Fir/copy-codegen.fir
index eef1885c6a49c..7b0620ca2d312 100644
--- a/flang/test/Fir/copy-codegen.fir
+++ b/flang/test/Fir/copy-codegen.fir
@@ -12,10 +12,8 @@ func.func @test_copy_1(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
// CHECK-LABEL: llvm.func @test_copy_1(
// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
-// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
-// CHECK: %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_2]][1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"sometype", (array<9 x i32>)>
-// CHECK: %[[VAL_4:.*]] = llvm.ptrtoint %[[VAL_3]] : !llvm.ptr to i64
-// CHECK: "llvm.intr.memcpy"(%[[VAL_1]], %[[VAL_0]], %[[VAL_4]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
+// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(36 : i64) : i64
+// CHECK: "llvm.intr.memcpy"(%[[VAL_1]], %[[VAL_0]], %[[VAL_2]]) <{isVolatile = false}> : (!llvm.ptr, !llvm.ptr, i64) -> ()
// CHECK: llvm.return
// CHECK: }
@@ -26,10 +24,8 @@ func.func @test_copy_2(%arg0: !fir.ref<!t>, %arg1: !fir.ref<!t>) {
// CHECK-LABEL: llvm.func @test_copy_2(
// CHECK-SAME: %[[VAL_0:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr,
// CHECK-SAME: %[[VAL_1:[0-9]+|[a-zA-Z$._-][a-zA-Z0-9$._-]*]]: !llvm.ptr) {
-// CHECK: %[[VAL_2:.*]] = llvm.mlir.zero : !llvm.ptr
-// CHECK: %[[VAL_3:....
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks great!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
…140268) This patch relies on #140235 and #139724 to speed-up compilations of files with derived type array global with initial value. Currently, such derived type global init was lowered to an llvm.mlir.insertvalue chain in the LLVM IR dialect because there was no way to represent such value via attributes. This chain was later folded in LLVM dialect to LLVM IR using LLVM IR (not dialect) folding. This insert chain generation and folding is very expensive for big arrays. For instance, this patch brings down the compilation of FM_lib fmsave.f95 from 50s to 0.5s.
Now that the datalayout is part of codegen, use that to generate type size constants in codegen instead of generating GEP.
This will be needed to be able to fold initializers of derived type arrays with descriptor components into ArrayAttr to speed-up compilation times which I will do in a different patch.