From 25c5feaebb681fc8c91ce81348d02cd579f32495 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Wed, 30 Apr 2025 11:20:51 -0700 Subject: [PATCH] [flang][cuda] Use a reference for asyncObject --- .../flang-rt/runtime/allocator-registry.h | 4 +-- .../include/flang-rt/runtime/descriptor.h | 6 ++--- .../flang-rt/runtime/reduction-templates.h | 2 +- flang-rt/lib/cuda/allocator.cpp | 16 ++++++------ flang-rt/lib/cuda/descriptor.cpp | 2 +- flang-rt/lib/runtime/allocatable.cpp | 12 ++++----- flang-rt/lib/runtime/array-constructor.cpp | 4 +-- flang-rt/lib/runtime/assign.cpp | 4 +-- flang-rt/lib/runtime/character.cpp | 20 +++++++------- flang-rt/lib/runtime/copy.cpp | 4 +-- flang-rt/lib/runtime/derived.cpp | 6 ++--- flang-rt/lib/runtime/descriptor.cpp | 4 +-- flang-rt/lib/runtime/extrema.cpp | 4 +-- flang-rt/lib/runtime/findloc.cpp | 2 +- flang-rt/lib/runtime/matmul-transpose.cpp | 2 +- flang-rt/lib/runtime/matmul.cpp | 2 +- flang-rt/lib/runtime/misc-intrinsic.cpp | 2 +- flang-rt/lib/runtime/pointer.cpp | 2 +- flang-rt/lib/runtime/temporary-stack.cpp | 2 +- flang-rt/lib/runtime/tools.cpp | 2 +- flang-rt/lib/runtime/transformational.cpp | 4 +-- flang-rt/unittests/Evaluate/reshape.cpp | 2 +- flang-rt/unittests/Runtime/Allocatable.cpp | 4 +-- .../unittests/Runtime/CUDA/Allocatable.cpp | 3 ++- .../unittests/Runtime/CUDA/AllocatorCUF.cpp | 4 +-- flang-rt/unittests/Runtime/CUDA/Memory.cpp | 4 +-- flang-rt/unittests/Runtime/CharacterTest.cpp | 2 +- flang-rt/unittests/Runtime/CommandTest.cpp | 8 +++--- flang-rt/unittests/Runtime/TemporaryStack.cpp | 4 +-- flang-rt/unittests/Runtime/tools.h | 2 +- .../flang/Optimizer/Dialect/CUF/CUFOps.td | 11 ++++---- .../include/flang/Runtime/CUDA/allocatable.h | 8 +++--- flang/include/flang/Runtime/CUDA/pointer.h | 8 +++--- flang/include/flang/Runtime/allocatable.h | 7 ++--- flang/lib/Lower/Allocatable.cpp | 2 +- .../Optimizer/Builder/Runtime/Allocatable.cpp | 7 +++-- flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp | 22 ++++++++-------- .../Optimizer/Transforms/CUFOpConversion.cpp | 10 +++---- flang/test/Fir/CUDA/cuda-allocate.fir | 18 ++++++------- flang/test/Fir/cuf-invalid.fir | 5 ++-- flang/test/Fir/cuf.mlir | 7 +++-- flang/test/HLFIR/elemental-codegen.fir | 6 ++--- flang/test/Lower/CUDA/cuda-allocatable.cuf | 9 +++---- .../acc-declare-unwrap-defaultbounds.f90 | 4 +-- flang/test/Lower/OpenACC/acc-declare.f90 | 4 +-- flang/test/Lower/allocatable-polymorphic.f90 | 26 +++++++++---------- flang/test/Lower/allocatable-runtime.f90 | 4 +-- flang/test/Lower/allocate-mold.f90 | 4 +-- flang/test/Lower/polymorphic.f90 | 2 +- flang/test/Transforms/lower-repack-arrays.fir | 8 +++--- 50 files changed, 153 insertions(+), 158 deletions(-) diff --git a/flang-rt/include/flang-rt/runtime/allocator-registry.h b/flang-rt/include/flang-rt/runtime/allocator-registry.h index 33e8e2c7d7850..f0ba77a360736 100644 --- a/flang-rt/include/flang-rt/runtime/allocator-registry.h +++ b/flang-rt/include/flang-rt/runtime/allocator-registry.h @@ -19,7 +19,7 @@ namespace Fortran::runtime { -using AllocFct = void *(*)(std::size_t, std::int64_t); +using AllocFct = void *(*)(std::size_t, std::int64_t *); using FreeFct = void (*)(void *); typedef struct Allocator_t { @@ -28,7 +28,7 @@ typedef struct Allocator_t { } Allocator_t; static RT_API_ATTRS void *MallocWrapper( - std::size_t size, [[maybe_unused]] std::int64_t) { + std::size_t size, [[maybe_unused]] std::int64_t *) { return std::malloc(size); } #ifdef RT_DEVICE_COMPILATION diff --git a/flang-rt/include/flang-rt/runtime/descriptor.h b/flang-rt/include/flang-rt/runtime/descriptor.h index 9907e7866e7bf..19cfeeb1e9dd1 100644 --- a/flang-rt/include/flang-rt/runtime/descriptor.h +++ b/flang-rt/include/flang-rt/runtime/descriptor.h @@ -29,8 +29,8 @@ #include #include -/// Value used for asyncId when no specific stream is specified. -static constexpr std::int64_t kNoAsyncId = -1; +/// Value used for asyncObject when no specific stream is specified. +static constexpr void *kNoAsyncObject = nullptr; namespace Fortran::runtime { @@ -372,7 +372,7 @@ class Descriptor { // before calling. It (re)computes the byte strides after // allocation. Does not allocate automatic components or // perform default component initialization. - RT_API_ATTRS int Allocate(std::int64_t asyncId); + RT_API_ATTRS int Allocate(std::int64_t *asyncObject); RT_API_ATTRS void SetByteStrides(); // Deallocates storage; does not call FINAL subroutines or diff --git a/flang-rt/include/flang-rt/runtime/reduction-templates.h b/flang-rt/include/flang-rt/runtime/reduction-templates.h index 77f77a592a476..18412708b02c5 100644 --- a/flang-rt/include/flang-rt/runtime/reduction-templates.h +++ b/flang-rt/include/flang-rt/runtime/reduction-templates.h @@ -347,7 +347,7 @@ inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x, // as the element size of the source. result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr, CFI_attribute_allocatable); - if (int stat{result.Allocate(kNoAsyncId)}) { + if (int stat{result.Allocate(kNoAsyncObject)}) { terminator.Crash( "%s: could not allocate memory for result; STAT=%d", intrinsic, stat); } diff --git a/flang-rt/lib/cuda/allocator.cpp b/flang-rt/lib/cuda/allocator.cpp index 51119ab251168..3a92c5b21c9af 100644 --- a/flang-rt/lib/cuda/allocator.cpp +++ b/flang-rt/lib/cuda/allocator.cpp @@ -136,7 +136,7 @@ void RTDEF(CUFRegisterAllocator)() { } void *CUFAllocPinned( - std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) { + std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) { void *p; CUDA_REPORT_IF_ERROR(cudaMallocHost((void **)&p, sizeInBytes)); return p; @@ -144,18 +144,18 @@ void *CUFAllocPinned( void CUFFreePinned(void *p) { CUDA_REPORT_IF_ERROR(cudaFreeHost(p)); } -void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t asyncId) { +void *CUFAllocDevice(std::size_t sizeInBytes, std::int64_t *asyncObject) { void *p; if (Fortran::runtime::executionEnvironment.cudaDeviceIsManaged) { CUDA_REPORT_IF_ERROR( cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal)); } else { - if (asyncId == kNoAsyncId) { + if (asyncObject == kNoAsyncObject) { CUDA_REPORT_IF_ERROR(cudaMalloc(&p, sizeInBytes)); } else { CUDA_REPORT_IF_ERROR( - cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)asyncId)); - insertAllocation(p, sizeInBytes, asyncId); + cudaMallocAsync(&p, sizeInBytes, (cudaStream_t)*asyncObject)); + insertAllocation(p, sizeInBytes, (cudaStream_t)*asyncObject); } } return p; @@ -174,7 +174,7 @@ void CUFFreeDevice(void *p) { } void *CUFAllocManaged( - std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) { + std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) { void *p; CUDA_REPORT_IF_ERROR( cudaMallocManaged((void **)&p, sizeInBytes, cudaMemAttachGlobal)); @@ -184,9 +184,9 @@ void *CUFAllocManaged( void CUFFreeManaged(void *p) { CUDA_REPORT_IF_ERROR(cudaFree(p)); } void *CUFAllocUnified( - std::size_t sizeInBytes, [[maybe_unused]] std::int64_t asyncId) { + std::size_t sizeInBytes, [[maybe_unused]] std::int64_t *asyncObject) { // Call alloc managed for the time being. - return CUFAllocManaged(sizeInBytes, asyncId); + return CUFAllocManaged(sizeInBytes, asyncObject); } void CUFFreeUnified(void *p) { diff --git a/flang-rt/lib/cuda/descriptor.cpp b/flang-rt/lib/cuda/descriptor.cpp index 175e8c0ef8438..7b768f91af29d 100644 --- a/flang-rt/lib/cuda/descriptor.cpp +++ b/flang-rt/lib/cuda/descriptor.cpp @@ -21,7 +21,7 @@ RT_EXT_API_GROUP_BEGIN Descriptor *RTDEF(CUFAllocDescriptor)( std::size_t sizeInBytes, const char *sourceFile, int sourceLine) { return reinterpret_cast( - CUFAllocManaged(sizeInBytes, /*asyncId*/ -1)); + CUFAllocManaged(sizeInBytes, /*asyncObject=*/nullptr)); } void RTDEF(CUFFreeDescriptor)( diff --git a/flang-rt/lib/runtime/allocatable.cpp b/flang-rt/lib/runtime/allocatable.cpp index 6acce34eb9a9e..ef18da6ea0786 100644 --- a/flang-rt/lib/runtime/allocatable.cpp +++ b/flang-rt/lib/runtime/allocatable.cpp @@ -133,17 +133,17 @@ void RTDEF(AllocatableApplyMold)( } } -int RTDEF(AllocatableAllocate)(Descriptor &descriptor, std::int64_t asyncId, - bool hasStat, const Descriptor *errMsg, const char *sourceFile, - int sourceLine) { +int RTDEF(AllocatableAllocate)(Descriptor &descriptor, + std::int64_t *asyncObject, bool hasStat, const Descriptor *errMsg, + const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (!descriptor.IsAllocatable()) { return ReturnError(terminator, StatInvalidDescriptor, errMsg, hasStat); } else if (descriptor.IsAllocated()) { return ReturnError(terminator, StatBaseNotNull, errMsg, hasStat); } else { - int stat{ - ReturnError(terminator, descriptor.Allocate(asyncId), errMsg, hasStat)}; + int stat{ReturnError( + terminator, descriptor.Allocate(asyncObject), errMsg, hasStat)}; if (stat == StatOk) { if (const DescriptorAddendum * addendum{descriptor.Addendum()}) { if (const auto *derived{addendum->derivedType()}) { @@ -162,7 +162,7 @@ int RTDEF(AllocatableAllocateSource)(Descriptor &alloc, const Descriptor &source, bool hasStat, const Descriptor *errMsg, const char *sourceFile, int sourceLine) { int stat{RTNAME(AllocatableAllocate)( - alloc, /*asyncId=*/-1, hasStat, errMsg, sourceFile, sourceLine)}; + alloc, /*asyncObject=*/nullptr, hasStat, errMsg, sourceFile, sourceLine)}; if (stat == StatOk) { Terminator terminator{sourceFile, sourceLine}; DoFromSourceAssign(alloc, source, terminator); diff --git a/flang-rt/lib/runtime/array-constructor.cpp b/flang-rt/lib/runtime/array-constructor.cpp index 67b3b5e1e0f50..858fac7bf2b39 100644 --- a/flang-rt/lib/runtime/array-constructor.cpp +++ b/flang-rt/lib/runtime/array-constructor.cpp @@ -50,7 +50,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded( initialAllocationSize(fromElements, to.ElementBytes())}; to.GetDimension(0).SetBounds(1, allocationSize); RTNAME(AllocatableAllocate) - (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, + (to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile, vector.sourceLine); to.GetDimension(0).SetBounds(1, fromElements); vector.actualAllocationSize = allocationSize; @@ -59,7 +59,7 @@ static RT_API_ATTRS void AllocateOrReallocateVectorIfNeeded( // first value: there should be no reallocation. RUNTIME_CHECK(terminator, previousToElements >= fromElements); RTNAME(AllocatableAllocate) - (to, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, + (to, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, vector.sourceFile, vector.sourceLine); vector.actualAllocationSize = previousToElements; } diff --git a/flang-rt/lib/runtime/assign.cpp b/flang-rt/lib/runtime/assign.cpp index 4a813cd489022..8a4fa36c91479 100644 --- a/flang-rt/lib/runtime/assign.cpp +++ b/flang-rt/lib/runtime/assign.cpp @@ -99,7 +99,7 @@ static RT_API_ATTRS int AllocateAssignmentLHS( toDim.SetByteStride(stride); stride *= toDim.Extent(); } - int result{ReturnError(terminator, to.Allocate(kNoAsyncId))}; + int result{ReturnError(terminator, to.Allocate(kNoAsyncObject))}; if (result == StatOk && derived && !derived->noInitializationNeeded()) { result = ReturnError(terminator, Initialize(to, *derived, terminator)); } @@ -277,7 +277,7 @@ RT_API_ATTRS void Assign(Descriptor &to, const Descriptor &from, // entity, otherwise, the Deallocate() below will not // free the descriptor memory. newFrom.raw().attribute = CFI_attribute_allocatable; - auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncId))}; + auto stat{ReturnError(terminator, newFrom.Allocate(kNoAsyncObject))}; if (stat == StatOk) { if (HasDynamicComponent(from)) { // If 'from' has allocatable/automatic component, we cannot diff --git a/flang-rt/lib/runtime/character.cpp b/flang-rt/lib/runtime/character.cpp index d1152ee1caefb..f140d202e118e 100644 --- a/flang-rt/lib/runtime/character.cpp +++ b/flang-rt/lib/runtime/character.cpp @@ -118,7 +118,7 @@ static RT_API_ATTRS void Compare(Descriptor &result, const Descriptor &x, for (int j{0}; j < rank; ++j) { result.GetDimension(j).SetBounds(1, ub[j]); } - if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) { + if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) { terminator.Crash("Compare: could not allocate storage for result"); } std::size_t xChars{x.ElementBytes() >> shift}; @@ -173,7 +173,7 @@ static RT_API_ATTRS void AdjustLRHelper(Descriptor &result, for (int j{0}; j < rank; ++j) { result.GetDimension(j).SetBounds(1, ub[j]); } - if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) { + if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) { terminator.Crash("ADJUSTL/R: could not allocate storage for result"); } for (SubscriptValue resultAt{0}; elements-- > 0; @@ -227,7 +227,7 @@ static RT_API_ATTRS void LenTrim(Descriptor &result, const Descriptor &string, for (int j{0}; j < rank; ++j) { result.GetDimension(j).SetBounds(1, ub[j]); } - if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) { + if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) { terminator.Crash("LEN_TRIM: could not allocate storage for result"); } std::size_t stringElementChars{string.ElementBytes() >> shift}; @@ -427,7 +427,7 @@ static RT_API_ATTRS void GeneralCharFunc(Descriptor &result, for (int j{0}; j < rank; ++j) { result.GetDimension(j).SetBounds(1, ub[j]); } - if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) { + if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) { terminator.Crash("SCAN/VERIFY: could not allocate storage for result"); } std::size_t stringElementChars{string.ElementBytes() >> shift}; @@ -530,7 +530,8 @@ static RT_API_ATTRS void MaxMinHelper(Descriptor &accumulator, for (int j{0}; j < rank; ++j) { accumulator.GetDimension(j).SetBounds(1, ub[j]); } - RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS); + RUNTIME_CHECK( + terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS); } for (CHAR *result{accumulator.OffsetElement()}; elements-- > 0; accumData += accumChars, result += chars, x.IncrementSubscripts(xAt)) { @@ -606,7 +607,7 @@ void RTDEF(CharacterConcatenate)(Descriptor &accumulator, for (int j{0}; j < rank; ++j) { accumulator.GetDimension(j).SetBounds(1, ub[j]); } - if (accumulator.Allocate(kNoAsyncId) != CFI_SUCCESS) { + if (accumulator.Allocate(kNoAsyncObject) != CFI_SUCCESS) { terminator.Crash( "CharacterConcatenate: could not allocate storage for result"); } @@ -629,7 +630,8 @@ void RTDEF(CharacterConcatenateScalar1)( accumulator.set_base_addr(nullptr); std::size_t oldLen{accumulator.ElementBytes()}; accumulator.raw().elem_len += chars; - RUNTIME_CHECK(terminator, accumulator.Allocate(kNoAsyncId) == CFI_SUCCESS); + RUNTIME_CHECK( + terminator, accumulator.Allocate(kNoAsyncObject) == CFI_SUCCESS); std::memcpy(accumulator.OffsetElement(oldLen), from, chars); FreeMemory(old); } @@ -831,7 +833,7 @@ void RTDEF(Repeat)(Descriptor &result, const Descriptor &string, std::size_t origBytes{string.ElementBytes()}; result.Establish(string.type(), origBytes * ncopies, nullptr, 0, nullptr, CFI_attribute_allocatable); - if (result.Allocate(kNoAsyncId) != CFI_SUCCESS) { + if (result.Allocate(kNoAsyncObject) != CFI_SUCCESS) { terminator.Crash("REPEAT could not allocate storage for result"); } const char *from{string.OffsetElement()}; @@ -865,7 +867,7 @@ void RTDEF(Trim)(Descriptor &result, const Descriptor &string, } result.Establish(string.type(), resultBytes, nullptr, 0, nullptr, CFI_attribute_allocatable); - RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncId) == CFI_SUCCESS); + RUNTIME_CHECK(terminator, result.Allocate(kNoAsyncObject) == CFI_SUCCESS); std::memcpy(result.OffsetElement(), string.OffsetElement(), resultBytes); } diff --git a/flang-rt/lib/runtime/copy.cpp b/flang-rt/lib/runtime/copy.cpp index 3a0f98cf8d376..f990f46e0be66 100644 --- a/flang-rt/lib/runtime/copy.cpp +++ b/flang-rt/lib/runtime/copy.cpp @@ -171,8 +171,8 @@ RT_API_ATTRS void CopyElement(const Descriptor &to, const SubscriptValue toAt[], *reinterpret_cast(toPtr + component->offset())}; if (toDesc.raw().base_addr != nullptr) { toDesc.set_base_addr(nullptr); - RUNTIME_CHECK( - terminator, toDesc.Allocate(/*asyncId=*/-1) == CFI_SUCCESS); + RUNTIME_CHECK(terminator, + toDesc.Allocate(/*asyncObject=*/nullptr) == CFI_SUCCESS); const Descriptor &fromDesc{*reinterpret_cast( fromPtr + component->offset())}; copyStack.emplace(toDesc, fromDesc); diff --git a/flang-rt/lib/runtime/derived.cpp b/flang-rt/lib/runtime/derived.cpp index c46ea806a430a..35037036f63e7 100644 --- a/flang-rt/lib/runtime/derived.cpp +++ b/flang-rt/lib/runtime/derived.cpp @@ -52,7 +52,7 @@ RT_API_ATTRS int Initialize(const Descriptor &instance, allocDesc.raw().attribute = CFI_attribute_allocatable; if (comp.genre() == typeInfo::Component::Genre::Automatic) { stat = ReturnError( - terminator, allocDesc.Allocate(kNoAsyncId), errMsg, hasStat); + terminator, allocDesc.Allocate(kNoAsyncObject), errMsg, hasStat); if (stat == StatOk) { if (const DescriptorAddendum * addendum{allocDesc.Addendum()}) { if (const auto *derived{addendum->derivedType()}) { @@ -153,7 +153,7 @@ RT_API_ATTRS int InitializeClone(const Descriptor &clone, if (origDesc.IsAllocated()) { cloneDesc.ApplyMold(origDesc, origDesc.rank()); stat = ReturnError( - terminator, cloneDesc.Allocate(kNoAsyncId), errMsg, hasStat); + terminator, cloneDesc.Allocate(kNoAsyncObject), errMsg, hasStat); if (stat == StatOk) { if (const DescriptorAddendum * addendum{cloneDesc.Addendum()}) { if (const typeInfo::DerivedType * @@ -260,7 +260,7 @@ static RT_API_ATTRS void CallFinalSubroutine(const Descriptor &descriptor, copy.raw().attribute = CFI_attribute_allocatable; Terminator stubTerminator{"CallFinalProcedure() in Fortran runtime", 0}; RUNTIME_CHECK(terminator ? *terminator : stubTerminator, - copy.Allocate(kNoAsyncId) == CFI_SUCCESS); + copy.Allocate(kNoAsyncObject) == CFI_SUCCESS); ShallowCopyDiscontiguousToContiguous(copy, descriptor); argDescriptor = © } diff --git a/flang-rt/lib/runtime/descriptor.cpp b/flang-rt/lib/runtime/descriptor.cpp index 3debf53bb5290..67336d01380e0 100644 --- a/flang-rt/lib/runtime/descriptor.cpp +++ b/flang-rt/lib/runtime/descriptor.cpp @@ -158,7 +158,7 @@ RT_API_ATTRS static inline int MapAllocIdx(const Descriptor &desc) { #endif } -RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) { +RT_API_ATTRS int Descriptor::Allocate(std::int64_t *asyncObject) { std::size_t elementBytes{ElementBytes()}; if (static_cast(elementBytes) < 0) { // F'2023 7.4.4.2 p5: "If the character length parameter value evaluates @@ -170,7 +170,7 @@ RT_API_ATTRS int Descriptor::Allocate(std::int64_t asyncId) { // Zero size allocation is possible in Fortran and the resulting // descriptor must be allocated/associated. Since std::malloc(0) // result is implementation defined, always allocate at least one byte. - void *p{alloc(byteSize ? byteSize : 1, asyncId)}; + void *p{alloc(byteSize ? byteSize : 1, asyncObject)}; if (!p) { return CFI_ERROR_MEM_ALLOCATION; } diff --git a/flang-rt/lib/runtime/extrema.cpp b/flang-rt/lib/runtime/extrema.cpp index 4c7f8e8b99e8f..03e574a8fbff1 100644 --- a/flang-rt/lib/runtime/extrema.cpp +++ b/flang-rt/lib/runtime/extrema.cpp @@ -152,7 +152,7 @@ inline RT_API_ATTRS void CharacterMaxOrMinLoc(const char *intrinsic, CFI_attribute_allocatable); result.GetDimension(0).SetBounds(1, extent[0]); Terminator terminator{source, line}; - if (int stat{result.Allocate(kNoAsyncId)}) { + if (int stat{result.Allocate(kNoAsyncObject)}) { terminator.Crash( "%s: could not allocate memory for result; STAT=%d", intrinsic, stat); } @@ -181,7 +181,7 @@ inline RT_API_ATTRS void TotalNumericMaxOrMinLoc(const char *intrinsic, CFI_attribute_allocatable); result.GetDimension(0).SetBounds(1, extent[0]); Terminator terminator{source, line}; - if (int stat{result.Allocate(kNoAsyncId)}) { + if (int stat{result.Allocate(kNoAsyncObject)}) { terminator.Crash( "%s: could not allocate memory for result; STAT=%d", intrinsic, stat); } diff --git a/flang-rt/lib/runtime/findloc.cpp b/flang-rt/lib/runtime/findloc.cpp index e3e98953b0cfc..5485f4b97bd2f 100644 --- a/flang-rt/lib/runtime/findloc.cpp +++ b/flang-rt/lib/runtime/findloc.cpp @@ -220,7 +220,7 @@ void RTDEF(Findloc)(Descriptor &result, const Descriptor &x, CFI_attribute_allocatable); result.GetDimension(0).SetBounds(1, extent[0]); Terminator terminator{source, line}; - if (int stat{result.Allocate(kNoAsyncId)}) { + if (int stat{result.Allocate(kNoAsyncObject)}) { terminator.Crash( "FINDLOC: could not allocate memory for result; STAT=%d", stat); } diff --git a/flang-rt/lib/runtime/matmul-transpose.cpp b/flang-rt/lib/runtime/matmul-transpose.cpp index 17987fb73d943..c9e21502b629e 100644 --- a/flang-rt/lib/runtime/matmul-transpose.cpp +++ b/flang-rt/lib/runtime/matmul-transpose.cpp @@ -183,7 +183,7 @@ inline static RT_API_ATTRS void DoMatmulTranspose( for (int j{0}; j < resRank; ++j) { result.GetDimension(j).SetBounds(1, extent[j]); } - if (int stat{result.Allocate(kNoAsyncId)}) { + if (int stat{result.Allocate(kNoAsyncObject)}) { terminator.Crash( "MATMUL-TRANSPOSE: could not allocate memory for result; STAT=%d", stat); diff --git a/flang-rt/lib/runtime/matmul.cpp b/flang-rt/lib/runtime/matmul.cpp index 0ff92cecbbcb8..5acb345725212 100644 --- a/flang-rt/lib/runtime/matmul.cpp +++ b/flang-rt/lib/runtime/matmul.cpp @@ -255,7 +255,7 @@ static inline RT_API_ATTRS void DoMatmul( for (int j{0}; j < resRank; ++j) { result.GetDimension(j).SetBounds(1, extent[j]); } - if (int stat{result.Allocate(kNoAsyncId)}) { + if (int stat{result.Allocate(kNoAsyncObject)}) { terminator.Crash( "MATMUL: could not allocate memory for result; STAT=%d", stat); } diff --git a/flang-rt/lib/runtime/misc-intrinsic.cpp b/flang-rt/lib/runtime/misc-intrinsic.cpp index 2fde859869ef0..a8797f48fa667 100644 --- a/flang-rt/lib/runtime/misc-intrinsic.cpp +++ b/flang-rt/lib/runtime/misc-intrinsic.cpp @@ -30,7 +30,7 @@ static RT_API_ATTRS void TransferImpl(Descriptor &result, if (const DescriptorAddendum * addendum{mold.Addendum()}) { *result.Addendum() = *addendum; } - if (int stat{result.Allocate(kNoAsyncId)}) { + if (int stat{result.Allocate(kNoAsyncObject)}) { Terminator{sourceFile, line}.Crash( "TRANSFER: could not allocate memory for result; STAT=%d", stat); } diff --git a/flang-rt/lib/runtime/pointer.cpp b/flang-rt/lib/runtime/pointer.cpp index fd2427f4124b5..7331f7bbc3a75 100644 --- a/flang-rt/lib/runtime/pointer.cpp +++ b/flang-rt/lib/runtime/pointer.cpp @@ -129,7 +129,7 @@ RT_API_ATTRS void *AllocateValidatedPointerPayload( byteSize = ((byteSize + align - 1) / align) * align; std::size_t total{byteSize + sizeof(std::uintptr_t)}; AllocFct alloc{allocatorRegistry.GetAllocator(allocatorIdx)}; - void *p{alloc(total, /*asyncId=*/-1)}; + void *p{alloc(total, /*asyncObject=*/nullptr)}; if (p && allocatorIdx == 0) { // Fill the footer word with the XOR of the ones' complement of // the base address, which is a value that would be highly unlikely diff --git a/flang-rt/lib/runtime/temporary-stack.cpp b/flang-rt/lib/runtime/temporary-stack.cpp index 3a952b1fdbcca..3f6fd8ee15a80 100644 --- a/flang-rt/lib/runtime/temporary-stack.cpp +++ b/flang-rt/lib/runtime/temporary-stack.cpp @@ -148,7 +148,7 @@ void DescriptorStorage::push(const Descriptor &source) { if constexpr (COPY_VALUES) { // copy the data pointed to by the box box.set_base_addr(nullptr); - box.Allocate(kNoAsyncId); + box.Allocate(kNoAsyncObject); RTNAME(AssignTemporary) (box, source, terminator_.sourceFileName(), terminator_.sourceLine()); } diff --git a/flang-rt/lib/runtime/tools.cpp b/flang-rt/lib/runtime/tools.cpp index 5d6e35faca70a..1f965b0b151ce 100644 --- a/flang-rt/lib/runtime/tools.cpp +++ b/flang-rt/lib/runtime/tools.cpp @@ -261,7 +261,7 @@ RT_API_ATTRS void CreatePartialReductionResult(Descriptor &result, for (int j{0}; j + 1 < xRank; ++j) { result.GetDimension(j).SetBounds(1, resultExtent[j]); } - if (int stat{result.Allocate(kNoAsyncId)}) { + if (int stat{result.Allocate(kNoAsyncObject)}) { terminator.Crash( "%s: could not allocate memory for result; STAT=%d", intrinsic, stat); } diff --git a/flang-rt/lib/runtime/transformational.cpp b/flang-rt/lib/runtime/transformational.cpp index a7d5a48530ee9..3df314a4e966b 100644 --- a/flang-rt/lib/runtime/transformational.cpp +++ b/flang-rt/lib/runtime/transformational.cpp @@ -132,7 +132,7 @@ static inline RT_API_ATTRS std::size_t AllocateResult(Descriptor &result, for (int j{0}; j < rank; ++j) { result.GetDimension(j).SetBounds(1, extent[j]); } - if (int stat{result.Allocate(kNoAsyncId)}) { + if (int stat{result.Allocate(kNoAsyncObject)}) { terminator.Crash( "%s: Could not allocate memory for result (stat=%d)", function, stat); } @@ -157,7 +157,7 @@ static inline RT_API_ATTRS std::size_t AllocateBesselResult(Descriptor &result, for (int j{0}; j < rank; ++j) { result.GetDimension(j).SetBounds(1, extent[j]); } - if (int stat{result.Allocate(kNoAsyncId)}) { + if (int stat{result.Allocate(kNoAsyncObject)}) { terminator.Crash( "%s: Could not allocate memory for result (stat=%d)", function, stat); } diff --git a/flang-rt/unittests/Evaluate/reshape.cpp b/flang-rt/unittests/Evaluate/reshape.cpp index 67a0be124e8e0..f84de443965d1 100644 --- a/flang-rt/unittests/Evaluate/reshape.cpp +++ b/flang-rt/unittests/Evaluate/reshape.cpp @@ -26,7 +26,7 @@ int main() { for (int j{0}; j < 3; ++j) { source->GetDimension(j).SetBounds(1, sourceExtent[j]); } - TEST(source->Allocate(kNoAsyncId) == CFI_SUCCESS); + TEST(source->Allocate(kNoAsyncObject) == CFI_SUCCESS); TEST(source->IsAllocated()); MATCH(2, source->GetDimension(0).Extent()); MATCH(3, source->GetDimension(1).Extent()); diff --git a/flang-rt/unittests/Runtime/Allocatable.cpp b/flang-rt/unittests/Runtime/Allocatable.cpp index a6fcdd0d1423c..b394312e5bc5a 100644 --- a/flang-rt/unittests/Runtime/Allocatable.cpp +++ b/flang-rt/unittests/Runtime/Allocatable.cpp @@ -26,7 +26,7 @@ TEST(AllocatableTest, MoveAlloc) { auto b{createAllocatable(TypeCategory::Integer, 4)}; // ALLOCATE(a(20)) a->GetDimension(0).SetBounds(1, 20); - a->Allocate(kNoAsyncId); + a->Allocate(kNoAsyncObject); EXPECT_TRUE(a->IsAllocated()); EXPECT_FALSE(b->IsAllocated()); @@ -46,7 +46,7 @@ TEST(AllocatableTest, MoveAlloc) { // move_alloc with errMsg auto errMsg{Descriptor::Create( sizeof(char), 64, nullptr, 0, nullptr, CFI_attribute_allocatable)}; - errMsg->Allocate(kNoAsyncId); + errMsg->Allocate(kNoAsyncObject); RTNAME(MoveAlloc)(*b, *a, nullptr, false, errMsg.get(), __FILE__, __LINE__); EXPECT_FALSE(a->IsAllocated()); EXPECT_TRUE(b->IsAllocated()); diff --git a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp index 89649aa95ad93..202119cf2ea20 100644 --- a/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp +++ b/flang-rt/unittests/Runtime/CUDA/Allocatable.cpp @@ -42,7 +42,8 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocatable) { CUDA_REPORT_IF_ERROR(cudaMalloc(&device_desc, a->SizeInBytes())); RTNAME(AllocatableAllocate) - (*a, kNoAsyncId, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); + (*a, kNoAsyncObject, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, + __LINE__); EXPECT_TRUE(a->IsAllocated()); RTNAME(CUFDescriptorSync)(device_desc, a.get(), __FILE__, __LINE__); cudaDeviceSynchronize(); diff --git a/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp b/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp index 2f1dc64dc8c5a..f1f931e87a86e 100644 --- a/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp +++ b/flang-rt/unittests/Runtime/CUDA/AllocatorCUF.cpp @@ -35,7 +35,7 @@ TEST(AllocatableCUFTest, SimpleDeviceAllocate) { EXPECT_FALSE(a->HasAddendum()); RTNAME(AllocatableSetBounds)(*a, 0, 1, 10); RTNAME(AllocatableAllocate) - (*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, + (*a, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); EXPECT_TRUE(a->IsAllocated()); RTNAME(AllocatableDeallocate) @@ -54,7 +54,7 @@ TEST(AllocatableCUFTest, SimplePinnedAllocate) { EXPECT_FALSE(a->HasAddendum()); RTNAME(AllocatableSetBounds)(*a, 0, 1, 10); RTNAME(AllocatableAllocate) - (*a, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, + (*a, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, __LINE__); EXPECT_TRUE(a->IsAllocated()); RTNAME(AllocatableDeallocate) diff --git a/flang-rt/unittests/Runtime/CUDA/Memory.cpp b/flang-rt/unittests/Runtime/CUDA/Memory.cpp index b3612073657ab..7915baca6c203 100644 --- a/flang-rt/unittests/Runtime/CUDA/Memory.cpp +++ b/flang-rt/unittests/Runtime/CUDA/Memory.cpp @@ -50,8 +50,8 @@ TEST(MemoryCUFTest, CUFDataTransferDescDesc) { EXPECT_EQ((int)kDeviceAllocatorPos, dev->GetAllocIdx()); RTNAME(AllocatableSetBounds)(*dev, 0, 1, 10); RTNAME(AllocatableAllocate) - (*dev, /*asyncId=*/-1, /*hasStat=*/false, /*errMsg=*/nullptr, __FILE__, - __LINE__); + (*dev, /*asyncObject=*/nullptr, /*hasStat=*/false, /*errMsg=*/nullptr, + __FILE__, __LINE__); EXPECT_TRUE(dev->IsAllocated()); // Create temp array to transfer to device. diff --git a/flang-rt/unittests/Runtime/CharacterTest.cpp b/flang-rt/unittests/Runtime/CharacterTest.cpp index 0f28e883671bc..2c7af27b9da77 100644 --- a/flang-rt/unittests/Runtime/CharacterTest.cpp +++ b/flang-rt/unittests/Runtime/CharacterTest.cpp @@ -35,7 +35,7 @@ OwningPtr CreateDescriptor(const std::vector &shape, for (int j{0}; j < rank; ++j) { descriptor->GetDimension(j).SetBounds(2, shape[j] + 1); } - if (descriptor->Allocate(kNoAsyncId) != 0) { + if (descriptor->Allocate(kNoAsyncObject) != 0) { return nullptr; } diff --git a/flang-rt/unittests/Runtime/CommandTest.cpp b/flang-rt/unittests/Runtime/CommandTest.cpp index 9d0da4ce8dd4e..6919a98105b8a 100644 --- a/flang-rt/unittests/Runtime/CommandTest.cpp +++ b/flang-rt/unittests/Runtime/CommandTest.cpp @@ -26,7 +26,7 @@ template static OwningPtr CreateEmptyCharDescriptor() { OwningPtr descriptor{Descriptor::Create( sizeof(char), n, nullptr, 0, nullptr, CFI_attribute_allocatable)}; - if (descriptor->Allocate(kNoAsyncId) != 0) { + if (descriptor->Allocate(kNoAsyncObject) != 0) { return nullptr; } return descriptor; @@ -36,7 +36,7 @@ static OwningPtr CharDescriptor(const char *value) { std::size_t n{std::strlen(value)}; OwningPtr descriptor{Descriptor::Create( sizeof(char), n, nullptr, 0, nullptr, CFI_attribute_allocatable)}; - if (descriptor->Allocate(kNoAsyncId) != 0) { + if (descriptor->Allocate(kNoAsyncObject) != 0) { return nullptr; } std::memcpy(descriptor->OffsetElement(), value, n); @@ -47,7 +47,7 @@ template static OwningPtr EmptyIntDescriptor() { OwningPtr descriptor{Descriptor::Create(TypeCategory::Integer, kind, nullptr, 0, nullptr, CFI_attribute_allocatable)}; - if (descriptor->Allocate(kNoAsyncId) != 0) { + if (descriptor->Allocate(kNoAsyncObject) != 0) { return nullptr; } return descriptor; @@ -57,7 +57,7 @@ template static OwningPtr IntDescriptor(const int &value) { OwningPtr descriptor{Descriptor::Create(TypeCategory::Integer, kind, nullptr, 0, nullptr, CFI_attribute_allocatable)}; - if (descriptor->Allocate(kNoAsyncId) != 0) { + if (descriptor->Allocate(kNoAsyncObject) != 0) { return nullptr; } std::memcpy(descriptor->OffsetElement(), &value, sizeof(int)); diff --git a/flang-rt/unittests/Runtime/TemporaryStack.cpp b/flang-rt/unittests/Runtime/TemporaryStack.cpp index 3291794f22fc1..65725840459ab 100644 --- a/flang-rt/unittests/Runtime/TemporaryStack.cpp +++ b/flang-rt/unittests/Runtime/TemporaryStack.cpp @@ -59,7 +59,7 @@ TEST(TemporaryStack, ValueStackBasic) { Descriptor &outputDesc2{testDescriptorStorage[2].descriptor()}; inputDesc.Establish(code, elementBytes, descriptorPtr, rank, extent); - inputDesc.Allocate(kNoAsyncId); + inputDesc.Allocate(kNoAsyncObject); ASSERT_EQ(inputDesc.IsAllocated(), true); uint32_t *inputData = static_cast(inputDesc.raw().base_addr); for (std::size_t i = 0; i < inputDesc.Elements(); ++i) { @@ -123,7 +123,7 @@ TEST(TemporaryStack, ValueStackMultiSize) { boxDims.extent = extent[dim]; boxDims.sm = elementBytes; } - desc->Allocate(kNoAsyncId); + desc->Allocate(kNoAsyncObject); // fill the array with some data to test for (uint32_t i = 0; i < desc->Elements(); ++i) { diff --git a/flang-rt/unittests/Runtime/tools.h b/flang-rt/unittests/Runtime/tools.h index a1eba45647a80..4ada862df110b 100644 --- a/flang-rt/unittests/Runtime/tools.h +++ b/flang-rt/unittests/Runtime/tools.h @@ -42,7 +42,7 @@ static OwningPtr MakeArray(const std::vector &shape, for (int j{0}; j < rank; ++j) { result->GetDimension(j).SetBounds(1, shape[j]); } - int stat{result->Allocate(kNoAsyncId)}; + int stat{result->Allocate(kNoAsyncObject)}; EXPECT_EQ(stat, 0) << stat; EXPECT_LE(data.size(), result->Elements()); char *p{result->OffsetElement()}; diff --git a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td index 46cc59cda1612..e38738230ffbc 100644 --- a/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td +++ b/flang/include/flang/Optimizer/Dialect/CUF/CUFOps.td @@ -95,12 +95,11 @@ def cuf_AllocateOp : cuf_Op<"allocate", [AttrSizedOperandSegments, }]; let arguments = (ins Arg:$box, - Arg, "", [MemWrite]>:$errmsg, - Optional:$stream, - Arg, "", [MemWrite]>:$pinned, - Arg, "", [MemRead]>:$source, - cuf_DataAttributeAttr:$data_attr, - UnitAttr:$hasStat); + Arg, "", [MemWrite]>:$errmsg, + Optional:$stream, + Arg, "", [MemWrite]>:$pinned, + Arg, "", [MemRead]>:$source, + cuf_DataAttributeAttr:$data_attr, UnitAttr:$hasStat); let results = (outs AnyIntegerType:$stat); diff --git a/flang/include/flang/Runtime/CUDA/allocatable.h b/flang/include/flang/Runtime/CUDA/allocatable.h index 822f2d4a2b297..6c97afa9e10e8 100644 --- a/flang/include/flang/Runtime/CUDA/allocatable.h +++ b/flang/include/flang/Runtime/CUDA/allocatable.h @@ -17,14 +17,14 @@ namespace Fortran::runtime::cuda { extern "C" { /// Perform allocation of the descriptor. -int RTDECL(CUFAllocatableAllocate)(Descriptor &, int64_t stream = -1, +int RTDECL(CUFAllocatableAllocate)(Descriptor &, int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); /// Perform allocation of the descriptor with synchronization of it when /// necessary. -int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t stream = -1, +int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); @@ -32,14 +32,14 @@ int RTDECL(CUFAllocatableAllocateSync)(Descriptor &, int64_t stream = -1, /// Perform allocation of the descriptor without synchronization. Assign data /// from source. int RTDEF(CUFAllocatableAllocateSource)(Descriptor &alloc, - const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr, + const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); /// Perform allocation of the descriptor with synchronization of it when /// necessary. Assign data from source. int RTDEF(CUFAllocatableAllocateSourceSync)(Descriptor &alloc, - const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr, + const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); diff --git a/flang/include/flang/Runtime/CUDA/pointer.h b/flang/include/flang/Runtime/CUDA/pointer.h index 7fbd8f8e061f2..bdfc3268e0814 100644 --- a/flang/include/flang/Runtime/CUDA/pointer.h +++ b/flang/include/flang/Runtime/CUDA/pointer.h @@ -17,14 +17,14 @@ namespace Fortran::runtime::cuda { extern "C" { /// Perform allocation of the descriptor. -int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t stream = -1, +int RTDECL(CUFPointerAllocate)(Descriptor &, int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); /// Perform allocation of the descriptor with synchronization of it when /// necessary. -int RTDECL(CUFPointerAllocateSync)(Descriptor &, int64_t stream = -1, +int RTDECL(CUFPointerAllocateSync)(Descriptor &, int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); @@ -32,14 +32,14 @@ int RTDECL(CUFPointerAllocateSync)(Descriptor &, int64_t stream = -1, /// Perform allocation of the descriptor without synchronization. Assign data /// from source. int RTDEF(CUFPointerAllocateSource)(Descriptor &pointer, - const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr, + const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); /// Perform allocation of the descriptor with synchronization of it when /// necessary. Assign data from source. int RTDEF(CUFPointerAllocateSourceSync)(Descriptor &pointer, - const Descriptor &source, int64_t stream = -1, bool *pinned = nullptr, + const Descriptor &source, int64_t *stream = nullptr, bool *pinned = nullptr, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); diff --git a/flang/include/flang/Runtime/allocatable.h b/flang/include/flang/Runtime/allocatable.h index 6895f8af5e2a8..863c07494e7c3 100644 --- a/flang/include/flang/Runtime/allocatable.h +++ b/flang/include/flang/Runtime/allocatable.h @@ -94,9 +94,10 @@ int RTDECL(AllocatableCheckLengthParameter)(Descriptor &, // Successfully allocated memory is initialized if the allocatable has a // derived type, and is always initialized by AllocatableAllocateSource(). // Performs all necessary coarray synchronization and validation actions. -int RTDECL(AllocatableAllocate)(Descriptor &, std::int64_t asyncId = -1, - bool hasStat = false, const Descriptor *errMsg = nullptr, - const char *sourceFile = nullptr, int sourceLine = 0); +int RTDECL(AllocatableAllocate)(Descriptor &, + std::int64_t *asyncObject = nullptr, bool hasStat = false, + const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, + int sourceLine = 0); int RTDECL(AllocatableAllocateSource)(Descriptor &, const Descriptor &source, bool hasStat = false, const Descriptor *errMsg = nullptr, const char *sourceFile = nullptr, int sourceLine = 0); diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp index 8d0444a6e5bd4..af8169c8e7f7b 100644 --- a/flang/lib/Lower/Allocatable.cpp +++ b/flang/lib/Lower/Allocatable.cpp @@ -773,7 +773,7 @@ class AllocateStmtHelper { mlir::Value errmsg = errMsgExpr ? errorManager.errMsgAddr : nullptr; mlir::Value stream = streamExpr - ? fir::getBase(converter.genExprValue(loc, *streamExpr, stmtCtx)) + ? fir::getBase(converter.genExprAddr(loc, *streamExpr, stmtCtx)) : nullptr; mlir::Value pinned = pinnedExpr diff --git a/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp b/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp index 28452d3b486da..cd5f1f6d098c3 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Allocatable.cpp @@ -76,8 +76,7 @@ void fir::runtime::genAllocatableAllocate(fir::FirOpBuilder &builder, mlir::func::FuncOp func{ fir::runtime::getRuntimeFunc(loc, builder)}; mlir::FunctionType fTy{func.getFunctionType()}; - mlir::Value asyncId = - builder.createIntegerConstant(loc, builder.getI64Type(), -1); + mlir::Value asyncObject = builder.createNullConstant(loc); mlir::Value sourceFile{fir::factory::locationToFilename(builder, loc)}; mlir::Value sourceLine{ fir::factory::locationToLineNo(builder, loc, fTy.getInput(5))}; @@ -88,7 +87,7 @@ void fir::runtime::genAllocatableAllocate(fir::FirOpBuilder &builder, errMsg = builder.create(loc, boxNoneTy).getResult(); } llvm::SmallVector args{ - fir::runtime::createArguments(builder, loc, fTy, desc, asyncId, hasStat, - errMsg, sourceFile, sourceLine)}; + fir::runtime::createArguments(builder, loc, fTy, desc, asyncObject, + hasStat, errMsg, sourceFile, sourceLine)}; builder.create(loc, func, args); } diff --git a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp index 24033bc15b8eb..687007d957225 100644 --- a/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp +++ b/flang/lib/Optimizer/Dialect/CUF/CUFOps.cpp @@ -76,6 +76,16 @@ llvm::LogicalResult cuf::FreeOp::verify() { return checkCudaAttr(*this); } // AllocateOp //===----------------------------------------------------------------------===// +template +static llvm::LogicalResult checkStreamType(OpTy op) { + if (!op.getStream()) + return mlir::success(); + if (auto refTy = mlir::dyn_cast(op.getStream().getType())) + if (!refTy.getEleTy().isInteger(64)) + return op.emitOpError("stream is expected to be an i64 reference"); + return mlir::success(); +} + llvm::LogicalResult cuf::AllocateOp::verify() { if (getPinned() && getStream()) return emitOpError("pinned and stream cannot appears at the same time"); @@ -92,7 +102,7 @@ llvm::LogicalResult cuf::AllocateOp::verify() { "expect errmsg to be a reference to/or a box type value"); if (getErrmsg() && !getHasStat()) return emitOpError("expect stat attribute when errmsg is provided"); - return mlir::success(); + return checkStreamType(*this); } //===----------------------------------------------------------------------===// @@ -143,16 +153,6 @@ llvm::LogicalResult cuf::DeallocateOp::verify() { // KernelLaunchOp //===----------------------------------------------------------------------===// -template -static llvm::LogicalResult checkStreamType(OpTy op) { - if (!op.getStream()) - return mlir::success(); - if (auto refTy = mlir::dyn_cast(op.getStream().getType())) - if (!refTy.getEleTy().isInteger(64)) - return op.emitOpError("stream is expected to be an i64 reference"); - return mlir::success(); -} - llvm::LogicalResult cuf::KernelLaunchOp::verify() { return checkStreamType(*this); } diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index e70ceb3a67d98..3a3eab9e8e37b 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -128,17 +128,15 @@ static mlir::LogicalResult convertOpToCall(OpTy op, mlir::IntegerType::get(op.getContext(), 1))); if (op.getSource()) { mlir::Value stream = - op.getStream() - ? op.getStream() - : builder.createIntegerConstant(loc, fTy.getInput(2), -1); + op.getStream() ? op.getStream() + : builder.createNullConstant(loc, fTy.getInput(2)); args = fir::runtime::createArguments( builder, loc, fTy, op.getBox(), op.getSource(), stream, pinned, hasStat, errmsg, sourceFile, sourceLine); } else { mlir::Value stream = - op.getStream() - ? op.getStream() - : builder.createIntegerConstant(loc, fTy.getInput(1), -1); + op.getStream() ? op.getStream() + : builder.createNullConstant(loc, fTy.getInput(1)); args = fir::runtime::createArguments(builder, loc, fTy, op.getBox(), stream, pinned, hasStat, errmsg, sourceFile, sourceLine); diff --git a/flang/test/Fir/CUDA/cuda-allocate.fir b/flang/test/Fir/CUDA/cuda-allocate.fir index 095ad92d5deb5..ea7890c9aac52 100644 --- a/flang/test/Fir/CUDA/cuda-allocate.fir +++ b/flang/test/Fir/CUDA/cuda-allocate.fir @@ -19,7 +19,7 @@ func.func @_QPsub1() { // CHECK: %[[DESC:.*]] = fir.convert %[[DESC_RT_CALL]] : (!fir.ref>) -> !fir.ref>>> // CHECK: %[[DECL_DESC:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref>>>) -> !fir.ref> -// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DECL_DESC]]#1 : (!fir.ref>>>) -> !fir.ref> // CHECK: %{{.*}} = fir.call @_FortranAAllocatableDeallocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -47,7 +47,7 @@ func.func @_QPsub3() { // CHECK: %[[A:.*]]:2 = hlfir.declare %[[A_ADDR]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMmod1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref>>>) -> !fir.ref> -// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[A_BOX:.*]] = fir.convert %[[A]]#1 : (!fir.ref>>>) -> !fir.ref> // CHECK: fir.call @_FortranACUFAllocatableDeallocate(%[[A_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -87,7 +87,7 @@ func.func @_QPsub5() { } // CHECK-LABEL: func.func @_QPsub5() -// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref>, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocate({{.*}}) : (!fir.ref>, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: fir.call @_FortranAAllocatableDeallocate({{.*}}) : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 @@ -118,7 +118,7 @@ func.func @_QQsub6() attributes {fir.bindc_name = "test"} { // CHECK: %[[B:.*]]:2 = hlfir.declare %[[B_ADDR]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QMdataEb"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) // CHECK: _FortranAAllocatableSetBounds // CHECK: %[[B_BOX:.*]] = fir.convert %[[B]]#1 : (!fir.ref>>>) -> !fir.ref> -// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocateSync(%[[B_BOX]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 func.func @_QPallocate_source() { @@ -142,7 +142,7 @@ func.func @_QPallocate_source() { // CHECK: %[[SOURCE:.*]] = fir.load %[[DECL_HOST]] : !fir.ref>>> // CHECK: %[[DEV_CONV:.*]] = fir.convert %[[DECL_DEV]] : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[SOURCE_CONV:.*]] = fir.convert %[[SOURCE]] : (!fir.box>>) -> !fir.box -// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %{{.*}} = fir.call @_FortranACUFAllocatableAllocateSource(%[[DEV_CONV]], %[[SOURCE_CONV]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.box, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 fir.global @_QMmod1Ea_d {data_attr = #cuf.cuda} : !fir.box>> { @@ -170,16 +170,14 @@ func.func @_QQallocate_stream() { %1 = fir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFEa"} : (!fir.ref>>>) -> !fir.ref>>> %2 = fir.alloca i64 {bindc_name = "stream1", uniq_name = "_QFEstream1"} %3 = fir.declare %2 {uniq_name = "_QFEstream1"} : (!fir.ref) -> !fir.ref - %4 = fir.load %3 : !fir.ref - %5 = cuf.allocate %1 : !fir.ref>>> stream(%4 : i64) {data_attr = #cuf.cuda} -> i32 + %5 = cuf.allocate %1 : !fir.ref>>> stream(%3 : !fir.ref) {data_attr = #cuf.cuda} -> i32 return } // CHECK-LABEL: func.func @_QQallocate_stream() // CHECK: %[[STREAM_ALLOCA:.*]] = fir.alloca i64 {bindc_name = "stream1", uniq_name = "_QFEstream1"} // CHECK: %[[STREAM:.*]] = fir.declare %[[STREAM_ALLOCA]] {uniq_name = "_QFEstream1"} : (!fir.ref) -> !fir.ref -// CHECK: %[[STREAM_LOAD:.*]] = fir.load %[[STREAM]] : !fir.ref -// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM_LOAD]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %[[STREAM]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 func.func @_QPp_alloc() { @@ -268,6 +266,6 @@ func.func @_QQpinned() attributes {fir.bindc_name = "testasync"} { // CHECK: %[[PINNED:.*]] = fir.alloca !fir.logical<4> {bindc_name = "pinnedflag", uniq_name = "_QFEpinnedflag"} // CHECK: %[[DECL_PINNED:.*]] = fir.declare %[[PINNED]] {uniq_name = "_QFEpinnedflag"} : (!fir.ref>) -> !fir.ref> // CHECK: %[[CONV_PINNED:.*]] = fir.convert %[[DECL_PINNED]] : (!fir.ref>) -> !fir.ref -// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %{{.*}}, %[[CONV_PINNED]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, i64, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: fir.call @_FortranACUFAllocatableAllocate(%{{.*}}, %{{.*}}, %[[CONV_PINNED]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref>, !fir.ref, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 } // end of module diff --git a/flang/test/Fir/cuf-invalid.fir b/flang/test/Fir/cuf-invalid.fir index a3b9be3ee8223..dceb8f6fde236 100644 --- a/flang/test/Fir/cuf-invalid.fir +++ b/flang/test/Fir/cuf-invalid.fir @@ -2,13 +2,12 @@ func.func @_QPsub1() { %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} - %1 = fir.alloca i32 + %s = fir.alloca i64 %pinned = fir.alloca i1 %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> - %s = fir.load %1 : !fir.ref // expected-error@+1{{'cuf.allocate' op pinned and stream cannot appears at the same time}} - %13 = cuf.allocate %11 : !fir.ref> stream(%s : i32) pinned(%pinned : !fir.ref) {data_attr = #cuf.cuda} -> i32 + %13 = cuf.allocate %11 : !fir.ref> stream(%s : !fir.ref) pinned(%pinned : !fir.ref) {data_attr = #cuf.cuda} -> i32 return } diff --git a/flang/test/Fir/cuf.mlir b/flang/test/Fir/cuf.mlir index d38b26a4548ed..f80a70eca34a3 100644 --- a/flang/test/Fir/cuf.mlir +++ b/flang/test/Fir/cuf.mlir @@ -18,15 +18,14 @@ func.func @_QPsub1() { func.func @_QPsub1() { %0 = fir.alloca !fir.box>> {bindc_name = "a", uniq_name = "_QFsub1Ea"} - %1 = fir.alloca i32 + %1 = fir.alloca i64 %4:2 = hlfir.declare %0 {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub1Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %11 = fir.convert %4#1 : (!fir.ref>>>) -> !fir.ref> - %s = fir.load %1 : !fir.ref - %13 = cuf.allocate %11 : !fir.ref> stream(%s : i32) {data_attr = #cuf.cuda} -> i32 + %13 = cuf.allocate %11 : !fir.ref> stream(%1 : !fir.ref) {data_attr = #cuf.cuda} -> i32 return } -// CHECK: cuf.allocate %{{.*}} : !fir.ref> stream(%{{.*}} : i32) {data_attr = #cuf.cuda} -> i32 +// CHECK: cuf.allocate %{{.*}} : !fir.ref> stream(%{{.*}} : !fir.ref) {data_attr = #cuf.cuda} -> i32 // ----- diff --git a/flang/test/HLFIR/elemental-codegen.fir b/flang/test/HLFIR/elemental-codegen.fir index a715479f16115..67af4261470f7 100644 --- a/flang/test/HLFIR/elemental-codegen.fir +++ b/flang/test/HLFIR/elemental-codegen.fir @@ -191,7 +191,7 @@ func.func @test_polymorphic(%arg0: !fir.class> {fir.bindc_ // CHECK: %[[VAL_35:.*]] = fir.absent !fir.box // CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_4]] : (!fir.ref>>>>) -> !fir.ref> // CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_31]] : (!fir.ref>) -> !fir.ref -// CHECK: %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %{{.*}}, %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %[[VAL_38:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_36]], %{{.*}}, %[[VAL_34]], %[[VAL_35]], %[[VAL_37]], %[[VAL_33]]) : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[VAL_12:.*]] = arith.constant true // CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref>>>> // CHECK: %[[VAL_40:.*]] = arith.constant 1 : index @@ -275,7 +275,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class> {fir.b // CHECK: %[[VAL_36:.*]] = fir.absent !fir.box // CHECK: %[[VAL_37:.*]] = fir.convert %[[VAL_5]] : (!fir.ref>>>>) -> !fir.ref> // CHECK: %[[VAL_38:.*]] = fir.convert %[[VAL_32]] : (!fir.ref>) -> !fir.ref -// CHECK: %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %{{.*}}, %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %[[VAL_39:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_37]], %{{.*}}, %[[VAL_35]], %[[VAL_36]], %[[VAL_38]], %[[VAL_34]]) : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[VAL_13:.*]] = arith.constant true // CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref>>>> // CHECK: %[[VAL_41:.*]] = arith.constant 1 : index @@ -328,7 +328,7 @@ func.func @test_polymorphic_expr(%arg0: !fir.class> {fir.b // CHECK: %[[VAL_85:.*]] = fir.absent !fir.box // CHECK: %[[VAL_86:.*]] = fir.convert %[[VAL_4]] : (!fir.ref>>>>) -> !fir.ref> // CHECK: %[[VAL_87:.*]] = fir.convert %[[VAL_81]] : (!fir.ref>) -> !fir.ref -// CHECK: %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %{{.*}}, %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %[[VAL_88:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_86]], %{{.*}}, %[[VAL_84]], %[[VAL_85]], %[[VAL_87]], %[[VAL_83]]) : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[VAL_62:.*]] = arith.constant true // CHECK: %[[VAL_89:.*]] = fir.load %[[VAL_63]]#0 : !fir.ref>>>> // CHECK: %[[VAL_90:.*]] = arith.constant 1 : index diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf index a570f636b8db1..cec10dda839e9 100644 --- a/flang/test/Lower/CUDA/cuda-allocatable.cuf +++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf @@ -90,7 +90,7 @@ end subroutine subroutine sub4() real, allocatable, device :: a(:) - integer :: istream + integer(8) :: istream allocate(a(10), stream=istream) end subroutine @@ -98,11 +98,10 @@ end subroutine ! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box>> {bindc_name = "a", data_attr = #cuf.cuda, uniq_name = "_QFsub4Ea"} -> !fir.ref>>> ! CHECK: fir.embox {{.*}} {allocator_idx = 2 : i32} ! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFsub4Ea"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) -! CHECK: %[[ISTREAM:.*]] = fir.alloca i32 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"} -! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[ISTREAM:.*]] = fir.alloca i64 {bindc_name = "istream", uniq_name = "_QFsub4Eistream"} +! CHECK: %[[ISTREAM_DECL:.*]]:2 = hlfir.declare %[[ISTREAM]] {uniq_name = "_QFsub4Eistream"} : (!fir.ref) -> (!fir.ref, !fir.ref) ! CHECK: fir.call @_FortranAAllocatableSetBounds -! CHECK: %[[STREAM:.*]] = fir.load %[[ISTREAM_DECL]]#0 : !fir.ref -! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#0 : !fir.ref>>> stream(%[[STREAM]] : i32) {data_attr = #cuf.cuda} -> i32 +! CHECK: %{{.*}} = cuf.allocate %[[BOX_DECL]]#0 : !fir.ref>>> stream(%[[ISTREAM_DECL]]#0 : !fir.ref) {data_attr = #cuf.cuda} -> i32 ! CHECK: fir.if %{{.*}} { ! CHECK: %{{.*}} = cuf.deallocate %[[BOX_DECL]]#0 : !fir.ref>>> {data_attr = #cuf.cuda} -> i32 ! CHECK: } diff --git a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90 b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90 index 5bb1ae3797346..6869af863644d 100644 --- a/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90 +++ b/flang/test/Lower/OpenACC/acc-declare-unwrap-defaultbounds.f90 @@ -473,6 +473,6 @@ subroutine init() end module ! CHECK-LABEL: func.func @_QMacc_declare_post_action_statPinit() -! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath {acc.declare_action = #acc.declare_action} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath {acc.declare_action = #acc.declare_action} : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: fir.if -! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath {acc.declare_action = #acc.declare_action} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath {acc.declare_action = #acc.declare_action} : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 diff --git a/flang/test/Lower/OpenACC/acc-declare.f90 b/flang/test/Lower/OpenACC/acc-declare.f90 index 889cdef51f4ce..4d95ffa10edaf 100644 --- a/flang/test/Lower/OpenACC/acc-declare.f90 +++ b/flang/test/Lower/OpenACC/acc-declare.f90 @@ -434,6 +434,6 @@ subroutine init() end module ! CHECK-LABEL: func.func @_QMacc_declare_post_action_statPinit() -! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath {acc.declare_action = #acc.declare_action} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath {acc.declare_action = #acc.declare_action} : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: fir.if -! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath {acc.declare_action = #acc.declare_action} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: fir.call @_FortranAAllocatableAllocate({{.*}}) fastmath {acc.declare_action = #acc.declare_action} : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90 index dd8671daeaf8e..cbd7876203424 100644 --- a/flang/test/Lower/allocatable-polymorphic.f90 +++ b/flang/test/Lower/allocatable-polymorphic.f90 @@ -267,7 +267,7 @@ subroutine test_allocatable() ! CHECK: %[[C0:.*]] = arith.constant 0 : i32 ! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[P_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref>, !fir.ref, i32, i32) -> () ! CHECK: %[[P_CAST:.*]] = fir.convert %[[P_DECL]]#0 : (!fir.ref>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[P_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[TYPE_DESC_P1:.*]] = fir.type_desc !fir.type<_QMpolyTp1{a:i32,b:i32}> ! CHECK: %[[C1_CAST:.*]] = fir.convert %[[C1_DECL]]#0 : (!fir.ref>>>) -> !fir.ref> @@ -276,7 +276,7 @@ subroutine test_allocatable() ! CHECK: %[[C0:.*]] = arith.constant 0 : i32 ! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[C1_CAST]], %[[TYPE_DESC_P1_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref>, !fir.ref, i32, i32) -> () ! CHECK: %[[C1_CAST:.*]] = fir.convert %[[C1_DECL]]#0 : (!fir.ref>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C1_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C1_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[TYPE_DESC_P2:.*]] = fir.type_desc !fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}> ! CHECK: %[[C2_CAST:.*]] = fir.convert %[[C2_DECL]]#0 : (!fir.ref>>>) -> !fir.ref> @@ -285,7 +285,7 @@ subroutine test_allocatable() ! CHECK: %[[C0:.*]] = arith.constant 0 : i32 ! CHECK: fir.call @_FortranAAllocatableInitDerivedForAllocate(%[[C2_CAST]], %[[TYPE_DESC_P2_CAST]], %[[RANK]], %[[C0]]) {{.*}}: (!fir.ref>, !fir.ref, i32, i32) -> () ! CHECK: %[[C2_CAST:.*]] = fir.convert %[[C2_DECL]]#0 : (!fir.ref>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C2_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C2_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[TYPE_DESC_P1:.*]] = fir.type_desc !fir.type<_QMpolyTp1{a:i32,b:i32}> ! CHECK: %[[C3_CAST:.*]] = fir.convert %[[C3_DECL]]#0 : (!fir.ref>>>>) -> !fir.ref> @@ -300,7 +300,7 @@ subroutine test_allocatable() ! CHECK: %[[C10_I64:.*]] = fir.convert %[[C10]] : (i32) -> i64 ! CHECK: fir.call @_FortranAAllocatableSetBounds(%[[C3_CAST]], %[[C0]], %[[C1_I64]], %[[C10_I64]]) {{.*}}: (!fir.ref>, i32, i64, i64) -> () ! CHECK: %[[C3_CAST:.*]] = fir.convert %[[C3_DECL]]#0 : (!fir.ref>>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C3_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C3_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[TYPE_DESC_P2:.*]] = fir.type_desc !fir.type<_QMpolyTp2{p1:!fir.type<_QMpolyTp1{a:i32,b:i32}>,c:i32}> ! CHECK: %[[C4_CAST:.*]] = fir.convert %[[C4_DECL]]#0 : (!fir.ref>>>>) -> !fir.ref> @@ -316,7 +316,7 @@ subroutine test_allocatable() ! CHECK: %[[C20_I64:.*]] = fir.convert %[[C20]] : (i32) -> i64 ! CHECK: fir.call @_FortranAAllocatableSetBounds(%[[C4_CAST]], %[[C0]], %[[C1_I64]], %[[C20_I64]]) {{.*}}: (!fir.ref>, i32, i64, i64) -> () ! CHECK: %[[C4_CAST:.*]] = fir.convert %[[C4_DECL]]#0 : (!fir.ref>>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C4_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[C4_CAST]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}}: (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[C1_LOAD1:.*]] = fir.load %[[C1_DECL]]#0 : !fir.ref>>> ! CHECK: fir.dispatch "proc1"(%[[C1_LOAD1]] : !fir.class>>) @@ -390,7 +390,7 @@ subroutine test_unlimited_polymorphic_with_intrinsic_type_spec() ! CHECK: %[[CORANK:.*]] = arith.constant 0 : i32 ! CHECK: fir.call @_FortranAAllocatableInitIntrinsicForAllocate(%[[BOX_NONE]], %[[CAT]], %[[KIND]], %[[RANK]], %[[CORANK]]) {{.*}} : (!fir.ref>, i32, i32, i32, i32) -> () ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[P_DECL]]#0 : (!fir.ref>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[PTR_DECL]]#0 : (!fir.ref>>) -> !fir.ref> ! CHECK: %[[CAT:.*]] = arith.constant 2 : i32 @@ -573,7 +573,7 @@ subroutine test_allocatable_up_character() ! CHECK: %[[CORANK:.*]] = arith.constant 0 : i32 ! CHECK: fir.call @_FortranAAllocatableInitCharacterForAllocate(%[[A_NONE]], %[[LEN]], %[[KIND]], %[[RANK]], %[[CORANK]]) {{.*}} : (!fir.ref>, i64, i32, i32, i32) -> () ! CHECK: %[[A_NONE:.*]] = fir.convert %[[A_DECL]]#0 : (!fir.ref>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 end module @@ -592,17 +592,17 @@ program test_alloc ! LLVM-LABEL: define void @_QMpolyPtest_allocatable() ! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 0, i32 0) -! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) +! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 0, i32 0) -! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) +! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp2, i32 0, i32 0) -! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) +! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp1, i32 1, i32 0) ! LLVM: call void @_FortranAAllocatableSetBounds(ptr %{{.*}}, i32 0, i64 1, i64 10) -! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) +! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %{{.*}}, ptr @_QMpolyEXdtXp2, i32 1, i32 0) ! LLVM: call void @_FortranAAllocatableSetBounds(ptr %{{.*}}, i32 0, i64 1, i64 20) -! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) +! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %{{.*}}, ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM-COUNT-2: call void %{{[0-9]*}}() ! LLVM: call void @llvm.memcpy.p0.p0.i32 @@ -683,5 +683,5 @@ program test_alloc ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } { ptr null, i64 8, i32 20240719, i8 0, i8 42, i8 2, i8 1, ptr @_QMpolyEXdtXp1, [1 x i64] zeroinitializer }, ptr %[[ALLOCA1:[0-9]*]] ! LLVM: call void @llvm.memcpy.p0.p0.i32(ptr %[[ALLOCA2:[0-9]+]], ptr %[[ALLOCA1]], i32 40, i1 false) ! LLVM: call void @_FortranAAllocatableInitDerivedForAllocate(ptr %[[ALLOCA2]], ptr @_QMpolyEXdtXp1, i32 0, i32 0) -! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %[[ALLOCA2]], i64 {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) +! LLVM: %{{.*}} = call i32 @_FortranAAllocatableAllocate(ptr %[[ALLOCA2]], ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) ! LLVM: %{{.*}} = call i32 @_FortranAAllocatableDeallocatePolymorphic(ptr %[[ALLOCA2]], ptr {{.*}}, i1 false, ptr null, ptr @_QQclX{{.*}}, i32 {{.*}}) diff --git a/flang/test/Lower/allocatable-runtime.f90 b/flang/test/Lower/allocatable-runtime.f90 index 37272c90656cc..c63252c68974e 100644 --- a/flang/test/Lower/allocatable-runtime.f90 +++ b/flang/test/Lower/allocatable-runtime.f90 @@ -31,7 +31,7 @@ subroutine foo() ! CHECK: fir.call @{{.*}}AllocatableSetBounds(%[[xBoxCast2]], %c0{{.*}}, %[[xlbCast]], %[[xubCast]]) {{.*}}: (!fir.ref>, i32, i64, i64) -> () ! CHECK-DAG: %[[xBoxCast3:.*]] = fir.convert %[[xBoxAddr]] : (!fir.ref>>>) -> !fir.ref> ! CHECK-DAG: %[[sourceFile:.*]] = fir.convert %{{.*}} -> !fir.ref - ! CHECK: fir.call @{{.*}}AllocatableAllocate(%[[xBoxCast3]], %{{.*}}, %false{{.*}}, %[[errMsg]], %[[sourceFile]], %{{.*}}) {{.*}}: (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 + ! CHECK: fir.call @{{.*}}AllocatableAllocate(%[[xBoxCast3]], %{{.*}}, %false{{.*}}, %[[errMsg]], %[[sourceFile]], %{{.*}}) {{.*}}: (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 ! Simply check that we are emitting the right numebr of set bound for y and z. Otherwise, this is just like x. ! CHECK: fir.convert %[[yBoxAddr]] : (!fir.ref>>>) -> !fir.ref> @@ -180,4 +180,4 @@ subroutine mold_allocation() ! CHECK: %[[M_BOX_NONE:.*]] = fir.convert %[[EMBOX_M]] : (!fir.box>) -> !fir.box ! CHECK: fir.call @_FortranAAllocatableApplyMold(%[[A_BOX_NONE]], %[[M_BOX_NONE]], %[[RANK]]) {{.*}} : (!fir.ref>, !fir.box, i32) -> () ! CHECK: %[[A_BOX_NONE:.*]] = fir.convert %[[A]] : (!fir.ref>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 diff --git a/flang/test/Lower/allocate-mold.f90 b/flang/test/Lower/allocate-mold.f90 index c7985b11397ce..9427c8b08786f 100644 --- a/flang/test/Lower/allocate-mold.f90 +++ b/flang/test/Lower/allocate-mold.f90 @@ -16,7 +16,7 @@ subroutine scalar_mold_allocation() ! CHECK: %[[A_REF_BOX_NONE1:.*]] = fir.convert %[[A]] : (!fir.ref>>) -> !fir.ref> ! CHECK: fir.call @_FortranAAllocatableApplyMold(%[[A_REF_BOX_NONE1]], %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, !fir.box, i32) -> () ! CHECK: %[[A_REF_BOX_NONE2:.*]] = fir.convert %[[A]] : (!fir.ref>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_REF_BOX_NONE2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[A_REF_BOX_NONE2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 subroutine array_scalar_mold_allocation() real, allocatable :: a(:) @@ -40,4 +40,4 @@ end subroutine array_scalar_mold_allocation ! CHECK: %[[REF_BOX_A1:.*]] = fir.convert %1 : (!fir.ref>>>) -> !fir.ref> ! CHECK: fir.call @_FortranAAllocatableSetBounds(%[[REF_BOX_A1]], {{.*}},{{.*}}, {{.*}}) fastmath : (!fir.ref>, i32, i64, i64) -> () ! CHECK: %[[REF_BOX_A2:.*]] = fir.convert %[[A]] : (!fir.ref>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[REF_BOX_A2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[REF_BOX_A2]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 diff --git a/flang/test/Lower/polymorphic.f90 b/flang/test/Lower/polymorphic.f90 index 485861a838ff6..b7be5f685d9e3 100644 --- a/flang/test/Lower/polymorphic.f90 +++ b/flang/test/Lower/polymorphic.f90 @@ -1149,7 +1149,7 @@ program test ! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "test"} { ! CHECK: %[[ADDR_O:.*]] = fir.address_of(@_QFEo) : !fir.ref}>>>> ! CHECK: %[[BOX_NONE:.*]] = fir.convert %[[ADDR_O]] : (!fir.ref}>>>>) -> !fir.ref> -! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %{{.*}} = fir.call @_FortranAAllocatableAllocate(%[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) {{.*}} : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 ! CHECK: %[[O:.*]] = fir.load %[[ADDR_O]] : !fir.ref}>>>> ! CHECK: %[[COORD_INNER:.*]] = fir.coordinate_of %[[O]], inner : (!fir.box}>>>) -> !fir.ref> ! CHECK: %{{.*}} = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered iter_args(%arg1 = %{{.*}}) -> (!fir.array<5x!fir.logical<4>>) { diff --git a/flang/test/Transforms/lower-repack-arrays.fir b/flang/test/Transforms/lower-repack-arrays.fir index bbae7ba5b0e0b..0b323b1bb0697 100644 --- a/flang/test/Transforms/lower-repack-arrays.fir +++ b/flang/test/Transforms/lower-repack-arrays.fir @@ -840,7 +840,7 @@ func.func @_QPtest6(%arg0: !fir.class>> {fir.bi // CHECK: %[[VAL_34:.*]] = fir.absent !fir.box // CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_7]] : (!fir.ref>>>>) -> !fir.ref> // CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_33]] : (!fir.ref>) -> !fir.ref -// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_22]] : !fir.ref>>>> // CHECK: %[[VAL_39:.*]] = fir.address_of(@{{_QQcl.*}} // CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (!fir.class>>>) -> !fir.box @@ -928,7 +928,7 @@ func.func @_QPtest6_stack(%arg0: !fir.class>> { // CHECK: %[[VAL_34:.*]] = fir.absent !fir.box // CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_7]] : (!fir.ref>>>>) -> !fir.ref> // CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_33]] : (!fir.ref>) -> !fir.ref -// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_22]] : !fir.ref>>>> // CHECK: %[[VAL_39:.*]] = fir.address_of(@{{_QQcl.*}} // CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (!fir.class>>>) -> !fir.box @@ -1015,7 +1015,7 @@ func.func @_QPtest7(%arg0: !fir.class> {fir.bindc_name = "x // CHECK: %[[VAL_34:.*]] = fir.absent !fir.box // CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_7]] : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_33]] : (!fir.ref>) -> !fir.ref -// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_22]] : !fir.ref>>> // CHECK: %[[VAL_39:.*]] = fir.address_of(@{{_QQcl.*}} // CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (!fir.class>>) -> !fir.box @@ -1103,7 +1103,7 @@ func.func @_QPtest7_stack(%arg0: !fir.class> {fir.bindc_nam // CHECK: %[[VAL_34:.*]] = fir.absent !fir.box // CHECK: %[[VAL_35:.*]] = fir.convert %[[VAL_7]] : (!fir.ref>>>) -> !fir.ref> // CHECK: %[[VAL_36:.*]] = fir.convert %[[VAL_33]] : (!fir.ref>) -> !fir.ref -// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref>, i64, i1, !fir.box, !fir.ref, i32) -> i32 +// CHECK: %[[VAL_37:.*]] = fir.call @_FortranAAllocatableAllocate(%[[VAL_35]], %{{.*}}, %[[VAL_6]], %[[VAL_34]], %[[VAL_36]], %[[VAL_2]]) : (!fir.ref>, !fir.ref, i1, !fir.box, !fir.ref, i32) -> i32 // CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_22]] : !fir.ref>>> // CHECK: %[[VAL_39:.*]] = fir.address_of(@{{_QQcl.*}} // CHECK: %[[VAL_40:.*]] = fir.convert %[[VAL_38]] : (!fir.class>>) -> !fir.box