-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[Flang][OpenMP] Enable no-loop kernels #155818
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
efdd979
564410d
8efb5e0
91e5e58
b34a653
6176c95
d2e88db
b32ac37
e8a23ce
52ebd55
78165d7
31f87cd
a15711d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2591,13 +2591,34 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, | |
| } | ||
|
|
||
| builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); | ||
|
|
||
| // Check if we can generate no-loop kernel | ||
| bool noLoopMode = false; | ||
| omp::TargetOp targetOp = wsloopOp->getParentOfType<mlir::omp::TargetOp>(); | ||
| if (targetOp) { | ||
| Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp(); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we check here that the captured op is the !$omp target teams distribute parallel do
do i=1,10
!$omp do
do j=1,i
end do
end doI'm not actually sure about what is the expected behavior of this, but I imagine that no-loop would just refer to the outer loop, as it's the one for which the trip count can be evaluated in the host.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi Sergio,
Yes. There is assumption in
There are 2 issues:
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thank you for the clarifications, Dominik.
What I mean is adding something like this: Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp();
if (*loopOp == targetCapturedOp) {
omp::TargetRegionFlags kernelFlags = targetOp.getKernelExecFlags(targetCapturedOp);
...
}We need that because, if not,
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in commit: d2e88db |
||
| // We need this check because, without it, noLoopMode would be set to true | ||
| // for every omp.wsloop nested inside a no-loop SPMD target region, even if | ||
| // that loop is not the top-level SPMD one. | ||
| if (loopOp == targetCapturedOp) { | ||
| omp::TargetRegionFlags kernelFlags = | ||
| targetOp.getKernelExecFlags(targetCapturedOp); | ||
| if (omp::bitEnumContainsAll(kernelFlags, | ||
| omp::TargetRegionFlags::spmd | | ||
| omp::TargetRegionFlags::no_loop) && | ||
| !omp::bitEnumContainsAny(kernelFlags, | ||
| omp::TargetRegionFlags::generic)) | ||
| noLoopMode = true; | ||
| } | ||
| } | ||
|
|
||
| llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = | ||
| ompBuilder->applyWorkshareLoop( | ||
| ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier, | ||
| convertToScheduleKind(schedule), chunk, isSimd, | ||
| scheduleMod == omp::ScheduleModifier::monotonic, | ||
| scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered, | ||
| workshareLoopType); | ||
| workshareLoopType, noLoopMode); | ||
|
|
||
| if (failed(handleError(wsloopIP, opInst))) | ||
| return failure(); | ||
|
|
@@ -5425,6 +5446,12 @@ initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, | |
| ? llvm::omp::OMP_TGT_EXEC_MODE_GENERIC_SPMD | ||
| : llvm::omp::OMP_TGT_EXEC_MODE_GENERIC | ||
| : llvm::omp::OMP_TGT_EXEC_MODE_SPMD; | ||
| if (omp::bitEnumContainsAll(kernelFlags, | ||
| omp::TargetRegionFlags::spmd | | ||
| omp::TargetRegionFlags::no_loop) && | ||
| !omp::bitEnumContainsAny(kernelFlags, omp::TargetRegionFlags::generic)) | ||
| attrs.ExecFlags = llvm::omp::OMP_TGT_EXEC_MODE_SPMD_NO_LOOP; | ||
|
|
||
| attrs.MinTeams = minTeamsVal; | ||
| attrs.MaxTeams.front() = maxTeamsVal; | ||
| attrs.MinThreads = 1; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,96 @@ | ||
| ! REQUIRES: flang | ||
|
|
||
| ! RUN: %libomptarget-compile-fortran-generic -O3 -fopenmp-assume-threads-oversubscription -fopenmp-assume-teams-oversubscription | ||
| ! RUN: env LIBOMPTARGET_INFO=16 OMP_NUM_TEAMS=16 OMP_TEAMS_THREAD_LIMIT=16 %libomptarget-run-generic 2>&1 | %fcheck-generic | ||
| function check_errors(array) result (errors) | ||
| integer, intent(in) :: array(1024) | ||
| integer :: errors | ||
| integer :: i | ||
| errors = 0 | ||
| do i = 1, 1024 | ||
| if ( array( i) .ne. (i) ) then | ||
| errors = errors + 1 | ||
| end if | ||
| end do | ||
| end function | ||
|
|
||
| program main | ||
| use omp_lib | ||
| implicit none | ||
| integer :: i,j,red | ||
| integer :: array(1024), errors = 0 | ||
| array = 1 | ||
|
|
||
| ! No-loop kernel | ||
| !$omp target teams distribute parallel do | ||
| do i = 1, 1024 | ||
| array(i) = i | ||
| end do | ||
| errors = errors + check_errors(array) | ||
|
|
||
| ! SPMD kernel (num_teams clause blocks promotion to no-loop) | ||
| array = 1 | ||
| !$omp target teams distribute parallel do num_teams(3) | ||
| do i = 1, 1024 | ||
| array(i) = i | ||
| end do | ||
|
|
||
| errors = errors + check_errors(array) | ||
|
|
||
| ! No-loop kernel | ||
| array = 1 | ||
| !$omp target teams distribute parallel do num_threads(64) | ||
| do i = 1, 1024 | ||
| array(i) = i | ||
| end do | ||
|
|
||
| errors = errors + check_errors(array) | ||
|
|
||
| ! SPMD kernel | ||
| array = 1 | ||
| !$omp target parallel do | ||
| do i = 1, 1024 | ||
| array(i) = i | ||
| end do | ||
|
|
||
| errors = errors + check_errors(array) | ||
|
|
||
| ! Generic kernel | ||
| array = 1 | ||
| !$omp target teams distribute | ||
| do i = 1, 1024 | ||
| array(i) = i | ||
| end do | ||
|
|
||
| errors = errors + check_errors(array) | ||
|
|
||
| ! SPMD kernel (reduction clause blocks promotion to no-loop) | ||
| array = 1 | ||
| red =0 | ||
| !$omp target teams distribute parallel do reduction(+:red) | ||
| do i = 1, 1024 | ||
| red = red + array(i) | ||
| end do | ||
|
|
||
| if (red .ne. 1024) then | ||
| errors = errors + 1 | ||
| end if | ||
|
|
||
| print *,"number of errors: ", errors | ||
|
|
||
| end program main | ||
|
|
||
| ! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}} SPMD-No-Loop mode | ||
| ! CHECK: info: #Args: 3 Teams x Thrds: 64x 16 | ||
| ! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}} SPMD mode | ||
| ! CHECK: info: #Args: 3 Teams x Thrds: 3x 16 {{.*}} | ||
| ! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}} SPMD-No-Loop mode | ||
| ! CHECK: info: #Args: 3 Teams x Thrds: 64x 16 {{.*}} | ||
| ! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}} SPMD mode | ||
| ! CHECK: info: #Args: 3 Teams x Thrds: 1x 16 | ||
| ! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}} Generic mode | ||
| ! CHECK: info: #Args: 3 Teams x Thrds: 16x 16 {{.*}} | ||
| ! CHECK: "PluginInterface" device {{[0-9]+}} info: Launching kernel {{.*}} SPMD mode | ||
| ! CHECK: info: #Args: 4 Teams x Thrds: 16x 16 {{.*}} | ||
| ! CHECK: number of errors: 0 | ||
|
|
Uh oh!
There was an error while loading. Please reload this page.