-
Notifications
You must be signed in to change notification settings - Fork 15.5k
Honor pragmas with -ffp-contract=fast, deprecate fast-honor-pragmas #105746
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -398,20 +398,8 @@ static bool initTargetOptions(const CompilerInstance &CI, | |
| .Default(llvm::FloatABI::Default); | ||
|
|
||
| // Set FP fusion mode. | ||
| switch (LangOpts.getDefaultFPContractMode()) { | ||
| case LangOptions::FPM_Off: | ||
| // Preserve any contraction performed by the front-end. (Strict performs | ||
| // splitting of the muladd intrinsic in the backend.) | ||
| Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; | ||
| break; | ||
| case LangOptions::FPM_On: | ||
| case LangOptions::FPM_FastHonorPragmas: | ||
| Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; | ||
| break; | ||
| case LangOptions::FPM_Fast: | ||
| Options.AllowFPOpFusion = llvm::FPOpFusion::Fast; | ||
| break; | ||
| } | ||
| // All allowed fusion is indicated in the IR. | ||
| Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is redundant, I think; Standard is the default anyway. |
||
|
|
||
| Options.BinutilsVersion = | ||
| llvm::TargetMachine::parseBinutilsVersion(CodeGenOpts.BinutilsVersion); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3000,6 +3000,13 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, | |
| StringRef Val = A->getValue(); | ||
| if (Val == "fast" || Val == "on" || Val == "off" || | ||
| Val == "fast-honor-pragmas") { | ||
| // fast-honor-pragmas is deprecated -- replace it with fast | ||
| if (Val == "fast-honor-pragmas") { | ||
| D.Diag(diag::warn_drv_deprecated_arg) | ||
| << A->getAsString(Args) << /*hasReplacement=*/true | ||
| << "-ffp-contract=fast"; | ||
| Val = "fast"; | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure that actually diagnosing this is a good idea. We have to support it forever; let's just silently consider an alias.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we document it as deprecated (or just remove it from the documentation) but silently accept it? I was copying this code from another option where an option was deprecated. I certainly don't see a problem with leaving it permanently. |
||
| if (Val != FPContract && LastFpContractOverrideOption != "") { | ||
| D.Diag(clang::diag::warn_drv_overriding_option) | ||
| << LastFpContractOverrideOption | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,6 +1,6 @@ | ||
| // REQUIRES: x86-registered-target, nvptx-registered-target, amdgpu-registered-target | ||
|
|
||
| // By default CUDA uses -ffp-contract=fast, HIP uses -ffp-contract=fast-honor-pragmas. | ||
| // By default CUDA and HIP use -ffp-contract=fast. | ||
| // we should fuse multiply/add into fma instruction. | ||
| // In IR, fmul/fadd instructions with contract flag are emitted. | ||
| // In backend | ||
|
|
@@ -68,35 +68,35 @@ | |
| // RUN: -O3 -target-cpu gfx906 -o - -x ir %t.ll \ | ||
| // RUN: | FileCheck -check-prefixes=COMMON,AMD-OPT-FASTSTD %s | ||
|
|
||
| // Explicit -ffp-contract=fast-honor-pragmas | ||
| // Explicit -ffp-contract=fast (was fast-honor-pragmas) | ||
| // In IR, fmul/fadd instructions with contract flag are emitted. | ||
| // In backend | ||
| // nvptx/amdgcn - assumes standard fp fuse option, which only | ||
| // fuses mult/add insts with contract flag or | ||
| // llvm.fmuladd intrinsics. | ||
|
|
||
| // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ | ||
| // RUN: -ffp-contract=fast-honor-pragmas -disable-llvm-passes -o - %s \ | ||
| // RUN: -ffp-contract=fast -disable-llvm-passes -o - %s \ | ||
| // RUN: | FileCheck -check-prefixes=COMMON,NV-ON %s | ||
| // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -S \ | ||
| // RUN: -target-cpu gfx906 -disable-llvm-passes -o - -x hip %s \ | ||
| // RUN: -ffp-contract=fast-honor-pragmas \ | ||
| // RUN: -ffp-contract=fast \ | ||
| // RUN: | FileCheck -check-prefixes=COMMON,AMD-ON %s | ||
| // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ | ||
| // RUN: -O3 -o - %s \ | ||
| // RUN: -ffp-contract=fast-honor-pragmas \ | ||
| // RUN: -ffp-contract=fast \ | ||
| // RUN: | FileCheck -check-prefixes=COMMON,NV-OPT-FASTSTD %s | ||
| // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -S \ | ||
| // RUN: -O3 -target-cpu gfx906 -o - -x hip %s \ | ||
| // RUN: -ffp-contract=fast-honor-pragmas \ | ||
| // RUN: -ffp-contract=fast \ | ||
| // RUN: | FileCheck -check-prefixes=COMMON,AMD-OPT-FASTSTD %s | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You've verified that we aren't testing for any actual IR-gen differences in this mode?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At a glance, I thought it was mostly duplicating the run lines above, but now I see that there are some additional checks. This test probably needs more attention to see if the checks can be reorganized. |
||
| // Check separate compile/backend steps corresponding to -save-temps. | ||
| // When input is IR, -ffp-contract has no effect. Backend uses default | ||
| // default FP fuse option. | ||
|
|
||
| // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ | ||
| // RUN: -ffp-contract=fast-honor-pragmas \ | ||
| // RUN: -ffp-contract=fast \ | ||
| // RUN: -O3 -disable-llvm-passes -target-cpu gfx906 -o %t.ll -x hip %s | ||
| // RUN: cat %t.ll | FileCheck -check-prefixes=COMMON,AMD-OPT-FAST-IR %s | ||
| // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -S \ | ||
|
|
@@ -254,19 +254,16 @@ __host__ __device__ float func2(float a, float b, float c) { | |
|
|
||
| // Test multiply/add in the different statements, which is forced | ||
| // to be compiled with fp contract on. fmul/fadd without contract | ||
| // flags are emitted in IR. In nvptx, they are emitted as FMA in | ||
| // fp-contract is fast but not on, as nvptx backend uses the same | ||
| // fp fuse option as front end, whereas fast fp fuse option in | ||
| // backend fuses fadd/fmul disregarding contract flag. In amdgcn | ||
| // they are not fused as amdgcn always use standard fp fusion | ||
| // option which respects contract flag. | ||
| __host__ __device__ float func3(float a, float b, float c) { | ||
| // flags are emitted in IR. The operations should not be fused | ||
| // because the mul and add occurs in different statements. | ||
| __host__ __device__ float func3(float a, float b, float c) { | ||
| #pragma clang fp contract(on) | ||
| float t = b * c; | ||
| return t + a; | ||
| } | ||
| // COMMON-LABEL: _Z5func3fff | ||
| // NV-OPT-FAST: fma.rn.f32 | ||
| // NV-OPT-FAST: mul.rn.f32 | ||
| // NV-OPT-FAST: add.rn.f32 | ||
| // NV-OPT-FAST-NEXT: st.param.b32 | ||
| // NV-OPT-FASTSTD: mul.rn.f32 | ||
| // NV-OPT-FASTSTD: add.rn.f32 | ||
|
|
@@ -285,7 +282,8 @@ __host__ __device__ float func2(float a, float b, float c) { | |
| // AMD-OPT-OFF-IR: fmul float | ||
| // AMD-OPT-OFF-IR: fadd float | ||
|
|
||
| // AMD-OPT-FAST: v_fmac_f32_e32 | ||
| // AMD-OPT-FAST: v_mul_f32_e32 | ||
| // AMD-OPT-FAST-NEXT: v_add_f32_e32 | ||
| // AMD-OPT-FAST-NEXT: s_setpc_b64 | ||
| // AMD-OPT-FASTSTD: v_mul_f32_e32 | ||
| // AMD-OPT-FASTSTD-NEXT: v_add_f32_e32 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,7 +6,6 @@ | |
| // before the drive options that are checked below the run lines. | ||
| // WARN_FM_OFF: warning: overriding '-ffast-math' option with '-ffp-contract=off' | ||
| // WARN_FM_ON: warning: overriding '-ffast-math' option with '-ffp-contract=on' | ||
| // WARN_FM_FHP: warning: overriding '-ffast-math' option with '-ffp-contract=fast-honor-pragmas' | ||
| // WARN_UM_OFF: warning: overriding '-funsafe-math-optimizations' option with '-ffp-contract=off' | ||
| // WARN_UM_ON: warning: overriding '-funsafe-math-optimizations' option with '-ffp-contract=on' | ||
|
|
||
|
|
@@ -30,8 +29,10 @@ | |
| // RUN: | FileCheck --check-prefix=CHECK-FPC-FAST %s | ||
|
|
||
| // RUN: %clang -### -ffast-math -ffp-contract=fast-honor-pragmas -c %s 2>&1 \ | ||
| // RUN: | FileCheck --check-prefixes=CHECK-FPC-FAST-HONOR,WARN_FM_FHP %s | ||
| // CHECK-FPC-FAST-HONOR: "-ffp-contract=fast-honor-pragmas" | ||
| // RUN: | FileCheck --check-prefixes=CHECK-FPC-FAST-HONOR,WARN_FHP_DEPRECATED %s | ||
| // WARN_FHP_DEPRECATED: clang: warning: argument '-ffp-contract=fast-honor-pragmas' is deprecated, use '-ffp-contract=fast' instead [-Wdeprecated] | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We generally don't check the string before "warning" (can be customized) |
||
| // CHECK-FPC-FAST-HONOR: "-ffp-contract=fast" | ||
| // CHECK-FPC-FAST-HONOR-NOT: "-honor-pragmas" | ||
|
|
||
| // RUN: %clang -### -Werror -ffp-contract=fast -ffast-math -c %s 2>&1 \ | ||
| // RUN: | FileCheck --check-prefix=CHECK-FPC-FAST %s | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comment below needs to be updated.