diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index a3db3e5d356b3..16731bf8037c2 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -5733,9 +5733,7 @@ statements in C). The pragma can also be used with ``off`` which turns FP contraction off for a section of the code. This can be useful when fast contraction is otherwise -enabled for the translation unit with the ``-ffp-contract=fast-honor-pragmas`` flag. -Note that ``-ffp-contract=fast`` will override pragmas to fuse multiply and -addition across statements regardless of any controlling pragmas. +enabled for the translation unit with the ``-ffp-contract=fast`` flag. ``#pragma clang fp exceptions`` specifies floating point exception behavior. It may take one of the values: ``ignore``, ``maytrap`` or ``strict``. Meaning of diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7459127670cc3..2dcd66343ff7c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -341,6 +341,10 @@ Modified Compiler Flags - The `-gkey-instructions` compiler flag is now enabled by default when DWARF is emitted for plain C/C++ and optimizations are enabled. (#GH149509) - The `-fconstexpr-steps` compiler flag now accepts value `0` to opt out of this limit. (#GH160440) +- The ``-ffp-contract`` option now honors pragmas by default when the ``fast`` + argument is used. The ``fast-honor-pragmas`` option is now deprecated and acts + as an alias for ``fast``. + Removed Compiler Flags ------------------------- diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index d267eec9425b3..c3aa7914ddf0f 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1557,7 +1557,7 @@ describes the various floating point semantic modes and the corresponding option "ffp-exception-behavior", "{ignore, strict, maytrap}", "fenv_access", "{off, on}", "(none)" "frounding-math", "{dynamic, tonearest, downward, upward, towardzero}" - "ffp-contract", "{on, off, fast, fast-honor-pragmas}" + "ffp-contract", "{on, off, fast}" "fdenormal-fp-math", "{IEEE, PreserveSign, PositiveZero}" "fdenormal-fp-math-fp32", "{IEEE, PreserveSign, PositiveZero}" "fmath-errno", "{on, off}" @@ -1764,13 +1764,12 @@ for more details. Valid values are: - * ``fast``: enable fusion across statements disregarding pragmas, breaking - compliance with the C and C++ standards (default for CUDA). + * ``fast``: enable fusion across statements unless dictated by pragmas, + breaking compliance with the C and C++ standards (default for CUDA). * ``on``: enable C and C++ standard compliant fusion in the same statement unless dictated by pragmas (default for languages other than CUDA/HIP) * ``off``: disable fusion - * ``fast-honor-pragmas``: fuse across statements unless dictated by pragmas - (default for HIP) + * ``fast-honor-pragmas``: deprecated, aliases fast .. option:: -f[no-]honor-infinities diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 8aa89d8c8c807..3eb53b18adf7e 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -217,11 +217,8 @@ class LangOptionsBase { // Enable the floating point pragma FPM_On, - // Aggressively fuse FP ops (E.g. FMA) disregarding pragmas. - FPM_Fast, - // Aggressively fuse FP ops and honor pragmas. - FPM_FastHonorPragmas + FPM_Fast }; /// Possible floating point exception behavior. @@ -816,12 +813,7 @@ class FPOptions { } explicit FPOptions(const LangOptions &LO) { Value = 0; - // The language fp contract option FPM_FastHonorPragmas has the same effect - // as FPM_Fast in frontend. For simplicity, use FPM_Fast uniformly in - // frontend. auto LangOptContractMode = LO.getDefaultFPContractMode(); - if (LangOptContractMode == LangOptions::FPM_FastHonorPragmas) - LangOptContractMode = LangOptions::FPM_Fast; setFPContractMode(LangOptContractMode); setRoundingMath(LO.RoundingMath); setConstRoundingMode(LangOptions::RoundingMode::Dynamic); diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 2f7434d8afe11..4c6291d5eabf4 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -2831,11 +2831,11 @@ def fno_trapping_math : Flag<["-"], "fno-trapping-math">, Group; def ffp_contract : Joined<["-"], "ffp-contract=">, Group, Visibility<[ClangOption, CC1Option, FC1Option, FlangOption]>, DocBrief<"Form fused FP ops (e.g. FMAs):" - " fast (fuses across statements disregarding pragmas)" + " fast (fuses across statements unless dictated by pragmas)" " | on (only fuses in the same statement unless dictated by pragmas)" " | off (never fuses)" - " | fast-honor-pragmas (fuses across statements unless dictated by pragmas)." - " Default is 'fast' for CUDA, 'fast-honor-pragmas' for HIP, and 'on' otherwise.">, + " | fast-honor-pragmas (deprecated, aliases fast)." + " Default is 'fast' for CUDA or HIP, and 'on' otherwise.">, HelpText<"Form fused FP ops (e.g. FMAs)">, Values<"fast,on,off,fast-honor-pragmas">; diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp index 19b557603d135..1040c865614ad 100644 --- a/clang/lib/Basic/LangOptions.cpp +++ b/clang/lib/Basic/LangOptions.cpp @@ -192,18 +192,8 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang, Opts.HIP = Lang == Language::HIP; Opts.CUDA = Lang == Language::CUDA || Opts.HIP; - if (Opts.HIP) { - // HIP toolchain does not support 'Fast' FPOpFusion in backends since it - // fuses multiplication/addition instructions without contract flag from - // device library functions in LLVM bitcode, which causes accuracy loss in - // certain math functions, e.g. tan(-1e20) becomes -0.933 instead of 0.8446. - // For device library functions in bitcode to work, 'Strict' or 'Standard' - // FPOpFusion options in backends is needed. Therefore 'fast-honor-pragmas' - // FP contract option is used to allow fuse across statements in frontend - // whereas respecting contract flag in backend. - Opts.setDefaultFPContractMode(LangOptions::FPM_FastHonorPragmas); - } else if (Opts.CUDA) { - if (T.isSPIRV()) { + if (Opts.HIP || Opts.CUDA) { + if (Opts.CUDA && T.isSPIRV()) { // Emit OpenCL version metadata in LLVM IR when targeting SPIR-V. Opts.OpenCLVersion = 200; } diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 6f63e6470270e..777b3e579d7cc 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -398,20 +398,8 @@ static bool initTargetOptions(const CompilerInstance &CI, .Default(llvm::FloatABI::Default); // Set FP fusion mode. - switch (LangOpts.getDefaultFPContractMode()) { - case LangOptions::FPM_Off: - // Preserve any contraction performed by the front-end. (Strict performs - // splitting of the muladd intrinsic in the backend.) - Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; - break; - case LangOptions::FPM_On: - case LangOptions::FPM_FastHonorPragmas: - Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; - break; - case LangOptions::FPM_Fast: - Options.AllowFPOpFusion = llvm::FPOpFusion::Fast; - break; - } + // All allowed fusion is indicated in the IR. + Options.AllowFPOpFusion = llvm::FPOpFusion::Standard; Options.BinutilsVersion = llvm::TargetMachine::parseBinutilsVersion(CodeGenOpts.BinutilsVersion); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index c5d40c9825fab..8a39ea411a102 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3000,6 +3000,13 @@ static void RenderFloatingPointOptions(const ToolChain &TC, const Driver &D, StringRef Val = A->getValue(); if (Val == "fast" || Val == "on" || Val == "off" || Val == "fast-honor-pragmas") { + // fast-honor-pragmas is deprecated -- replace it with fast + if (Val == "fast-honor-pragmas") { + D.Diag(diag::warn_drv_deprecated_arg) + << A->getAsString(Args) << /*hasReplacement=*/true + << "-ffp-contract=fast"; + Val = "fast"; + } if (Val != FPContract && LastFpContractOverrideOption != "") { D.Diag(clang::diag::warn_drv_overriding_option) << LastFpContractOverrideOption diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 54b302e829e1f..81fc47295e6d1 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3898,8 +3898,6 @@ void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts, GenerateArg(Consumer, OPT_ffp_contract, "on"); else if (Opts.DefaultFPContractMode == LangOptions::FPM_Off) GenerateArg(Consumer, OPT_ffp_contract, "off"); - else if (Opts.DefaultFPContractMode == LangOptions::FPM_FastHonorPragmas) - GenerateArg(Consumer, OPT_ffp_contract, "fast-honor-pragmas"); for (StringRef Sanitizer : serializeSanitizerKinds(Opts.Sanitize)) GenerateArg(Consumer, OPT_fsanitize_EQ, Sanitizer); @@ -4389,8 +4387,8 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, Opts.setDefaultFPContractMode(LangOptions::FPM_On); else if (Val == "off") Opts.setDefaultFPContractMode(LangOptions::FPM_Off); - else if (Val == "fast-honor-pragmas") - Opts.setDefaultFPContractMode(LangOptions::FPM_FastHonorPragmas); + else if (Val == "fast-honor-pragmas") // Deprecated + Opts.setDefaultFPContractMode(LangOptions::FPM_Fast); else Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp index 8411a3da8322d..3c1c000b544dc 100644 --- a/clang/lib/Sema/SemaAttr.cpp +++ b/clang/lib/Sema/SemaAttr.cpp @@ -1386,7 +1386,6 @@ void Sema::ActOnPragmaFPContract(SourceLocation Loc, NewFPFeatures.setAllowFPContractWithinStatement(); break; case LangOptions::FPM_Fast: - case LangOptions::FPM_FastHonorPragmas: NewFPFeatures.setAllowFPContractAcrossStatement(); break; case LangOptions::FPM_Off: diff --git a/clang/test/CodeGen/ffp-contract-fast-honor-pramga-option.cpp b/clang/test/CodeGen/ffp-contract-fast-honor-pramga-option.cpp index fef4da1edf1fc..c3d8909c33bd6 100644 --- a/clang/test/CodeGen/ffp-contract-fast-honor-pramga-option.cpp +++ b/clang/test/CodeGen/ffp-contract-fast-honor-pramga-option.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -O3 -ffp-contract=fast-honor-pragmas -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -O3 -ffp-contract=fast -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s float fp_contract_1(float a, float b, float c) { // CHECK-LABEL: fp_contract_1fff( diff --git a/clang/test/CodeGen/ffp-contract-fhp-pragma-override.cpp b/clang/test/CodeGen/ffp-contract-fhp-pragma-override.cpp index ff35c9204c79c..fe4cf21861f00 100644 --- a/clang/test/CodeGen/ffp-contract-fhp-pragma-override.cpp +++ b/clang/test/CodeGen/ffp-contract-fhp-pragma-override.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -O3 -ffp-contract=fast-honor-pragmas -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -O3 -ffp-contract=fast -triple %itanium_abi_triple -emit-llvm -o - %s | FileCheck %s float fp_contract_on_1(float a, float b, float c) { // CHECK-LABEL: fp_contract_on_1fff( diff --git a/clang/test/CodeGen/fp-function-attrs.cpp b/clang/test/CodeGen/fp-function-attrs.cpp index 3775bd5452d78..2e62875bc9f34 100644 --- a/clang/test/CodeGen/fp-function-attrs.cpp +++ b/clang/test/CodeGen/fp-function-attrs.cpp @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple x86_64-linux-gnu -ffast-math -ffp-contract=fast-honor-pragmas -emit-llvm -o - %s | FileCheck %s float test_default(float a, float b, float c) { float tmp = a; @@ -53,5 +52,18 @@ float test_contract_on_pragma(float a, float b, float c) { // CHECK: fmul fast float {{%.+}}, {{%.+}} // CHECK: fadd reassoc nnan ninf nsz arcp afn float {{%.+}}, {{%.+}} +float test_contract_off_pragma(float a, float b, float c) { + float tmp = a * b; + { + #pragma clang fp contract(off) + tmp += c; + } + return tmp; +} + +// CHECK: define{{.*}} float @_Z24test_contract_off_pragmafff(float noundef nofpclass(nan inf) %a, float noundef nofpclass(nan inf) %b, float noundef nofpclass(nan inf) %c) +// CHECK: fmul fast float {{%.+}}, {{%.+}} +// CHECK: fadd reassoc nnan ninf nsz arcp afn float {{%.+}}, {{%.+}} + // CHECK: attributes [[FAST_ATTRS]] = { {{.*}}"no-infs-fp-math"="true" {{.*}}"no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true"{{.*}} } // CHECK: attributes [[PRECISE_ATTRS]] = { {{.*}}"no-infs-fp-math"="false" {{.*}}"no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false"{{.*}} } diff --git a/clang/test/CodeGenCUDA/fp-contract.cu b/clang/test/CodeGenCUDA/fp-contract.cu index d6c796a817cbf..dd7e619a262c1 100644 --- a/clang/test/CodeGenCUDA/fp-contract.cu +++ b/clang/test/CodeGenCUDA/fp-contract.cu @@ -1,6 +1,6 @@ // REQUIRES: x86-registered-target, nvptx-registered-target, amdgpu-registered-target -// By default CUDA uses -ffp-contract=fast, HIP uses -ffp-contract=fast-honor-pragmas. +// By default CUDA and HIP use -ffp-contract=fast. // we should fuse multiply/add into fma instruction. // In IR, fmul/fadd instructions with contract flag are emitted. // In backend @@ -68,7 +68,7 @@ // RUN: -O3 -target-cpu gfx906 -o - -x ir %t.ll \ // RUN: | FileCheck -check-prefixes=COMMON,AMD-OPT-FASTSTD %s -// Explicit -ffp-contract=fast-honor-pragmas +// Explicit -ffp-contract=fast (was fast-honor-pragmas) // In IR, fmul/fadd instructions with contract flag are emitted. // In backend // nvptx/amdgcn - assumes standard fp fuse option, which only @@ -76,19 +76,19 @@ // llvm.fmuladd intrinsics. // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ -// RUN: -ffp-contract=fast-honor-pragmas -disable-llvm-passes -o - %s \ +// RUN: -ffp-contract=fast -disable-llvm-passes -o - %s \ // RUN: | FileCheck -check-prefixes=COMMON,NV-ON %s // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -S \ // RUN: -target-cpu gfx906 -disable-llvm-passes -o - -x hip %s \ -// RUN: -ffp-contract=fast-honor-pragmas \ +// RUN: -ffp-contract=fast \ // RUN: | FileCheck -check-prefixes=COMMON,AMD-ON %s // RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -S \ // RUN: -O3 -o - %s \ -// RUN: -ffp-contract=fast-honor-pragmas \ +// RUN: -ffp-contract=fast \ // RUN: | FileCheck -check-prefixes=COMMON,NV-OPT-FASTSTD %s // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -S \ // RUN: -O3 -target-cpu gfx906 -o - -x hip %s \ -// RUN: -ffp-contract=fast-honor-pragmas \ +// RUN: -ffp-contract=fast \ // RUN: | FileCheck -check-prefixes=COMMON,AMD-OPT-FASTSTD %s // Check separate compile/backend steps corresponding to -save-temps. @@ -96,7 +96,7 @@ // default FP fuse option. // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \ -// RUN: -ffp-contract=fast-honor-pragmas \ +// RUN: -ffp-contract=fast \ // RUN: -O3 -disable-llvm-passes -target-cpu gfx906 -o %t.ll -x hip %s // RUN: cat %t.ll | FileCheck -check-prefixes=COMMON,AMD-OPT-FAST-IR %s // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -S \ @@ -254,19 +254,16 @@ __host__ __device__ float func2(float a, float b, float c) { // Test multiply/add in the different statements, which is forced // to be compiled with fp contract on. fmul/fadd without contract -// flags are emitted in IR. In nvptx, they are emitted as FMA in -// fp-contract is fast but not on, as nvptx backend uses the same -// fp fuse option as front end, whereas fast fp fuse option in -// backend fuses fadd/fmul disregarding contract flag. In amdgcn -// they are not fused as amdgcn always use standard fp fusion -// option which respects contract flag. - __host__ __device__ float func3(float a, float b, float c) { +// flags are emitted in IR. The operations should not be fused +// because the mul and add occurs in different statements. +__host__ __device__ float func3(float a, float b, float c) { #pragma clang fp contract(on) float t = b * c; return t + a; } // COMMON-LABEL: _Z5func3fff -// NV-OPT-FAST: fma.rn.f32 +// NV-OPT-FAST: mul.rn.f32 +// NV-OPT-FAST: add.rn.f32 // NV-OPT-FAST-NEXT: st.param.b32 // NV-OPT-FASTSTD: mul.rn.f32 // NV-OPT-FASTSTD: add.rn.f32 @@ -285,7 +282,8 @@ __host__ __device__ float func2(float a, float b, float c) { // AMD-OPT-OFF-IR: fmul float // AMD-OPT-OFF-IR: fadd float -// AMD-OPT-FAST: v_fmac_f32_e32 +// AMD-OPT-FAST: v_mul_f32_e32 +// AMD-OPT-FAST-NEXT: v_add_f32_e32 // AMD-OPT-FAST-NEXT: s_setpc_b64 // AMD-OPT-FASTSTD: v_mul_f32_e32 // AMD-OPT-FASTSTD-NEXT: v_add_f32_e32 diff --git a/clang/test/Driver/fp-contract.c b/clang/test/Driver/fp-contract.c index cab63683ee813..06241caa2fbd5 100644 --- a/clang/test/Driver/fp-contract.c +++ b/clang/test/Driver/fp-contract.c @@ -6,7 +6,6 @@ // before the drive options that are checked below the run lines. // WARN_FM_OFF: warning: overriding '-ffast-math' option with '-ffp-contract=off' // WARN_FM_ON: warning: overriding '-ffast-math' option with '-ffp-contract=on' -// WARN_FM_FHP: warning: overriding '-ffast-math' option with '-ffp-contract=fast-honor-pragmas' // WARN_UM_OFF: warning: overriding '-funsafe-math-optimizations' option with '-ffp-contract=off' // WARN_UM_ON: warning: overriding '-funsafe-math-optimizations' option with '-ffp-contract=on' @@ -30,8 +29,10 @@ // RUN: | FileCheck --check-prefix=CHECK-FPC-FAST %s // RUN: %clang -### -ffast-math -ffp-contract=fast-honor-pragmas -c %s 2>&1 \ -// RUN: | FileCheck --check-prefixes=CHECK-FPC-FAST-HONOR,WARN_FM_FHP %s -// CHECK-FPC-FAST-HONOR: "-ffp-contract=fast-honor-pragmas" +// RUN: | FileCheck --check-prefixes=CHECK-FPC-FAST-HONOR,WARN_FHP_DEPRECATED %s +// WARN_FHP_DEPRECATED: clang: warning: argument '-ffp-contract=fast-honor-pragmas' is deprecated, use '-ffp-contract=fast' instead [-Wdeprecated] +// CHECK-FPC-FAST-HONOR: "-ffp-contract=fast" +// CHECK-FPC-FAST-HONOR-NOT: "-honor-pragmas" // RUN: %clang -### -Werror -ffp-contract=fast -ffast-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-FPC-FAST %s