diff --git a/.github/workflows/libc-fullbuild-tests.yml b/.github/workflows/libc-fullbuild-tests.yml index 58e15ce29546e..2c88da653aae4 100644 --- a/.github/workflows/libc-fullbuild-tests.yml +++ b/.github/workflows/libc-fullbuild-tests.yml @@ -11,12 +11,19 @@ on: jobs: build: - runs-on: ubuntu-24.04 + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: include: - - c_compiler: clang + - os: ubuntu-24.04 + ccache-variant: sccache + c_compiler: clang + cpp_compiler: clang++ + # TODO: remove ccache logic when https://github.com/hendrikmuhs/ccache-action/issues/279 is resolved. + - os: ubuntu-24.04-arm + ccache-variant: ccache + c_compiler: clang cpp_compiler: clang++ # TODO: add back gcc build when it is fixed # - c_compiler: gcc @@ -35,7 +42,7 @@ jobs: with: max-size: 1G key: libc_fullbuild_${{ matrix.c_compiler }} - variant: sccache + variant: ${{ matrix.ccache-variant }} # Notice: # - MPFR is required by some of the mathlib tests. @@ -62,8 +69,8 @@ jobs: -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }} -DCMAKE_C_COMPILER=${{ matrix.c_compiler }} -DCMAKE_BUILD_TYPE=MinSizeRel - -DCMAKE_C_COMPILER_LAUNCHER=sccache - -DCMAKE_CXX_COMPILER_LAUNCHER=sccache + -DCMAKE_C_COMPILER_LAUNCHER=${{ matrix.ccache-variant }} + -DCMAKE_CXX_COMPILER_LAUNCHER=${{ matrix.ccache-variant }} -DCMAKE_INSTALL_PREFIX=${{ steps.strings.outputs.build-install-dir }} -DLLVM_ENABLE_RUNTIMES="libc;compiler-rt" -DLLVM_LIBC_FULL_BUILD=ON diff --git a/.github/workflows/libc-overlay-tests.yml b/.github/workflows/libc-overlay-tests.yml index 8b59d76aed4a8..0a0916084b18c 100644 --- a/.github/workflows/libc-overlay-tests.yml +++ b/.github/workflows/libc-overlay-tests.yml @@ -19,14 +19,28 @@ jobs: include: # TODO: add linux gcc when it is fixed - os: ubuntu-24.04 + ccache-variant: sccache + compiler: + c_compiler: clang + cpp_compiler: clang++ + # TODO: remove ccache logic when https://github.com/hendrikmuhs/ccache-action/issues/279 is resolved. + - os: ubuntu-24.04-arm + ccache-variant: ccache compiler: c_compiler: clang cpp_compiler: clang++ - os: windows-2022 + ccache-variant: sccache + compiler: + c_compiler: clang-cl + cpp_compiler: clang-cl + - os: windows-2025 + ccache-variant: sccache compiler: c_compiler: clang-cl cpp_compiler: clang-cl - os: macos-14 + ccache-variant: sccache compiler: c_compiler: clang cpp_compiler: clang++ @@ -46,7 +60,7 @@ jobs: with: max-size: 1G key: libc_overlay_build_${{ matrix.os }}_${{ matrix.compiler.c_compiler }} - variant: sccache + variant: ${{ matrix.ccache-variant }} # MPFR is required by some of the mathlib tests. - name: Prepare dependencies (Ubuntu) @@ -82,8 +96,8 @@ jobs: -DCMAKE_CXX_COMPILER=${{ matrix.compiler.cpp_compiler }} -DCMAKE_C_COMPILER=${{ matrix.compiler.c_compiler }} -DCMAKE_BUILD_TYPE=MinSizeRel - -DCMAKE_C_COMPILER_LAUNCHER=sccache - -DCMAKE_CXX_COMPILER_LAUNCHER=sccache + -DCMAKE_C_COMPILER_LAUNCHER=${{ matrix.ccache-variant }} + -DCMAKE_CXX_COMPILER_LAUNCHER=${{ matrix.ccache-variant }} -DCMAKE_POLICY_DEFAULT_CMP0141=NEW -DCMAKE_MSVC_DEBUG_INFORMATION_FORMAT=Embedded -DLLVM_ENABLE_RUNTIMES=libc diff --git a/clang-tools-extra/clang-tidy/ClangTidyProfiling.cpp b/clang-tools-extra/clang-tidy/ClangTidyProfiling.cpp index 07ab34a07cd31..89867ec30f51f 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyProfiling.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyProfiling.cpp @@ -36,23 +36,25 @@ ClangTidyProfiling::StorageParams::StorageParams(llvm::StringRef ProfilePrefix, .str(); } -void ClangTidyProfiling::printUserFriendlyTable(llvm::raw_ostream &OS) { - TG->print(OS); +void ClangTidyProfiling::printUserFriendlyTable(llvm::raw_ostream &OS, + llvm::TimerGroup &TG) { + TG.print(OS); OS.flush(); } -void ClangTidyProfiling::printAsJSON(llvm::raw_ostream &OS) { +void ClangTidyProfiling::printAsJSON(llvm::raw_ostream &OS, + llvm::TimerGroup &TG) { OS << "{\n"; OS << R"("file": ")" << Storage->SourceFilename << "\",\n"; OS << R"("timestamp": ")" << Storage->Timestamp << "\",\n"; OS << "\"profile\": {\n"; - TG->printJSONValues(OS, ""); + TG.printJSONValues(OS, ""); OS << "\n}\n"; OS << "}\n"; OS.flush(); } -void ClangTidyProfiling::storeProfileData() { +void ClangTidyProfiling::storeProfileData(llvm::TimerGroup &TG) { assert(Storage && "We should have a filename."); llvm::SmallString<256> OutputDirectory(Storage->StoreFilename); @@ -71,19 +73,18 @@ void ClangTidyProfiling::storeProfileData() { return; } - printAsJSON(OS); + printAsJSON(OS, TG); } ClangTidyProfiling::ClangTidyProfiling(std::optional Storage) : Storage(std::move(Storage)) {} ClangTidyProfiling::~ClangTidyProfiling() { - TG.emplace("clang-tidy", "clang-tidy checks profiling", Records); - + llvm::TimerGroup TG{"clang-tidy", "clang-tidy checks profiling", Records}; if (!Storage) - printUserFriendlyTable(llvm::errs()); + printUserFriendlyTable(llvm::errs(), TG); else - storeProfileData(); + storeProfileData(TG); } } // namespace clang::tidy diff --git a/clang-tools-extra/clang-tidy/ClangTidyProfiling.h b/clang-tools-extra/clang-tidy/ClangTidyProfiling.h index b6f7d66343fa4..76deede1716f4 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyProfiling.h +++ b/clang-tools-extra/clang-tidy/ClangTidyProfiling.h @@ -34,14 +34,11 @@ class ClangTidyProfiling { }; private: - std::optional TG; - std::optional Storage; - void printUserFriendlyTable(llvm::raw_ostream &OS); - void printAsJSON(llvm::raw_ostream &OS); - - void storeProfileData(); + void printUserFriendlyTable(llvm::raw_ostream &OS, llvm::TimerGroup &TG); + void printAsJSON(llvm::raw_ostream &OS, llvm::TimerGroup &TG); + void storeProfileData(llvm::TimerGroup &TG); public: llvm::StringMap Records; diff --git a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp index 4448e9ccba80d..5d74907aa9fab 100644 --- a/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/UnusedUsingDeclsCheck.cpp @@ -51,6 +51,10 @@ UnusedUsingDeclsCheck::UnusedUsingDeclsCheck(StringRef Name, HeaderFileExtensions(Context->getHeaderFileExtensions()) {} void UnusedUsingDeclsCheck::registerMatchers(MatchFinder *Finder) { + // We don't emit warnings on unused-using-decls from headers, so bail out if + // the main file is a header. + if (utils::isFileExtension(getCurrentMainFile(), HeaderFileExtensions)) + return; Finder->addMatcher(usingDecl(isExpansionInMainFile()).bind("using"), this); auto DeclMatcher = hasDeclaration(namedDecl().bind("used")); Finder->addMatcher(loc(templateSpecializationType(DeclMatcher)), this); @@ -83,12 +87,6 @@ void UnusedUsingDeclsCheck::registerMatchers(MatchFinder *Finder) { void UnusedUsingDeclsCheck::check(const MatchFinder::MatchResult &Result) { if (Result.Context->getDiagnostics().hasUncompilableErrorOccurred()) return; - // We don't emit warnings on unused-using-decls from headers, so bail out if - // the main file is a header. - if (auto MainFile = Result.SourceManager->getFileEntryRefForID( - Result.SourceManager->getMainFileID()); - utils::isFileExtension(MainFile->getName(), HeaderFileExtensions)) - return; if (const auto *Using = Result.Nodes.getNodeAs("using")) { // Ignores using-declarations defined in macros. diff --git a/clang-tools-extra/modularize/CoverageChecker.cpp b/clang-tools-extra/modularize/CoverageChecker.cpp index b536ee00497c0..fe6711398ab7d 100644 --- a/clang-tools-extra/modularize/CoverageChecker.cpp +++ b/clang-tools-extra/modularize/CoverageChecker.cpp @@ -278,15 +278,15 @@ CoverageChecker::collectUmbrellaHeaderHeaders(StringRef UmbrellaHeaderName) { sys::fs::current_path(PathBuf); // Create the compilation database. - std::unique_ptr Compilations; - Compilations.reset(new FixedCompilationDatabase(Twine(PathBuf), CommandLine)); + FixedCompilationDatabase Compilations(Twine(PathBuf), CommandLine); std::vector HeaderPath; HeaderPath.push_back(std::string(UmbrellaHeaderName)); // Create the tool and run the compilation. - ClangTool Tool(*Compilations, HeaderPath); - int HadErrors = Tool.run(new CoverageCheckerFrontendActionFactory(*this)); + ClangTool Tool(Compilations, HeaderPath); + CoverageCheckerFrontendActionFactory ActionFactory(*this); + int HadErrors = Tool.run(&ActionFactory); // If we had errors, exit early. return !HadErrors; diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a03f42ab910ed..f110b8cf76507 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -294,9 +294,6 @@ C++ Language Changes C++2c Feature Support ^^^^^^^^^^^^^^^^^^^^^ -- Add ``__builtin_is_implicit_lifetime`` intrinsic, which supports - `P2647R1 A trait for implicit lifetime types `_ - - Add ``__builtin_is_virtual_base_of`` intrinsic, which supports `P2985R0 A type trait for detecting virtual base classes `_ @@ -318,6 +315,9 @@ C++23 Feature Support - ``__cpp_explicit_this_parameter`` is now defined. (#GH82780) +- Add ``__builtin_is_implicit_lifetime`` intrinsic, which supports + `P2674R1 A trait for implicit lifetime types `_ + - Add support for `P2280R4 Using unknown pointers and references in constant expressions `_. (#GH63139) C++20 Feature Support @@ -325,6 +325,10 @@ C++20 Feature Support - Implemented module level lookup for C++20 modules. (#GH90154) +C++17 Feature Support +^^^^^^^^^^^^^^^^^^^^^ +- The implementation of the relaxed template template argument matching rules is + more complete and reliable, and should provide more accurate diagnostics. Resolutions to C++ Defect Reports ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -351,7 +355,8 @@ Resolutions to C++ Defect Reports (`CWG2351: void{} `_). - Clang now has improved resolution to CWG2398, allowing class templates to have - default arguments deduced when partial ordering. + default arguments deduced when partial ordering, and better backwards compatibility + in overload resolution. - Clang now allows comparing unequal object pointers that have been cast to ``void *`` in constant expressions. These comparisons always worked in non-constant expressions. @@ -636,6 +641,10 @@ Improvements to Clang's diagnostics - Clang now diagnoses when the result of a [[nodiscard]] function is discarded after being cast in C. Fixes #GH104391. +- Clang now properly explains the reason a template template argument failed to + match a template template parameter, in terms of the C++17 relaxed matching rules + instead of the old ones. + - Don't emit duplicated dangling diagnostics. (#GH93386). - Improved diagnostic when trying to befriend a concept. (#GH45182). @@ -807,6 +816,8 @@ Improvements to Clang's diagnostics - Clang now emits a ``-Wignored-qualifiers`` diagnostic when a base class includes cv-qualifiers (#GH55474). +- Clang now diagnoses the use of attribute names reserved by the C++ standard (#GH92196). + Improvements to Clang's time-trace ---------------------------------- @@ -885,6 +896,8 @@ Bug Fixes to C++ Support - Correctly check constraints of explicit instantiations of member functions. (#GH46029) - When performing partial ordering of function templates, clang now checks that the deduction was consistent. Fixes (#GH18291). +- Fixes to several issues in partial ordering of template template parameters, which + were documented in the test suite. - Fixed an assertion failure about a constraint of a friend function template references to a value with greater template depth than the friend function template. (#GH98258) - Clang now rebuilds the template parameters of out-of-line declarations and specializations in the context @@ -975,6 +988,8 @@ Bug Fixes to C++ Support - Fixed a nested lambda substitution issue for constraint evaluation. (#GH123441) - Fixed various false diagnostics related to the use of immediate functions. (#GH123472) - Fix immediate escalation not propagating through inherited constructors. (#GH112677) +- Fixed assertions or false compiler diagnostics in the case of C++ modules for + lambda functions or inline friend functions defined inside templates (#GH122493). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/AttributeCommonInfo.h b/clang/include/clang/Basic/AttributeCommonInfo.h index 11c6454772173..4af5a8fd1852c 100644 --- a/clang/include/clang/Basic/AttributeCommonInfo.h +++ b/clang/include/clang/Basic/AttributeCommonInfo.h @@ -61,13 +61,18 @@ class AttributeCommonInfo { }; enum Kind { #define PARSED_ATTR(NAME) AT_##NAME, -#include "clang/Sema/AttrParsedAttrList.inc" +#include "clang/Basic/AttrParsedAttrList.inc" #undef PARSED_ATTR NoSemaHandlerAttribute, IgnoredAttribute, UnknownAttribute, }; enum class Scope { NONE, CLANG, GNU, MSVC, OMP, HLSL, GSL, RISCV }; + enum class AttrArgsInfo { + None, + Optional, + Required, + }; private: const IdentifierInfo *AttrName = nullptr; @@ -241,6 +246,8 @@ class AttributeCommonInfo { static Kind getParsedKind(const IdentifierInfo *Name, const IdentifierInfo *Scope, Syntax SyntaxUsed); + static AttrArgsInfo getCXX11AttrArgsInfo(const IdentifierInfo *Name); + private: /// Get an index into the attribute spelling list /// defined in Attr.td. This index is used by an attribute diff --git a/clang/include/clang/Basic/Attributes.h b/clang/include/clang/Basic/Attributes.h index 61666a6f4d9ac..99bb668fe32d0 100644 --- a/clang/include/clang/Basic/Attributes.h +++ b/clang/include/clang/Basic/Attributes.h @@ -23,6 +23,11 @@ int hasAttribute(AttributeCommonInfo::Syntax Syntax, const IdentifierInfo *Scope, const IdentifierInfo *Attr, const TargetInfo &Target, const LangOptions &LangOpts); +int hasAttribute(AttributeCommonInfo::Syntax Syntax, + const IdentifierInfo *Scope, const IdentifierInfo *Attr, + const TargetInfo &Target, const LangOptions &LangOpts, + bool CheckPlugins); + } // end namespace clang #endif // LLVM_CLANG_BASIC_ATTRIBUTES_H diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 0c9173f9bfcce..572ac7235be02 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -5191,51 +5191,51 @@ let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] i } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in { - def vcvtne2ph2bf8_128 : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; + def vcvt2ph2bf8_128 : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in { - def vcvtne2ph2bf8_256 : X86Builtin<"_Vector<32, char>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; + def vcvt2ph2bf8_256 : X86Builtin<"_Vector<32, char>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; } let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in { - def vcvtne2ph2bf8_512 : X86Builtin<"_Vector<64, char>(_Vector<32, _Float16>, _Vector<32, _Float16>)">; + def vcvt2ph2bf8_512 : X86Builtin<"_Vector<64, char>(_Vector<32, _Float16>, _Vector<32, _Float16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in { - def vcvtne2ph2bf8s_128 : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; + def vcvt2ph2bf8s_128 : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in { - def vcvtne2ph2bf8s_256 : X86Builtin<"_Vector<32, char>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; + def vcvt2ph2bf8s_256 : X86Builtin<"_Vector<32, char>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; } let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in { - def vcvtne2ph2bf8s_512 : X86Builtin<"_Vector<64, char>(_Vector<32, _Float16>, _Vector<32, _Float16>)">; + def vcvt2ph2bf8s_512 : X86Builtin<"_Vector<64, char>(_Vector<32, _Float16>, _Vector<32, _Float16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in { - def vcvtne2ph2hf8_128 : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; + def vcvt2ph2hf8_128 : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in { - def vcvtne2ph2hf8_256 : X86Builtin<"_Vector<32, char>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; + def vcvt2ph2hf8_256 : X86Builtin<"_Vector<32, char>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; } let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in { - def vcvtne2ph2hf8_512 : X86Builtin<"_Vector<64, char>(_Vector<32, _Float16>, _Vector<32, _Float16>)">; + def vcvt2ph2hf8_512 : X86Builtin<"_Vector<64, char>(_Vector<32, _Float16>, _Vector<32, _Float16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in { - def vcvtne2ph2hf8s_128 : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; + def vcvt2ph2hf8s_128 : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<8, _Float16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in { - def vcvtne2ph2hf8s_256 : X86Builtin<"_Vector<32, char>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; + def vcvt2ph2hf8s_256 : X86Builtin<"_Vector<32, char>(_Vector<16, _Float16>, _Vector<16, _Float16>)">; } let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in { - def vcvtne2ph2hf8s_512 : X86Builtin<"_Vector<64, char>(_Vector<32, _Float16>, _Vector<32, _Float16>)">; + def vcvt2ph2hf8s_512 : X86Builtin<"_Vector<64, char>(_Vector<32, _Float16>, _Vector<32, _Float16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in { @@ -5251,51 +5251,51 @@ let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] i } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in { - def vcvtneph2bf8_128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<16, char>, unsigned char)">; + def vcvtph2bf8_128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<16, char>, unsigned char)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in { - def vcvtneph2bf8_256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, _Float16>, _Vector<16, char>, unsigned short)">; + def vcvtph2bf8_256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, _Float16>, _Vector<16, char>, unsigned short)">; } let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in { - def vcvtneph2bf8_512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, _Float16>, _Vector<32, char>, unsigned int)">; + def vcvtph2bf8_512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, _Float16>, _Vector<32, char>, unsigned int)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in { - def vcvtneph2bf8s_128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<16, char>, unsigned char)">; + def vcvtph2bf8s_128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<16, char>, unsigned char)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in { - def vcvtneph2bf8s_256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, _Float16>, _Vector<16, char>, unsigned short)">; + def vcvtph2bf8s_256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, _Float16>, _Vector<16, char>, unsigned short)">; } let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in { - def vcvtneph2bf8s_512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, _Float16>, _Vector<32, char>, unsigned int)">; + def vcvtph2bf8s_512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, _Float16>, _Vector<32, char>, unsigned int)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in { - def vcvtneph2hf8_128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<16, char>, unsigned char)">; + def vcvtph2hf8_128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<16, char>, unsigned char)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in { - def vcvtneph2hf8_256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, _Float16>, _Vector<16, char>, unsigned short)">; + def vcvtph2hf8_256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, _Float16>, _Vector<16, char>, unsigned short)">; } let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in { - def vcvtneph2hf8_512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, _Float16>, _Vector<32, char>, unsigned int)">; + def vcvtph2hf8_512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, _Float16>, _Vector<32, char>, unsigned int)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in { - def vcvtneph2hf8s_128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<16, char>, unsigned char)">; + def vcvtph2hf8s_128_mask : X86Builtin<"_Vector<16, char>(_Vector<8, _Float16>, _Vector<16, char>, unsigned char)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<256>] in { - def vcvtneph2hf8s_256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, _Float16>, _Vector<16, char>, unsigned short)">; + def vcvtph2hf8s_256_mask : X86Builtin<"_Vector<16, char>(_Vector<16, _Float16>, _Vector<16, char>, unsigned short)">; } let Features = "avx10.2-512", Attributes = [NoThrow, RequiredVectorWidth<512>] in { - def vcvtneph2hf8s_512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, _Float16>, _Vector<32, char>, unsigned int)">; + def vcvtph2hf8s_512_mask : X86Builtin<"_Vector<32, char>(_Vector<32, _Float16>, _Vector<32, char>, unsigned int)">; } let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] in { @@ -5304,201 +5304,196 @@ let Features = "avx10.2-256", Attributes = [NoThrow, RequiredVectorWidth<128>] i } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vaddnepbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">; + def vaddbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vaddnepbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">; + def vaddbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vaddnepbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">; + def vaddbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vdivnepbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">; + def vdivbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vdivnepbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">; + def vdivbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vdivnepbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">; + def vdivbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vmaxpbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">; + def vmaxbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vmaxpbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">; + def vmaxbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vmaxpbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">; + def vmaxbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vminpbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">; + def vminbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vminpbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">; + def vminbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vminpbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">; + def vminbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vmulnepbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">; + def vmulbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vmulnepbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">; + def vmulbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vmulnepbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">; + def vmulbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vsubnepbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">; + def vsubbf16128 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vsubnepbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">; + def vsubbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vsubnepbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">; + def vsubbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>)">; } -let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vcomsbf16eq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">; - def vcomsbf16lt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">; - def vcomsbf16neq : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">; - def vcomsbf16ge : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">; - def vcomsbf16gt : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">; - def vcomsbf16le : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">; -} +let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in + foreach Cmp = ["eq", "lt", "le", "gt", "ge", "neq"] in + def vcomisbf16#Cmp : X86Builtin<"int(_Vector<8, __bf16>, _Vector<8, __bf16>)">; let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vcmppbf16512_mask : X86Builtin<"unsigned int(_Vector<32, __bf16>, _Vector<32, __bf16>, _Constant int, unsigned int)">; + def vcmpbf16512_mask : X86Builtin<"unsigned int(_Vector<32, __bf16>, _Vector<32, __bf16>, _Constant int, unsigned int)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vcmppbf16256_mask : X86Builtin<"unsigned short(_Vector<16, __bf16>, _Vector<16, __bf16>, _Constant int, unsigned short)">; + def vcmpbf16256_mask : X86Builtin<"unsigned short(_Vector<16, __bf16>, _Vector<16, __bf16>, _Constant int, unsigned short)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vcmppbf16128_mask : X86Builtin<"unsigned char(_Vector<8, __bf16>, _Vector<8, __bf16>, _Constant int, unsigned char)">; - def vfpclasspbf16128_mask : X86Builtin<"unsigned char(_Vector<8, __bf16>, _Constant int, unsigned char)">; + def vcmpbf16128_mask : X86Builtin<"unsigned char(_Vector<8, __bf16>, _Vector<8, __bf16>, _Constant int, unsigned char)">; + def vfpclassbf16128_mask : X86Builtin<"unsigned char(_Vector<8, __bf16>, _Constant int, unsigned char)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vfpclasspbf16256_mask : X86Builtin<"unsigned short(_Vector<16, __bf16>, _Constant int, unsigned short)">; + def vfpclassbf16256_mask : X86Builtin<"unsigned short(_Vector<16, __bf16>, _Constant int, unsigned short)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vfpclasspbf16512_mask : X86Builtin<"unsigned int(_Vector<32, __bf16>, _Constant int, unsigned int)">; + def vfpclassbf16512_mask : X86Builtin<"unsigned int(_Vector<32, __bf16>, _Constant int, unsigned int)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vscalefpbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">; + def vscalefbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vscalefpbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">; + def vscalefbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vscalefpbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">; + def vscalefbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vrcppbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">; + def vrcpbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vrcppbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">; + def vrcpbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vrcppbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">; + def vrcpbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vgetexppbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">; + def vgetexpbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vgetexppbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">; + def vgetexpbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vgetexppbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">; + def vgetexpbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vrsqrtpbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">; + def vrsqrtbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Vector<8, __bf16>, unsigned char)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vrsqrtpbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">; + def vrsqrtbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Vector<16, __bf16>, unsigned short)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vrsqrtpbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">; + def vrsqrtbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, unsigned int)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vreducenepbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Constant int, _Vector<8, __bf16>, unsigned char)">; + def vreducebf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Constant int, _Vector<8, __bf16>, unsigned char)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vreducenepbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Constant int, _Vector<16, __bf16>, unsigned short)">; + def vreducebf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Constant int, _Vector<16, __bf16>, unsigned short)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vreducenepbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">; + def vreducebf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vrndscalenepbf16_128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Constant int, _Vector<8, __bf16>, unsigned char)">; + def vrndscalebf16_128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Constant int, _Vector<8, __bf16>, unsigned char)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vrndscalenepbf16_256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Constant int, _Vector<16, __bf16>, unsigned short)">; + def vrndscalebf16_256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Constant int, _Vector<16, __bf16>, unsigned short)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vrndscalenepbf16_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">; + def vrndscalebf16_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vgetmantpbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Constant int, _Vector<8, __bf16>, unsigned char)">; + def vgetmantbf16128_mask : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>, _Constant int, _Vector<8, __bf16>, unsigned char)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vgetmantpbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Constant int, _Vector<16, __bf16>, unsigned short)">; + def vgetmantbf16256_mask : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>, _Constant int, _Vector<16, __bf16>, unsigned short)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vgetmantpbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">; + def vgetmantbf16512_mask : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Constant int, _Vector<32, __bf16>, unsigned int)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vsqrtnepbf16 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>)">; + def vsqrtbf16 : X86Builtin<"_Vector<8, __bf16>(_Vector<8, __bf16>)">; } let Features = "avx10.2-256", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vsqrtnepbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>)">; + def vsqrtbf16256 : X86Builtin<"_Vector<16, __bf16>(_Vector<16, __bf16>)">; } let Features = "avx10.2-512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vsqrtnepbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">; + def vsqrtbf16512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>)">; def vfmaddnepbh512 : X86Builtin<"_Vector<32, __bf16>(_Vector<32, __bf16>, _Vector<32, __bf16>, _Vector<32, __bf16>)">; } diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt index 56c27bacdb20b..4103d2753abc5 100644 --- a/clang/include/clang/Basic/CMakeLists.txt +++ b/clang/include/clang/Basic/CMakeLists.txt @@ -36,6 +36,11 @@ clang_tablegen(AttrList.inc -gen-clang-attr-list SOURCE Attr.td TARGET ClangAttrList) +clang_tablegen(AttrParsedAttrList.inc -gen-clang-attr-parsed-attr-list + -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ + SOURCE Attr.td + TARGET ClangAttrParsedAttrList) + clang_tablegen(AttrSubMatchRulesList.inc -gen-clang-attr-subject-match-rule-list -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ SOURCE Attr.td @@ -53,6 +58,12 @@ clang_tablegen(AttrHasAttributeImpl.inc -gen-clang-attr-has-attribute-impl TARGET ClangAttrHasAttributeImpl ) +clang_tablegen(CXX11AttributeInfo.inc -gen-cxx11-attribute-info + -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ + SOURCE Attr.td + TARGET CXX11AttributeInfo + ) + clang_tablegen(Builtins.inc -gen-clang-builtins SOURCE Builtins.td TARGET ClangBuiltins) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 612f7e330ba51..288786b8ce939 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -452,6 +452,10 @@ def warn_drv_deprecated_arg_ofast : Warning< "argument '-Ofast' is deprecated; use '-O3 -ffast-math' for the same behavior," " or '-O3' to enable only conforming optimizations">, InGroup; +def warn_drv_deprecated_arg_ofast_for_flang : Warning< + "argument '-Ofast' is deprecated; use '-O3 -ffast-math -fstack-arrays' for the same behavior," + " or '-O3 -fstack-arrays' to enable only conforming optimizations">, + InGroup; def warn_drv_deprecated_custom : Warning< "argument '%0' is deprecated, %1">, InGroup; def warn_drv_assuming_mfloat_abi_is : Warning< diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index b0ad76026fdb3..209792f851b6a 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -760,6 +760,7 @@ def AmbiguousMacro : DiagGroup<"ambiguous-macro">; def KeywordAsMacro : DiagGroup<"keyword-macro">; def ReservedIdAsMacro : DiagGroup<"reserved-macro-identifier">; def ReservedIdAsMacroAlias : DiagGroup<"reserved-id-macro", [ReservedIdAsMacro]>; +def ReservedAttributeIdentifier : DiagGroup<"reserved-attribute-identifier">; def RestrictExpansionMacro : DiagGroup<"restrict-expansion">; def FinalMacro : DiagGroup<"final-macro">; @@ -935,7 +936,8 @@ def SignedEnumBitfield : DiagGroup<"signed-enum-bitfield">; def ReservedModuleIdentifier : DiagGroup<"reserved-module-identifier">; def ReservedIdentifier : DiagGroup<"reserved-identifier", - [ReservedIdAsMacro, ReservedModuleIdentifier, UserDefinedLiterals]>; + [ReservedIdAsMacro, ReservedModuleIdentifier, + UserDefinedLiterals, ReservedAttributeIdentifier]>; // Unreachable code warning groups. // diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 959376b084721..4bcef23ccce16 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -407,6 +407,9 @@ def warn_pp_macro_hides_keyword : Extension< def warn_pp_macro_is_reserved_id : Warning< "macro name is a reserved identifier">, DefaultIgnore, InGroup; +def warn_pp_macro_is_reserved_attribute_id : Warning< + "%0 is a reserved attribute identifier">, DefaultIgnore, + InGroup; def warn_pp_objc_macro_redef_ignored : Warning< "ignoring redefinition of Objective-C qualifier macro">, InGroup>; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 36b693c6a304e..774e5484cfa0e 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5323,6 +5323,13 @@ def note_template_arg_refers_here_func : Note< def err_template_arg_template_params_mismatch : Error< "template template argument has different template parameters than its " "corresponding template template parameter">; +def note_template_arg_template_params_mismatch : Note< + "template template argument has different template parameters than its " + "corresponding template template parameter">; +def err_non_deduced_mismatch : Error< + "could not match %diff{$ against $|types}0,1">; +def err_inconsistent_deduction : Error< + "conflicting deduction %diff{$ against $|types}0,1 for parameter">; def err_template_arg_not_integral_or_enumeral : Error< "non-type template argument of type %0 must have an integral or enumeration" " type">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index df705104d9ea3..c5b7fcb7c7f09 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -933,10 +933,12 @@ def O : Joined<["-"], "O">, Group, def O_flag : Flag<["-"], "O">, Visibility<[ClangOption, CC1Option, FC1Option]>, Alias, AliasArgs<["1"]>; def Ofast : Joined<["-"], "Ofast">, Group, - Visibility<[ClangOption, CC1Option, FlangOption]>, - HelpTextForVariants<[ClangOption, CC1Option], - "Deprecated; use '-O3 -ffast-math' for the same behavior," - " or '-O3' to enable only conforming optimizations">; + Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, + HelpTextForVariants<[FlangOption, FC1Option], + "Deprecated; use '-O3 -ffast-math -fstack-arrays' for the same behavior," + " or '-O3 -fstack-arrays' to enable only conforming optimizations">, + HelpText<"Deprecated; use '-O3 -ffast-math' for the same behavior," + " or '-O3' to enable only conforming optimizations">; def P : Flag<["-"], "P">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, Group, diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 3d223c345ea15..8ddc5b56eedbd 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2271,6 +2271,11 @@ class Preprocessor { } } + /// Determine whether the next preprocessor token to be + /// lexed is a '('. If so, consume the token and return true, if not, this + /// method should have no observable side-effect on the lexed tokens. + bool isNextPPTokenLParen(); + private: /// Identifiers used for SEH handling in Borland. These are only /// allowed in particular circumstances @@ -2648,11 +2653,6 @@ class Preprocessor { void removeCachedMacroExpandedTokensOfLastLexer(); - /// Determine whether the next preprocessor token to be - /// lexed is a '('. If so, consume the token and return true, if not, this - /// method should have no observable side-effect on the lexed tokens. - bool isNextPPTokenLParen(); - /// After reading "MACRO(", this method is invoked to read all of the formal /// arguments specified for the macro invocation. Returns null on error. MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, diff --git a/clang/include/clang/Sema/CMakeLists.txt b/clang/include/clang/Sema/CMakeLists.txt index 0b0e31ece3195..9077e22c2307c 100644 --- a/clang/include/clang/Sema/CMakeLists.txt +++ b/clang/include/clang/Sema/CMakeLists.txt @@ -3,11 +3,6 @@ clang_tablegen(AttrTemplateInstantiate.inc -gen-clang-attr-template-instantiate SOURCE ../Basic/Attr.td TARGET ClangAttrTemplateInstantiate) -clang_tablegen(AttrParsedAttrList.inc -gen-clang-attr-parsed-attr-list - -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ - SOURCE ../Basic/Attr.td - TARGET ClangAttrParsedAttrList) - clang_tablegen(AttrParsedAttrKinds.inc -gen-clang-attr-parsed-attr-kinds -I ${CMAKE_CURRENT_SOURCE_DIR}/../../ SOURCE ../Basic/Attr.td diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h index 176a2a8d2a35e..c7f2422b542dd 100644 --- a/clang/include/clang/Sema/Overload.h +++ b/clang/include/clang/Sema/Overload.h @@ -930,6 +930,11 @@ class Sema; LLVM_PREFERRED_TYPE(bool) unsigned TookAddressOfOverload : 1; + /// Have we matched any packs on the parameter side, versus any non-packs on + /// the argument side, in a context where the opposite matching is also + /// allowed? + bool HasMatchedPackOnParmToNonPackOnArg : 1; + /// True if the candidate was found using ADL. LLVM_PREFERRED_TYPE(CallExpr::ADLCallKind) unsigned IsADLCandidate : 1; @@ -1006,6 +1011,7 @@ class Sema; OverloadCandidate() : IsSurrogate(false), IgnoreObjectArgument(false), TookAddressOfOverload(false), + HasMatchedPackOnParmToNonPackOnArg(false), IsADLCandidate(llvm::to_underlying(CallExpr::NotADL)), RewriteKind(CRK_None) {} }; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 9a9998b114e0f..4d6e02fe2956e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10169,7 +10169,8 @@ class Sema final : public SemaBase { ADLCallKind IsADLCandidate = ADLCallKind::NotADL, ConversionSequenceList EarlyConversions = {}, OverloadCandidateParamOrder PO = {}, - bool AggregateCandidateDeduction = false); + bool AggregateCandidateDeduction = false, + bool HasMatchedPackOnParmToNonPackOnArg = false); /// Add all of the function declarations in the given function set to /// the overload candidate set. @@ -10204,7 +10205,8 @@ class Sema final : public SemaBase { bool SuppressUserConversions = false, bool PartialOverloading = false, ConversionSequenceList EarlyConversions = {}, - OverloadCandidateParamOrder PO = {}); + OverloadCandidateParamOrder PO = {}, + bool HasMatchedPackOnParmToNonPackOnArg = false); /// Add a C++ member function template as a candidate to the candidate /// set, using template argument deduction to produce an appropriate member @@ -10250,7 +10252,8 @@ class Sema final : public SemaBase { CXXConversionDecl *Conversion, DeclAccessPair FoundDecl, CXXRecordDecl *ActingContext, Expr *From, QualType ToType, OverloadCandidateSet &CandidateSet, bool AllowObjCConversionOnExplicit, - bool AllowExplicit, bool AllowResultConversion = true); + bool AllowExplicit, bool AllowResultConversion = true, + bool HasMatchedPackOnParmToNonPackOnArg = false); /// Adds a conversion function template specialization /// candidate to the overload set, using template argument deduction @@ -11678,7 +11681,8 @@ class Sema final : public SemaBase { SourceLocation RAngleLoc, unsigned ArgumentPackIndex, SmallVectorImpl &SugaredConverted, SmallVectorImpl &CanonicalConverted, - CheckTemplateArgumentKind CTAK); + CheckTemplateArgumentKind CTAK, bool PartialOrdering, + bool *MatchedPackOnParmToNonPackOnArg); /// Check that the given template arguments can be provided to /// the given template, converting the arguments along the way. @@ -11725,7 +11729,8 @@ class Sema final : public SemaBase { SmallVectorImpl &SugaredConverted, SmallVectorImpl &CanonicalConverted, bool UpdateArgsWithConversions = true, - bool *ConstraintsNotSatisfied = nullptr, bool PartialOrderingTTP = false); + bool *ConstraintsNotSatisfied = nullptr, bool PartialOrderingTTP = false, + bool *MatchedPackOnParmToNonPackOnArg = nullptr); bool CheckTemplateTypeArgument( TemplateTypeParmDecl *Param, TemplateArgumentLoc &Arg, @@ -11759,7 +11764,9 @@ class Sema final : public SemaBase { /// It returns true if an error occurred, and false otherwise. bool CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param, TemplateParameterList *Params, - TemplateArgumentLoc &Arg, bool IsDeduced); + TemplateArgumentLoc &Arg, + bool PartialOrdering, + bool *MatchedPackOnParmToNonPackOnArg); void NoteTemplateLocation(const NamedDecl &Decl, std::optional ParamRange = {}); @@ -12270,8 +12277,8 @@ class Sema final : public SemaBase { SmallVectorImpl &Deduced, unsigned NumExplicitlySpecified, FunctionDecl *&Specialization, sema::TemplateDeductionInfo &Info, - SmallVectorImpl const *OriginalCallArgs = nullptr, - bool PartialOverloading = false, + SmallVectorImpl const *OriginalCallArgs, + bool PartialOverloading, bool PartialOrdering, llvm::function_ref CheckNonDependent = [] { return false; }); /// Perform template argument deduction from a function call @@ -12305,7 +12312,8 @@ class Sema final : public SemaBase { TemplateArgumentListInfo *ExplicitTemplateArgs, ArrayRef Args, FunctionDecl *&Specialization, sema::TemplateDeductionInfo &Info, bool PartialOverloading, bool AggregateDeductionCandidate, - QualType ObjectType, Expr::Classification ObjectClassification, + bool PartialOrdering, QualType ObjectType, + Expr::Classification ObjectClassification, llvm::function_ref)> CheckNonDependent); /// Deduce template arguments when taking the address of a function @@ -12458,8 +12466,9 @@ class Sema final : public SemaBase { sema::TemplateDeductionInfo &Info); bool isTemplateTemplateParameterAtLeastAsSpecializedAs( - TemplateParameterList *PParam, TemplateDecl *AArg, - const DefaultArguments &DefaultArgs, SourceLocation Loc, bool IsDeduced); + TemplateParameterList *PParam, TemplateDecl *PArg, TemplateDecl *AArg, + const DefaultArguments &DefaultArgs, SourceLocation ArgLoc, + bool PartialOrdering, bool *MatchedPackOnParmToNonPackOnArg); /// Mark which template parameters are used in a given expression. /// @@ -12768,6 +12777,9 @@ class Sema final : public SemaBase { /// We are instantiating a type alias template declaration. TypeAliasTemplateInstantiation, + + /// We are performing partial ordering for template template parameters. + PartialOrderingTTP, } Kind; /// Was the enclosing context a non-instantiation SFINAE context? @@ -12989,6 +13001,12 @@ class Sema final : public SemaBase { TemplateDecl *Entity, BuildingDeductionGuidesTag, SourceRange InstantiationRange = SourceRange()); + struct PartialOrderingTTP {}; + /// \brief Note that we are partial ordering template template parameters. + InstantiatingTemplate(Sema &SemaRef, SourceLocation ArgLoc, + PartialOrderingTTP, TemplateDecl *PArg, + SourceRange InstantiationRange = SourceRange()); + /// Note that we have finished instantiating this template. void Clear(); @@ -13450,7 +13468,8 @@ class Sema final : public SemaBase { bool InstantiateClassTemplateSpecialization( SourceLocation PointOfInstantiation, ClassTemplateSpecializationDecl *ClassTemplateSpec, - TemplateSpecializationKind TSK, bool Complain = true); + TemplateSpecializationKind TSK, bool Complain = true, + bool PrimaryHasMatchedPackOnParmToNonPackOnArg = false); /// Instantiates the definitions of all of the member /// of the given class, which is an instantiation of a class template diff --git a/clang/include/clang/Sema/SemaInternal.h b/clang/include/clang/Sema/SemaInternal.h index 27cda71989726..95874077050a9 100644 --- a/clang/include/clang/Sema/SemaInternal.h +++ b/clang/include/clang/Sema/SemaInternal.h @@ -72,7 +72,7 @@ inline std::pair getDepthAndIndex(const NamedDecl *ND) { /// Retrieve the depth and index of an unexpanded parameter pack. inline std::pair getDepthAndIndex(UnexpandedParameterPack UPP) { - if (const auto *TTP = UPP.first.dyn_cast()) + if (const auto *TTP = dyn_cast(UPP.first)) return std::make_pair(TTP->getDepth(), TTP->getIndex()); return getDepthAndIndex(cast(UPP.first)); diff --git a/clang/include/clang/Sema/TemplateDeduction.h b/clang/include/clang/Sema/TemplateDeduction.h index 28b014fd84e4b..9c12eef5c42a0 100644 --- a/clang/include/clang/Sema/TemplateDeduction.h +++ b/clang/include/clang/Sema/TemplateDeduction.h @@ -51,6 +51,11 @@ class TemplateDeductionInfo { /// Have we suppressed an error during deduction? bool HasSFINAEDiagnostic = false; + /// Have we matched any packs on the parameter side, versus any non-packs on + /// the argument side, in a context where the opposite matching is also + /// allowed? + bool MatchedPackOnParmToNonPackOnArg = false; + /// The template parameter depth for which we're performing deduction. unsigned DeducedDepth; @@ -87,6 +92,14 @@ class TemplateDeductionInfo { return DeducedDepth; } + bool hasMatchedPackOnParmToNonPackOnArg() const { + return MatchedPackOnParmToNonPackOnArg; + } + + void setMatchedPackOnParmToNonPackOnArg() { + MatchedPackOnParmToNonPackOnArg = true; + } + /// Get the number of explicitly-specified arguments. unsigned getNumExplicitArgs() const { return ExplicitArgs; diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h index 7530015c9dacf..47301419c76c6 100644 --- a/clang/include/clang/Serialization/ASTReader.h +++ b/clang/include/clang/Serialization/ASTReader.h @@ -546,11 +546,18 @@ class ASTReader /// Mapping from main decl ID to the related decls IDs. /// - /// These related decls have to be loaded right after the main decl. - /// It is required to have canonical declaration for related decls from the - /// same module as the enclosing main decl. Without this, due to lazy - /// deserialization, canonical declarations for the main decl and related can - /// be selected from different modules. + /// The key is the main decl ID, and the value is a vector of related decls + /// that must be loaded immediately after the main decl. This is necessary + /// to ensure that the definition for related decls comes from the same module + /// as the enclosing main decl. Without this, due to lazy deserialization, + /// the definition for the main decl and related decls may come from different + /// modules. It is used for the following cases: + /// - Lambda inside a template function definition: The main declaration is + /// the enclosing function, and the related declarations are the lambda + /// declarations. + /// - Friend function defined inside a template CXXRecord declaration: The + /// main declaration is the enclosing record, and the related declarations + /// are the friend functions. llvm::DenseMap> RelatedDeclsMap; struct PendingUpdateRecord { diff --git a/clang/lib/AST/ByteCode/Descriptor.cpp b/clang/lib/AST/ByteCode/Descriptor.cpp index 437b9f1bab2d6..1c16c2022dd02 100644 --- a/clang/lib/AST/ByteCode/Descriptor.cpp +++ b/clang/lib/AST/ByteCode/Descriptor.cpp @@ -428,17 +428,17 @@ QualType Descriptor::getElemQualType() const { } SourceLocation Descriptor::getLocation() const { - if (auto *D = Source.dyn_cast()) + if (auto *D = dyn_cast(Source)) return D->getLocation(); - if (auto *E = Source.dyn_cast()) + if (auto *E = dyn_cast(Source)) return E->getExprLoc(); llvm_unreachable("Invalid descriptor type"); } SourceInfo Descriptor::getLoc() const { - if (const auto *D = Source.dyn_cast()) + if (const auto *D = dyn_cast(Source)) return SourceInfo(D); - if (const auto *E = Source.dyn_cast()) + if (const auto *E = dyn_cast(Source)) return SourceInfo(E); llvm_unreachable("Invalid descriptor type"); } diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index c765ebf5d618e..40fe7147a18a3 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -873,13 +873,17 @@ bool CheckNewDeleteForms(InterpState &S, CodePtr OpPC, bool CheckDeleteSource(InterpState &S, CodePtr OpPC, const Expr *Source, const Pointer &Ptr) { - // The two sources we currently allow are new expressions and - // __builtin_operator_new calls. + // Regular new type(...) call. if (isa_and_nonnull(Source)) return true; - if (const CallExpr *CE = dyn_cast_if_present(Source); + // operator new. + if (const auto *CE = dyn_cast_if_present(Source); CE && CE->getBuiltinCallee() == Builtin::BI__builtin_operator_new) return true; + // std::allocator.allocate() call + if (const auto *MCE = dyn_cast_if_present(Source); + MCE && MCE->getMethodDecl()->getIdentifier()->isStr("allocate")) + return true; // Whatever this is, we didn't heap allocate it. const SourceInfo &Loc = S.Current->getSource(OpPC); @@ -1489,7 +1493,8 @@ bool CheckNewTypeMismatch(InterpState &S, CodePtr OpPC, const Expr *E, const auto *NewExpr = cast(E); QualType StorageType = Ptr.getType(); - if (isa_and_nonnull(Ptr.getFieldDesc()->asExpr()) && + if ((isa_and_nonnull(Ptr.getFieldDesc()->asExpr()) || + isa_and_nonnull(Ptr.getFieldDesc()->asExpr())) && StorageType->isPointerType()) { // FIXME: Are there other cases where this is a problem? StorageType = StorageType->getPointeeType(); diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0d52083b06946..e657dbd2f9c73 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1584,6 +1584,7 @@ static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC, // Walk up the call stack to find the appropriate caller and get the // element type from it. QualType ElemType; + const CallExpr *NewCall = nullptr; for (const InterpFrame *F = Frame; F; F = F->Caller) { const Function *Func = F->getFunction(); @@ -1606,6 +1607,7 @@ static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC, if (CTSD->isInStdNamespace() && ClassII && ClassII->isStr("allocator") && TAL.size() >= 1 && TAL[0].getKind() == TemplateArgument::Type) { ElemType = TAL[0].getAsType(); + NewCall = cast(F->Caller->getExpr(F->getRetPC())); break; } } @@ -1616,6 +1618,7 @@ static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC, : diag::note_constexpr_new); return false; } + assert(NewCall); if (ElemType->isIncompleteType() || ElemType->isFunctionType()) { S.FFDiag(Call, diag::note_constexpr_new_not_complete_object_type) @@ -1654,7 +1657,7 @@ static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC, if (ElemT) { if (NumElems.ule(1)) { const Descriptor *Desc = - S.P.createDescriptor(Call, *ElemT, Descriptor::InlineDescMD, + S.P.createDescriptor(NewCall, *ElemT, Descriptor::InlineDescMD, /*IsConst=*/false, /*IsTemporary=*/false, /*IsMutable=*/false); Block *B = Allocator.allocate(Desc, S.getContext().getEvalID(), @@ -1667,7 +1670,7 @@ static bool interp__builtin_operator_new(InterpState &S, CodePtr OpPC, assert(NumElems.ugt(1)); Block *B = - Allocator.allocate(Call, *ElemT, NumElems.getZExtValue(), + Allocator.allocate(NewCall, *ElemT, NumElems.getZExtValue(), S.Ctx.getEvalID(), DynamicAllocator::Form::Operator); assert(B); S.Stk.push(B); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 734311e5d8b9a..3b5ab839c6cf7 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -1136,6 +1136,7 @@ namespace { struct StdAllocatorCaller { unsigned FrameIndex; QualType ElemType; + const Expr *Call; explicit operator bool() const { return FrameIndex != 0; }; }; @@ -1159,7 +1160,7 @@ namespace { if (CTSD->isInStdNamespace() && ClassII && ClassII->isStr("allocator") && TAL.size() >= 1 && TAL[0].getKind() == TemplateArgument::Type) - return {Call->Index, TAL[0].getAsType()}; + return {Call->Index, TAL[0].getAsType(), Call->CallExpr}; } return {}; @@ -7113,7 +7114,7 @@ static bool HandleOperatorNewCall(EvalInfo &Info, const CallExpr *E, QualType AllocType = Info.Ctx.getConstantArrayType( ElemType, Size, nullptr, ArraySizeModifier::Normal, 0); - APValue *Val = Info.createHeapAlloc(E, AllocType, Result); + APValue *Val = Info.createHeapAlloc(Caller.Call, AllocType, Result); *Val = APValue(APValue::UninitArray(), 0, Size.getZExtValue()); Result.addArray(Info, E, cast(AllocType)); return true; diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp index ddbe2136a671f..36ef1fc8c79db 100644 --- a/clang/lib/AST/JSONNodeDumper.cpp +++ b/clang/lib/AST/JSONNodeDumper.cpp @@ -1537,9 +1537,9 @@ void JSONNodeDumper::VisitExprWithCleanups(const ExprWithCleanups *EWC) { if (EWC->getNumObjects()) { JOS.attributeArray("cleanups", [this, EWC] { for (const ExprWithCleanups::CleanupObject &CO : EWC->getObjects()) - if (auto *BD = CO.dyn_cast()) { + if (auto *BD = dyn_cast(CO)) { JOS.value(createBareDeclRef(BD)); - } else if (auto *CLE = CO.dyn_cast()) { + } else if (auto *CLE = dyn_cast(CO)) { llvm::json::Object Obj; Obj["id"] = createPointerRepresentation(CLE); Obj["kind"] = CLE->getStmtClassName(); diff --git a/clang/lib/Basic/Attributes.cpp b/clang/lib/Basic/Attributes.cpp index fa26cc584b724..2035d4c0a5768 100644 --- a/clang/lib/Basic/Attributes.cpp +++ b/clang/lib/Basic/Attributes.cpp @@ -33,7 +33,8 @@ static int hasAttributeImpl(AttributeCommonInfo::Syntax Syntax, StringRef Name, int clang::hasAttribute(AttributeCommonInfo::Syntax Syntax, const IdentifierInfo *Scope, const IdentifierInfo *Attr, - const TargetInfo &Target, const LangOptions &LangOpts) { + const TargetInfo &Target, const LangOptions &LangOpts, + bool CheckPlugins) { StringRef Name = Attr->getName(); // Normalize the attribute name, __foo__ becomes foo. if (Name.size() >= 4 && Name.starts_with("__") && Name.ends_with("__")) @@ -61,14 +62,23 @@ int clang::hasAttribute(AttributeCommonInfo::Syntax Syntax, if (res) return res; - // Check if any plugin provides this attribute. - for (auto &Ptr : getAttributePluginInstances()) - if (Ptr->hasSpelling(Syntax, Name)) - return 1; + if (CheckPlugins) { + // Check if any plugin provides this attribute. + for (auto &Ptr : getAttributePluginInstances()) + if (Ptr->hasSpelling(Syntax, Name)) + return 1; + } return 0; } +int clang::hasAttribute(AttributeCommonInfo::Syntax Syntax, + const IdentifierInfo *Scope, const IdentifierInfo *Attr, + const TargetInfo &Target, const LangOptions &LangOpts) { + return hasAttribute(Syntax, Scope, Attr, Target, LangOpts, + /*CheckPlugins=*/true); +} + const char *attr::getSubjectMatchRuleSpelling(attr::SubjectMatchRule Rule) { switch (Rule) { #define ATTR_MATCH_RULE(NAME, SPELLING, IsAbstract) \ @@ -151,6 +161,17 @@ AttributeCommonInfo::getParsedKind(const IdentifierInfo *Name, return ::getAttrKind(normalizeName(Name, ScopeName, SyntaxUsed), SyntaxUsed); } +AttributeCommonInfo::AttrArgsInfo +AttributeCommonInfo::getCXX11AttrArgsInfo(const IdentifierInfo *Name) { + StringRef AttrName = + normalizeAttrName(Name, /*NormalizedScopeName*/ "", Syntax::AS_CXX11); +#define CXX11_ATTR_ARGS_INFO + return llvm::StringSwitch(AttrName) +#include "clang/Basic/CXX11AttributeInfo.inc" + .Default(AttributeCommonInfo::AttrArgsInfo::None); +#undef CXX11_ATTR_ARGS_INFO +} + std::string AttributeCommonInfo::getNormalizedFullName() const { return static_cast( normalizeName(getAttrName(), getScopeName(), getSyntax())); diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp index db23b0c228338..8167d7603b0e1 100644 --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -102,7 +102,7 @@ bool RISCVTargetInfo::validateAsmConstraint( return true; case 'c': // A RVC register - GPR or FPR - if (Name[1] == 'r' || Name[1] == 'f') { + if (Name[1] == 'r' || Name[1] == 'R' || Name[1] == 'f') { Info.setAllowsRegister(); Name += 1; return true; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f1515347fb816..26bccccdc5e36 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -16556,9 +16556,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_sqrtph256: case X86::BI__builtin_ia32_sqrtph: case X86::BI__builtin_ia32_sqrtph512: - case X86::BI__builtin_ia32_vsqrtnepbf16256: - case X86::BI__builtin_ia32_vsqrtnepbf16: - case X86::BI__builtin_ia32_vsqrtnepbf16512: + case X86::BI__builtin_ia32_vsqrtbf16256: + case X86::BI__builtin_ia32_vsqrtbf16: + case X86::BI__builtin_ia32_vsqrtbf16512: case X86::BI__builtin_ia32_sqrtps512: case X86::BI__builtin_ia32_sqrtpd512: { if (Ops.size() == 2) { @@ -16778,9 +16778,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_fpclassps128_mask: case X86::BI__builtin_ia32_fpclassps256_mask: case X86::BI__builtin_ia32_fpclassps512_mask: - case X86::BI__builtin_ia32_vfpclasspbf16128_mask: - case X86::BI__builtin_ia32_vfpclasspbf16256_mask: - case X86::BI__builtin_ia32_vfpclasspbf16512_mask: + case X86::BI__builtin_ia32_vfpclassbf16128_mask: + case X86::BI__builtin_ia32_vfpclassbf16256_mask: + case X86::BI__builtin_ia32_vfpclassbf16512_mask: case X86::BI__builtin_ia32_fpclassph128_mask: case X86::BI__builtin_ia32_fpclassph256_mask: case X86::BI__builtin_ia32_fpclassph512_mask: @@ -16795,14 +16795,14 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, Intrinsic::ID ID; switch (BuiltinID) { default: llvm_unreachable("Unsupported intrinsic!"); - case X86::BI__builtin_ia32_vfpclasspbf16128_mask: - ID = Intrinsic::x86_avx10_fpclass_nepbf16_128; + case X86::BI__builtin_ia32_vfpclassbf16128_mask: + ID = Intrinsic::x86_avx10_fpclass_bf16_128; break; - case X86::BI__builtin_ia32_vfpclasspbf16256_mask: - ID = Intrinsic::x86_avx10_fpclass_nepbf16_256; + case X86::BI__builtin_ia32_vfpclassbf16256_mask: + ID = Intrinsic::x86_avx10_fpclass_bf16_256; break; - case X86::BI__builtin_ia32_vfpclasspbf16512_mask: - ID = Intrinsic::x86_avx10_fpclass_nepbf16_512; + case X86::BI__builtin_ia32_vfpclassbf16512_mask: + ID = Intrinsic::x86_avx10_fpclass_bf16_512; break; case X86::BI__builtin_ia32_fpclassph128_mask: ID = Intrinsic::x86_avx512fp16_fpclass_ph_128; @@ -16962,9 +16962,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, case X86::BI__builtin_ia32_vcmppd256_round_mask: case X86::BI__builtin_ia32_vcmpps256_round_mask: case X86::BI__builtin_ia32_vcmpph256_round_mask: - case X86::BI__builtin_ia32_vcmppbf16512_mask: - case X86::BI__builtin_ia32_vcmppbf16256_mask: - case X86::BI__builtin_ia32_vcmppbf16128_mask: + case X86::BI__builtin_ia32_vcmpbf16512_mask: + case X86::BI__builtin_ia32_vcmpbf16256_mask: + case X86::BI__builtin_ia32_vcmpbf16128_mask: IsMaskFCmp = true; [[fallthrough]]; case X86::BI__builtin_ia32_cmpps: diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 0c09ff96f9d6b..9abf2e8c9190d 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -626,7 +626,7 @@ struct CallCoroDelete final : public EHScopeStack::Cleanup { // Get back to the block we were originally and move coro.free there. auto *InsertPt = SaveInsertBlock->getTerminator(); - CoroFree->moveBefore(InsertPt); + CoroFree->moveBefore(InsertPt->getIterator()); CGF.Builder.SetInsertPoint(InsertPt); // Add if (auto *mem = coro.free) Deallocate; diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp index e7dd5fb01ebed..5dc1686e7914c 100644 --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -1858,7 +1858,7 @@ Address CodeGenFunction::recoverAddrOfEscapedLocal(CodeGenFunction &ParentCGF, "expected alloca or localrecover in parent LocalDeclMap"); RecoverCall = cast(ParentRecover->clone()); RecoverCall->setArgOperand(1, ParentFP); - RecoverCall->insertBefore(AllocaInsertPt); + RecoverCall->insertBefore(AllocaInsertPt->getIterator()); } // Bitcast the variable, rename it, and insert it in the local decl map. diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index ddcb04d53661d..cafaaa364cb76 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1332,7 +1332,7 @@ void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, CGF.Builder.GetInsertBlock()); } else { Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); - Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); + Elem.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt->getIterator()); } } @@ -4079,7 +4079,7 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, Size = llvm::ConstantInt::get(CGF.SizeTy, 0); } LValue Base; - if (unsigned *P = Pos.dyn_cast()) { + if (unsigned *P = dyn_cast(Pos)) { Base = CGF.MakeAddrLValue( CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); } else { @@ -4109,7 +4109,7 @@ static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, CGF.EmitStoreOfScalar( llvm::ConstantInt::get(LLVMFlagsTy, static_cast(DepKind)), FlagsLVal); - if (unsigned *P = Pos.dyn_cast()) { + if (unsigned *P = dyn_cast(Pos)) { ++(*P); } else { LValue &PosLVal = *cast(Pos); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 33f08cf28feca..518113e20cb06 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6141,9 +6141,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fno-direct-access-external-data"); } - if (Args.hasFlag(options::OPT_fno_plt, options::OPT_fplt, false)) { - CmdArgs.push_back("-fno-plt"); - } + if (Triple.isOSBinFormatELF() && (Triple.isAArch64() || Triple.isX86())) + Args.addOptOutFlag(CmdArgs, options::OPT_fplt, options::OPT_fno_plt); // -fhosted is default. // TODO: Audit uses of KernelOrKext and see where it'd be more appropriate to diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index d4099216c81ba..0922a97ed7c19 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -639,6 +639,9 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back( Args.MakeArgString("--plugin-opt=-mattr=" + llvm::join(Features, ","))); + // Enable ctor / dtor lowering for the direct / freestanding NVPTX target. + CmdArgs.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"}); + // Add paths for the default clang library path. SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(TC.getDriver().Dir); @@ -783,7 +786,7 @@ void NVPTXToolChain::addClangTargetOptions( // If we are compiling with a standalone NVPTX toolchain we want to try to // mimic a standard environment as much as possible. So we enable lowering // ctor / dtor functions to global symbols that can be registered. - if (Freestanding) + if (Freestanding && !getDriver().isUsingLTO()) CC1Args.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"}); } diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 9c1fd28a3a8a2..45d05ed3e2485 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -937,6 +937,7 @@ void Flang::ConstructJob(Compilation &C, const JobAction &JA, D.Diag(diag::warn_O4_is_O3); } else if (A->getOption().matches(options::OPT_Ofast)) { CmdArgs.push_back("-O3"); + D.Diag(diag::warn_drv_deprecated_arg_ofast_for_flang); } else { A->render(Args, CmdArgs); } diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index 10f9a4f338f8f..eebe3becada65 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -347,6 +347,9 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs, // Bans incompatible options for Wasm EH / SjLj. We don't allow using // different modes for EH and SjLj. auto BanIncompatibleOptionsForWasmEHSjLj = [&](StringRef CurOption) { + static bool HasRun = false; + if (HasRun) + return; if (DriverArgs.hasFlag(options::OPT_mno_exception_handing, options::OPT_mexception_handing, false)) getDriver().Diag(diag::err_drv_argument_not_allowed_with) @@ -370,10 +373,14 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs, << CurOption << Option; } } + HasRun = true; }; // Enable necessary features for Wasm EH / SjLj in the backend. auto EnableFeaturesForWasmEHSjLj = [&]() { + static bool HasRun = false; + if (HasRun) + return; CC1Args.push_back("-target-feature"); CC1Args.push_back("+exception-handling"); // The standardized Wasm EH spec requires multivalue and reference-types. @@ -383,6 +390,7 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs, CC1Args.push_back("+reference-types"); // Backend needs '-exception-model=wasm' to use Wasm EH instructions CC1Args.push_back("-exception-model=wasm"); + HasRun = true; }; if (DriverArgs.getLastArg(options::OPT_fwasm_exceptions)) { diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index bf5ee281c4311..f36cf7b638e0d 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -5488,8 +5488,8 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, } if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) return ShouldAddSpacesInAngles(); - if (Left.is(tok::r_paren) && Right.is(TT_PointerOrReference) && - Right.isOneOf(tok::amp, tok::ampamp)) { + if (Left.is(tok::r_paren) && Left.isNot(TT_TypeDeclarationParen) && + Right.is(TT_PointerOrReference) && Right.isOneOf(tok::amp, tok::ampamp)) { return true; } // Space before TT_StructuredBindingLSquare. diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index 30dfa5481d070..1ea4a2e9e88cf 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -459,6 +459,8 @@ class DefaultTemplateInstCallback : public TemplateInstantiationCallback { return "BuildingDeductionGuides"; case CodeSynthesisContext::TypeAliasTemplateInstantiation: return "TypeAliasTemplateInstantiation"; + case CodeSynthesisContext::PartialOrderingTTP: + return "PartialOrderingTTP"; } return ""; } diff --git a/clang/lib/Headers/avx10_2_512bf16intrin.h b/clang/lib/Headers/avx10_2_512bf16intrin.h index 392b7ae770c5b..ce43ecbcfe047 100644 --- a/clang/lib/Headers/avx10_2_512bf16intrin.h +++ b/clang/lib/Headers/avx10_2_512bf16intrin.h @@ -62,17 +62,17 @@ static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set_pbh( (bf3), (bf2), (bf1)) static __inline__ __m512 __DEFAULT_FN_ATTRS512 -_mm512_castpbf16_ps(__m512bh __a) { +_mm512_castbf16_ps(__m512bh __a) { return (__m512)__a; } static __inline__ __m512d __DEFAULT_FN_ATTRS512 -_mm512_castpbf16_pd(__m512bh __a) { +_mm512_castbf16_pd(__m512bh __a) { return (__m512d)__a; } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_castpbf16_si512(__m512bh __a) { +_mm512_castbf16_si512(__m512bh __a) { return (__m512i)__a; } @@ -91,39 +91,39 @@ _mm512_castsi512_pbh(__m512i __a) { } static __inline__ __m128bh __DEFAULT_FN_ATTRS512 -_mm512_castpbf16512_pbh128(__m512bh __a) { +_mm512_castbf16512_pbh128(__m512bh __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline__ __m256bh __DEFAULT_FN_ATTRS512 -_mm512_castpbf16512_pbh256(__m512bh __a) { +_mm512_castbf16512_pbh256(__m512bh __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_castpbf16128_pbh512(__m128bh __a) { +_mm512_castbf16128_pbh512(__m128bh __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_castpbf16256_pbh512(__m256bh __a) { +_mm512_castbf16256_pbh512(__m256bh __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_zextpbf16128_pbh512(__m128bh __a) { +_mm512_zextbf16128_pbh512(__m128bh __a) { return __builtin_shufflevector( __a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_zextpbf16256_pbh512(__m256bh __a) { +_mm512_zextbf16256_pbh512(__m256bh __a) { return __builtin_shufflevector(__a, (__v16bf)_mm256_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, @@ -178,81 +178,81 @@ _mm512_permutexvar_pbh(__m512i __A, __m512bh __B) { return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_addne_pbh(__m512bh __A, __m512bh __B) { +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_add_pbh(__m512bh __A, + __m512bh __B) { return (__m512bh)((__v32bf)__A + (__v32bf)__B); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_mask_addne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { +_mm512_mask_add_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { return (__m512bh)__builtin_ia32_selectpbf_512( - (__mmask32)__U, (__v32bf)_mm512_addne_pbh(__A, __B), (__v32bf)__W); + (__mmask32)__U, (__v32bf)_mm512_add_pbh(__A, __B), (__v32bf)__W); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_maskz_addne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { +_mm512_maskz_add_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { return (__m512bh)__builtin_ia32_selectpbf_512( - (__mmask32)__U, (__v32bf)_mm512_addne_pbh(__A, __B), + (__mmask32)__U, (__v32bf)_mm512_add_pbh(__A, __B), (__v32bf)_mm512_setzero_pbh()); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_subne_pbh(__m512bh __A, __m512bh __B) { +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sub_pbh(__m512bh __A, + __m512bh __B) { return (__m512bh)((__v32bf)__A - (__v32bf)__B); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_mask_subne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { +_mm512_mask_sub_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { return (__m512bh)__builtin_ia32_selectpbf_512( - (__mmask32)__U, (__v32bf)_mm512_subne_pbh(__A, __B), (__v32bf)__W); + (__mmask32)__U, (__v32bf)_mm512_sub_pbh(__A, __B), (__v32bf)__W); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_maskz_subne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { +_mm512_maskz_sub_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { return (__m512bh)__builtin_ia32_selectpbf_512( - (__mmask32)__U, (__v32bf)_mm512_subne_pbh(__A, __B), + (__mmask32)__U, (__v32bf)_mm512_sub_pbh(__A, __B), (__v32bf)_mm512_setzero_pbh()); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_mulne_pbh(__m512bh __A, __m512bh __B) { +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mul_pbh(__m512bh __A, + __m512bh __B) { return (__m512bh)((__v32bf)__A * (__v32bf)__B); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_mask_mulne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { +_mm512_mask_mul_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { return (__m512bh)__builtin_ia32_selectpbf_512( - (__mmask32)__U, (__v32bf)_mm512_mulne_pbh(__A, __B), (__v32bf)__W); + (__mmask32)__U, (__v32bf)_mm512_mul_pbh(__A, __B), (__v32bf)__W); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_maskz_mulne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { +_mm512_maskz_mul_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { return (__m512bh)__builtin_ia32_selectpbf_512( - (__mmask32)__U, (__v32bf)_mm512_mulne_pbh(__A, __B), + (__mmask32)__U, (__v32bf)_mm512_mul_pbh(__A, __B), (__v32bf)_mm512_setzero_pbh()); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_divne_pbh(__m512bh __A, __m512bh __B) { +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_div_pbh(__m512bh __A, + __m512bh __B) { return (__m512bh)((__v32bf)__A / (__v32bf)__B); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_mask_divne_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { +_mm512_mask_div_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { return (__m512bh)__builtin_ia32_selectpbf_512( - (__mmask32)__U, (__v32bf)_mm512_divne_pbh(__A, __B), (__v32bf)__W); + (__mmask32)__U, (__v32bf)_mm512_div_pbh(__A, __B), (__v32bf)__W); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_maskz_divne_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { +_mm512_maskz_div_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { return (__m512bh)__builtin_ia32_selectpbf_512( - (__mmask32)__U, (__v32bf)_mm512_divne_pbh(__A, __B), + (__mmask32)__U, (__v32bf)_mm512_div_pbh(__A, __B), (__v32bf)_mm512_setzero_pbh()); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_max_pbh(__m512bh __A, __m512bh __B) { - return (__m512bh)__builtin_ia32_vmaxpbf16512((__v32bf)__A, (__v32bf)__B); + return (__m512bh)__builtin_ia32_vmaxbf16512((__v32bf)__A, (__v32bf)__B); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 @@ -270,7 +270,7 @@ _mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_min_pbh(__m512bh __A, __m512bh __B) { - return (__m512bh)__builtin_ia32_vminpbf16512((__v32bf)__A, (__v32bf)__B); + return (__m512bh)__builtin_ia32_vminbf16512((__v32bf)__A, (__v32bf)__B); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 @@ -287,143 +287,143 @@ _mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { } #define _mm512_cmp_pbh_mask(__A, __B, __P) \ - ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(__A), \ - (__v32bf)(__m512bh)(__B), \ - (int)(__P), (__mmask32) - 1)) + ((__mmask32)__builtin_ia32_vcmpbf16512_mask((__v32bf)(__m512bh)(__A), \ + (__v32bf)(__m512bh)(__B), \ + (int)(__P), (__mmask32) - 1)) #define _mm512_mask_cmp_pbh_mask(__U, __A, __B, __P) \ - ((__mmask32)__builtin_ia32_vcmppbf16512_mask((__v32bf)(__m512bh)(__A), \ - (__v32bf)(__m512bh)(__B), \ - (int)(__P), (__mmask32)(__U))) + ((__mmask32)__builtin_ia32_vcmpbf16512_mask((__v32bf)(__m512bh)(__A), \ + (__v32bf)(__m512bh)(__B), \ + (int)(__P), (__mmask32)(__U))) #define _mm512_mask_fpclass_pbh_mask(__U, __A, imm) \ - ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \ + ((__mmask32)__builtin_ia32_vfpclassbf16512_mask( \ (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32)(__U))) #define _mm512_fpclass_pbh_mask(__A, imm) \ - ((__mmask32)__builtin_ia32_vfpclasspbf16512_mask( \ + ((__mmask32)__builtin_ia32_vfpclassbf16512_mask( \ (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32) - 1)) static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_scalef_pbh(__m512bh __A, __m512bh __B) { - return (__m512bh)__builtin_ia32_vscalefpbf16512_mask( + return (__m512bh)__builtin_ia32_vscalefbf16512_mask( (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pbh( __m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { - return (__m512bh)__builtin_ia32_vscalefpbf16512_mask( + return (__m512bh)__builtin_ia32_vscalefbf16512_mask( (__v32bf)__A, (__v32bf)__B, (__v32bf)__W, (__mmask32)__U); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { - return (__m512bh)__builtin_ia32_vscalefpbf16512_mask( + return (__m512bh)__builtin_ia32_vscalefbf16512_mask( (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_rcp_pbh(__m512bh __A) { - return (__m512bh)__builtin_ia32_vrcppbf16512_mask( + return (__m512bh)__builtin_ia32_vrcpbf16512_mask( (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { - return (__m512bh)__builtin_ia32_vrcppbf16512_mask((__v32bf)__A, (__v32bf)__W, - (__mmask32)__U); + return (__m512bh)__builtin_ia32_vrcpbf16512_mask((__v32bf)__A, (__v32bf)__W, + (__mmask32)__U); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) { - return (__m512bh)__builtin_ia32_vrcppbf16512_mask( + return (__m512bh)__builtin_ia32_vrcpbf16512_mask( (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_getexp_pbh(__m512bh __A) { - return (__m512bh)__builtin_ia32_vgetexppbf16512_mask( + return (__m512bh)__builtin_ia32_vgetexpbf16512_mask( (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { - return (__m512bh)__builtin_ia32_vgetexppbf16512_mask( + return (__m512bh)__builtin_ia32_vgetexpbf16512_mask( (__v32bf)__A, (__v32bf)__W, (__mmask32)__U); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) { - return (__m512bh)__builtin_ia32_vgetexppbf16512_mask( + return (__m512bh)__builtin_ia32_vgetexpbf16512_mask( (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_rsqrt_pbh(__m512bh __A) { - return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask( + return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask( (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { - return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask( - (__v32bf)__A, (__v32bf)__W, (__mmask32)__U); + return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask((__v32bf)__A, (__v32bf)__W, + (__mmask32)__U); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) { - return (__m512bh)__builtin_ia32_vrsqrtpbf16512_mask( + return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask( (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U); } -#define _mm512_reducene_pbh(__A, imm) \ - ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \ +#define _mm512_reduce_pbh(__A, imm) \ + ((__m512bh)__builtin_ia32_vreducebf16512_mask( \ (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_undefined_pbh(), \ (__mmask32) - 1)) -#define _mm512_mask_reducene_pbh(__W, __U, __A, imm) \ - ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \ +#define _mm512_mask_reduce_pbh(__W, __U, __A, imm) \ + ((__m512bh)__builtin_ia32_vreducebf16512_mask( \ (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \ (__mmask32)(__U))) -#define _mm512_maskz_reducene_pbh(__U, __A, imm) \ - ((__m512bh)__builtin_ia32_vreducenepbf16512_mask( \ +#define _mm512_maskz_reduce_pbh(__U, __A, imm) \ + ((__m512bh)__builtin_ia32_vreducebf16512_mask( \ (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \ (__mmask32)(__U))) -#define _mm512_roundscalene_pbh(__A, imm) \ - ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \ +#define _mm512_roundscale_pbh(__A, imm) \ + ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \ (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \ (__mmask32) - 1)) -#define _mm512_mask_roundscalene_pbh(__W, __U, __A, imm) \ - ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \ +#define _mm512_mask_roundscale_pbh(__W, __U, __A, imm) \ + ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \ (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \ (__mmask32)(__U))) -#define _mm512_maskz_roundscalene_pbh(__U, __A, imm) \ - ((__m512bh)__builtin_ia32_vrndscalenepbf16_mask( \ +#define _mm512_maskz_roundscale_pbh(__U, __A, imm) \ + ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \ (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \ (__mmask32)(__U))) #define _mm512_getmant_pbh(__A, __B, __C) \ - ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \ + ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \ (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \ (__v32bf)_mm512_undefined_pbh(), (__mmask32) - 1)) #define _mm512_mask_getmant_pbh(__W, __U, __A, __B, __C) \ - ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \ + ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \ (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \ (__v32bf)(__m512bh)(__W), (__mmask32)(__U))) #define _mm512_maskz_getmant_pbh(__U, __A, __B, __C) \ - ((__m512bh)__builtin_ia32_vgetmantpbf16512_mask( \ + ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \ (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \ (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U))) static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) { - return (__m512bh)__builtin_ia32_vsqrtnepbf16512((__v32bf)__A); + return (__m512bh)__builtin_ia32_vsqrtbf16512((__v32bf)__A); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 @@ -440,122 +440,118 @@ _mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) { } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_fmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { +_mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B, (__v32bf)__C); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddne_pbh( - __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_fmadd_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { return (__m512bh)__builtin_ia32_selectpbf_512( (__mmask32)__U, - _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), - (__v32bf)__A); + _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddne_pbh( +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pbh( __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { return (__m512bh)__builtin_ia32_selectpbf_512( (__mmask32)__U, - _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), - (__v32bf)__C); + _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddne_pbh( +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh( __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { return (__m512bh)__builtin_ia32_selectpbf_512( (__mmask32)__U, - _mm512_fmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)_mm512_setzero_pbh()); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_fmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { +_mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B, -(__v32bf)__C); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubne_pbh( - __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_fmsub_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { return (__m512bh)__builtin_ia32_selectpbf_512( (__mmask32)__U, - _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), - (__v32bf)__A); + _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubne_pbh( +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pbh( __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { return (__m512bh)__builtin_ia32_selectpbf_512( (__mmask32)__U, - _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), - (__v32bf)__C); + _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubne_pbh( +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh( __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { return (__m512bh)__builtin_ia32_selectpbf_512( (__mmask32)__U, - _mm512_fmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)_mm512_setzero_pbh()); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_fnmaddne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { +_mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B, (__v32bf)__C); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmaddne_pbh( +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pbh( __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { return (__m512bh)__builtin_ia32_selectpbf_512( (__mmask32)__U, - _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmaddne_pbh( +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pbh( __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { return (__m512bh)__builtin_ia32_selectpbf_512( (__mmask32)__U, - _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmaddne_pbh( +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh( __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { return (__m512bh)__builtin_ia32_selectpbf_512( (__mmask32)__U, - _mm512_fnmaddne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)_mm512_setzero_pbh()); } static __inline__ __m512bh __DEFAULT_FN_ATTRS512 -_mm512_fnmsubne_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { +_mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B, -(__v32bf)__C); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsubne_pbh( +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pbh( __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { return (__m512bh)__builtin_ia32_selectpbf_512( (__mmask32)__U, - _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsubne_pbh( +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pbh( __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { return (__m512bh)__builtin_ia32_selectpbf_512( (__mmask32)__U, - _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C); } -static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsubne_pbh( +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pbh( __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { return (__m512bh)__builtin_ia32_selectpbf_512( (__mmask32)__U, - _mm512_fnmsubne_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)_mm512_setzero_pbh()); } diff --git a/clang/lib/Headers/avx10_2_512convertintrin.h b/clang/lib/Headers/avx10_2_512convertintrin.h index 60a5b1ef4548d..0b5fca5cda522 100644 --- a/clang/lib/Headers/avx10_2_512convertintrin.h +++ b/clang/lib/Headers/avx10_2_512convertintrin.h @@ -58,263 +58,258 @@ _mm512_maskz_cvtx2ps_ph(__mmask32 __U, __m512 __A, __m512 __B) { (__mmask32)(U), (const int)(R))) static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_cvtbiasph_pbf8(__m512i __A, __m512h __B) { +_mm512_cvtbiasph_bf8(__m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), (__mmask32)-1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_pbf8( +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_bf8( __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtbiasph_pbf8(__mmask32 __U, __m512i __A, __m512h __B) { +_mm512_maskz_cvtbiasph_bf8(__mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_cvtbiassph_pbf8(__m512i __A, __m512h __B) { +_mm512_cvtbiassph_bf8(__m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), (__mmask32)-1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_pbf8( +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_bf8( __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtbiassph_pbf8(__mmask32 __U, __m512i __A, __m512h __B) { +_mm512_maskz_cvtbiassph_bf8(__mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_cvtbiasph_phf8(__m512i __A, __m512h __B) { +_mm512_cvtbiasph_hf8(__m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), (__mmask32)-1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_phf8( +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_hf8( __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtbiasph_phf8(__mmask32 __U, __m512i __A, __m512h __B) { +_mm512_maskz_cvtbiasph_hf8(__mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_cvtbiassph_phf8(__m512i __A, __m512h __B) { +_mm512_cvtbiassph_hf8(__m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), (__mmask32)-1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_phf8( +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_hf8( __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtbiassph_phf8(__mmask32 __U, __m512i __A, __m512h __B) { +_mm512_maskz_cvtbiassph_hf8(__mmask32 __U, __m512i __A, __m512h __B) { return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtne2ph_pbf8(__m512h __A, __m512h __B) { - return (__m512i)__builtin_ia32_vcvtne2ph2bf8_512((__v32hf)(__A), - (__v32hf)(__B)); +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvt2ph_bf8(__m512h __A, + __m512h __B) { + return (__m512i)__builtin_ia32_vcvt2ph2bf8_512((__v32hf)(__A), + (__v32hf)(__B)); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtne2ph_pbf8( - __m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvt2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvtne2ph_pbf8(__A, __B), (__v64qi)__W); + (__mmask64)__U, (__v64qi)_mm512_cvt2ph_bf8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtne2ph_pbf8(__mmask64 __U, __m512h __A, __m512h __B) { +_mm512_maskz_cvt2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvtne2ph_pbf8(__A, __B), + (__mmask64)__U, (__v64qi)_mm512_cvt2ph_bf8(__A, __B), (__v64qi)(__m512i)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtnes2ph_pbf8(__m512h __A, __m512h __B) { - return (__m512i)__builtin_ia32_vcvtne2ph2bf8s_512((__v32hf)(__A), - (__v32hf)(__B)); +_mm512_cvts2ph_bf8(__m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_vcvt2ph2bf8s_512((__v32hf)(__A), + (__v32hf)(__B)); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtnes2ph_pbf8( - __m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvts2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvtnes2ph_pbf8(__A, __B), (__v64qi)__W); + (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtnes2ph_pbf8(__mmask64 __U, __m512h __A, __m512h __B) { +_mm512_maskz_cvts2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvtnes2ph_pbf8(__A, __B), + (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), (__v64qi)(__m512i)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtne2ph_phf8(__m512h __A, __m512h __B) { - return (__m512i)__builtin_ia32_vcvtne2ph2hf8_512((__v32hf)(__A), - (__v32hf)(__B)); +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvt2ph_hf8(__m512h __A, + __m512h __B) { + return (__m512i)__builtin_ia32_vcvt2ph2hf8_512((__v32hf)(__A), + (__v32hf)(__B)); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtne2ph_phf8( - __m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvt2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvtne2ph_phf8(__A, __B), (__v64qi)__W); + (__mmask64)__U, (__v64qi)_mm512_cvt2ph_hf8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtne2ph_phf8(__mmask64 __U, __m512h __A, __m512h __B) { +_mm512_maskz_cvt2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvtne2ph_phf8(__A, __B), + (__mmask64)__U, (__v64qi)_mm512_cvt2ph_hf8(__A, __B), (__v64qi)(__m512i)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_cvtnes2ph_phf8(__m512h __A, __m512h __B) { - return (__m512i)__builtin_ia32_vcvtne2ph2hf8s_512((__v32hf)(__A), - (__v32hf)(__B)); +_mm512_cvts2ph_hf8(__m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_vcvt2ph2hf8s_512((__v32hf)(__A), + (__v32hf)(__B)); } -static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtnes2ph_phf8( - __m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvts2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvtnes2ph_phf8(__A, __B), (__v64qi)__W); + (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), (__v64qi)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtnes2ph_phf8(__mmask64 __U, __m512h __A, __m512h __B) { +_mm512_maskz_cvts2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) { return (__m512i)__builtin_ia32_selectb_512( - (__mmask64)__U, (__v64qi)_mm512_cvtnes2ph_phf8(__A, __B), + (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), (__v64qi)(__m512i)_mm512_setzero_si512()); } -static __inline__ __m512h __DEFAULT_FN_ATTRS512 -_mm512_cvtnehf8_ph(__m256i __A) { +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvthf8(__m256i __A) { return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask( (__v32qi)__A, (__v32hf)(__m512h)_mm512_undefined_ph(), (__mmask32)-1); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtnehf8_ph(__m512h __W, __mmask32 __U, __m256i __A) { +_mm512_mask_cvthf8(__m512h __W, __mmask32 __U, __m256i __A) { return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask( (__v32qi)__A, (__v32hf)(__m512h)__W, (__mmask32)__U); } static __inline__ __m512h __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtnehf8_ph(__mmask32 __U, __m256i __A) { +_mm512_maskz_cvthf8(__mmask32 __U, __m256i __A) { return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask( (__v32qi)__A, (__v32hf)(__m512h)_mm512_setzero_ph(), (__mmask32)__U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_cvtneph_pbf8(__m512h __A) { - return (__m256i)__builtin_ia32_vcvtneph2bf8_512_mask( +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtph_bf8(__m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2bf8_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtneph_pbf8(__m256i __W, __mmask32 __U, __m512h __A) { - return (__m256i)__builtin_ia32_vcvtneph2bf8_512_mask( +_mm512_mask_cvtph_bf8(__m256i __W, __mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2bf8_512_mask( (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtneph_pbf8(__mmask32 __U, __m512h __A) { - return (__m256i)__builtin_ia32_vcvtneph2bf8_512_mask( +_mm512_maskz_cvtph_bf8(__mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2bf8_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_cvtnesph_pbf8(__m512h __A) { - return (__m256i)__builtin_ia32_vcvtneph2bf8s_512_mask( +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_bf8(__m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtnesph_pbf8(__m256i __W, __mmask32 __U, __m512h __A) { - return (__m256i)__builtin_ia32_vcvtneph2bf8s_512_mask( +_mm512_mask_cvtsph_bf8(__m256i __W, __mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtnesph_pbf8(__mmask32 __U, __m512h __A) { - return (__m256i)__builtin_ia32_vcvtneph2bf8s_512_mask( +_mm512_maskz_cvtsph_bf8(__mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_cvtneph_phf8(__m512h __A) { - return (__m256i)__builtin_ia32_vcvtneph2hf8_512_mask( +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtph_hf8(__m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2hf8_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtneph_phf8(__m256i __W, __mmask32 __U, __m512h __A) { - return (__m256i)__builtin_ia32_vcvtneph2hf8_512_mask( +_mm512_mask_cvtph_hf8(__m256i __W, __mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2hf8_512_mask( (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtneph_phf8(__mmask32 __U, __m512h __A) { - return (__m256i)__builtin_ia32_vcvtneph2hf8_512_mask( +_mm512_maskz_cvtph_hf8(__mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2hf8_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_cvtnesph_phf8(__m512h __A) { - return (__m256i)__builtin_ia32_vcvtneph2hf8s_512_mask( +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_hf8(__m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtnesph_phf8(__m256i __W, __mmask32 __U, __m512h __A) { - return (__m256i)__builtin_ia32_vcvtneph2hf8s_512_mask( +_mm512_mask_cvtsph_hf8(__m256i __W, __mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); } static __inline__ __m256i __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtnesph_phf8(__mmask32 __U, __m512h __A) { - return (__m256i)__builtin_ia32_vcvtneph2hf8s_512_mask( +_mm512_maskz_cvtsph_hf8(__mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask( (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); } -static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtpbf8_ph(__m256i __A) { +static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtbf8_ph(__m256i __A) { return _mm512_castsi512_ph(_mm512_slli_epi16(_mm512_cvtepi8_epi16(__A), 8)); } static __inline __m512h __DEFAULT_FN_ATTRS512 -_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask32 __U, __m256i __A) { +_mm512_mask_cvtbf8_ph(__m512h __S, __mmask32 __U, __m256i __A) { return _mm512_castsi512_ph( _mm512_mask_slli_epi16((__m512i)__S, __U, _mm512_cvtepi8_epi16(__A), 8)); } static __inline __m512h __DEFAULT_FN_ATTRS512 -_mm512_maskz_cvtpbf8_ph(__mmask32 __U, __m256i __A) { +_mm512_maskz_cvtbf8_ph(__mmask32 __U, __m256i __A) { return _mm512_castsi512_ph( _mm512_slli_epi16(_mm512_maskz_cvtepi8_epi16(__U, __A), 8)); } diff --git a/clang/lib/Headers/avx10_2bf16intrin.h b/clang/lib/Headers/avx10_2bf16intrin.h index 0a427b9b7418b..199cc13ff7a1c 100644 --- a/clang/lib/Headers/avx10_2bf16intrin.h +++ b/clang/lib/Headers/avx10_2bf16intrin.h @@ -35,31 +35,31 @@ static __inline __m128bh __DEFAULT_FN_ATTRS128 _mm_setzero_pbh(void) { return __builtin_bit_cast(__m128bh, _mm_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castpbf16_ps(__m128bh __a) { +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castbf16_ps(__m128bh __a) { return (__m128)__a; } static __inline__ __m256 __DEFAULT_FN_ATTRS256 -_mm256_castpbf16_ps(__m256bh __a) { +_mm256_castbf16_ps(__m256bh __a) { return (__m256)__a; } static __inline__ __m256d __DEFAULT_FN_ATTRS256 -_mm256_castpbf16_pd(__m256bh __a) { +_mm256_castbf16_pd(__m256bh __a) { return (__m256d)__a; } -static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castpbf16_pd(__m128bh __a) { +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castbf16_pd(__m128bh __a) { return (__m128d)__a; } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_castpbf16_si128(__m128bh __a) { +_mm_castbf16_si128(__m128bh __a) { return (__m128i)__a; } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_castpbf16_si256(__m256bh __a) { +_mm256_castbf16_si256(__m256bh __a) { return (__m256i)__a; } @@ -100,18 +100,18 @@ _mm256_castsi256_pbh(__m256i __a) { } static __inline__ __m128bh __DEFAULT_FN_ATTRS256 -_mm256_castpbf16256_pbh128(__m256bh __a) { +_mm256_castbf16256_pbh128(__m256bh __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_castpbf16128_pbh256(__m128bh __a) { +_mm256_castbf16128_pbh256(__m128bh __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_zextpbf16128_pbh256(__m128bh __a) { +_mm256_zextbf16128_pbh256(__m128bh __a) { return __builtin_shufflevector(__a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } @@ -309,153 +309,149 @@ _mm256_permutexvar_pbh(__m256i __A, __m256bh __B) { return (__m256bh)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_addne_pbh(__m256bh __A, __m256bh __B) { +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_add_pbh(__m256bh __A, + __m256bh __B) { return (__m256bh)((__v16bf)__A + (__v16bf)__B); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_mask_addne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { +_mm256_mask_add_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { return (__m256bh)__builtin_ia32_selectpbf_256( - (__mmask16)__U, (__v16bf)_mm256_addne_pbh(__A, __B), (__v16bf)__W); + (__mmask16)__U, (__v16bf)_mm256_add_pbh(__A, __B), (__v16bf)__W); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_maskz_addne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { +_mm256_maskz_add_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { return (__m256bh)__builtin_ia32_selectpbf_256( - (__mmask16)__U, (__v16bf)_mm256_addne_pbh(__A, __B), + (__mmask16)__U, (__v16bf)_mm256_add_pbh(__A, __B), (__v16bf)_mm256_setzero_pbh()); } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_addne_pbh(__m128bh __A, - __m128bh __B) { +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_add_pbh(__m128bh __A, + __m128bh __B) { return (__m128bh)((__v8bf)__A + (__v8bf)__B); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask_addne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { +_mm_mask_add_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, (__v8bf)_mm_addne_pbh(__A, __B), (__v8bf)__W); + (__mmask8)__U, (__v8bf)_mm_add_pbh(__A, __B), (__v8bf)__W); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_maskz_addne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { - return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, - (__v8bf)_mm_addne_pbh(__A, __B), - (__v8bf)_mm_setzero_pbh()); +_mm_maskz_add_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_add_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_subne_pbh(__m256bh __A, __m256bh __B) { +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_sub_pbh(__m256bh __A, + __m256bh __B) { return (__m256bh)((__v16bf)__A - (__v16bf)__B); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_mask_subne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { +_mm256_mask_sub_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { return (__m256bh)__builtin_ia32_selectpbf_256( - (__mmask16)__U, (__v16bf)_mm256_subne_pbh(__A, __B), (__v16bf)__W); + (__mmask16)__U, (__v16bf)_mm256_sub_pbh(__A, __B), (__v16bf)__W); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_maskz_subne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { +_mm256_maskz_sub_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { return (__m256bh)__builtin_ia32_selectpbf_256( - (__mmask16)__U, (__v16bf)_mm256_subne_pbh(__A, __B), + (__mmask16)__U, (__v16bf)_mm256_sub_pbh(__A, __B), (__v16bf)_mm256_setzero_pbh()); } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_subne_pbh(__m128bh __A, - __m128bh __B) { +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_sub_pbh(__m128bh __A, + __m128bh __B) { return (__m128bh)((__v8bf)__A - (__v8bf)__B); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask_subne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { +_mm_mask_sub_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, (__v8bf)_mm_subne_pbh(__A, __B), (__v8bf)__W); + (__mmask8)__U, (__v8bf)_mm_sub_pbh(__A, __B), (__v8bf)__W); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_maskz_subne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { - return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, - (__v8bf)_mm_subne_pbh(__A, __B), - (__v8bf)_mm_setzero_pbh()); +_mm_maskz_sub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_sub_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_mulne_pbh(__m256bh __A, __m256bh __B) { +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mul_pbh(__m256bh __A, + __m256bh __B) { return (__m256bh)((__v16bf)__A * (__v16bf)__B); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_mask_mulne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { +_mm256_mask_mul_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { return (__m256bh)__builtin_ia32_selectpbf_256( - (__mmask16)__U, (__v16bf)_mm256_mulne_pbh(__A, __B), (__v16bf)__W); + (__mmask16)__U, (__v16bf)_mm256_mul_pbh(__A, __B), (__v16bf)__W); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_maskz_mulne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { +_mm256_maskz_mul_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { return (__m256bh)__builtin_ia32_selectpbf_256( - (__mmask16)__U, (__v16bf)_mm256_mulne_pbh(__A, __B), + (__mmask16)__U, (__v16bf)_mm256_mul_pbh(__A, __B), (__v16bf)_mm256_setzero_pbh()); } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mulne_pbh(__m128bh __A, - __m128bh __B) { +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mul_pbh(__m128bh __A, + __m128bh __B) { return (__m128bh)((__v8bf)__A * (__v8bf)__B); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask_mulne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { +_mm_mask_mul_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, (__v8bf)_mm_mulne_pbh(__A, __B), (__v8bf)__W); + (__mmask8)__U, (__v8bf)_mm_mul_pbh(__A, __B), (__v8bf)__W); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_maskz_mulne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { - return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, - (__v8bf)_mm_mulne_pbh(__A, __B), - (__v8bf)_mm_setzero_pbh()); +_mm_maskz_mul_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_mul_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_divne_pbh(__m256bh __A, __m256bh __B) { +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_div_pbh(__m256bh __A, + __m256bh __B) { return (__m256bh)((__v16bf)__A / (__v16bf)__B); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_mask_divne_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { +_mm256_mask_div_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { return (__m256bh)__builtin_ia32_selectpbf_256( - (__mmask16)__U, (__v16bf)_mm256_divne_pbh(__A, __B), (__v16bf)__W); + (__mmask16)__U, (__v16bf)_mm256_div_pbh(__A, __B), (__v16bf)__W); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_maskz_divne_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { +_mm256_maskz_div_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { return (__m256bh)__builtin_ia32_selectpbf_256( - (__mmask16)__U, (__v16bf)_mm256_divne_pbh(__A, __B), + (__mmask16)__U, (__v16bf)_mm256_div_pbh(__A, __B), (__v16bf)_mm256_setzero_pbh()); } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_divne_pbh(__m128bh __A, - __m128bh __B) { +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_div_pbh(__m128bh __A, + __m128bh __B) { return (__m128bh)((__v8bf)__A / (__v8bf)__B); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask_divne_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { +_mm_mask_div_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, (__v8bf)_mm_divne_pbh(__A, __B), (__v8bf)__W); + (__mmask8)__U, (__v8bf)_mm_div_pbh(__A, __B), (__v8bf)__W); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_maskz_divne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { - return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, - (__v8bf)_mm_divne_pbh(__A, __B), - (__v8bf)_mm_setzero_pbh()); +_mm_maskz_div_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_div_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_max_pbh(__m256bh __A, __m256bh __B) { - return (__m256bh)__builtin_ia32_vmaxpbf16256((__v16bf)__A, (__v16bf)__B); + return (__m256bh)__builtin_ia32_vmaxbf16256((__v16bf)__A, (__v16bf)__B); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 @@ -473,7 +469,7 @@ _mm256_maskz_max_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_max_pbh(__m128bh __A, __m128bh __B) { - return (__m128bh)__builtin_ia32_vmaxpbf16128((__v8bf)__A, (__v8bf)__B); + return (__m128bh)__builtin_ia32_vmaxbf16128((__v8bf)__A, (__v8bf)__B); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -490,7 +486,7 @@ _mm_maskz_max_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_min_pbh(__m256bh __A, __m256bh __B) { - return (__m256bh)__builtin_ia32_vminpbf16256((__v16bf)__A, (__v16bf)__B); + return (__m256bh)__builtin_ia32_vminbf16256((__v16bf)__A, (__v16bf)__B); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 @@ -508,7 +504,7 @@ _mm256_maskz_min_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_min_pbh(__m128bh __A, __m128bh __B) { - return (__m128bh)__builtin_ia32_vminpbf16128((__v8bf)__A, (__v8bf)__B); + return (__m128bh)__builtin_ia32_vminbf16128((__v8bf)__A, (__v8bf)__B); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -523,306 +519,306 @@ _mm_maskz_min_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { (__mmask8)__U, (__v8bf)_mm_min_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comeqsbh(__m128bh A, - __m128bh B) { - return __builtin_ia32_vcomsbf16eq((__v8bf)A, (__v8bf)B); +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomisbf16eq((__v8bf)A, (__v8bf)B); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comltsbh(__m128bh A, - __m128bh B) { - return __builtin_ia32_vcomsbf16lt((__v8bf)A, (__v8bf)B); +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomisbf16lt((__v8bf)A, (__v8bf)B); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comlesbh(__m128bh A, - __m128bh B) { - return __builtin_ia32_vcomsbf16le((__v8bf)A, (__v8bf)B); +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomisbf16le((__v8bf)A, (__v8bf)B); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comgtsbh(__m128bh A, - __m128bh B) { - return __builtin_ia32_vcomsbf16gt((__v8bf)A, (__v8bf)B); +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomisbf16gt((__v8bf)A, (__v8bf)B); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comgesbh(__m128bh A, - __m128bh B) { - return __builtin_ia32_vcomsbf16ge((__v8bf)A, (__v8bf)B); +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomisbf16ge((__v8bf)A, (__v8bf)B); } -static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comneqsbh(__m128bh A, - __m128bh B) { - return __builtin_ia32_vcomsbf16neq((__v8bf)A, (__v8bf)B); +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomisbf16neq((__v8bf)A, (__v8bf)B); } #define _mm256_cmp_pbh_mask(__A, __B, __P) \ - ((__mmask16)__builtin_ia32_vcmppbf16256_mask((__v16bf)(__m256bh)(__A), \ - (__v16bf)(__m256bh)(__B), \ - (int)(__P), (__mmask16) - 1)) + ((__mmask16)__builtin_ia32_vcmpbf16256_mask((__v16bf)(__m256bh)(__A), \ + (__v16bf)(__m256bh)(__B), \ + (int)(__P), (__mmask16) - 1)) #define _mm256_mask_cmp_pbh_mask(__U, __A, __B, __P) \ - ((__mmask16)__builtin_ia32_vcmppbf16256_mask((__v16bf)(__m256bh)(__A), \ - (__v16bf)(__m256bh)(__B), \ - (int)(__P), (__mmask16)(__U))) + ((__mmask16)__builtin_ia32_vcmpbf16256_mask((__v16bf)(__m256bh)(__A), \ + (__v16bf)(__m256bh)(__B), \ + (int)(__P), (__mmask16)(__U))) #define _mm_cmp_pbh_mask(__A, __B, __P) \ - ((__mmask8)__builtin_ia32_vcmppbf16128_mask((__v8bf)(__m128bh)(__A), \ - (__v8bf)(__m128bh)(__B), \ - (int)(__P), (__mmask8) - 1)) + ((__mmask8)__builtin_ia32_vcmpbf16128_mask((__v8bf)(__m128bh)(__A), \ + (__v8bf)(__m128bh)(__B), \ + (int)(__P), (__mmask8) - 1)) #define _mm_mask_cmp_pbh_mask(__U, __A, __B, __P) \ - ((__mmask8)__builtin_ia32_vcmppbf16128_mask((__v8bf)(__m128bh)(__A), \ - (__v8bf)(__m128bh)(__B), \ - (int)(__P), (__mmask8)(__U))) + ((__mmask8)__builtin_ia32_vcmpbf16128_mask((__v8bf)(__m128bh)(__A), \ + (__v8bf)(__m128bh)(__B), \ + (int)(__P), (__mmask8)(__U))) #define _mm256_mask_fpclass_pbh_mask(__U, __A, imm) \ - ((__mmask16)__builtin_ia32_vfpclasspbf16256_mask( \ + ((__mmask16)__builtin_ia32_vfpclassbf16256_mask( \ (__v16bf)(__m256bh)(__A), (int)(imm), (__mmask16)(__U))) #define _mm256_fpclass_pbh_mask(__A, imm) \ - ((__mmask16)__builtin_ia32_vfpclasspbf16256_mask( \ + ((__mmask16)__builtin_ia32_vfpclassbf16256_mask( \ (__v16bf)(__m256bh)(__A), (int)(imm), (__mmask16) - 1)) #define _mm_mask_fpclass_pbh_mask(__U, __A, imm) \ - ((__mmask8)__builtin_ia32_vfpclasspbf16128_mask( \ - (__v8bf)(__m128bh)(__A), (int)(imm), (__mmask8)(__U))) + ((__mmask8)__builtin_ia32_vfpclassbf16128_mask((__v8bf)(__m128bh)(__A), \ + (int)(imm), (__mmask8)(__U))) #define _mm_fpclass_pbh_mask(__A, imm) \ - ((__mmask8)__builtin_ia32_vfpclasspbf16128_mask((__v8bf)(__m128bh)(__A), \ - (int)(imm), (__mmask8) - 1)) + ((__mmask8)__builtin_ia32_vfpclassbf16128_mask((__v8bf)(__m128bh)(__A), \ + (int)(imm), (__mmask8) - 1)) static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_scalef_pbh(__m256bh __A, __m256bh __B) { - return (__m256bh)__builtin_ia32_vscalefpbf16256_mask( + return (__m256bh)__builtin_ia32_vscalefbf16256_mask( (__v16bf)__A, (__v16bf)__B, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pbh( __m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { - return (__m256bh)__builtin_ia32_vscalefpbf16256_mask( + return (__m256bh)__builtin_ia32_vscalefbf16256_mask( (__v16bf)__A, (__v16bf)__B, (__v16bf)__W, (__mmask16)__U); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { - return (__m256bh)__builtin_ia32_vscalefpbf16256_mask( + return (__m256bh)__builtin_ia32_vscalefbf16256_mask( (__v16bf)__A, (__v16bf)__B, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_scalef_pbh(__m128bh __A, __m128bh __B) { - return (__m128bh)__builtin_ia32_vscalefpbf16128_mask( + return (__m128bh)__builtin_ia32_vscalefbf16128_mask( (__v8bf)__A, (__v8bf)__B, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mask_scalef_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { - return (__m128bh)__builtin_ia32_vscalefpbf16128_mask( + return (__m128bh)__builtin_ia32_vscalefbf16128_mask( (__v8bf)__A, (__v8bf)__B, (__v8bf)__W, (__mmask8)__U); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { - return (__m128bh)__builtin_ia32_vscalefpbf16128_mask( + return (__m128bh)__builtin_ia32_vscalefbf16128_mask( (__v8bf)__A, (__v8bf)__B, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_rcp_pbh(__m256bh __A) { - return (__m256bh)__builtin_ia32_vrcppbf16256_mask( + return (__m256bh)__builtin_ia32_vrcpbf16256_mask( (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_rcp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { - return (__m256bh)__builtin_ia32_vrcppbf16256_mask((__v16bf)__A, (__v16bf)__W, - (__mmask16)__U); + return (__m256bh)__builtin_ia32_vrcpbf16256_mask((__v16bf)__A, (__v16bf)__W, + (__mmask16)__U); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp_pbh(__mmask16 __U, __m256bh __A) { - return (__m256bh)__builtin_ia32_vrcppbf16256_mask( + return (__m256bh)__builtin_ia32_vrcpbf16256_mask( (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_rcp_pbh(__m128bh __A) { - return (__m128bh)__builtin_ia32_vrcppbf16128_mask( + return (__m128bh)__builtin_ia32_vrcpbf16128_mask( (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mask_rcp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { - return (__m128bh)__builtin_ia32_vrcppbf16128_mask((__v8bf)__A, (__v8bf)__W, - (__mmask8)__U); + return (__m128bh)__builtin_ia32_vrcpbf16128_mask((__v8bf)__A, (__v8bf)__W, + (__mmask8)__U); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_maskz_rcp_pbh(__mmask8 __U, __m128bh __A) { - return (__m128bh)__builtin_ia32_vrcppbf16128_mask( + return (__m128bh)__builtin_ia32_vrcpbf16128_mask( (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_getexp_pbh(__m256bh __A) { - return (__m256bh)__builtin_ia32_vgetexppbf16256_mask( + return (__m256bh)__builtin_ia32_vgetexpbf16256_mask( (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { - return (__m256bh)__builtin_ia32_vgetexppbf16256_mask( + return (__m256bh)__builtin_ia32_vgetexpbf16256_mask( (__v16bf)__A, (__v16bf)__W, (__mmask16)__U); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_pbh(__mmask16 __U, __m256bh __A) { - return (__m256bh)__builtin_ia32_vgetexppbf16256_mask( + return (__m256bh)__builtin_ia32_vgetexpbf16256_mask( (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_getexp_pbh(__m128bh __A) { - return (__m128bh)__builtin_ia32_vgetexppbf16128_mask( + return (__m128bh)__builtin_ia32_vgetexpbf16128_mask( (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mask_getexp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { - return (__m128bh)__builtin_ia32_vgetexppbf16128_mask((__v8bf)__A, (__v8bf)__W, - (__mmask8)__U); + return (__m128bh)__builtin_ia32_vgetexpbf16128_mask((__v8bf)__A, (__v8bf)__W, + (__mmask8)__U); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_pbh(__mmask8 __U, __m128bh __A) { - return (__m128bh)__builtin_ia32_vgetexppbf16128_mask( + return (__m128bh)__builtin_ia32_vgetexpbf16128_mask( (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_rsqrt_pbh(__m256bh __A) { - return (__m256bh)__builtin_ia32_vrsqrtpbf16256_mask( + return (__m256bh)__builtin_ia32_vrsqrtbf16256_mask( (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { - return (__m256bh)__builtin_ia32_vrsqrtpbf16256_mask( - (__v16bf)__A, (__v16bf)__W, (__mmask16)__U); + return (__m256bh)__builtin_ia32_vrsqrtbf16256_mask((__v16bf)__A, (__v16bf)__W, + (__mmask16)__U); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt_pbh(__mmask16 __U, __m256bh __A) { - return (__m256bh)__builtin_ia32_vrsqrtpbf16256_mask( + return (__m256bh)__builtin_ia32_vrsqrtbf16256_mask( (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_rsqrt_pbh(__m128bh __A) { - return (__m128bh)__builtin_ia32_vrsqrtpbf16128_mask( + return (__m128bh)__builtin_ia32_vrsqrtbf16128_mask( (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { - return (__m128bh)__builtin_ia32_vrsqrtpbf16128_mask((__v8bf)__A, (__v8bf)__W, - (__mmask8)__U); + return (__m128bh)__builtin_ia32_vrsqrtbf16128_mask((__v8bf)__A, (__v8bf)__W, + (__mmask8)__U); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) { - return (__m128bh)__builtin_ia32_vrsqrtpbf16128_mask( + return (__m128bh)__builtin_ia32_vrsqrtbf16128_mask( (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U); } -#define _mm256_reducene_pbh(__A, imm) \ - ((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \ +#define _mm256_reduce_pbh(__A, imm) \ + ((__m256bh)__builtin_ia32_vreducebf16256_mask( \ (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_undefined_pbh(), \ (__mmask16) - 1)) -#define _mm256_mask_reducene_pbh(__W, __U, __A, imm) \ - ((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \ +#define _mm256_mask_reduce_pbh(__W, __U, __A, imm) \ + ((__m256bh)__builtin_ia32_vreducebf16256_mask( \ (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)(__m256bh)(__W), \ (__mmask16)(__U))) -#define _mm256_maskz_reducene_pbh(__U, __A, imm) \ - ((__m256bh)__builtin_ia32_vreducenepbf16256_mask( \ +#define _mm256_maskz_reduce_pbh(__U, __A, imm) \ + ((__m256bh)__builtin_ia32_vreducebf16256_mask( \ (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \ (__mmask16)(__U))) -#define _mm_reducene_pbh(__A, imm) \ - ((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \ +#define _mm_reduce_pbh(__A, imm) \ + ((__m128bh)__builtin_ia32_vreducebf16128_mask( \ (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_undefined_pbh(), \ (__mmask8) - 1)) -#define _mm_mask_reducene_pbh(__W, __U, __A, imm) \ - ((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \ +#define _mm_mask_reduce_pbh(__W, __U, __A, imm) \ + ((__m128bh)__builtin_ia32_vreducebf16128_mask( \ (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)(__m128bh)(__W), \ (__mmask8)(__U))) -#define _mm_maskz_reducene_pbh(__U, __A, imm) \ - ((__m128bh)__builtin_ia32_vreducenepbf16128_mask( \ +#define _mm_maskz_reduce_pbh(__U, __A, imm) \ + ((__m128bh)__builtin_ia32_vreducebf16128_mask( \ (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \ (__mmask8)(__U))) -#define _mm256_roundscalene_pbh(__A, imm) \ - ((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \ +#define _mm256_roundscale_pbh(__A, imm) \ + ((__m256bh)__builtin_ia32_vrndscalebf16_256_mask( \ (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \ (__mmask16) - 1)) -#define _mm256_mask_roundscalene_pbh(__W, __U, __A, imm) \ - ((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \ +#define _mm256_mask_roundscale_pbh(__W, __U, __A, imm) \ + ((__m256bh)__builtin_ia32_vrndscalebf16_256_mask( \ (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)(__m256bh)(__W), \ (__mmask16)(__U))) -#define _mm256_maskz_roundscalene_pbh(__U, __A, imm) \ - ((__m256bh)__builtin_ia32_vrndscalenepbf16_256_mask( \ +#define _mm256_maskz_roundscale_pbh(__U, __A, imm) \ + ((__m256bh)__builtin_ia32_vrndscalebf16_256_mask( \ (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \ (__mmask16)(__U))) -#define _mm_roundscalene_pbh(__A, imm) \ - ((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \ +#define _mm_roundscale_pbh(__A, imm) \ + ((__m128bh)__builtin_ia32_vrndscalebf16_128_mask( \ (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \ (__mmask8) - 1)) -#define _mm_mask_roundscalene_pbh(__W, __U, __A, imm) \ - ((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \ +#define _mm_mask_roundscale_pbh(__W, __U, __A, imm) \ + ((__m128bh)__builtin_ia32_vrndscalebf16_128_mask( \ (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)(__m128bh)(__W), \ (__mmask8)(__U))) -#define _mm_maskz_roundscalene_pbh(__U, __A, imm) \ - ((__m128bh)__builtin_ia32_vrndscalenepbf16_128_mask( \ +#define _mm_maskz_roundscale_pbh(__U, __A, imm) \ + ((__m128bh)__builtin_ia32_vrndscalebf16_128_mask( \ (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \ (__mmask8)(__U))) #define _mm256_getmant_pbh(__A, __B, __C) \ - ((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \ + ((__m256bh)__builtin_ia32_vgetmantbf16256_mask( \ (__v16bf)(__m256bh)(__A), (int)(((__C) << 2) | (__B)), \ (__v16bf)_mm256_undefined_pbh(), (__mmask16) - 1)) #define _mm256_mask_getmant_pbh(__W, __U, __A, __B, __C) \ - ((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \ + ((__m256bh)__builtin_ia32_vgetmantbf16256_mask( \ (__v16bf)(__m256bh)(__A), (int)(((__C) << 2) | (__B)), \ (__v16bf)(__m256bh)(__W), (__mmask16)(__U))) #define _mm256_maskz_getmant_pbh(__U, __A, __B, __C) \ - ((__m256bh)__builtin_ia32_vgetmantpbf16256_mask( \ + ((__m256bh)__builtin_ia32_vgetmantbf16256_mask( \ (__v16bf)(__m256bh)(__A), (int)(((__C) << 2) | (__B)), \ (__v16bf)_mm256_setzero_pbh(), (__mmask16)(__U))) #define _mm_getmant_pbh(__A, __B, __C) \ - ((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \ + ((__m128bh)__builtin_ia32_vgetmantbf16128_mask( \ (__v8bf)(__m128bh)(__A), (int)(((__C) << 2) | (__B)), \ (__v8bf)_mm_undefined_pbh(), (__mmask8) - 1)) #define _mm_mask_getmant_pbh(__W, __U, __A, __B, __C) \ - ((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \ + ((__m128bh)__builtin_ia32_vgetmantbf16128_mask( \ (__v8bf)(__m128bh)(__A), (int)(((__C) << 2) | (__B)), \ (__v8bf)(__m128bh)(__W), (__mmask8)(__U))) #define _mm_maskz_getmant_pbh(__U, __A, __B, __C) \ - ((__m128bh)__builtin_ia32_vgetmantpbf16128_mask( \ + ((__m128bh)__builtin_ia32_vgetmantbf16128_mask( \ (__v8bf)(__m128bh)(__A), (int)(((__C) << 2) | (__B)), \ (__v8bf)_mm_setzero_pbh(), (__mmask8)(__U))) static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_sqrt_pbh(__m256bh __A) { - return (__m256bh)__builtin_ia32_vsqrtnepbf16256((__v16bf)__A); + return (__m256bh)__builtin_ia32_vsqrtbf16256((__v16bf)__A); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 @@ -839,7 +835,7 @@ _mm256_maskz_sqrt_pbh(__mmask16 __U, __m256bh __A) { } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_sqrt_pbh(__m128bh __A) { - return (__m128bh)__builtin_ia32_vsqrtnepbf16((__v8bf)__A); + return (__m128bh)__builtin_ia32_vsqrtbf16((__v8bf)__A); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 @@ -855,232 +851,230 @@ _mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) { } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_fmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { +_mm256_fmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B, (__v16bf)__C); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddne_pbh( - __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_fmadd_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { return (__m256bh)__builtin_ia32_selectpbf_256( (__mmask16)__U, - _mm256_fmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), - (__v16bf)__A); + _mm256_fmadd_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)__A); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddne_pbh( +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pbh( __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { return (__m256bh)__builtin_ia32_selectpbf_256( (__mmask16)__U, - _mm256_fmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), - (__v16bf)__C); + _mm256_fmadd_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)__C); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddne_pbh( +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pbh( __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { return (__m256bh)__builtin_ia32_selectpbf_256( (__mmask16)__U, - _mm256_fmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + _mm256_fmadd_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)_mm256_setzero_pbh()); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_fmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { +_mm256_fmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B, -(__v16bf)__C); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubne_pbh( - __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_fmsub_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { return (__m256bh)__builtin_ia32_selectpbf_256( (__mmask16)__U, - _mm256_fmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), - (__v16bf)__A); + _mm256_fmsub_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)__A); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubne_pbh( +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_pbh( __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { return (__m256bh)__builtin_ia32_selectpbf_256( (__mmask16)__U, - _mm256_fmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), - (__v16bf)__C); + _mm256_fmsub_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)__C); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubne_pbh( +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pbh( __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { return (__m256bh)__builtin_ia32_selectpbf_256( (__mmask16)__U, - _mm256_fmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + _mm256_fmsub_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)_mm256_setzero_pbh()); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_fnmaddne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { +_mm256_fnmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B, (__v16bf)__C); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmaddne_pbh( +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pbh( __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { return (__m256bh)__builtin_ia32_selectpbf_256( (__mmask16)__U, - _mm256_fnmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + _mm256_fnmadd_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)__A); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmaddne_pbh( +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_pbh( __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { return (__m256bh)__builtin_ia32_selectpbf_256( (__mmask16)__U, - _mm256_fnmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + _mm256_fnmadd_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)__C); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmaddne_pbh( +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pbh( __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { return (__m256bh)__builtin_ia32_selectpbf_256( (__mmask16)__U, - _mm256_fnmaddne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + _mm256_fnmadd_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)_mm256_setzero_pbh()); } static __inline__ __m256bh __DEFAULT_FN_ATTRS256 -_mm256_fnmsubne_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { +_mm256_fnmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B, -(__v16bf)__C); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsubne_pbh( +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pbh( __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { return (__m256bh)__builtin_ia32_selectpbf_256( (__mmask16)__U, - _mm256_fnmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + _mm256_fnmsub_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)__A); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsubne_pbh( +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_pbh( __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { return (__m256bh)__builtin_ia32_selectpbf_256( (__mmask16)__U, - _mm256_fnmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + _mm256_fnmsub_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)__C); } -static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsubne_pbh( +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pbh( __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { return (__m256bh)__builtin_ia32_selectpbf_256( (__mmask16)__U, - _mm256_fnmsubne_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + _mm256_fnmsub_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)_mm256_setzero_pbh()); } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmaddne_pbh(__m128bh __A, - __m128bh __B, - __m128bh __C) { +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmadd_pbh(__m128bh __A, + __m128bh __B, + __m128bh __C) { return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B, (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask_fmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { +_mm_mask_fmadd_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, _mm_fmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__mmask8)__U, _mm_fmadd_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), (__v8bf)__A); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask3_fmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { +_mm_mask3_fmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, _mm_fmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__mmask8)__U, _mm_fmadd_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_maskz_fmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { +_mm_maskz_fmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, _mm_fmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__mmask8)__U, _mm_fmadd_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), (__v8bf)_mm_setzero_pbh()); } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsubne_pbh(__m128bh __A, - __m128bh __B, - __m128bh __C) { +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsub_pbh(__m128bh __A, + __m128bh __B, + __m128bh __C) { return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B, -(__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask_fmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { +_mm_mask_fmsub_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, _mm_fmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__mmask8)__U, _mm_fmsub_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), (__v8bf)__A); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask3_fmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { +_mm_mask3_fmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, _mm_fmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__mmask8)__U, _mm_fmsub_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_maskz_fmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { +_mm_maskz_fmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, _mm_fmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__mmask8)__U, _mm_fmsub_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), (__v8bf)_mm_setzero_pbh()); } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmadd_pbh(__m128bh __A, + __m128bh __B, + __m128bh __C) { return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B, (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask_fnmaddne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { +_mm_mask_fnmadd_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, _mm_fnmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__mmask8)__U, _mm_fnmadd_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), (__v8bf)__A); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask3_fnmaddne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { +_mm_mask3_fnmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, _mm_fnmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__mmask8)__U, _mm_fnmadd_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_maskz_fnmaddne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { +_mm_maskz_fnmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, _mm_fnmaddne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__mmask8)__U, _mm_fnmadd_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), (__v8bf)_mm_setzero_pbh()); } -static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C) { +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmsub_pbh(__m128bh __A, + __m128bh __B, + __m128bh __C) { return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B, -(__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask_fnmsubne_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { +_mm_mask_fnmsub_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, _mm_fnmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__mmask8)__U, _mm_fnmsub_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), (__v8bf)__A); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_mask3_fnmsubne_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { +_mm_mask3_fnmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, _mm_fnmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__mmask8)__U, _mm_fnmsub_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), (__v8bf)__C); } static __inline__ __m128bh __DEFAULT_FN_ATTRS128 -_mm_maskz_fnmsubne_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { +_mm_maskz_fnmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { return (__m128bh)__builtin_ia32_selectpbf_128( - (__mmask8)__U, _mm_fnmsubne_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__mmask8)__U, _mm_fnmsub_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), (__v8bf)_mm_setzero_pbh()); } diff --git a/clang/lib/Headers/avx10_2convertintrin.h b/clang/lib/Headers/avx10_2convertintrin.h index efe8477cbbf9b..c67a5b890f195 100644 --- a/clang/lib/Headers/avx10_2convertintrin.h +++ b/clang/lib/Headers/avx10_2convertintrin.h @@ -77,516 +77,508 @@ _mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) { (__v8sf)(A), (__v8sf)(B), (__v16hf)(_mm256_setzero_ph()), \ (__mmask16)(U), (const int)(R))) -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtbiasph_pbf8(__m128i __A, __m128h __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_bf8(__m128i __A, + __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtbiasph_pbf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { +_mm_mask_cvtbiasph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtbiasph_pbf8(__mmask8 __U, __m128i __A, __m128h __B) { +_mm_maskz_cvtbiasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtbiasph_pbf8(__m256i __A, __m256h __B) { +_mm256_cvtbiasph_bf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_pbf8( +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_bf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtbiasph_pbf8(__mmask16 __U, __m256i __A, __m256h __B) { +_mm256_maskz_cvtbiasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtbiassph_pbf8(__m128i __A, __m128h __B) { +_mm_cvtbiassph_bf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtbiassph_pbf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { +_mm_mask_cvtbiassph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtbiassph_pbf8(__mmask8 __U, __m128i __A, __m128h __B) { +_mm_maskz_cvtbiassph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtbiassph_pbf8(__m256i __A, __m256h __B) { +_mm256_cvtbiassph_bf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_pbf8( +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_bf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtbiassph_pbf8(__mmask16 __U, __m256i __A, __m256h __B) { +_mm256_maskz_cvtbiassph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtbiasph_phf8(__m128i __A, __m128h __B) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_hf8(__m128i __A, + __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtbiasph_phf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { +_mm_mask_cvtbiasph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtbiasph_phf8(__mmask8 __U, __m128i __A, __m128h __B) { +_mm_maskz_cvtbiasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtbiasph_phf8(__m256i __A, __m256h __B) { +_mm256_cvtbiasph_hf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_phf8( +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_hf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtbiasph_phf8(__mmask16 __U, __m256i __A, __m256h __B) { +_mm256_maskz_cvtbiasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtbiassph_phf8(__m128i __A, __m128h __B) { +_mm_cvtbiassph_hf8(__m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtbiassph_phf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { +_mm_mask_cvtbiassph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtbiassph_phf8(__mmask8 __U, __m128i __A, __m128h __B) { +_mm_maskz_cvtbiassph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtbiassph_phf8(__m256i __A, __m256h __B) { +_mm256_cvtbiassph_hf8(__m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_phf8( +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_hf8( __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtbiassph_phf8(__mmask16 __U, __m256i __A, __m256h __B) { +_mm256_maskz_cvtbiassph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtne2ph_pbf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvtne2ph2bf8_128((__v8hf)(__A), - (__v8hf)(__B)); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_bf8(__m128h __A, + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2bf8_128((__v8hf)(__A), (__v8hf)(__B)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtne2ph_pbf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { +_mm_mask_cvt2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvtne2ph_pbf8(__A, __B), (__v16qi)__W); + (__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtne2ph_pbf8(__mmask16 __U, __m128h __A, __m128h __B) { +_mm_maskz_cvt2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvtne2ph_pbf8(__A, __B), + (__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B), (__v16qi)(__m128i)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtne2ph_pbf8(__m256h __A, __m256h __B) { - return (__m256i)__builtin_ia32_vcvtne2ph2bf8_256((__v16hf)(__A), - (__v16hf)(__B)); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_bf8(__m256h __A, + __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2bf8_256((__v16hf)(__A), + (__v16hf)(__B)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtne2ph_pbf8( - __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvt2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvtne2ph_pbf8(__A, __B), (__v32qi)__W); + (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtne2ph_pbf8(__mmask32 __U, __m256h __A, __m256h __B) { +_mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvtne2ph_pbf8(__A, __B), + (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)(__m256i)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtnes2ph_pbf8(__m128h __A, __m128h __B) { - return (__m128i)__builtin_ia32_vcvtne2ph2bf8s_128((__v8hf)(__A), - (__v8hf)(__B)); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_bf8(__m128h __A, + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A), (__v8hf)(__B)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtnes2ph_pbf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { +_mm_mask_cvts2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvtnes2ph_pbf8(__A, __B), (__v16qi)__W); + (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtnes2ph_pbf8(__mmask16 __U, __m128h __A, __m128h __B) { +_mm_maskz_cvts2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvtnes2ph_pbf8(__A, __B), + (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), (__v16qi)(__m128i)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtnes2ph_pbf8(__m256h __A, __m256h __B) { - return (__m256i)__builtin_ia32_vcvtne2ph2bf8s_256((__v16hf)(__A), - (__v16hf)(__B)); +_mm256_cvts2ph_bf8(__m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2bf8s_256((__v16hf)(__A), + (__v16hf)(__B)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtnes2ph_pbf8( - __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvts2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvtnes2ph_pbf8(__A, __B), (__v32qi)__W); + (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtnes2ph_pbf8(__mmask32 __U, __m256h __A, __m256h __B) { +_mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvtnes2ph_pbf8(__A, __B), + (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), (__v32qi)(__m256i)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtne2ph_phf8(__m128h __A, - __m128h __B) { - return (__m128i)__builtin_ia32_vcvtne2ph2hf8_128((__v8hf)(__A), - (__v8hf)(__B)); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_hf8(__m128h __A, + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2hf8_128((__v8hf)(__A), (__v8hf)(__B)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtne2ph_phf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { +_mm_mask_cvt2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvtne2ph_phf8(__A, __B), (__v16qi)__W); + (__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtne2ph_phf8(__mmask16 __U, __m128h __A, __m128h __B) { +_mm_maskz_cvt2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvtne2ph_phf8(__A, __B), + (__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B), (__v16qi)(__m128i)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtne2ph_phf8(__m256h __A, __m256h __B) { - return (__m256i)__builtin_ia32_vcvtne2ph2hf8_256((__v16hf)(__A), - (__v16hf)(__B)); +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_hf8(__m256h __A, + __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2hf8_256((__v16hf)(__A), + (__v16hf)(__B)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtne2ph_phf8( - __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvt2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvtne2ph_phf8(__A, __B), (__v32qi)__W); + (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtne2ph_phf8(__mmask32 __U, __m256h __A, __m256h __B) { +_mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvtne2ph_phf8(__A, __B), + (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)(__m256i)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_cvtnes2ph_phf8(__m128h __A, __m128h __B) { - return (__m128i)__builtin_ia32_vcvtne2ph2hf8s_128((__v8hf)(__A), - (__v8hf)(__B)); +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_hf8(__m128h __A, + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A), (__v8hf)(__B)); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtnes2ph_phf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { +_mm_mask_cvts2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvtnes2ph_phf8(__A, __B), (__v16qi)__W); + (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), (__v16qi)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtnes2ph_phf8(__mmask16 __U, __m128h __A, __m128h __B) { +_mm_maskz_cvts2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { return (__m128i)__builtin_ia32_selectb_128( - (__mmask16)__U, (__v16qi)_mm_cvtnes2ph_phf8(__A, __B), + (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), (__v16qi)(__m128i)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_cvtnes2ph_phf8(__m256h __A, __m256h __B) { - return (__m256i)__builtin_ia32_vcvtne2ph2hf8s_256((__v16hf)(__A), - (__v16hf)(__B)); +_mm256_cvts2ph_hf8(__m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2hf8s_256((__v16hf)(__A), + (__v16hf)(__B)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtnes2ph_phf8( - __m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvts2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvtnes2ph_phf8(__A, __B), (__v32qi)__W); + (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), (__v32qi)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtnes2ph_phf8(__mmask32 __U, __m256h __A, __m256h __B) { +_mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { return (__m256i)__builtin_ia32_selectb_256( - (__mmask16)__U, (__v32qi)_mm256_cvtnes2ph_phf8(__A, __B), + (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), (__v32qi)(__m256i)_mm256_setzero_si256()); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtnehf8_ph(__m128i __A) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvthf8(__m128i __A) { return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( (__v16qi)__A, (__v8hf)(__m128h)_mm_undefined_ph(), (__mmask8)-1); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 -_mm_mask_cvtnehf8_ph(__m128h __W, __mmask8 __U, __m128i __A) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvthf8(__m128h __W, + __mmask8 __U, + __m128i __A) { return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( (__v16qi)__A, (__v8hf)(__m128h)__W, (__mmask8)__U); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtnehf8_ph(__mmask8 __U, __m128i __A) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvthf8(__mmask8 __U, + __m128i __A) { return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( (__v16qi)__A, (__v8hf)(__m128h)_mm_setzero_ph(), (__mmask8)__U); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 -_mm256_cvtnehf8_ph(__m128i __A) { +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvthf8(__m128i __A) { return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( (__v16qi)__A, (__v16hf)(__m256h)_mm256_undefined_ph(), (__mmask16)-1); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtnehf8_ph(__m256h __W, __mmask16 __U, __m128i __A) { +_mm256_mask_cvthf8(__m256h __W, __mmask16 __U, __m128i __A) { return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( (__v16qi)__A, (__v16hf)(__m256h)__W, (__mmask16)__U); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtnehf8_ph(__mmask16 __U, __m128i __A) { +_mm256_maskz_cvthf8(__mmask16 __U, __m128i __A) { return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( (__v16qi)__A, (__v16hf)(__m256h)_mm256_setzero_ph(), (__mmask16)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtneph_pbf8(__m128h __A) { - return (__m128i)__builtin_ia32_vcvtneph2bf8_128_mask( +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_bf8(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtneph_pbf8(__m128i __W, __mmask8 __U, __m128h __A) { - return (__m128i)__builtin_ia32_vcvtneph2bf8_128_mask( +_mm_mask_cvtph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtneph_pbf8(__mmask8 __U, __m128h __A) { - return (__m128i)__builtin_ia32_vcvtneph2bf8_128_mask( +_mm_maskz_cvtph_bf8(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtneph_pbf8(__m256h __A) { - return (__m128i)__builtin_ia32_vcvtneph2bf8_256_mask( +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_bf8(__m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtneph_pbf8(__m128i __W, __mmask16 __U, __m256h __A) { - return (__m128i)__builtin_ia32_vcvtneph2bf8_256_mask( +_mm256_mask_cvtph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtneph_pbf8(__mmask16 __U, __m256h __A) { - return (__m128i)__builtin_ia32_vcvtneph2bf8_256_mask( +_mm256_maskz_cvtph_bf8(__mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtnesph_pbf8(__m128h __A) { - return (__m128i)__builtin_ia32_vcvtneph2bf8s_128_mask( +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_bf8(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtnesph_pbf8(__m128i __W, __mmask8 __U, __m128h __A) { - return (__m128i)__builtin_ia32_vcvtneph2bf8s_128_mask( +_mm_mask_cvtsph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtnesph_pbf8(__mmask8 __U, __m128h __A) { - return (__m128i)__builtin_ia32_vcvtneph2bf8s_128_mask( +_mm_maskz_cvtsph_bf8(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtnesph_pbf8(__m256h __A) { - return (__m128i)__builtin_ia32_vcvtneph2bf8s_256_mask( +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_bf8(__m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtnesph_pbf8(__m128i __W, __mmask16 __U, __m256h __A) { - return (__m128i)__builtin_ia32_vcvtneph2bf8s_256_mask( +_mm256_mask_cvtsph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtnesph_pbf8(__mmask16 __U, __m256h __A) { - return (__m128i)__builtin_ia32_vcvtneph2bf8s_256_mask( +_mm256_maskz_cvtsph_bf8(__mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtneph_phf8(__m128h __A) { - return (__m128i)__builtin_ia32_vcvtneph2hf8_128_mask( +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_hf8(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtneph_phf8(__m128i __W, __mmask8 __U, __m128h __A) { - return (__m128i)__builtin_ia32_vcvtneph2hf8_128_mask( +_mm_mask_cvtph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtneph_phf8(__mmask8 __U, __m128h __A) { - return (__m128i)__builtin_ia32_vcvtneph2hf8_128_mask( +_mm_maskz_cvtph_hf8(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtneph_phf8(__m256h __A) { - return (__m128i)__builtin_ia32_vcvtneph2hf8_256_mask( +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_hf8(__m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtneph_phf8(__m128i __W, __mmask16 __U, __m256h __A) { - return (__m128i)__builtin_ia32_vcvtneph2hf8_256_mask( +_mm256_mask_cvtph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtneph_phf8(__mmask16 __U, __m256h __A) { - return (__m128i)__builtin_ia32_vcvtneph2hf8_256_mask( +_mm256_maskz_cvtph_hf8(__mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtnesph_phf8(__m128h __A) { - return (__m128i)__builtin_ia32_vcvtneph2hf8s_128_mask( +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_hf8(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtnesph_phf8(__m128i __W, __mmask8 __U, __m128h __A) { - return (__m128i)__builtin_ia32_vcvtneph2hf8s_128_mask( +_mm_mask_cvtsph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtnesph_phf8(__mmask8 __U, __m128h __A) { - return (__m128i)__builtin_ia32_vcvtneph2hf8s_128_mask( +_mm_maskz_cvtsph_hf8(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_cvtnesph_phf8(__m256h __A) { - return (__m128i)__builtin_ia32_vcvtneph2hf8s_256_mask( +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_hf8(__m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtnesph_phf8(__m128i __W, __mmask16 __U, __m256h __A) { - return (__m128i)__builtin_ia32_vcvtneph2hf8s_256_mask( +_mm256_mask_cvtsph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); } static __inline__ __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtnesph_phf8(__mmask16 __U, __m256h __A) { - return (__m128i)__builtin_ia32_vcvtneph2hf8s_256_mask( +_mm256_maskz_cvtsph_hf8(__mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); } -static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtpbf8_ph(__m128i __A) { +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtbf8_ph(__m128i __A) { return _mm_castsi128_ph(_mm_slli_epi16(_mm_cvtepi8_epi16(__A), 8)); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 -_mm_mask_cvtpbf8_ph(__m128h __S, __mmask8 __U, __m128i __A) { +_mm_mask_cvtbf8_ph(__m128h __S, __mmask8 __U, __m128i __A) { return _mm_castsi128_ph( _mm_mask_slli_epi16((__m128i)__S, __U, _mm_cvtepi8_epi16(__A), 8)); } static __inline__ __m128h __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtpbf8_ph(__mmask8 __U, __m128i __A) { +_mm_maskz_cvtbf8_ph(__mmask8 __U, __m128i __A) { return _mm_castsi128_ph(_mm_slli_epi16(_mm_maskz_cvtepi8_epi16(__U, __A), 8)); } -static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtpbf8_ph(__m128i __A) { +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtbf8_ph(__m128i __A) { return _mm256_castsi256_ph(_mm256_slli_epi16(_mm256_cvtepi8_epi16(__A), 8)); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtpbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) { +_mm256_mask_cvtbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) { return _mm256_castsi256_ph( _mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8)); } static __inline__ __m256h __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtpbf8_ph(__mmask16 __U, __m128i __A) { +_mm256_maskz_cvtbf8_ph(__mmask16 __U, __m128i __A) { return _mm256_castsi256_ph( _mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8)); } diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index a23ad40884f24..a29b73f97ab7e 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -11,6 +11,8 @@ /// //===----------------------------------------------------------------------===// +#include "clang/Basic/AttributeCommonInfo.h" +#include "clang/Basic/Attributes.h" #include "clang/Basic/CharInfo.h" #include "clang/Basic/DirectoryEntry.h" #include "clang/Basic/FileManager.h" @@ -97,7 +99,8 @@ SourceRange Preprocessor::DiscardUntilEndOfDirective(Token &Tmp) { enum MacroDiag { MD_NoWarn, //> Not a reserved identifier MD_KeywordDef, //> Macro hides keyword, enabled by default - MD_ReservedMacro //> #define of #undef reserved id, disabled by default + MD_ReservedMacro, //> #define of #undef reserved id, disabled by default + MD_ReservedAttributeIdentifier }; /// Enumerates possible %select values for the pp_err_elif_after_else and @@ -173,6 +176,22 @@ static bool isLanguageDefinedBuiltin(const SourceManager &SourceMgr, return false; } +static bool isReservedCXXAttributeName(Preprocessor &PP, IdentifierInfo *II) { + const LangOptions &Lang = PP.getLangOpts(); + if (Lang.CPlusPlus && + hasAttribute(AttributeCommonInfo::AS_CXX11, /* Scope*/ nullptr, II, + PP.getTargetInfo(), Lang, /*CheckPlugins*/ false) > 0) { + AttributeCommonInfo::AttrArgsInfo AttrArgsInfo = + AttributeCommonInfo::getCXX11AttrArgsInfo(II); + if (AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Required) + return PP.isNextPPTokenLParen(); + + return !PP.isNextPPTokenLParen() || + AttrArgsInfo == AttributeCommonInfo::AttrArgsInfo::Optional; + } + return false; +} + static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) { const LangOptions &Lang = PP.getLangOpts(); StringRef Text = II->getName(); @@ -182,6 +201,8 @@ static MacroDiag shouldWarnOnMacroDef(Preprocessor &PP, IdentifierInfo *II) { return MD_KeywordDef; if (Lang.CPlusPlus11 && (Text == "override" || Text == "final")) return MD_KeywordDef; + if (isReservedCXXAttributeName(PP, II)) + return MD_ReservedAttributeIdentifier; return MD_NoWarn; } @@ -190,6 +211,8 @@ static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) { // Do not warn on keyword undef. It is generally harmless and widely used. if (isReservedInAllContexts(II->isReserved(Lang))) return MD_ReservedMacro; + if (isReservedCXXAttributeName(PP, II)) + return MD_ReservedAttributeIdentifier; return MD_NoWarn; } @@ -365,6 +388,9 @@ bool Preprocessor::CheckMacroName(Token &MacroNameTok, MacroUse isDefineUndef, } if (D == MD_ReservedMacro) Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_id); + if (D == MD_ReservedAttributeIdentifier) + Diag(MacroNameTok, diag::warn_pp_macro_is_reserved_attribute_id) + << II->getName(); } // Okay, we got a good identifier. diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 347c13da0ad21..9cf29668f251f 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1804,8 +1804,9 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { diag::err_feature_check_malformed); if (!II) return false; - else if (II->getBuiltinID() != 0) { - switch (II->getBuiltinID()) { + auto BuiltinID = II->getBuiltinID(); + if (BuiltinID != 0) { + switch (BuiltinID) { case Builtin::BI__builtin_cpu_is: return getTargetInfo().supportsCpuIs(); case Builtin::BI__builtin_cpu_init: @@ -1818,8 +1819,11 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) { // usual allocation and deallocation functions. Required by libc++ return 201802; default: + // __has_builtin should return false for aux builtins. + if (getBuiltinInfo().isAuxBuiltinID(BuiltinID)) + return false; return Builtin::evaluateRequiredTargetFeatures( - getBuiltinInfo().getRequiredFeatures(II->getBuiltinID()), + getBuiltinInfo().getRequiredFeatures(BuiltinID), getTargetInfo().getTargetOpts().FeatureMap); } return true; diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index e18e3c197383e..5f8ffa71607bb 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -2914,7 +2914,57 @@ static void CollectEnclosingNamespace(Sema::AssociatedNamespaceSet &Namespaces, while (!Ctx->isFileContext() || Ctx->isInlineNamespace()) Ctx = Ctx->getParent(); - Namespaces.insert(Ctx->getPrimaryContext()); + // Actually it is fine to always do `Namespaces.insert(Ctx);` simply. But it + // may cause more allocations in Namespaces and more unnecessary lookups. So + // we'd like to insert the representative namespace only. + DeclContext *PrimaryCtx = Ctx->getPrimaryContext(); + Decl *PrimaryD = cast(PrimaryCtx); + Decl *D = cast(Ctx); + ASTContext &AST = D->getASTContext(); + + // TODO: Technically it is better to insert one namespace per module. e.g., + // + // ``` + // //--- first.cppm + // export module first; + // namespace ns { ... } // first namespace + // + // //--- m-partA.cppm + // export module m:partA; + // import first; + // + // namespace ns { ... } + // namespace ns { ... } + // + // //--- m-partB.cppm + // export module m:partB; + // import first; + // import :partA; + // + // namespace ns { ... } + // namespace ns { ... } + // + // ... + // + // //--- m-partN.cppm + // export module m:partN; + // import first; + // import :partA; + // ... + // import :part$(N-1); + // + // namespace ns { ... } + // namespace ns { ... } + // + // consume(ns::any_decl); // the lookup + // ``` + // + // We should only insert once for all namespaces in module m. + if (D->isInNamedModule() && + !AST.isInSameModule(D->getOwningModule(), PrimaryD->getOwningModule())) + Namespaces.insert(Ctx); + else + Namespaces.insert(PrimaryCtx); } // Add the associated classes and namespaces for argument-dependent @@ -3675,7 +3725,9 @@ Sema::LookupLiteralOperator(Scope *S, LookupResult &R, TemplateArgumentLoc Arg(TemplateArgument(StringLit), StringLit); if (CheckTemplateArgument( Params->getParam(0), Arg, FD, R.getNameLoc(), R.getNameLoc(), - 0, SugaredChecked, CanonicalChecked, CTAK_Specified) || + 0, SugaredChecked, CanonicalChecked, CTAK_Specified, + /*PartialOrdering=*/false, + /*MatchedPackOnParmToNonPackOnArg=*/nullptr) || Trap.hasErrorOccurred()) IsTemplate = false; } diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 23056ca5deba3..6ae9c51c06b31 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -6917,7 +6917,8 @@ void Sema::AddOverloadCandidate( OverloadCandidateSet &CandidateSet, bool SuppressUserConversions, bool PartialOverloading, bool AllowExplicit, bool AllowExplicitConversions, ADLCallKind IsADLCandidate, ConversionSequenceList EarlyConversions, - OverloadCandidateParamOrder PO, bool AggregateCandidateDeduction) { + OverloadCandidateParamOrder PO, bool AggregateCandidateDeduction, + bool HasMatchedPackOnParmToNonPackOnArg) { const FunctionProtoType *Proto = dyn_cast(Function->getType()->getAs()); assert(Proto && "Functions without a prototype cannot be overloaded"); @@ -6936,7 +6937,8 @@ void Sema::AddOverloadCandidate( AddMethodCandidate(Method, FoundDecl, Method->getParent(), QualType(), Expr::Classification::makeSimpleLValue(), Args, CandidateSet, SuppressUserConversions, - PartialOverloading, EarlyConversions, PO); + PartialOverloading, EarlyConversions, PO, + HasMatchedPackOnParmToNonPackOnArg); return; } // We treat a constructor like a non-member function, since its object @@ -6979,6 +6981,8 @@ void Sema::AddOverloadCandidate( CandidateSet.getRewriteInfo().getRewriteKind(Function, PO); Candidate.IsADLCandidate = llvm::to_underlying(IsADLCandidate); Candidate.ExplicitCallArguments = Args.size(); + Candidate.HasMatchedPackOnParmToNonPackOnArg = + HasMatchedPackOnParmToNonPackOnArg; // Explicit functions are not actually candidates at all if we're not // allowing them in this context, but keep them around so we can point @@ -7521,16 +7525,13 @@ void Sema::AddMethodCandidate(DeclAccessPair FoundDecl, QualType ObjectType, } } -void -Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl, - CXXRecordDecl *ActingContext, QualType ObjectType, - Expr::Classification ObjectClassification, - ArrayRef Args, - OverloadCandidateSet &CandidateSet, - bool SuppressUserConversions, - bool PartialOverloading, - ConversionSequenceList EarlyConversions, - OverloadCandidateParamOrder PO) { +void Sema::AddMethodCandidate( + CXXMethodDecl *Method, DeclAccessPair FoundDecl, + CXXRecordDecl *ActingContext, QualType ObjectType, + Expr::Classification ObjectClassification, ArrayRef Args, + OverloadCandidateSet &CandidateSet, bool SuppressUserConversions, + bool PartialOverloading, ConversionSequenceList EarlyConversions, + OverloadCandidateParamOrder PO, bool HasMatchedPackOnParmToNonPackOnArg) { const FunctionProtoType *Proto = dyn_cast(Method->getType()->getAs()); assert(Proto && "Methods without a prototype cannot be overloaded"); @@ -7561,6 +7562,8 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl, Candidate.TookAddressOfOverload = CandidateSet.getKind() == OverloadCandidateSet::CSK_AddressOfOverloadSet; Candidate.ExplicitCallArguments = Args.size(); + Candidate.HasMatchedPackOnParmToNonPackOnArg = + HasMatchedPackOnParmToNonPackOnArg; bool IgnoreExplicitObject = (Method->isExplicitObjectMemberFunction() && @@ -7731,8 +7734,8 @@ void Sema::AddMethodTemplateCandidate( ConversionSequenceList Conversions; if (TemplateDeductionResult Result = DeduceTemplateArguments( MethodTmpl, ExplicitTemplateArgs, Args, Specialization, Info, - PartialOverloading, /*AggregateDeductionCandidate=*/false, ObjectType, - ObjectClassification, + PartialOverloading, /*AggregateDeductionCandidate=*/false, + /*PartialOrdering=*/false, ObjectType, ObjectClassification, [&](ArrayRef ParamTypes) { return CheckNonDependentConversions( MethodTmpl, ParamTypes, Args, CandidateSet, Conversions, @@ -7770,7 +7773,8 @@ void Sema::AddMethodTemplateCandidate( AddMethodCandidate(cast(Specialization), FoundDecl, ActingContext, ObjectType, ObjectClassification, Args, CandidateSet, SuppressUserConversions, PartialOverloading, - Conversions, PO); + Conversions, PO, + Info.hasMatchedPackOnParmToNonPackOnArg()); } /// Determine whether a given function template has a simple explicit specifier @@ -7816,6 +7820,7 @@ void Sema::AddTemplateOverloadCandidate( if (TemplateDeductionResult Result = DeduceTemplateArguments( FunctionTemplate, ExplicitTemplateArgs, Args, Specialization, Info, PartialOverloading, AggregateCandidateDeduction, + /*PartialOrdering=*/false, /*ObjectType=*/QualType(), /*ObjectClassification=*/Expr::Classification(), [&](ArrayRef ParamTypes) { @@ -7856,7 +7861,8 @@ void Sema::AddTemplateOverloadCandidate( Specialization, FoundDecl, Args, CandidateSet, SuppressUserConversions, PartialOverloading, AllowExplicit, /*AllowExplicitConversions=*/false, IsADLCandidate, Conversions, PO, - Info.AggregateDeductionCandidateHasMismatchedArity); + Info.AggregateDeductionCandidateHasMismatchedArity, + Info.hasMatchedPackOnParmToNonPackOnArg()); } bool Sema::CheckNonDependentConversions( @@ -7978,7 +7984,8 @@ void Sema::AddConversionCandidate( CXXConversionDecl *Conversion, DeclAccessPair FoundDecl, CXXRecordDecl *ActingContext, Expr *From, QualType ToType, OverloadCandidateSet &CandidateSet, bool AllowObjCConversionOnExplicit, - bool AllowExplicit, bool AllowResultConversion) { + bool AllowExplicit, bool AllowResultConversion, + bool HasMatchedPackOnParmToNonPackOnArg) { assert(!Conversion->getDescribedFunctionTemplate() && "Conversion function templates use AddTemplateConversionCandidate"); QualType ConvType = Conversion->getConversionType().getNonReferenceType(); @@ -8023,6 +8030,8 @@ void Sema::AddConversionCandidate( Candidate.FinalConversion.setAllToTypes(ToType); Candidate.Viable = true; Candidate.ExplicitCallArguments = 1; + Candidate.HasMatchedPackOnParmToNonPackOnArg = + HasMatchedPackOnParmToNonPackOnArg; // Explicit functions are not actually candidates at all if we're not // allowing them in this context, but keep them around so we can point @@ -8224,7 +8233,8 @@ void Sema::AddTemplateConversionCandidate( assert(Specialization && "Missing function template specialization?"); AddConversionCandidate(Specialization, FoundDecl, ActingDC, From, ToType, CandidateSet, AllowObjCConversionOnExplicit, - AllowExplicit, AllowResultConversion); + AllowExplicit, AllowResultConversion, + Info.hasMatchedPackOnParmToNonPackOnArg()); } void Sema::AddSurrogateCandidate(CXXConversionDecl *Conversion, @@ -10576,6 +10586,10 @@ bool clang::isBetterOverloadCandidate( isa(Cand2.Function)) return isa(Cand1.Function); + if (Cand1.HasMatchedPackOnParmToNonPackOnArg != + Cand2.HasMatchedPackOnParmToNonPackOnArg) + return Cand2.HasMatchedPackOnParmToNonPackOnArg; + // -- F1 is a non-template function and F2 is a function template // specialization, or, if not that, bool Cand1IsSpecialization = Cand1.Function && diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 50b479052a25f..38196c5c2bc12 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -5204,7 +5204,8 @@ bool Sema::CheckTemplateArgument( unsigned ArgumentPackIndex, SmallVectorImpl &SugaredConverted, SmallVectorImpl &CanonicalConverted, - CheckTemplateArgumentKind CTAK) { + CheckTemplateArgumentKind CTAK, bool PartialOrdering, + bool *MatchedPackOnParmToNonPackOnArg) { // Check template type parameters. if (TemplateTypeParmDecl *TTP = dyn_cast(Param)) return CheckTemplateTypeArgument(TTP, Arg, SugaredConverted, @@ -5419,8 +5420,8 @@ bool Sema::CheckTemplateArgument( case TemplateArgument::Template: case TemplateArgument::TemplateExpansion: - if (CheckTemplateTemplateArgument(TempParm, Params, Arg, - /*IsDeduced=*/CTAK != CTAK_Specified)) + if (CheckTemplateTemplateArgument(TempParm, Params, Arg, PartialOrdering, + MatchedPackOnParmToNonPackOnArg)) return true; SugaredConverted.push_back(Arg.getArgument()); @@ -5494,7 +5495,7 @@ bool Sema::CheckTemplateArgumentList( SmallVectorImpl &SugaredConverted, SmallVectorImpl &CanonicalConverted, bool UpdateArgsWithConversions, bool *ConstraintsNotSatisfied, - bool PartialOrderingTTP) { + bool PartialOrderingTTP, bool *MatchedPackOnParmToNonPackOnArg) { if (ConstraintsNotSatisfied) *ConstraintsNotSatisfied = false; @@ -5508,7 +5509,7 @@ bool Sema::CheckTemplateArgumentList( SourceLocation RAngleLoc = NewArgs.getRAngleLoc(); - // C++ [temp.arg]p1: + // C++23 [temp.arg.general]p1: // [...] The type and form of each template-argument specified in // a template-id shall match the type and form specified for the // corresponding parameter declared by the template in its @@ -5527,8 +5528,7 @@ bool Sema::CheckTemplateArgumentList( DefaultArgs && ParamIdx >= DefaultArgs.StartPos) { // All written arguments should have been consumed by this point. assert(ArgIdx == NumArgs && "bad default argument deduction"); - // FIXME: Don't ignore parameter packs. - if (ParamIdx == DefaultArgs.StartPos && !(*Param)->isParameterPack()) { + if (ParamIdx == DefaultArgs.StartPos) { assert(Param + DefaultArgs.Args.size() <= ParamEnd); // Default arguments from a DeducedTemplateName are already converted. for (const TemplateArgument &DefArg : DefaultArgs.Args) { @@ -5570,60 +5570,69 @@ bool Sema::CheckTemplateArgumentList( } if (ArgIdx < NumArgs) { - // Check the template argument we were given. - if (CheckTemplateArgument(*Param, NewArgs[ArgIdx], Template, TemplateLoc, - RAngleLoc, SugaredArgumentPack.size(), - SugaredConverted, CanonicalConverted, - CTAK_Specified)) - return true; - - CanonicalConverted.back().setIsDefaulted( - clang::isSubstitutedDefaultArgument( - Context, NewArgs[ArgIdx].getArgument(), *Param, - CanonicalConverted, Params->getDepth())); - - bool PackExpansionIntoNonPack = - NewArgs[ArgIdx].getArgument().isPackExpansion() && - (!(*Param)->isTemplateParameterPack() || getExpandedPackSize(*Param)); - // CWG1430: Don't diagnose this pack expansion when partial - // ordering template template parameters. Some uses of the template could - // be valid, and invalid uses will be diagnosed later during - // instantiation. - if (PackExpansionIntoNonPack && !PartialOrderingTTP && - (isa(Template) || - isa(Template))) { - // CWG1430: we have a pack expansion as an argument to an - // alias template, and it's not part of a parameter pack. This - // can't be canonicalized, so reject it now. - // As for concepts - we cannot normalize constraints where this - // situation exists. - Diag(NewArgs[ArgIdx].getLocation(), - diag::err_template_expansion_into_fixed_list) - << (isa(Template) ? 1 : 0) - << NewArgs[ArgIdx].getSourceRange(); - NoteTemplateParameterLocation(**Param); - return true; + TemplateArgumentLoc &ArgLoc = NewArgs[ArgIdx]; + bool NonPackParameter = + !(*Param)->isTemplateParameterPack() || getExpandedPackSize(*Param); + bool ArgIsExpansion = ArgLoc.getArgument().isPackExpansion(); + + if (ArgIsExpansion && PartialOrderingTTP) { + SmallVector Args(ParamEnd - Param); + for (TemplateParameterList::iterator First = Param; Param != ParamEnd; + ++Param) { + TemplateArgument &Arg = Args[Param - First]; + Arg = ArgLoc.getArgument(); + if (!(*Param)->isTemplateParameterPack() || + getExpandedPackSize(*Param)) + Arg = Arg.getPackExpansionPattern(); + TemplateArgumentLoc NewArgLoc(Arg, ArgLoc.getLocInfo()); + if (CheckTemplateArgument(*Param, NewArgLoc, Template, TemplateLoc, + RAngleLoc, SugaredArgumentPack.size(), + SugaredConverted, CanonicalConverted, + CTAK_Specified, /*PartialOrdering=*/false, + MatchedPackOnParmToNonPackOnArg)) + return true; + Arg = NewArgLoc.getArgument(); + CanonicalConverted.back().setIsDefaulted( + clang::isSubstitutedDefaultArgument(Context, Arg, *Param, + CanonicalConverted, + Params->getDepth())); + } + ArgLoc = + TemplateArgumentLoc(TemplateArgument::CreatePackCopy(Context, Args), + ArgLoc.getLocInfo()); + } else { + if (CheckTemplateArgument(*Param, ArgLoc, Template, TemplateLoc, + RAngleLoc, SugaredArgumentPack.size(), + SugaredConverted, CanonicalConverted, + CTAK_Specified, /*PartialOrdering=*/false, + MatchedPackOnParmToNonPackOnArg)) + return true; + CanonicalConverted.back().setIsDefaulted( + clang::isSubstitutedDefaultArgument(Context, ArgLoc.getArgument(), + *Param, CanonicalConverted, + Params->getDepth())); + if (ArgIsExpansion && NonPackParameter) { + // CWG1430/CWG2686: we have a pack expansion as an argument to an + // alias template or concept, and it's not part of a parameter pack. + // This can't be canonicalized, so reject it now. + if (isa(Template)) { + Diag(ArgLoc.getLocation(), + diag::err_template_expansion_into_fixed_list) + << (isa(Template) ? 1 : 0) + << ArgLoc.getSourceRange(); + NoteTemplateParameterLocation(**Param); + return true; + } + } } // We're now done with this argument. ++ArgIdx; - if ((*Param)->isTemplateParameterPack()) { - // The template parameter was a template parameter pack, so take the - // deduced argument and place it on the argument pack. Note that we - // stay on the same template parameter so that we can deduce more - // arguments. - SugaredArgumentPack.push_back(SugaredConverted.pop_back_val()); - CanonicalArgumentPack.push_back(CanonicalConverted.pop_back_val()); - } else { - // Move to the next template parameter. - ++Param; - } + if (ArgIsExpansion && (PartialOrderingTTP || NonPackParameter)) { + // Directly convert the remaining arguments, because we don't know what + // parameters they'll match up with. - // If we just saw a pack expansion into a non-pack, then directly convert - // the remaining arguments, because we don't know what parameters they'll - // match up with. - if (PackExpansionIntoNonPack) { if (!SugaredArgumentPack.empty()) { // If we were part way through filling in an expanded parameter pack, // fall back to just producing individual arguments. @@ -5649,6 +5658,17 @@ bool Sema::CheckTemplateArgumentList( return false; } + if ((*Param)->isTemplateParameterPack()) { + // The template parameter was a template parameter pack, so take the + // deduced argument and place it on the argument pack. Note that we + // stay on the same template parameter so that we can deduce more + // arguments. + SugaredArgumentPack.push_back(SugaredConverted.pop_back_val()); + CanonicalArgumentPack.push_back(CanonicalConverted.pop_back_val()); + } else { + // Move to the next template parameter. + ++Param; + } continue; } @@ -5732,7 +5752,8 @@ bool Sema::CheckTemplateArgumentList( // Check the default template argument. if (CheckTemplateArgument(*Param, Arg, Template, TemplateLoc, RAngleLoc, 0, SugaredConverted, CanonicalConverted, - CTAK_Specified)) + CTAK_Specified, /*PartialOrdering=*/false, + /*MatchedPackOnParmToNonPackOnArg=*/nullptr)) return true; SugaredConverted.back().setIsDefaulted(true); @@ -5753,8 +5774,9 @@ bool Sema::CheckTemplateArgumentList( // pack expansions; they might be empty. This can happen even if // PartialTemplateArgs is false (the list of arguments is complete but // still dependent). - if (ArgIdx < NumArgs && CurrentInstantiationScope && - CurrentInstantiationScope->getPartiallySubstitutedPack()) { + if (PartialOrderingTTP || + (CurrentInstantiationScope && + CurrentInstantiationScope->getPartiallySubstitutedPack())) { while (ArgIdx < NumArgs && NewArgs[ArgIdx].getArgument().isPackExpansion()) { const TemplateArgument &Arg = NewArgs[ArgIdx++].getArgument(); @@ -7321,10 +7343,10 @@ static void DiagnoseTemplateParameterListArityMismatch( Sema &S, TemplateParameterList *New, TemplateParameterList *Old, Sema::TemplateParameterListEqualKind Kind, SourceLocation TemplateArgLoc); -bool Sema::CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param, - TemplateParameterList *Params, - TemplateArgumentLoc &Arg, - bool IsDeduced) { +bool Sema::CheckTemplateTemplateArgument( + TemplateTemplateParmDecl *Param, TemplateParameterList *Params, + TemplateArgumentLoc &Arg, bool PartialOrdering, + bool *MatchedPackOnParmToNonPackOnArg) { TemplateName Name = Arg.getArgument().getAsTemplateOrTemplatePattern(); auto [Template, DefaultArgs] = Name.getTemplateDeclAndDefaultArgs(); if (!Template) { @@ -7359,64 +7381,47 @@ bool Sema::CheckTemplateTemplateArgument(TemplateTemplateParmDecl *Param, << Template; } + if (!getLangOpts().RelaxedTemplateTemplateArgs) + return !TemplateParameterListsAreEqual( + Template->getTemplateParameters(), Params, /*Complain=*/true, + TPL_TemplateTemplateArgumentMatch, Arg.getLocation()); + // C++1z [temp.arg.template]p3: (DR 150) // A template-argument matches a template template-parameter P when P // is at least as specialized as the template-argument A. - if (getLangOpts().RelaxedTemplateTemplateArgs) { - // Quick check for the common case: - // If P contains a parameter pack, then A [...] matches P if each of A's - // template parameters matches the corresponding template parameter in - // the template-parameter-list of P. - if (TemplateParameterListsAreEqual( - Template->getTemplateParameters(), Params, false, - TPL_TemplateTemplateArgumentMatch, Arg.getLocation()) && - // If the argument has no associated constraints, then the parameter is - // definitely at least as specialized as the argument. - // Otherwise - we need a more thorough check. - !Template->hasAssociatedConstraints()) - return false; - - if (isTemplateTemplateParameterAtLeastAsSpecializedAs( - Params, Template, DefaultArgs, Arg.getLocation(), IsDeduced)) { - // P2113 - // C++20[temp.func.order]p2 - // [...] If both deductions succeed, the partial ordering selects the - // more constrained template (if one exists) as determined below. - SmallVector ParamsAC, TemplateAC; - Params->getAssociatedConstraints(ParamsAC); - // C++2a[temp.arg.template]p3 - // [...] In this comparison, if P is unconstrained, the constraints on A - // are not considered. - if (ParamsAC.empty()) - return false; + if (!isTemplateTemplateParameterAtLeastAsSpecializedAs( + Params, Param, Template, DefaultArgs, Arg.getLocation(), + PartialOrdering, MatchedPackOnParmToNonPackOnArg)) + return true; + // P2113 + // C++20[temp.func.order]p2 + // [...] If both deductions succeed, the partial ordering selects the + // more constrained template (if one exists) as determined below. + SmallVector ParamsAC, TemplateAC; + Params->getAssociatedConstraints(ParamsAC); + // C++20[temp.arg.template]p3 + // [...] In this comparison, if P is unconstrained, the constraints on A + // are not considered. + if (ParamsAC.empty()) + return false; - Template->getAssociatedConstraints(TemplateAC); + Template->getAssociatedConstraints(TemplateAC); - bool IsParamAtLeastAsConstrained; - if (IsAtLeastAsConstrained(Param, ParamsAC, Template, TemplateAC, - IsParamAtLeastAsConstrained)) - return true; - if (!IsParamAtLeastAsConstrained) { - Diag(Arg.getLocation(), - diag::err_template_template_parameter_not_at_least_as_constrained) - << Template << Param << Arg.getSourceRange(); - Diag(Param->getLocation(), diag::note_entity_declared_at) << Param; - Diag(Template->getLocation(), diag::note_entity_declared_at) - << Template; - MaybeEmitAmbiguousAtomicConstraintsDiagnostic(Param, ParamsAC, Template, - TemplateAC); - return true; - } - return false; - } - // FIXME: Produce better diagnostics for deduction failures. + bool IsParamAtLeastAsConstrained; + if (IsAtLeastAsConstrained(Param, ParamsAC, Template, TemplateAC, + IsParamAtLeastAsConstrained)) + return true; + if (!IsParamAtLeastAsConstrained) { + Diag(Arg.getLocation(), + diag::err_template_template_parameter_not_at_least_as_constrained) + << Template << Param << Arg.getSourceRange(); + Diag(Param->getLocation(), diag::note_entity_declared_at) << Param; + Diag(Template->getLocation(), diag::note_entity_declared_at) << Template; + MaybeEmitAmbiguousAtomicConstraintsDiagnostic(Param, ParamsAC, Template, + TemplateAC); + return true; } - - return !TemplateParameterListsAreEqual(Template->getTemplateParameters(), - Params, - true, - TPL_TemplateTemplateArgumentMatch, - Arg.getLocation()); + return false; } static Sema::SemaDiagnosticBuilder noteLocation(Sema &S, const NamedDecl &Decl, @@ -9816,11 +9821,14 @@ DeclResult Sema::ActOnExplicitInstantiation( // Check that the template argument list is well-formed for this // template. + bool PrimaryHasMatchedPackOnParmToNonPackOnArg = false; SmallVector SugaredConverted, CanonicalConverted; - if (CheckTemplateArgumentList(ClassTemplate, TemplateNameLoc, TemplateArgs, - /*DefaultArgs=*/{}, false, SugaredConverted, - CanonicalConverted, - /*UpdateArgsWithConversions=*/true)) + if (CheckTemplateArgumentList( + ClassTemplate, TemplateNameLoc, TemplateArgs, + /*DefaultArgs=*/{}, false, SugaredConverted, CanonicalConverted, + /*UpdateArgsWithConversions=*/true, + /*ConstraintsNotSatisfied=*/nullptr, /*PartialOrderingTTP=*/false, + &PrimaryHasMatchedPackOnParmToNonPackOnArg)) return true; // Find the class template specialization declaration that @@ -9941,7 +9949,9 @@ DeclResult Sema::ActOnExplicitInstantiation( = cast_or_null( Specialization->getDefinition()); if (!Def) - InstantiateClassTemplateSpecialization(TemplateNameLoc, Specialization, TSK); + InstantiateClassTemplateSpecialization( + TemplateNameLoc, Specialization, TSK, + /*Complain=*/true, PrimaryHasMatchedPackOnParmToNonPackOnArg); else if (TSK == TSK_ExplicitInstantiationDefinition) { MarkVTableUsed(TemplateNameLoc, Specialization, true); Specialization->setPointOfInstantiation(Def->getPointOfInstantiation()); diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 1c1f6e30ab7b8..7882d7a755d34 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -145,7 +145,9 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch( PartialOrderingKind POK, bool DeducedFromArrayBound, bool *HasDeducedAnyParam); -enum class PackFold { ParameterToArgument, ArgumentToParameter }; +/// What directions packs are allowed to match non-packs. +enum class PackFold { ParameterToArgument, ArgumentToParameter, Both }; + static TemplateDeductionResult DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, ArrayRef Ps, @@ -1715,7 +1717,21 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch( DeducedTemplateArgument Result = checkDeducedTemplateArguments(S.Context, Deduced[Index], NewDeduced); if (Result.isNull()) { - Info.Param = cast(TemplateParams->getParam(Index)); + // We can also get inconsistencies when matching NTTP type. + switch (NamedDecl *Param = TemplateParams->getParam(Index); + Param->getKind()) { + case Decl::TemplateTypeParm: + Info.Param = cast(Param); + break; + case Decl::NonTypeTemplateParm: + Info.Param = cast(Param); + break; + case Decl::TemplateTemplateParm: + Info.Param = cast(Param); + break; + default: + llvm_unreachable("unexpected kind"); + } Info.FirstArg = Deduced[Index]; Info.SecondArg = NewDeduced; return TemplateDeductionResult::Inconsistent; @@ -2555,8 +2571,31 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, if (const NonTypeTemplateParmDecl *NTTP = getDeducedParameterFromExpr(Info, P.getAsExpr())) { switch (A.getKind()) { + case TemplateArgument::Expression: { + const Expr *E = A.getAsExpr(); + // When checking NTTP, if either the parameter or the argument is + // dependent, as there would be otherwise nothing to deduce, we force + // the argument to the parameter type using this dependent implicit + // cast, in order to maintain invariants. Now we can deduce the + // resulting type from the original type, and deduce the original type + // against the parameter we are checking. + if (const auto *ICE = dyn_cast(E); + ICE && ICE->getCastKind() == clang::CK_Dependent) { + E = ICE->getSubExpr(); + if (auto Result = DeduceTemplateArgumentsByTypeMatch( + S, TemplateParams, ICE->getType(), E->getType(), Info, + Deduced, TDF_SkipNonDependent, + PartialOrdering ? PartialOrderingKind::NonCall + : PartialOrderingKind::None, + /*DeducedFromArrayBound=*/false, HasDeducedAnyParam); + Result != TemplateDeductionResult::Success) + return Result; + } + return DeduceNonTypeTemplateArgument( + S, TemplateParams, NTTP, DeducedTemplateArgument(A), E->getType(), + Info, PartialOrdering, Deduced, HasDeducedAnyParam); + } case TemplateArgument::Integral: - case TemplateArgument::Expression: case TemplateArgument::StructuralValue: return DeduceNonTypeTemplateArgument( S, TemplateParams, NTTP, DeducedTemplateArgument(A), @@ -2645,50 +2684,72 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, SmallVectorImpl &Deduced, bool NumberOfArgumentsMustMatch, bool PartialOrdering, PackFold PackFold, bool *HasDeducedAnyParam) { - if (PackFold == PackFold::ArgumentToParameter) - std::swap(Ps, As); + bool FoldPackParameter = PackFold == PackFold::ParameterToArgument || + PackFold == PackFold::Both, + FoldPackArgument = PackFold == PackFold::ArgumentToParameter || + PackFold == PackFold::Both; + // C++0x [temp.deduct.type]p9: // If the template argument list of P contains a pack expansion that is not // the last template argument, the entire template argument list is a // non-deduced context. - if (hasPackExpansionBeforeEnd(Ps)) + if (FoldPackParameter && hasPackExpansionBeforeEnd(Ps)) return TemplateDeductionResult::Success; // C++0x [temp.deduct.type]p9: // If P has a form that contains or , then each argument Pi of the // respective template argument list P is compared with the corresponding // argument Ai of the corresponding template argument list of A. - unsigned ArgIdx = 0, ParamIdx = 0; - for (; hasTemplateArgumentForDeduction(Ps, ParamIdx); ++ParamIdx) { - const TemplateArgument &P = Ps[ParamIdx]; - if (!P.isPackExpansion()) { + for (unsigned ArgIdx = 0, ParamIdx = 0; /**/; /**/) { + if (!hasTemplateArgumentForDeduction(Ps, ParamIdx)) + return !FoldPackParameter && hasTemplateArgumentForDeduction(As, ArgIdx) + ? TemplateDeductionResult::MiscellaneousDeductionFailure + : TemplateDeductionResult::Success; + + if (!Ps[ParamIdx].isPackExpansion()) { // The simple case: deduce template arguments by matching Pi and Ai. // Check whether we have enough arguments. if (!hasTemplateArgumentForDeduction(As, ArgIdx)) - return NumberOfArgumentsMustMatch + return !FoldPackArgument && NumberOfArgumentsMustMatch ? TemplateDeductionResult::MiscellaneousDeductionFailure : TemplateDeductionResult::Success; - // C++1z [temp.deduct.type]p9: - // During partial ordering, if Ai was originally a pack expansion [and] - // Pi is not a pack expansion, template argument deduction fails. - if (As[ArgIdx].isPackExpansion()) - return TemplateDeductionResult::MiscellaneousDeductionFailure; + if (As[ArgIdx].isPackExpansion()) { + // C++1z [temp.deduct.type]p9: + // During partial ordering, if Ai was originally a pack expansion + // [and] Pi is not a pack expansion, template argument deduction + // fails. + if (!FoldPackArgument) + return TemplateDeductionResult::MiscellaneousDeductionFailure; + + TemplateArgument Pattern = As[ArgIdx].getPackExpansionPattern(); + for (;;) { + // Deduce template parameters from the pattern. + if (auto Result = DeduceTemplateArguments( + S, TemplateParams, Ps[ParamIdx], Pattern, Info, + PartialOrdering, Deduced, HasDeducedAnyParam); + Result != TemplateDeductionResult::Success) + return Result; - // Perform deduction for this Pi/Ai pair. - TemplateArgument Pi = P, Ai = As[ArgIdx]; - if (PackFold == PackFold::ArgumentToParameter) - std::swap(Pi, Ai); - if (auto Result = DeduceTemplateArguments(S, TemplateParams, Pi, Ai, Info, - PartialOrdering, Deduced, - HasDeducedAnyParam); - Result != TemplateDeductionResult::Success) - return Result; + ++ParamIdx; + if (!hasTemplateArgumentForDeduction(Ps, ParamIdx)) + return TemplateDeductionResult::Success; + if (Ps[ParamIdx].isPackExpansion()) + break; + } + } else { + // Perform deduction for this Pi/Ai pair. + if (auto Result = DeduceTemplateArguments( + S, TemplateParams, Ps[ParamIdx], As[ArgIdx], Info, + PartialOrdering, Deduced, HasDeducedAnyParam); + Result != TemplateDeductionResult::Success) + return Result; - // Move to the next argument. - ++ArgIdx; - continue; + ++ArgIdx; + ++ParamIdx; + continue; + } } // The parameter is a pack expansion. @@ -2698,7 +2759,7 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, // each remaining argument in the template argument list of A. Each // comparison deduces template arguments for subsequent positions in the // template parameter packs expanded by Pi. - TemplateArgument Pattern = P.getPackExpansionPattern(); + TemplateArgument Pattern = Ps[ParamIdx].getPackExpansionPattern(); // Prepare to deduce the packs within the pattern. PackDeductionScope PackScope(S, TemplateParams, Deduced, Info, Pattern); @@ -2709,13 +2770,16 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, for (; hasTemplateArgumentForDeduction(As, ArgIdx) && PackScope.hasNextElement(); ++ArgIdx) { - TemplateArgument Pi = Pattern, Ai = As[ArgIdx]; - if (PackFold == PackFold::ArgumentToParameter) - std::swap(Pi, Ai); + if (!As[ArgIdx].isPackExpansion()) { + if (!FoldPackParameter) + return TemplateDeductionResult::MiscellaneousDeductionFailure; + if (FoldPackArgument) + Info.setMatchedPackOnParmToNonPackOnArg(); + } // Deduce template arguments from the pattern. - if (auto Result = DeduceTemplateArguments(S, TemplateParams, Pi, Ai, Info, - PartialOrdering, Deduced, - HasDeducedAnyParam); + if (auto Result = DeduceTemplateArguments( + S, TemplateParams, Pattern, As[ArgIdx], Info, PartialOrdering, + Deduced, HasDeducedAnyParam); Result != TemplateDeductionResult::Success) return Result; @@ -2724,12 +2788,8 @@ DeduceTemplateArguments(Sema &S, TemplateParameterList *TemplateParams, // Build argument packs for each of the parameter packs expanded by this // pack expansion. - if (auto Result = PackScope.finish(); - Result != TemplateDeductionResult::Success) - return Result; + return PackScope.finish(); } - - return TemplateDeductionResult::Success; } TemplateDeductionResult Sema::DeduceTemplateArguments( @@ -2898,7 +2958,7 @@ Sema::getIdentityTemplateArgumentLoc(NamedDecl *TemplateParm, /// fully-converted template arguments. static bool ConvertDeducedTemplateArgument( Sema &S, NamedDecl *Param, DeducedTemplateArgument Arg, NamedDecl *Template, - TemplateDeductionInfo &Info, bool IsDeduced, + TemplateDeductionInfo &Info, bool IsDeduced, bool PartialOrdering, SmallVectorImpl &SugaredOutput, SmallVectorImpl &CanonicalOutput) { auto ConvertArg = [&](DeducedTemplateArgument Arg, @@ -2909,15 +2969,20 @@ static bool ConvertDeducedTemplateArgument( TemplateArgumentLoc ArgLoc = S.getTrivialTemplateArgumentLoc( Arg, QualType(), Info.getLocation(), Param); + bool MatchedPackOnParmToNonPackOnArg = false; // Check the template argument, converting it as necessary. - return S.CheckTemplateArgument( + auto Res = S.CheckTemplateArgument( Param, ArgLoc, Template, Template->getLocation(), Template->getSourceRange().getEnd(), ArgumentPackIndex, SugaredOutput, CanonicalOutput, IsDeduced ? (Arg.wasDeducedFromArrayBound() ? Sema::CTAK_DeducedFromArrayBound : Sema::CTAK_Deduced) - : Sema::CTAK_Specified); + : Sema::CTAK_Specified, + PartialOrdering, &MatchedPackOnParmToNonPackOnArg); + if (MatchedPackOnParmToNonPackOnArg) + Info.setMatchedPackOnParmToNonPackOnArg(); + return Res; }; if (Arg.getKind() == TemplateArgument::Pack) { @@ -3000,9 +3065,9 @@ static TemplateDeductionResult ConvertDeducedTemplateArguments( SmallVectorImpl &Deduced, TemplateDeductionInfo &Info, SmallVectorImpl &SugaredBuilder, - SmallVectorImpl &CanonicalBuilder, - LocalInstantiationScope *CurrentInstantiationScope = nullptr, - unsigned NumAlreadyConverted = 0, bool *IsIncomplete = nullptr) { + SmallVectorImpl &CanonicalBuilder, bool PartialOrdering, + LocalInstantiationScope *CurrentInstantiationScope, + unsigned NumAlreadyConverted, bool *IsIncomplete) { TemplateParameterList *TemplateParams = Template->getTemplateParameters(); for (unsigned I = 0, N = TemplateParams->size(); I != N; ++I) { @@ -3045,8 +3110,8 @@ static TemplateDeductionResult ConvertDeducedTemplateArguments( // We may have deduced this argument, so it still needs to be // checked and converted. if (ConvertDeducedTemplateArgument(S, Param, Deduced[I], Template, Info, - IsDeduced, SugaredBuilder, - CanonicalBuilder)) { + IsDeduced, PartialOrdering, + SugaredBuilder, CanonicalBuilder)) { Info.Param = makeTemplateParameter(Param); // FIXME: These template arguments are temporary. Free them! Info.reset( @@ -3112,7 +3177,9 @@ static TemplateDeductionResult ConvertDeducedTemplateArguments( // Check whether we can actually use the default argument. if (S.CheckTemplateArgument( Param, DefArg, TD, TD->getLocation(), TD->getSourceRange().getEnd(), - 0, SugaredBuilder, CanonicalBuilder, Sema::CTAK_Specified)) { + /*ArgumentPackIndex=*/0, SugaredBuilder, CanonicalBuilder, + Sema::CTAK_Specified, /*PartialOrdering=*/false, + /*MatchedPackOnParmToNonPackOnArg=*/nullptr)) { Info.Param = makeTemplateParameter( const_cast(TemplateParams->getParam(I))); // FIXME: These template arguments are temporary. Free them! @@ -3220,7 +3287,9 @@ FinishTemplateArgumentDeduction( SmallVector SugaredBuilder, CanonicalBuilder; if (auto Result = ConvertDeducedTemplateArguments( S, Partial, IsPartialOrdering, Deduced, Info, SugaredBuilder, - CanonicalBuilder); + CanonicalBuilder, IsPartialOrdering, + /*CurrentInstantiationScope=*/nullptr, /*NumAlreadyConverted=*/0, + /*IsIncomplete=*/nullptr); Result != TemplateDeductionResult::Success) return Result; @@ -3261,16 +3330,20 @@ FinishTemplateArgumentDeduction( return TemplateDeductionResult::SubstitutionFailure; } + bool MatchedPackOnParmToNonPackOnArg = false; bool ConstraintsNotSatisfied; SmallVector SugaredConvertedInstArgs, CanonicalConvertedInstArgs; if (S.CheckTemplateArgumentList( Template, Partial->getLocation(), InstArgs, /*DefaultArgs=*/{}, false, SugaredConvertedInstArgs, CanonicalConvertedInstArgs, - /*UpdateArgsWithConversions=*/true, &ConstraintsNotSatisfied)) + /*UpdateArgsWithConversions=*/true, &ConstraintsNotSatisfied, + /*PartialOrderingTTP=*/false, &MatchedPackOnParmToNonPackOnArg)) return ConstraintsNotSatisfied ? TemplateDeductionResult::ConstraintsNotSatisfied : TemplateDeductionResult::SubstitutionFailure; + if (MatchedPackOnParmToNonPackOnArg) + Info.setMatchedPackOnParmToNonPackOnArg(); TemplateParameterList *TemplateParams = Template->getTemplateParameters(); for (unsigned I = 0, E = TemplateParams->size(); I != E; ++I) { @@ -3308,7 +3381,6 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction( // Unevaluated SFINAE context. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::Unevaluated); - Sema::SFINAETrap Trap(S); Sema::ContextRAII SavedContext(S, getAsDeclContextOrEnclosing(Template)); @@ -3317,28 +3389,69 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction( // explicitly specified, template argument deduction fails. SmallVector SugaredBuilder, CanonicalBuilder; if (auto Result = ConvertDeducedTemplateArguments( - S, Template, /*IsDeduced*/ PartialOrdering, Deduced, Info, - SugaredBuilder, CanonicalBuilder, + S, Template, /*IsDeduced=*/PartialOrdering, Deduced, Info, + SugaredBuilder, CanonicalBuilder, PartialOrdering, /*CurrentInstantiationScope=*/nullptr, - /*NumAlreadyConverted=*/0U); + /*NumAlreadyConverted=*/0U, /*IsIncomplete=*/nullptr); Result != TemplateDeductionResult::Success) return Result; // Check that we produced the correct argument list. - TemplateParameterList *TemplateParams = Template->getTemplateParameters(); - for (unsigned I = 0, E = TemplateParams->size(); I != E; ++I) { - TemplateArgument InstArg = CanonicalBuilder[I]; - if (!isSameTemplateArg(S.Context, TemplateArgs[I], InstArg, PartialOrdering, - /*PackExpansionMatchesPack=*/true)) { - Info.Param = makeTemplateParameter(TemplateParams->getParam(I)); - Info.FirstArg = TemplateArgs[I]; - Info.SecondArg = InstArg; - return TemplateDeductionResult::NonDeducedMismatch; + SmallVector, 4> PsStack{TemplateArgs}, + AsStack{CanonicalBuilder}; + for (;;) { + auto take = [](SmallVectorImpl> &Stack) + -> std::tuple &, TemplateArgument> { + while (!Stack.empty()) { + auto &Xs = Stack.back(); + if (Xs.empty()) { + Stack.pop_back(); + continue; + } + auto &X = Xs.front(); + if (X.getKind() == TemplateArgument::Pack) { + Stack.emplace_back(X.getPackAsArray()); + Xs = Xs.drop_front(); + continue; + } + assert(!X.isNull()); + return {Xs, X}; + } + static constexpr ArrayRef None; + return {const_cast &>(None), + TemplateArgument()}; + }; + auto [Ps, P] = take(PsStack); + auto [As, A] = take(AsStack); + if (P.isNull() && A.isNull()) + break; + TemplateArgument PP = P.isPackExpansion() ? P.getPackExpansionPattern() : P, + PA = A.isPackExpansion() ? A.getPackExpansionPattern() : A; + if (!isSameTemplateArg(S.Context, PP, PA, /*PartialOrdering=*/false)) { + if (!P.isPackExpansion() && !A.isPackExpansion()) { + Info.Param = + makeTemplateParameter(Template->getTemplateParameters()->getParam( + (PsStack.empty() ? TemplateArgs.end() + : PsStack.front().begin()) - + TemplateArgs.begin())); + Info.FirstArg = P; + Info.SecondArg = A; + return TemplateDeductionResult::NonDeducedMismatch; + } + if (P.isPackExpansion()) { + Ps = Ps.drop_front(); + continue; + } + if (A.isPackExpansion()) { + As = As.drop_front(); + continue; + } } + Ps = Ps.drop_front(P.isPackExpansion() ? 0 : 1); + As = As.drop_front(A.isPackExpansion() && !P.isPackExpansion() ? 0 : 1); } - - if (Trap.hasErrorOccurred()) - return TemplateDeductionResult::SubstitutionFailure; + assert(PsStack.empty()); + assert(AsStack.empty()); if (!PartialOrdering) { if (auto Result = CheckDeducedArgumentConstraints( @@ -3360,7 +3473,6 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction( // Unevaluated SFINAE context. EnterExpressionEvaluationContext Unevaluated( S, Sema::ExpressionEvaluationContext::Unevaluated); - Sema::SFINAETrap Trap(S); Sema::ContextRAII SavedContext(S, getAsDeclContextOrEnclosing(TD)); @@ -3369,20 +3481,15 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction( // explicitly specified, template argument deduction fails. SmallVector SugaredBuilder, CanonicalBuilder; if (auto Result = ConvertDeducedTemplateArguments( - S, TD, /*IsPartialOrdering=*/false, Deduced, Info, SugaredBuilder, - CanonicalBuilder); - Result != TemplateDeductionResult::Success) - return Result; - - if (Trap.hasErrorOccurred()) - return TemplateDeductionResult::SubstitutionFailure; - - if (auto Result = CheckDeducedArgumentConstraints(S, TD, SugaredBuilder, - CanonicalBuilder, Info); + S, TD, /*IsDeduced=*/false, Deduced, Info, SugaredBuilder, + CanonicalBuilder, /*PartialOrdering=*/false, + /*CurrentInstantiationScope=*/nullptr, /*NumAlreadyConverted=*/0, + /*IsIncomplete=*/nullptr); Result != TemplateDeductionResult::Success) return Result; - return TemplateDeductionResult::Success; + return ::CheckDeducedArgumentConstraints(S, TD, SugaredBuilder, + CanonicalBuilder, Info); } /// Perform template argument deduction to determine whether the given template @@ -3429,16 +3536,20 @@ DeduceTemplateArguments(Sema &S, T *Partial, if (Inst.isInvalid()) return TemplateDeductionResult::InstantiationDepth; - if (Trap.hasErrorOccurred()) - return TemplateDeductionResult::SubstitutionFailure; - TemplateDeductionResult Result; S.runWithSufficientStackSpace(Info.getLocation(), [&] { Result = ::FinishTemplateArgumentDeduction(S, Partial, /*IsPartialOrdering=*/false, TemplateArgs, Deduced, Info); }); - return Result; + + if (Result != TemplateDeductionResult::Success) + return Result; + + if (Trap.hasErrorOccurred()) + return TemplateDeductionResult::SubstitutionFailure; + + return TemplateDeductionResult::Success; } TemplateDeductionResult @@ -3494,14 +3605,18 @@ Sema::DeduceTemplateArgumentsFromType(TemplateDecl *TD, QualType FromType, if (Inst.isInvalid()) return TemplateDeductionResult::InstantiationDepth; - if (Trap.hasErrorOccurred()) - return TemplateDeductionResult::SubstitutionFailure; - TemplateDeductionResult Result; runWithSufficientStackSpace(Info.getLocation(), [&] { Result = ::FinishTemplateArgumentDeduction(*this, TD, Deduced, Info); }); - return Result; + + if (Result != TemplateDeductionResult::Success) + return Result; + + if (Trap.hasErrorOccurred()) + return TemplateDeductionResult::SubstitutionFailure; + + return TemplateDeductionResult::Success; } /// Determine whether the given type T is a simple-template-id type. @@ -3907,7 +4022,8 @@ TemplateDeductionResult Sema::FinishTemplateArgumentDeduction( unsigned NumExplicitlySpecified, FunctionDecl *&Specialization, TemplateDeductionInfo &Info, SmallVectorImpl const *OriginalCallArgs, - bool PartialOverloading, llvm::function_ref CheckNonDependent) { + bool PartialOverloading, bool PartialOrdering, + llvm::function_ref CheckNonDependent) { // Unevaluated SFINAE context. EnterExpressionEvaluationContext Unevaluated( *this, Sema::ExpressionEvaluationContext::Unevaluated); @@ -3930,9 +4046,10 @@ TemplateDeductionResult Sema::FinishTemplateArgumentDeduction( bool IsIncomplete = false; SmallVector SugaredBuilder, CanonicalBuilder; if (auto Result = ConvertDeducedTemplateArguments( - *this, FunctionTemplate, /*IsDeduced*/ true, Deduced, Info, - SugaredBuilder, CanonicalBuilder, CurrentInstantiationScope, - NumExplicitlySpecified, PartialOverloading ? &IsIncomplete : nullptr); + *this, FunctionTemplate, /*IsDeduced=*/true, Deduced, Info, + SugaredBuilder, CanonicalBuilder, PartialOrdering, + CurrentInstantiationScope, NumExplicitlySpecified, + PartialOverloading ? &IsIncomplete : nullptr); Result != TemplateDeductionResult::Success) return Result; @@ -4463,7 +4580,8 @@ TemplateDeductionResult Sema::DeduceTemplateArguments( TemplateArgumentListInfo *ExplicitTemplateArgs, ArrayRef Args, FunctionDecl *&Specialization, TemplateDeductionInfo &Info, bool PartialOverloading, bool AggregateDeductionCandidate, - QualType ObjectType, Expr::Classification ObjectClassification, + bool PartialOrdering, QualType ObjectType, + Expr::Classification ObjectClassification, llvm::function_ref)> CheckNonDependent) { if (FunctionTemplate->isInvalidDecl()) return TemplateDeductionResult::Invalid; @@ -4678,7 +4796,8 @@ TemplateDeductionResult Sema::DeduceTemplateArguments( runWithSufficientStackSpace(Info.getLocation(), [&] { Result = FinishTemplateArgumentDeduction( FunctionTemplate, Deduced, NumExplicitlySpecified, Specialization, Info, - &OriginalCallArgs, PartialOverloading, [&, CallingCtx]() { + &OriginalCallArgs, PartialOverloading, PartialOrdering, + [&, CallingCtx]() { ContextRAII SavedContext(*this, CallingCtx); return CheckNonDependent(ParamTypesForArgChecking); }); @@ -4790,9 +4909,10 @@ TemplateDeductionResult Sema::DeduceTemplateArguments( TemplateDeductionResult Result; runWithSufficientStackSpace(Info.getLocation(), [&] { - Result = FinishTemplateArgumentDeduction(FunctionTemplate, Deduced, - NumExplicitlySpecified, - Specialization, Info); + Result = FinishTemplateArgumentDeduction( + FunctionTemplate, Deduced, NumExplicitlySpecified, Specialization, Info, + /*OriginalCallArgs=*/nullptr, /*PartialOverloading=*/false, + /*PartialOrdering=*/true); }); if (Result != TemplateDeductionResult::Success) return Result; @@ -4972,9 +5092,10 @@ TemplateDeductionResult Sema::DeduceTemplateArguments( FunctionDecl *ConversionSpecialized = nullptr; TemplateDeductionResult Result; runWithSufficientStackSpace(Info.getLocation(), [&] { - Result = FinishTemplateArgumentDeduction(ConversionTemplate, Deduced, 0, - ConversionSpecialized, Info, - &OriginalCallArgs); + Result = FinishTemplateArgumentDeduction( + ConversionTemplate, Deduced, 0, ConversionSpecialized, Info, + &OriginalCallArgs, /*PartialOverloading=*/false, + /*PartialOrdering=*/false); }); Specialization = cast_or_null(ConversionSpecialized); return Result; @@ -5551,7 +5672,8 @@ static TemplateDeductionResult FinishTemplateArgumentDeduction( SmallVector SugaredBuilder, CanonicalBuilder; if (auto Result = ConvertDeducedTemplateArguments( S, FTD, /*IsDeduced=*/true, Deduced, Info, SugaredBuilder, - CanonicalBuilder, /*CurrentInstantiationScope=*/nullptr, + CanonicalBuilder, /*PartialOrdering=*/true, + /*CurrentInstantiationScope=*/nullptr, /*NumAlreadyConverted=*/0, &IsIncomplete); Result != TemplateDeductionResult::Success) return Result; @@ -6141,14 +6263,23 @@ static bool isAtLeastAsSpecializedAs(Sema &S, QualType T1, QualType T2, return false; const auto *TST1 = cast(T1); - bool AtLeastAsSpecialized; + + Sema::SFINAETrap Trap(S); + + TemplateDeductionResult Result; S.runWithSufficientStackSpace(Info.getLocation(), [&] { - AtLeastAsSpecialized = - FinishTemplateArgumentDeduction( - S, P2, /*IsPartialOrdering=*/true, TST1->template_arguments(), - Deduced, Info) == TemplateDeductionResult::Success; + Result = ::FinishTemplateArgumentDeduction( + S, P2, /*IsPartialOrdering=*/true, TST1->template_arguments(), Deduced, + Info); }); - return AtLeastAsSpecialized; + + if (Result != TemplateDeductionResult::Success) + return false; + + if (Trap.hasErrorOccurred()) + return false; + + return true; } namespace { @@ -6386,8 +6517,9 @@ bool Sema::isMoreSpecializedThanPrimary( } bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs( - TemplateParameterList *P, TemplateDecl *AArg, - const DefaultArguments &DefaultArgs, SourceLocation Loc, bool IsDeduced) { + TemplateParameterList *P, TemplateDecl *PArg, TemplateDecl *AArg, + const DefaultArguments &DefaultArgs, SourceLocation ArgLoc, + bool PartialOrdering, bool *MatchedPackOnParmToNonPackOnArg) { // C++1z [temp.arg.template]p4: (DR 150) // A template template-parameter P is at least as specialized as a // template template-argument A if, given the following rewrite to two @@ -6399,6 +6531,12 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs( // TemplateParameterList *A = AArg->getTemplateParameters(); + Sema::InstantiatingTemplate Inst( + *this, ArgLoc, Sema::InstantiatingTemplate::PartialOrderingTTP(), PArg, + SourceRange(P->getTemplateLoc(), P->getRAngleLoc())); + if (Inst.isInvalid()) + return false; + // Given an invented class template X with the template parameter list of // A (including default arguments): // - Each function template has a single function parameter whose type is @@ -6427,22 +6565,20 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs( } PArgs.clear(); - SFINAETrap Trap(*this); // C++1z [temp.arg.template]p3: // If the rewrite produces an invalid type, then P is not at least as // specialized as A. - SmallVector SugaredPArgs; - if (CheckTemplateArgumentList(AArg, Loc, PArgList, DefaultArgs, false, - SugaredPArgs, PArgs, - /*UpdateArgsWithConversions=*/true, - /*ConstraintsNotSatisfied=*/nullptr, - /*PartialOrderTTP=*/true) || - Trap.hasErrorOccurred()) + SmallVector CanonicalPArgs; + if (CheckTemplateArgumentList( + AArg, ArgLoc, PArgList, DefaultArgs, false, PArgs, CanonicalPArgs, + /*UpdateArgsWithConversions=*/true, + /*ConstraintsNotSatisfied=*/nullptr, + /*PartialOrderingTTP=*/true, MatchedPackOnParmToNonPackOnArg)) return false; } // Determine whether P1 is at least as specialized as P2. - TemplateDeductionInfo Info(Loc, A->getDepth()); + TemplateDeductionInfo Info(ArgLoc, A->getDepth()); SmallVector Deduced; Deduced.resize(A->size()); @@ -6457,29 +6593,92 @@ bool Sema::isTemplateTemplateParameterAtLeastAsSpecializedAs( // be inverted between Ps and As. On non-deduced context, matching needs to // happen both ways, according to [temp.arg.template]p3, but this is // currently implemented as a special case elsewhere. - if (::DeduceTemplateArguments(*this, A, AArgs, PArgs, Info, Deduced, - /*NumberOfArgumentsMustMatch=*/false, - /*PartialOrdering=*/true, - IsDeduced ? PackFold::ArgumentToParameter - : PackFold::ParameterToArgument, - /*HasDeducedAnyParam=*/nullptr) != - TemplateDeductionResult::Success) + switch (::DeduceTemplateArguments( + *this, A, AArgs, PArgs, Info, Deduced, + /*NumberOfArgumentsMustMatch=*/false, /*PartialOrdering=*/true, + PartialOrdering ? PackFold::ArgumentToParameter : PackFold::Both, + /*HasDeducedAnyParam=*/nullptr)) { + case clang::TemplateDeductionResult::Success: + if (MatchedPackOnParmToNonPackOnArg && + Info.hasMatchedPackOnParmToNonPackOnArg()) + *MatchedPackOnParmToNonPackOnArg = true; + break; + + case TemplateDeductionResult::MiscellaneousDeductionFailure: + Diag(AArg->getLocation(), diag::err_template_param_list_different_arity) + << (A->size() > P->size()) << /*isTemplateTemplateParameter=*/true + << SourceRange(A->getTemplateLoc(), P->getRAngleLoc()); return false; + case TemplateDeductionResult::NonDeducedMismatch: + Diag(AArg->getLocation(), diag::err_non_deduced_mismatch) + << Info.FirstArg << Info.SecondArg; + return false; + case TemplateDeductionResult::Inconsistent: + Diag(getAsNamedDecl(Info.Param)->getLocation(), + diag::err_inconsistent_deduction) + << Info.FirstArg << Info.SecondArg; + return false; + case TemplateDeductionResult::AlreadyDiagnosed: + return false; + + // None of these should happen for a plain deduction. + case TemplateDeductionResult::Invalid: + case TemplateDeductionResult::InstantiationDepth: + case TemplateDeductionResult::Incomplete: + case TemplateDeductionResult::IncompletePack: + case TemplateDeductionResult::Underqualified: + case TemplateDeductionResult::SubstitutionFailure: + case TemplateDeductionResult::DeducedMismatch: + case TemplateDeductionResult::DeducedMismatchNested: + case TemplateDeductionResult::TooManyArguments: + case TemplateDeductionResult::TooFewArguments: + case TemplateDeductionResult::InvalidExplicitArguments: + case TemplateDeductionResult::NonDependentConversionFailure: + case TemplateDeductionResult::ConstraintsNotSatisfied: + case TemplateDeductionResult::CUDATargetMismatch: + llvm_unreachable("Unexpected Result"); + } SmallVector DeducedArgs(Deduced.begin(), Deduced.end()); - Sema::InstantiatingTemplate Inst(*this, Info.getLocation(), AArg, DeducedArgs, - Info); - if (Inst.isInvalid()) - return false; - bool AtLeastAsSpecialized; + TemplateDeductionResult TDK; runWithSufficientStackSpace(Info.getLocation(), [&] { - AtLeastAsSpecialized = - ::FinishTemplateArgumentDeduction( - *this, AArg, /*IsPartialOrdering=*/true, PArgs, Deduced, Info) == - TemplateDeductionResult::Success; + TDK = ::FinishTemplateArgumentDeduction( + *this, AArg, /*IsPartialOrdering=*/true, PArgs, Deduced, Info); }); - return AtLeastAsSpecialized; + switch (TDK) { + case TemplateDeductionResult::Success: + return true; + + // It doesn't seem possible to get a non-deduced mismatch when partial + // ordering TTPs. + case TemplateDeductionResult::NonDeducedMismatch: + llvm_unreachable("Unexpected NonDeducedMismatch"); + + // Substitution failures should have already been diagnosed. + case TemplateDeductionResult::AlreadyDiagnosed: + case TemplateDeductionResult::SubstitutionFailure: + case TemplateDeductionResult::InstantiationDepth: + return false; + + // None of these should happen when just converting deduced arguments. + case TemplateDeductionResult::Invalid: + case TemplateDeductionResult::Incomplete: + case TemplateDeductionResult::IncompletePack: + case TemplateDeductionResult::Inconsistent: + case TemplateDeductionResult::Underqualified: + case TemplateDeductionResult::DeducedMismatch: + case TemplateDeductionResult::DeducedMismatchNested: + case TemplateDeductionResult::TooManyArguments: + case TemplateDeductionResult::TooFewArguments: + case TemplateDeductionResult::InvalidExplicitArguments: + case TemplateDeductionResult::NonDependentConversionFailure: + case TemplateDeductionResult::ConstraintsNotSatisfied: + case TemplateDeductionResult::MiscellaneousDeductionFailure: + case TemplateDeductionResult::CUDATargetMismatch: + llvm_unreachable("Unexpected Result"); + } + llvm_unreachable("Unexpected TDK"); } namespace { diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 839c4e8a28220..3dc5696bd3821 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -575,6 +575,7 @@ bool Sema::CodeSynthesisContext::isInstantiationRecord() const { case LambdaExpressionSubstitution: case BuildingDeductionGuides: case TypeAliasTemplateInstantiation: + case PartialOrderingTTP: return false; // This function should never be called when Kind's value is Memoization. @@ -805,6 +806,11 @@ Sema::InstantiatingTemplate::InstantiatingTemplate( SemaRef, CodeSynthesisContext::BuildingDeductionGuides, PointOfInstantiation, InstantiationRange, Entity) {} +Sema::InstantiatingTemplate::InstantiatingTemplate( + Sema &SemaRef, SourceLocation ArgLoc, PartialOrderingTTP, + TemplateDecl *PArg, SourceRange InstantiationRange) + : InstantiatingTemplate(SemaRef, CodeSynthesisContext::PartialOrderingTTP, + ArgLoc, InstantiationRange, PArg) {} void Sema::pushCodeSynthesisContext(CodeSynthesisContext Ctx) { Ctx.SavedInNonInstantiationSFINAEContext = InNonInstantiationSFINAEContext; @@ -1243,6 +1249,14 @@ void Sema::PrintInstantiationStack() { << cast(Active->Entity) << Active->InstantiationRange; break; + case CodeSynthesisContext::PartialOrderingTTP: + Diags.Report(Active->PointOfInstantiation, + diag::note_template_arg_template_params_mismatch); + if (SourceLocation ParamLoc = Active->Entity->getLocation(); + ParamLoc.isValid()) + Diags.Report(ParamLoc, diag::note_template_prev_declaration) + << /*isTemplateTemplateParam=*/true << Active->InstantiationRange; + break; } } } @@ -1285,6 +1299,7 @@ std::optional Sema::isSFINAEContext() const { case CodeSynthesisContext::PriorTemplateArgumentSubstitution: case CodeSynthesisContext::DefaultTemplateArgumentChecking: case CodeSynthesisContext::RewritingOperatorAsSpaceship: + case CodeSynthesisContext::PartialOrderingTTP: // A default template argument instantiation and substitution into // template parameters with arguments for prior parameters may or may // not be a SFINAE context; look further up the stack. @@ -4039,11 +4054,11 @@ bool Sema::usesPartialOrExplicitSpecialization( /// Get the instantiation pattern to use to instantiate the definition of a /// given ClassTemplateSpecializationDecl (either the pattern of the primary /// template or of a partial specialization). -static ActionResult -getPatternForClassTemplateSpecialization( +static ActionResult getPatternForClassTemplateSpecialization( Sema &S, SourceLocation PointOfInstantiation, ClassTemplateSpecializationDecl *ClassTemplateSpec, - TemplateSpecializationKind TSK) { + TemplateSpecializationKind TSK, + bool PrimaryHasMatchedPackOnParmToNonPackOnArg) { Sema::InstantiatingTemplate Inst(S, PointOfInstantiation, ClassTemplateSpec); if (Inst.isInvalid()) return {/*Invalid=*/true}; @@ -4066,7 +4081,7 @@ getPatternForClassTemplateSpecialization( // specialization with the template argument lists of the partial // specializations. typedef PartialSpecMatchResult MatchResult; - SmallVector Matched; + SmallVector Matched, ExtraMatched; SmallVector PartialSpecs; Template->getPartialSpecializations(PartialSpecs); TemplateSpecCandidateSet FailedCandidates(PointOfInstantiation); @@ -4096,11 +4111,13 @@ getPatternForClassTemplateSpecialization( MakeDeductionFailureInfo(S.Context, Result, Info)); (void)Result; } else { - Matched.push_back(PartialSpecMatchResult()); - Matched.back().Partial = Partial; - Matched.back().Args = Info.takeCanonical(); + auto &List = + Info.hasMatchedPackOnParmToNonPackOnArg() ? ExtraMatched : Matched; + List.push_back(MatchResult{Partial, Info.takeCanonical()}); } } + if (Matched.empty() && PrimaryHasMatchedPackOnParmToNonPackOnArg) + Matched = std::move(ExtraMatched); // If we're dealing with a member template where the template parameters // have been instantiated, this provides the original template parameters @@ -4203,7 +4220,8 @@ getPatternForClassTemplateSpecialization( bool Sema::InstantiateClassTemplateSpecialization( SourceLocation PointOfInstantiation, ClassTemplateSpecializationDecl *ClassTemplateSpec, - TemplateSpecializationKind TSK, bool Complain) { + TemplateSpecializationKind TSK, bool Complain, + bool PrimaryHasMatchedPackOnParmToNonPackOnArg) { // Perform the actual instantiation on the canonical declaration. ClassTemplateSpec = cast( ClassTemplateSpec->getCanonicalDecl()); @@ -4211,8 +4229,9 @@ bool Sema::InstantiateClassTemplateSpecialization( return true; ActionResult Pattern = - getPatternForClassTemplateSpecialization(*this, PointOfInstantiation, - ClassTemplateSpec, TSK); + getPatternForClassTemplateSpecialization( + *this, PointOfInstantiation, ClassTemplateSpec, TSK, + PrimaryHasMatchedPackOnParmToNonPackOnArg); if (!Pattern.isUsable()) return Pattern.isInvalid(); diff --git a/clang/lib/Sema/SemaX86.cpp b/clang/lib/Sema/SemaX86.cpp index 7feca138e3e2c..392d60f640984 100644 --- a/clang/lib/Sema/SemaX86.cpp +++ b/clang/lib/Sema/SemaX86.cpp @@ -931,9 +931,9 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case X86::BI__builtin_ia32_rndscaleps_mask: case X86::BI__builtin_ia32_rndscalepd_mask: case X86::BI__builtin_ia32_rndscaleph_mask: - case X86::BI__builtin_ia32_vrndscalenepbf16_128_mask: - case X86::BI__builtin_ia32_vrndscalenepbf16_256_mask: - case X86::BI__builtin_ia32_vrndscalenepbf16_mask: + case X86::BI__builtin_ia32_vrndscalebf16_128_mask: + case X86::BI__builtin_ia32_vrndscalebf16_256_mask: + case X86::BI__builtin_ia32_vrndscalebf16_mask: case X86::BI__builtin_ia32_reducepd128_mask: case X86::BI__builtin_ia32_reducepd256_mask: case X86::BI__builtin_ia32_reducepd512_mask: @@ -943,9 +943,9 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case X86::BI__builtin_ia32_reduceph128_mask: case X86::BI__builtin_ia32_reduceph256_mask: case X86::BI__builtin_ia32_reduceph512_mask: - case X86::BI__builtin_ia32_vreducenepbf16128_mask: - case X86::BI__builtin_ia32_vreducenepbf16256_mask: - case X86::BI__builtin_ia32_vreducenepbf16512_mask: + case X86::BI__builtin_ia32_vreducebf16128_mask: + case X86::BI__builtin_ia32_vreducebf16256_mask: + case X86::BI__builtin_ia32_vreducebf16512_mask: case X86::BI__builtin_ia32_vreducepd256_round_mask: case X86::BI__builtin_ia32_vreduceps256_round_mask: case X86::BI__builtin_ia32_vreduceph256_round_mask: @@ -973,9 +973,9 @@ bool SemaX86::CheckBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case X86::BI__builtin_ia32_fpclassph128_mask: case X86::BI__builtin_ia32_fpclassph256_mask: case X86::BI__builtin_ia32_fpclassph512_mask: - case X86::BI__builtin_ia32_vfpclasspbf16128_mask: - case X86::BI__builtin_ia32_vfpclasspbf16256_mask: - case X86::BI__builtin_ia32_vfpclasspbf16512_mask: + case X86::BI__builtin_ia32_vfpclassbf16128_mask: + case X86::BI__builtin_ia32_vfpclassbf16256_mask: + case X86::BI__builtin_ia32_vfpclassbf16512_mask: case X86::BI__builtin_ia32_fpclasssd_mask: case X86::BI__builtin_ia32_fpclassss_mask: case X86::BI__builtin_ia32_fpclasssh_mask: diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 54570dedb0b22..8b9ba04dce91c 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -27,6 +27,20 @@ using namespace clang; using namespace serialization; +//===----------------------------------------------------------------------===// +// Utility functions +//===----------------------------------------------------------------------===// + +namespace { + +// Helper function that returns true if the decl passed in the argument is +// a defintion in dependent contxt. +template bool isDefinitionInDependentContext(DT *D) { + return D->isDependentContext() && D->isThisDeclarationADefinition(); +} + +} // namespace + //===----------------------------------------------------------------------===// // Declaration serialization //===----------------------------------------------------------------------===// @@ -801,14 +815,14 @@ void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { } if (D->getFriendObjectKind()) { - // For a function defined inline within a class template, we have to force - // the canonical definition to be the one inside the canonical definition of - // the template. Remember this relation to deserialize them together. - if (auto *RD = dyn_cast(D->getLexicalParent())) - if (RD->isDependentContext() && RD->isThisDeclarationADefinition()) { - Writer.RelatedDeclsMap[Writer.GetDeclRef(RD)].push_back( - Writer.GetDeclRef(D)); - } + // For a friend function defined inline within a class template, we have to + // force the definition to be the one inside the definition of the template + // class. Remember this relation to deserialize them together. + if (auto *RD = dyn_cast(D->getLexicalParent()); + RD && isDefinitionInDependentContext(RD)) { + Writer.RelatedDeclsMap[Writer.GetDeclRef(RD)].push_back( + Writer.GetDeclRef(D)); + } } Record.push_back(D->param_size()); @@ -1583,9 +1597,10 @@ void ASTDeclWriter::VisitCXXRecordDecl(CXXRecordDecl *D) { } else { Record.push_back(0); } - // For lambdas inside canonical FunctionDecl remember the mapping. - if (auto FD = llvm::dyn_cast_or_null(D->getDeclContext()); - FD && FD->isCanonicalDecl()) { + // For lambdas inside template functions, remember the mapping to + // deserialize them together. + if (auto *FD = llvm::dyn_cast_or_null(D->getDeclContext()); + FD && isDefinitionInDependentContext(FD)) { Writer.RelatedDeclsMap[Writer.GetDeclRef(FD)].push_back( Writer.GetDeclRef(D)); } diff --git a/clang/test/AST/ByteCode/new-delete.cpp b/clang/test/AST/ByteCode/new-delete.cpp index 8466e9b88782f..a8f073aa03fc1 100644 --- a/clang/test/AST/ByteCode/new-delete.cpp +++ b/clang/test/AST/ByteCode/new-delete.cpp @@ -602,8 +602,7 @@ namespace std { using size_t = decltype(sizeof(0)); template struct allocator { constexpr T *allocate(size_t N) { - return (T*)__builtin_operator_new(sizeof(T) * N); // both-note 2{{allocation performed here}} \ - // #alloc + return (T*)__builtin_operator_new(sizeof(T) * N); // #alloc } constexpr void deallocate(void *p) { __builtin_operator_delete(p); // both-note 2{{std::allocator<...>::deallocate' used to delete pointer to object allocated with 'new'}} \ @@ -641,7 +640,7 @@ namespace OperatorNewDelete { p = new int[1]; // both-note {{heap allocation performed here}} break; case 2: - p = std::allocator().allocate(1); + p = std::allocator().allocate(1); // both-note 2{{heap allocation performed here}} break; } switch (dealloc_kind) { @@ -838,6 +837,26 @@ namespace ToplevelScopeInTemplateArg { } } +template +struct SS { + constexpr SS(unsigned long long N) + : data(nullptr){ + data = alloc.allocate(N); // #call + for(std::size_t i = 0; i < N; i ++) + std::construct_at(data + i, i); // #construct_call + } + constexpr T operator[](std::size_t i) const { + return data[i]; + } + + constexpr ~SS() { + alloc.deallocate(data); + } + std::allocator alloc; + T* data; +}; +constexpr unsigned short ssmall = SS(100)[42]; + #else /// Make sure we reject this prior to C++20 constexpr int a() { // both-error {{never produces a constant expression}} diff --git a/clang/test/CXX/temp/temp.arg/temp.arg.template/p3-0x.cpp b/clang/test/CXX/temp/temp.arg/temp.arg.template/p3-0x.cpp index 19793fe826372..54fcfccad6f52 100644 --- a/clang/test/CXX/temp/temp.arg/temp.arg.template/p3-0x.cpp +++ b/clang/test/CXX/temp/temp.arg/temp.arg.template/p3-0x.cpp @@ -2,13 +2,13 @@ template struct eval; // expected-note 3{{template is declared here}} -template