From 5de2d189e6ad466a1f0616195e8c524a4eb3cbc0 Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Tue, 23 Feb 2021 09:47:15 -0800 Subject: [PATCH 001/784] [Diagnose] Unify MCContext and LLVMContext diagnosing The situation with inline asm/MC error reporting is kind of messy at the moment. The errors from MC layout are not reliably propagated and users have to specify an inlineasm handler separately to get inlineasm diagnose. The latter issue is not a correctness issue but could be improved. * Kill LLVMContext inlineasm diagnose handler and migrate it to use DiagnoseInfo/DiagnoseHandler. * Introduce `DiagnoseInfoSrcMgr` to diagnose SourceMgr backed errors. This covers use cases like inlineasm, MC, and any clients using SourceMgr. * Move AsmPrinter::SrcMgrDiagInfo and its instance to MCContext. The next step is to combine MCContext::SrcMgr and MCContext::InlineSrcMgr because in all use cases, only one of them is used. * If LLVMContext is available, let MCContext uses LLVMContext's diagnose handler; if LLVMContext is not available, MCContext uses its own default diagnose handler which just prints SMDiagnostic. * Change a few clients(Clang, llc, lldb) to use the new way of reporting. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D97449 --- .../clang/Basic/DiagnosticCategories.td | 1 + .../clang/Basic/DiagnosticFrontendKinds.td | 4 + clang/include/clang/Basic/DiagnosticGroups.td | 1 + clang/lib/CodeGen/CodeGenAction.cpp | 165 +++++++----------- lldb/source/Expression/IRExecutionUnit.cpp | 32 ++-- llvm/include/llvm/CodeGen/AsmPrinter.h | 12 -- llvm/include/llvm/IR/DiagnosticInfo.h | 45 +++++ llvm/include/llvm/IR/LLVMContext.h | 21 --- llvm/include/llvm/MC/MCContext.h | 28 ++- .../AsmPrinter/AsmPrinterInlineAsm.cpp | 70 ++------ llvm/lib/CodeGen/MachineModuleInfo.cpp | 37 ++++ llvm/lib/IR/DiagnosticInfo.cpp | 4 + llvm/lib/IR/LLVMContext.cpp | 20 --- llvm/lib/IR/LLVMContextImpl.h | 3 - llvm/lib/MC/MCContext.cpp | 90 +++++++--- llvm/lib/MC/MCParser/AsmParser.cpp | 15 +- llvm/test/CodeGen/AMDGPU/lds-initializer.ll | 4 +- .../CodeGen/AMDGPU/lds-zero-initializer.ll | 6 +- llvm/test/CodeGen/XCore/section-name.ll | 2 +- llvm/tools/llc/llc.cpp | 30 ++-- 20 files changed, 311 insertions(+), 279 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticCategories.td b/clang/include/clang/Basic/DiagnosticCategories.td index d7203173790e..fb6bdd710741 100644 --- a/clang/include/clang/Basic/DiagnosticCategories.td +++ b/clang/include/clang/Basic/DiagnosticCategories.td @@ -7,4 +7,5 @@ //===----------------------------------------------------------------------===// class CatInlineAsm : DiagCategory<"Inline Assembly Issue">; +class CatSourceMgr : DiagCategory<"SourceMgr Reported Issue">; class CatBackend : DiagCategory<"Backend Issue">; diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index b9f8c78e43da..831f906ffac8 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -19,6 +19,10 @@ def err_fe_inline_asm : Error<"%0">, CatInlineAsm; def warn_fe_inline_asm : Warning<"%0">, CatInlineAsm, InGroup; def note_fe_inline_asm : Note<"%0">, CatInlineAsm; def note_fe_inline_asm_here : Note<"instantiated into assembly here">; +def err_fe_source_mgr : Error<"%0">, CatSourceMgr; +def warn_fe_source_mgr : Warning<"%0">, CatSourceMgr, InGroup; +def note_fe_source_mgr : Note<"%0">, CatSourceMgr; +def remark_fe_source_mgr: Remark<"%0">, CatSourceMgr, InGroup; def err_fe_cannot_link_module : Error<"cannot link module '%0': %1">, DefaultFatal; diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 1a1ce66656f5..81d78c69cc44 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1145,6 +1145,7 @@ def OpenMP : DiagGroup<"openmp", [ // Backend warnings. def BackendInlineAsm : DiagGroup<"inline-asm">; +def BackendSourceMgr : DiagGroup<"source-mgr">; def BackendFrameLargerThanEQ : DiagGroup<"frame-larger-than=">; def BackendPlugin : DiagGroup<"backend-plugin">; def RemarkBackendPlugin : DiagGroup<"remark-backend-plugin">; diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 6853926f4362..ff56b2902c54 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -301,14 +301,7 @@ namespace clang { if (!getModule()) return; - // Install an inline asm handler so that diagnostics get printed through - // our diagnostics hooks. LLVMContext &Ctx = getModule()->getContext(); - LLVMContext::InlineAsmDiagHandlerTy OldHandler = - Ctx.getInlineAsmDiagnosticHandler(); - void *OldContext = Ctx.getInlineAsmDiagnosticContext(); - Ctx.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, this); - std::unique_ptr OldDiagnosticHandler = Ctx.getDiagnosticHandler(); Ctx.setDiagnosticHandler(std::make_unique( @@ -342,8 +335,6 @@ namespace clang { LangOpts, C.getTargetInfo().getDataLayout(), getModule(), Action, std::move(AsmOutStream)); - Ctx.setInlineAsmDiagnosticHandler(OldHandler, OldContext); - Ctx.setDiagnosticHandler(std::move(OldDiagnosticHandler)); if (OptRecordFile) @@ -377,12 +368,6 @@ namespace clang { Gen->HandleVTable(RD); } - static void InlineAsmDiagHandler(const llvm::SMDiagnostic &SM,void *Context, - unsigned LocCookie) { - SourceLocation Loc = SourceLocation::getFromRawEncoding(LocCookie); - ((BackendConsumer*)Context)->InlineAsmDiagHandler2(SM, Loc); - } - /// Get the best possible source location to represent a diagnostic that /// may have associated debug info. const FullSourceLoc @@ -390,14 +375,13 @@ namespace clang { bool &BadDebugInfo, StringRef &Filename, unsigned &Line, unsigned &Column) const; - void InlineAsmDiagHandler2(const llvm::SMDiagnostic &, - SourceLocation LocCookie); - void DiagnosticHandlerImpl(const llvm::DiagnosticInfo &DI); /// Specialized handler for InlineAsm diagnostic. /// \return True if the diagnostic has been successfully reported, false /// otherwise. bool InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D); + /// Specialized handler for diagnostics reported using SMDiagnostic. + void SrcMgrDiagHandler(const llvm::DiagnosticInfoSrcMgr &D); /// Specialized handler for StackSize diagnostic. /// \return True if the diagnostic has been successfully reported, false /// otherwise. @@ -456,64 +440,6 @@ static FullSourceLoc ConvertBackendLocation(const llvm::SMDiagnostic &D, return FullSourceLoc(NewLoc, CSM); } - -/// InlineAsmDiagHandler2 - This function is invoked when the backend hits an -/// error parsing inline asm. The SMDiagnostic indicates the error relative to -/// the temporary memory buffer that the inline asm parser has set up. -void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D, - SourceLocation LocCookie) { - // There are a couple of different kinds of errors we could get here. First, - // we re-format the SMDiagnostic in terms of a clang diagnostic. - - // Strip "error: " off the start of the message string. - StringRef Message = D.getMessage(); - if (Message.startswith("error: ")) - Message = Message.substr(7); - - // If the SMDiagnostic has an inline asm source location, translate it. - FullSourceLoc Loc; - if (D.getLoc() != SMLoc()) - Loc = ConvertBackendLocation(D, Context->getSourceManager()); - - unsigned DiagID; - switch (D.getKind()) { - case llvm::SourceMgr::DK_Error: - DiagID = diag::err_fe_inline_asm; - break; - case llvm::SourceMgr::DK_Warning: - DiagID = diag::warn_fe_inline_asm; - break; - case llvm::SourceMgr::DK_Note: - DiagID = diag::note_fe_inline_asm; - break; - case llvm::SourceMgr::DK_Remark: - llvm_unreachable("remarks unexpected"); - } - // If this problem has clang-level source location information, report the - // issue in the source with a note showing the instantiated - // code. - if (LocCookie.isValid()) { - Diags.Report(LocCookie, DiagID).AddString(Message); - - if (D.getLoc().isValid()) { - DiagnosticBuilder B = Diags.Report(Loc, diag::note_fe_inline_asm_here); - // Convert the SMDiagnostic ranges into SourceRange and attach them - // to the diagnostic. - for (const std::pair &Range : D.getRanges()) { - unsigned Column = D.getColumnNo(); - B << SourceRange(Loc.getLocWithOffset(Range.first - Column), - Loc.getLocWithOffset(Range.second - Column)); - } - } - return; - } - - // Otherwise, report the backend issue as occurring in the generated .s file. - // If Loc is invalid, we still need to report the issue, it just gets no - // location info. - Diags.Report(Loc, DiagID).AddString(Message); -} - #define ComputeDiagID(Severity, GroupName, DiagID) \ do { \ switch (Severity) { \ @@ -550,6 +476,65 @@ void BackendConsumer::InlineAsmDiagHandler2(const llvm::SMDiagnostic &D, } \ } while (false) +void BackendConsumer::SrcMgrDiagHandler(const llvm::DiagnosticInfoSrcMgr &DI) { + const llvm::SMDiagnostic &D = DI.getSMDiag(); + + unsigned DiagID; + if (DI.isInlineAsmDiag()) + ComputeDiagID(DI.getSeverity(), inline_asm, DiagID); + else + ComputeDiagID(DI.getSeverity(), source_mgr, DiagID); + + // This is for the empty BackendConsumer that uses the clang diagnostic + // handler for IR input files. + if (!Context) { + D.print(nullptr, llvm::errs()); + Diags.Report(DiagID).AddString("cannot compile inline asm"); + return; + } + + // There are a couple of different kinds of errors we could get here. + // First, we re-format the SMDiagnostic in terms of a clang diagnostic. + + // Strip "error: " off the start of the message string. + StringRef Message = D.getMessage(); + (void)Message.consume_front("error: "); + + // If the SMDiagnostic has an inline asm source location, translate it. + FullSourceLoc Loc; + if (D.getLoc() != SMLoc()) + Loc = ConvertBackendLocation(D, Context->getSourceManager()); + + // If this problem has clang-level source location information, report the + // issue in the source with a note showing the instantiated + // code. + if (DI.isInlineAsmDiag()) { + SourceLocation LocCookie = + SourceLocation::getFromRawEncoding(DI.getLocCookie()); + if (LocCookie.isValid()) { + Diags.Report(LocCookie, DiagID).AddString(Message); + + if (D.getLoc().isValid()) { + DiagnosticBuilder B = Diags.Report(Loc, diag::note_fe_inline_asm_here); + // Convert the SMDiagnostic ranges into SourceRange and attach them + // to the diagnostic. + for (const std::pair &Range : D.getRanges()) { + unsigned Column = D.getColumnNo(); + B << SourceRange(Loc.getLocWithOffset(Range.first - Column), + Loc.getLocWithOffset(Range.second - Column)); + } + } + return; + } + } + + // Otherwise, report the backend issue as occurring in the generated .s file. + // If Loc is invalid, we still need to report the issue, it just gets no + // location info. + Diags.Report(Loc, DiagID).AddString(Message); + return; +} + bool BackendConsumer::InlineAsmDiagHandler(const llvm::DiagnosticInfoInlineAsm &D) { unsigned DiagID; @@ -783,6 +768,9 @@ void BackendConsumer::DiagnosticHandlerImpl(const DiagnosticInfo &DI) { return; ComputeDiagID(Severity, inline_asm, DiagID); break; + case llvm::DK_SrcMgr: + SrcMgrDiagHandler(cast(DI)); + return; case llvm::DK_StackSize: if (StackSizeDiagHandler(cast(DI))) return; @@ -979,30 +967,6 @@ CodeGenAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { return std::move(Result); } -static void BitcodeInlineAsmDiagHandler(const llvm::SMDiagnostic &SM, - void *Context, - unsigned LocCookie) { - SM.print(nullptr, llvm::errs()); - - auto Diags = static_cast(Context); - unsigned DiagID; - switch (SM.getKind()) { - case llvm::SourceMgr::DK_Error: - DiagID = diag::err_fe_inline_asm; - break; - case llvm::SourceMgr::DK_Warning: - DiagID = diag::warn_fe_inline_asm; - break; - case llvm::SourceMgr::DK_Note: - DiagID = diag::note_fe_inline_asm; - break; - case llvm::SourceMgr::DK_Remark: - llvm_unreachable("remarks unexpected"); - } - - Diags->Report(DiagID).AddString("cannot compile inline asm"); -} - std::unique_ptr CodeGenAction::loadModule(MemoryBufferRef MBRef) { CompilerInstance &CI = getCompilerInstance(); @@ -1105,7 +1069,6 @@ void CodeGenAction::ExecuteAction() { EmbedBitcode(TheModule.get(), CodeGenOpts, *MainFile); LLVMContext &Ctx = TheModule->getContext(); - Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler, &Diagnostics); // Restore any diagnostic handler previously set before returning from this // function. diff --git a/lldb/source/Expression/IRExecutionUnit.cpp b/lldb/source/Expression/IRExecutionUnit.cpp index 538935da52ee..b19c1d787093 100644 --- a/lldb/source/Expression/IRExecutionUnit.cpp +++ b/lldb/source/Expression/IRExecutionUnit.cpp @@ -9,6 +9,8 @@ #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticHandler.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/Support/SourceMgr.h" @@ -200,16 +202,26 @@ Status IRExecutionUnit::DisassembleFunction(Stream &stream, return ret; } -static void ReportInlineAsmError(const llvm::SMDiagnostic &diagnostic, - void *Context, unsigned LocCookie) { - Status *err = static_cast(Context); +namespace { +struct IRExecDiagnosticHandler : public llvm::DiagnosticHandler { + Status *err; + IRExecDiagnosticHandler(Status *err) : err(err) {} + bool handleDiagnostics(const llvm::DiagnosticInfo &DI) override { + if (DI.getKind() == llvm::DK_SrcMgr) { + const auto &DISM = llvm::cast(DI); + if (err && err->Success()) { + err->SetErrorToGenericError(); + err->SetErrorStringWithFormat( + "Inline assembly error: %s", + DISM.getSMDiag().getMessage().str().c_str()); + } + return true; + } - if (err && err->Success()) { - err->SetErrorToGenericError(); - err->SetErrorStringWithFormat("Inline assembly error: %s", - diagnostic.getMessage().str().c_str()); + return false; } -} +}; +} // namespace void IRExecutionUnit::ReportSymbolLookupError(ConstString name) { m_failed_lookups.push_back(name); @@ -257,8 +269,8 @@ void IRExecutionUnit::GetRunnableInfo(Status &error, lldb::addr_t &func_addr, LLDB_LOGF(log, "Module being sent to JIT: \n%s", s.c_str()); } - m_module_up->getContext().setInlineAsmDiagnosticHandler(ReportInlineAsmError, - &error); + m_module_up->getContext().setDiagnosticHandler( + std::make_unique(&error)); llvm::EngineBuilder builder(std::move(m_module_up)); llvm::Triple triple(m_module->getTargetTriple()); diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h index da48bfd0889b..dcd9cca0d81d 100644 --- a/llvm/include/llvm/CodeGen/AsmPrinter.h +++ b/llvm/include/llvm/CodeGen/AsmPrinter.h @@ -185,14 +185,6 @@ class AsmPrinter : public MachineFunctionPass { std::vector Handlers; size_t NumUserHandlers = 0; -public: - struct SrcMgrDiagInfo { - SourceMgr SrcMgr; - std::vector LocInfos; - LLVMContext::InlineAsmDiagHandlerTy DiagHandler; - void *DiagContext; - }; - private: /// If generated on the fly this own the instance. std::unique_ptr OwnedMDT; @@ -200,10 +192,6 @@ class AsmPrinter : public MachineFunctionPass { /// If generated on the fly this own the instance. std::unique_ptr OwnedMLI; - /// Structure for generating diagnostics for inline assembly. Only initialised - /// when necessary. - mutable std::unique_ptr DiagInfo; - /// If the target supports dwarf debug info, this pointer is non-null. DwarfDebug *DD = nullptr; diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index 889720735631..793b84cb032b 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -78,6 +78,7 @@ enum DiagnosticKind { DK_MIRParser, DK_PGOProfile, DK_Unsupported, + DK_SrcMgr, DK_FirstPluginKind // Must be last value to work with // getNextAvailablePluginDiagnosticKind }; @@ -916,6 +917,7 @@ class OptimizationRemarkAnalysisAliasing : public OptimizationRemarkAnalysis { }; /// Diagnostic information for machine IR parser. +// FIXME: Remove this, use DiagnosticInfoSrcMgr instead. class DiagnosticInfoMIRParser : public DiagnosticInfo { const SMDiagnostic &Diagnostic; @@ -1015,6 +1017,49 @@ class DiagnosticInfoUnsupported : public DiagnosticInfoWithLocationBase { void print(DiagnosticPrinter &DP) const override; }; +static DiagnosticSeverity getDiagnosticSeverity(SourceMgr::DiagKind DK) { + switch (DK) { + case llvm::SourceMgr::DK_Error: + return DS_Error; + break; + case llvm::SourceMgr::DK_Warning: + return DS_Warning; + break; + case llvm::SourceMgr::DK_Note: + return DS_Note; + break; + case llvm::SourceMgr::DK_Remark: + return DS_Remark; + break; + } + llvm_unreachable("unknown SourceMgr::DiagKind"); +} + +/// Diagnostic information for SMDiagnostic reporting. +class DiagnosticInfoSrcMgr : public DiagnosticInfo { + const SMDiagnostic &Diagnostic; + + // For inlineasm !srcloc translation. + bool InlineAsmDiag; + unsigned LocCookie; + +public: + DiagnosticInfoSrcMgr(const SMDiagnostic &Diagnostic, + bool InlineAsmDiag = true, unsigned LocCookie = 0) + : DiagnosticInfo(DK_SrcMgr, getDiagnosticSeverity(Diagnostic.getKind())), + Diagnostic(Diagnostic), InlineAsmDiag(InlineAsmDiag), + LocCookie(LocCookie) {} + + bool isInlineAsmDiag() const { return InlineAsmDiag; } + const SMDiagnostic &getSMDiag() const { return Diagnostic; } + unsigned getLocCookie() const { return LocCookie; } + void print(DiagnosticPrinter &DP) const override; + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_SrcMgr; + } +}; + } // end namespace llvm #endif // LLVM_IR_DIAGNOSTICINFO_H diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index a352deda6bc8..3bd889485dd1 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -153,31 +153,10 @@ class LLVMContext { void enableDebugTypeODRUniquing(); void disableDebugTypeODRUniquing(); - using InlineAsmDiagHandlerTy = void (*)(const SMDiagnostic&, void *Context, - unsigned LocCookie); - /// Defines the type of a yield callback. /// \see LLVMContext::setYieldCallback. using YieldCallbackTy = void (*)(LLVMContext *Context, void *OpaqueHandle); - /// setInlineAsmDiagnosticHandler - This method sets a handler that is invoked - /// when problems with inline asm are detected by the backend. The first - /// argument is a function pointer and the second is a context pointer that - /// gets passed into the DiagHandler. - /// - /// LLVMContext doesn't take ownership or interpret either of these - /// pointers. - void setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler, - void *DiagContext = nullptr); - - /// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by - /// setInlineAsmDiagnosticHandler. - InlineAsmDiagHandlerTy getInlineAsmDiagnosticHandler() const; - - /// getInlineAsmDiagnosticContext - Return the diagnostic context set by - /// setInlineAsmDiagnosticHandler. - void *getInlineAsmDiagnosticContext() const; - /// setDiagnosticHandlerCallBack - This method sets a handler call back /// that is invoked when the backend needs to report anything to the user. /// The first argument is a function pointer and the second is a context pointer diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index a06aabe835bc..33874e83ead7 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -59,6 +60,8 @@ namespace llvm { class MCSymbolELF; class MCSymbolWasm; class MCSymbolXCOFF; + class MDNode; + class SMDiagnostic; class SMLoc; class SourceMgr; @@ -68,13 +71,19 @@ namespace llvm { class MCContext { public: using SymbolTable = StringMap; + using DiagHandlerTy = + std::function &)>; private: /// The SourceMgr for this object, if any. const SourceMgr *SrcMgr; /// The SourceMgr for inline assembly, if any. - SourceMgr *InlineSrcMgr; + std::unique_ptr InlineSrcMgr; + std::vector LocInfos; + + DiagHandlerTy DiagHandler; /// The MCAsmInfo for this target. const MCAsmInfo *MAI; @@ -299,6 +308,9 @@ namespace llvm { bool HadError = false; + void reportCommon(SMLoc Loc, + std::function); + MCSymbol *createSymbolImpl(const StringMapEntry *Name, bool CanBeUnnamed); MCSymbol *createSymbol(StringRef Name, bool AlwaysAddSuffix, @@ -363,7 +375,15 @@ namespace llvm { const SourceMgr *getSourceManager() const { return SrcMgr; } - void setInlineSourceManager(SourceMgr *SM) { InlineSrcMgr = SM; } + void initInlineSourceManager(); + SourceMgr *getInlineSourceManager() { + assert(InlineSrcMgr); + return InlineSrcMgr.get(); + } + std::vector &getLocInfos() { return LocInfos; } + void setDiagnosticHandler(DiagHandlerTy DiagHandler) { + this->DiagHandler = DiagHandler; + } const MCAsmInfo *getAsmInfo() const { return MAI; } @@ -748,13 +768,13 @@ namespace llvm { void deallocate(void *Ptr) {} bool hadError() { return HadError; } + void diagnose(const SMDiagnostic &SMD); void reportError(SMLoc L, const Twine &Msg); void reportWarning(SMLoc L, const Twine &Msg); // Unrecoverable error has occurred. Display the best diagnostic we can // and bail via exit(1). For now, most MC backend errors are unrecoverable. // FIXME: We should really do something about that. - LLVM_ATTRIBUTE_NORETURN void reportFatalError(SMLoc L, - const Twine &Msg); + LLVM_ATTRIBUTE_NORETURN void reportFatalError(SMLoc L, const Twine &Msg); const MCAsmMacro *lookupMacro(StringRef Name) { StringMap::iterator I = MacroMap.find(Name); diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index b3327aff082a..8d04f7e9ca4f 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -39,54 +39,12 @@ using namespace llvm; #define DEBUG_TYPE "asm-printer" -/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an -/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo -/// struct above. -static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { - AsmPrinter::SrcMgrDiagInfo *DiagInfo = - static_cast(diagInfo); - assert(DiagInfo && "Diagnostic context not passed down?"); - - // Look up a LocInfo for the buffer this diagnostic is coming from. - unsigned BufNum = DiagInfo->SrcMgr.FindBufferContainingLoc(Diag.getLoc()); - const MDNode *LocInfo = nullptr; - if (BufNum > 0 && BufNum <= DiagInfo->LocInfos.size()) - LocInfo = DiagInfo->LocInfos[BufNum-1]; - - // If the inline asm had metadata associated with it, pull out a location - // cookie corresponding to which line the error occurred on. - unsigned LocCookie = 0; - if (LocInfo) { - unsigned ErrorLine = Diag.getLineNo()-1; - if (ErrorLine >= LocInfo->getNumOperands()) - ErrorLine = 0; - - if (LocInfo->getNumOperands() != 0) - if (const ConstantInt *CI = - mdconst::dyn_extract(LocInfo->getOperand(ErrorLine))) - LocCookie = CI->getZExtValue(); - } - - DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie); -} - unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr, const MDNode *LocMDNode) const { - if (!DiagInfo) { - DiagInfo = std::make_unique(); - - MCContext &Context = MMI->getContext(); - Context.setInlineSourceManager(&DiagInfo->SrcMgr); - - LLVMContext &LLVMCtx = MMI->getModule()->getContext(); - if (LLVMCtx.getInlineAsmDiagnosticHandler()) { - DiagInfo->DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler(); - DiagInfo->DiagContext = LLVMCtx.getInlineAsmDiagnosticContext(); - DiagInfo->SrcMgr.setDiagHandler(srcMgrDiagHandler, DiagInfo.get()); - } - } - - SourceMgr &SrcMgr = DiagInfo->SrcMgr; + MCContext &Context = MMI->getContext(); + Context.initInlineSourceManager(); + SourceMgr &SrcMgr = *Context.getInlineSourceManager(); + std::vector &LocInfos = Context.getLocInfos(); std::unique_ptr Buffer; // The inline asm source manager will outlive AsmStr, so make a copy of the @@ -98,8 +56,8 @@ unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr, // Store LocMDNode in DiagInfo, using BufNum as an identifier. if (LocMDNode) { - DiagInfo->LocInfos.resize(BufNum); - DiagInfo->LocInfos[BufNum - 1] = LocMDNode; + LocInfos.resize(BufNum); + LocInfos[BufNum - 1] = LocMDNode; } return BufNum; @@ -134,10 +92,11 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, } unsigned BufNum = addInlineAsmDiagBuffer(Str, LocMDNode); - DiagInfo->SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths); + SourceMgr &SrcMgr = *MMI->getContext().getInlineSourceManager(); + SrcMgr.setIncludeDirs(MCOptions.IASSearchPaths); - std::unique_ptr Parser(createMCAsmParser( - DiagInfo->SrcMgr, OutContext, *OutStreamer, *MAI, BufNum)); + std::unique_ptr Parser( + createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI, BufNum)); // Do not use assembler-level information for parsing inline assembly. OutStreamer->setUseAssemblerInfoForParsing(false); @@ -162,12 +121,9 @@ void AsmPrinter::emitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, emitInlineAsmStart(); // Don't implicitly switch to the text section before the asm. - int Res = Parser->Run(/*NoInitialTextSection*/ true, - /*NoFinalize*/ true); + (void)Parser->Run(/*NoInitialTextSection*/ true, + /*NoFinalize*/ true); emitInlineAsmEnd(STI, &TAP->getSTI()); - - if (Res && !DiagInfo->DiagHandler) - report_fatal_error("Error parsing inline asm\n"); } static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, @@ -572,7 +528,7 @@ void AsmPrinter::emitInlineAsm(const MachineInstr *MI) const { if (!RestrRegs.empty()) { unsigned BufNum = addInlineAsmDiagBuffer(OS.str(), LocMD); - auto &SrcMgr = DiagInfo->SrcMgr; + auto &SrcMgr = *MMI->getContext().getInlineSourceManager(); SMLoc Loc = SMLoc::getFromPointer( SrcMgr.getMemoryBuffer(BufNum)->getBuffer().begin()); diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index 0379dbd0dced..8a3aef390b58 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -16,7 +16,9 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" @@ -315,9 +317,44 @@ INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo", "Machine Module Information", false, false) char MachineModuleInfoWrapperPass::ID = 0; +static unsigned getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr, + std::vector &LocInfos) { + // Look up a LocInfo for the buffer this diagnostic is coming from. + unsigned BufNum = SrcMgr.FindBufferContainingLoc(SMD.getLoc()); + const MDNode *LocInfo = nullptr; + if (BufNum > 0 && BufNum <= LocInfos.size()) + LocInfo = LocInfos[BufNum - 1]; + + // If the inline asm had metadata associated with it, pull out a location + // cookie corresponding to which line the error occurred on. + unsigned LocCookie = 0; + if (LocInfo) { + unsigned ErrorLine = SMD.getLineNo() - 1; + if (ErrorLine >= LocInfo->getNumOperands()) + ErrorLine = 0; + + if (LocInfo->getNumOperands() != 0) + if (const ConstantInt *CI = + mdconst::dyn_extract(LocInfo->getOperand(ErrorLine))) + LocCookie = CI->getZExtValue(); + } + + return LocCookie; +} + bool MachineModuleInfoWrapperPass::doInitialization(Module &M) { MMI.initialize(); MMI.TheModule = &M; + // FIXME: Do this for new pass manager. + LLVMContext &Ctx = M.getContext(); + MMI.getContext().setDiagnosticHandler( + [&Ctx](const SMDiagnostic &SMD, bool IsInlineAsm, const SourceMgr &SrcMgr, + std::vector &LocInfos) { + unsigned LocCookie = 0; + if (IsInlineAsm) + LocCookie = getLocCookie(SMD, SrcMgr, LocInfos); + Ctx.diagnose(DiagnosticInfoSrcMgr(SMD, IsInlineAsm, LocCookie)); + }); MMI.DbgInfoAvailable = !M.debug_compile_units().empty(); return false; } diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp index 749b798945bf..1bdaff526333 100644 --- a/llvm/lib/IR/DiagnosticInfo.cpp +++ b/llvm/lib/IR/DiagnosticInfo.cpp @@ -330,6 +330,10 @@ void DiagnosticInfoMIRParser::print(DiagnosticPrinter &DP) const { DP << Diagnostic; } +void DiagnosticInfoSrcMgr::print(DiagnosticPrinter &DP) const { + DP << Diagnostic; +} + DiagnosticInfoOptimizationFailure::DiagnosticInfoOptimizationFailure( const char *PassName, StringRef RemarkName, const DiagnosticLocation &Loc, const Value *CodeRegion) diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp index 280db8c9bb6d..79002fb1b1bc 100644 --- a/llvm/lib/IR/LLVMContext.cpp +++ b/llvm/lib/IR/LLVMContext.cpp @@ -111,26 +111,6 @@ void LLVMContext::removeModule(Module *M) { // Recoverable Backend Errors //===----------------------------------------------------------------------===// -void LLVMContext:: -setInlineAsmDiagnosticHandler(InlineAsmDiagHandlerTy DiagHandler, - void *DiagContext) { - pImpl->InlineAsmDiagHandler = DiagHandler; - pImpl->InlineAsmDiagContext = DiagContext; -} - -/// getInlineAsmDiagnosticHandler - Return the diagnostic handler set by -/// setInlineAsmDiagnosticHandler. -LLVMContext::InlineAsmDiagHandlerTy -LLVMContext::getInlineAsmDiagnosticHandler() const { - return pImpl->InlineAsmDiagHandler; -} - -/// getInlineAsmDiagnosticContext - Return the diagnostic context set by -/// setInlineAsmDiagnosticHandler. -void *LLVMContext::getInlineAsmDiagnosticContext() const { - return pImpl->InlineAsmDiagContext; -} - void LLVMContext::setDiagnosticHandlerCallBack( DiagnosticHandler::DiagnosticHandlerTy DiagnosticHandler, void *DiagnosticContext, bool RespectFilters) { diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h index 6d5588352dfb..4f7e4c23ce28 100644 --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1314,9 +1314,6 @@ class LLVMContextImpl { /// will be automatically deleted if this context is deleted. SmallPtrSet OwnedModules; - LLVMContext::InlineAsmDiagHandlerTy InlineAsmDiagHandler = nullptr; - void *InlineAsmDiagContext = nullptr; - /// The main remark streamer used by all the other streamers (e.g. IR, MIR, /// frontends, etc.). This should only be used by the specific streamers, and /// never directly. diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index 0902a15979cd..50f412789527 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -57,11 +57,16 @@ AsSecureLogFileName("as-secure-log-file-name", "AS_SECURE_LOG_FILE env variable)"), cl::init(getenv("AS_SECURE_LOG_FILE")), cl::Hidden); +static void defaultDiagHandler(const SMDiagnostic &SMD, bool, const SourceMgr &, + std::vector &) { + SMD.print(nullptr, errs()); +} + MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri, const MCObjectFileInfo *mofi, const SourceMgr *mgr, MCTargetOptions const *TargetOpts, bool DoAutoReset) - : SrcMgr(mgr), InlineSrcMgr(nullptr), MAI(mai), MRI(mri), MOFI(mofi), - Symbols(Allocator), UsedNames(Allocator), + : SrcMgr(mgr), InlineSrcMgr(nullptr), DiagHandler(defaultDiagHandler), + MAI(mai), MRI(mri), MOFI(mofi), Symbols(Allocator), UsedNames(Allocator), InlineAsmUsedLabelNames(Allocator), CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0), AutoReset(DoAutoReset), TargetOptions(TargetOpts) { @@ -80,11 +85,21 @@ MCContext::~MCContext() { // we don't need to free them here. } +void MCContext::initInlineSourceManager() { + if (!InlineSrcMgr) + InlineSrcMgr.reset(new SourceMgr()); +} + //===----------------------------------------------------------------------===// // Module Lifetime Management //===----------------------------------------------------------------------===// void MCContext::reset() { + SrcMgr = nullptr; + InlineSrcMgr.release(); + LocInfos.clear(); + DiagHandler = defaultDiagHandler; + // Call the destructors so the fragments are freed COFFAllocator.DestroyAll(); ELFAllocator.DestroyAll(); @@ -835,32 +850,67 @@ CodeViewContext &MCContext::getCVContext() { // Error Reporting //===----------------------------------------------------------------------===// +void MCContext::diagnose(const SMDiagnostic &SMD) { + assert(DiagHandler && "MCContext::DiagHandler is not set"); + bool UseInlineSrcMgr = false; + const SourceMgr *SMP = nullptr; + if (SrcMgr) { + SMP = SrcMgr; + } else if (InlineSrcMgr) { + SMP = InlineSrcMgr.get(); + UseInlineSrcMgr = true; + } else + llvm_unreachable("Either SourceMgr should be available"); + DiagHandler(SMD, UseInlineSrcMgr, *SMP, LocInfos); +} + +void MCContext::reportCommon( + SMLoc Loc, + std::function GetMessage) { + // * MCContext::SrcMgr is null when the MC layer emits machine code for input + // other than assembly file, say, for .c/.cpp/.ll/.bc. + // * MCContext::InlineSrcMgr is null when the inline asm is not used. + // * A default SourceMgr is needed for diagnosing when both MCContext::SrcMgr + // and MCContext::InlineSrcMgr are null. + SourceMgr SM; + const SourceMgr *SMP = &SM; + bool UseInlineSrcMgr = false; + + // FIXME: Simplify these by combining InlineSrcMgr & SrcMgr. + // For MC-only execution, only SrcMgr is used; + // For non MC-only execution, InlineSrcMgr is only ctor'd if there is + // inline asm in the IR. + if (Loc.isValid()) { + if (SrcMgr) { + SMP = SrcMgr; + } else if (InlineSrcMgr) { + SMP = InlineSrcMgr.get(); + UseInlineSrcMgr = true; + } else + llvm_unreachable("Either SourceMgr should be available"); + } + + SMDiagnostic D; + GetMessage(D, SMP); + DiagHandler(D, UseInlineSrcMgr, *SMP, LocInfos); +} + void MCContext::reportError(SMLoc Loc, const Twine &Msg) { HadError = true; - - // If we have a source manager use it. Otherwise, try using the inline source - // manager. - // If that fails, construct a temporary SourceMgr. - if (SrcMgr) - SrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg); - else if (InlineSrcMgr) - InlineSrcMgr->PrintMessage(Loc, SourceMgr::DK_Error, Msg); - else - SourceMgr().PrintMessage(Loc, SourceMgr::DK_Error, Msg); + reportCommon(Loc, [&](SMDiagnostic &D, const SourceMgr *SMP) { + D = SMP->GetMessage(Loc, SourceMgr::DK_Error, Msg); + }); } void MCContext::reportWarning(SMLoc Loc, const Twine &Msg) { if (TargetOptions && TargetOptions->MCNoWarn) return; - if (TargetOptions && TargetOptions->MCFatalWarnings) + if (TargetOptions && TargetOptions->MCFatalWarnings) { reportError(Loc, Msg); - else { - // If we have a source manager use it. Otherwise, try using the inline - // source manager. - if (SrcMgr) - SrcMgr->PrintMessage(Loc, SourceMgr::DK_Warning, Msg); - else if (InlineSrcMgr) - InlineSrcMgr->PrintMessage(Loc, SourceMgr::DK_Warning, Msg); + } else { + reportCommon(Loc, [&](SMDiagnostic &D, const SourceMgr *SMP) { + D = SMP->GetMessage(Loc, SourceMgr::DK_Warning, Msg); + }); } } diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 3d2f0d83d2ad..4a7d82d845df 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -2348,7 +2348,7 @@ bool AsmParser::parseCppHashLineFilenameComment(SMLoc L, bool SaveLocInfo) { /// will use the last parsed cpp hash line filename comment /// for the Filename and LineNo if any in the diagnostic. void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { - const AsmParser *Parser = static_cast(Context); + auto *Parser = static_cast(Context); raw_ostream &OS = errs(); const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr(); @@ -2369,12 +2369,8 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { // If we have not parsed a cpp hash line filename comment or the source // manager changed or buffer changed (like in a nested include) then just // print the normal diagnostic using its Filename and LineNo. - if (!Parser->CppHashInfo.LineNumber || &DiagSrcMgr != &Parser->SrcMgr || - DiagBuf != CppHashBuf) { - if (Parser->SavedDiagHandler) - Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext); - else - Diag.print(nullptr, OS); + if (!Parser->CppHashInfo.LineNumber || DiagBuf != CppHashBuf) { + Parser->getContext().diagnose(Diag); return; } @@ -2393,10 +2389,7 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(), Diag.getLineContents(), Diag.getRanges()); - if (Parser->SavedDiagHandler) - Parser->SavedDiagHandler(NewDiag, Parser->SavedDiagContext); - else - NewDiag.print(nullptr, OS); + Parser->getContext().diagnose(NewDiag); } // FIXME: This is mostly duplicated from the function in AsmLexer.cpp. The diff --git a/llvm/test/CodeGen/AMDGPU/lds-initializer.ll b/llvm/test/CodeGen/AMDGPU/lds-initializer.ll index 2c2ccd0582ac..8b46b4c3e6b2 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-initializer.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti < %s -o /dev/null 2>&1 | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tahiti < %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tonga < %s -o /dev/null 2>&1 | FileCheck %s ; CHECK: lds: unsupported initializer for address space diff --git a/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll index 8b3eeb125900..63b52fa169bf 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-zero-initializer.ll @@ -1,7 +1,7 @@ -; RUN: llc -march=amdgcn -mcpu=tahiti < %s -o /dev/null 2>&1 | FileCheck %s -; RUN: llc -march=amdgcn -mcpu=tonga < %s -o /dev/null 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tahiti -filetype=null < %s 2>&1 | FileCheck %s +; RUN: not llc -march=amdgcn -mcpu=tonga -filetype=null < %s 2>&1 | FileCheck %s -; CHECK: lds: unsupported initializer for address space +; CHECK: error: lds: unsupported initializer for address space @lds = addrspace(3) global [256 x i32] zeroinitializer diff --git a/llvm/test/CodeGen/XCore/section-name.ll b/llvm/test/CodeGen/XCore/section-name.ll index 5f91702a2847..e7ff1aed1a98 100644 --- a/llvm/test/CodeGen/XCore/section-name.ll +++ b/llvm/test/CodeGen/XCore/section-name.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=xcore -o /dev/null 2>&1 | FileCheck %s +; RUN: not llc < %s -march=xcore -o /dev/null 2>&1 | FileCheck %s @bar = internal global i32 zeroinitializer diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index 7640aa4948c7..2e8865a98b46 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -290,6 +290,22 @@ struct LLCDiagnosticHandler : public DiagnosticHandler { bool *HasError; LLCDiagnosticHandler(bool *HasErrorPtr) : HasError(HasErrorPtr) {} bool handleDiagnostics(const DiagnosticInfo &DI) override { + if (DI.getKind() == llvm::DK_SrcMgr) { + const auto &DISM = cast(DI); + const SMDiagnostic &SMD = DISM.getSMDiag(); + + if (SMD.getKind() == SourceMgr::DK_Error) + *HasError = true; + + SMD.print(nullptr, errs()); + + // For testing purposes, we print the LocCookie here. + if (DISM.isInlineAsmDiag() && DISM.getLocCookie()) + WithColor::note() << "!srcloc = " << DISM.getLocCookie() << "\n"; + + return true; + } + if (DI.getSeverity() == DS_Error) *HasError = true; @@ -305,19 +321,6 @@ struct LLCDiagnosticHandler : public DiagnosticHandler { } }; -static void InlineAsmDiagHandler(const SMDiagnostic &SMD, void *Context, - unsigned LocCookie) { - bool *HasError = static_cast(Context); - if (SMD.getKind() == SourceMgr::DK_Error) - *HasError = true; - - SMD.print(nullptr, errs()); - - // For testing purposes, we print the LocCookie here. - if (LocCookie) - WithColor::note() << "!srcloc = " << LocCookie << "\n"; -} - // main - Entry point for the llc compiler. // int main(int argc, char **argv) { @@ -367,7 +370,6 @@ int main(int argc, char **argv) { bool HasError = false; Context.setDiagnosticHandler( std::make_unique(&HasError)); - Context.setInlineAsmDiagnosticHandler(InlineAsmDiagHandler, &HasError); Expected> RemarksFileOrErr = setupLLVMOptimizationRemarks(Context, RemarksFilename, RemarksPasses, From 1490f6b72c30f690b18018ceefd499562b255efa Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Mon, 1 Mar 2021 16:06:30 -0800 Subject: [PATCH 002/784] Fix build 5de2d189e6ad4 Remove source_mgr remark diagnose kind. --- clang/include/clang/Basic/DiagnosticFrontendKinds.td | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index 831f906ffac8..3086e922d9ed 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -22,7 +22,6 @@ def note_fe_inline_asm_here : Note<"instantiated into assembly here">; def err_fe_source_mgr : Error<"%0">, CatSourceMgr; def warn_fe_source_mgr : Warning<"%0">, CatSourceMgr, InGroup; def note_fe_source_mgr : Note<"%0">, CatSourceMgr; -def remark_fe_source_mgr: Remark<"%0">, CatSourceMgr, InGroup; def err_fe_cannot_link_module : Error<"cannot link module '%0': %1">, DefaultFatal; From 74ca5c2c1e0214f82b1c19e0c9f8f30fa59ffbf3 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Sun, 28 Feb 2021 18:15:28 -0800 Subject: [PATCH 003/784] [lld][WebAssembly] Convert lld/test/wasm/data-layout.ll to asm. NFC. Part of ongoing conversion work started in https://reviews.llvm.org/D80361. Differential Revision: https://reviews.llvm.org/D97651 --- lld/test/wasm/data-layout.ll | 163 ----------------------------- lld/test/wasm/data-layout.s | 195 +++++++++++++++++++++++++++++++++++ 2 files changed, 195 insertions(+), 163 deletions(-) delete mode 100644 lld/test/wasm/data-layout.ll create mode 100644 lld/test/wasm/data-layout.s diff --git a/lld/test/wasm/data-layout.ll b/lld/test/wasm/data-layout.ll deleted file mode 100644 index 0074517925e9..000000000000 --- a/lld/test/wasm/data-layout.ll +++ /dev/null @@ -1,163 +0,0 @@ -; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/hello.s -o %t.hello32.o -; RUN: llc -mtriple=wasm32-unknown-unknown -filetype=obj %s -o %t32.o -; RUN: wasm-ld -m wasm32 -no-gc-sections --export=__data_end --export=__heap_base --allow-undefined --no-entry -o %t32.wasm %t32.o %t.hello32.o -; RUN: obj2yaml %t32.wasm | FileCheck --check-prefixes CHECK,CHK32 %s - -; RUN: llvm-mc -filetype=obj -triple=wasm64-unknown-unknown %p/Inputs/hello.s -o %t.hello64.o -; RUN: llc -mtriple=wasm64-unknown-unknown -filetype=obj %s -o %t64.o -; RUN: wasm-ld -m wasm64 -no-gc-sections --export=__data_end --export=__heap_base --allow-undefined --no-entry -o %t64.wasm %t64.o %t.hello64.o -; RUN: obj2yaml %t64.wasm | FileCheck --check-prefixes CHECK,CHK64 %s - -@foo = hidden global i32 1, align 4 -@aligned_bar = hidden global i32 3, align 16 - -@hello_str = external global i8* -@external_ref = global i8** @hello_str, align 8 - -%struct.s = type { i32, i32 } -@local_struct = hidden global %struct.s zeroinitializer, align 4 -@local_struct_internal_ptr = hidden local_unnamed_addr global i32* getelementptr inbounds (%struct.s, %struct.s* @local_struct, i32 0, i32 1), align 4 - -; CHECK: - Type: MEMORY -; CHECK-NEXT: Memories: -; CHK32-NEXT: - Initial: 0x2 -; CHK64-NEXT: - Flags: [ IS_64 ] -; CHK64-NEXT: Initial: 0x2 -; CHECK-NEXT: - Type: GLOBAL -; CHECK-NEXT: Globals: -; CHECK-NEXT: - Index: 0 -; CHK32-NEXT: Type: I32 -; CHK64-NEXT: Type: I64 -; CHECK-NEXT: Mutable: true -; CHECK-NEXT: InitExpr: -; CHK32-NEXT: Opcode: I32_CONST -; CHK64-NEXT: Opcode: I64_CONST -; CHECK-NEXT: Value: 66624 -; CHECK-NEXT: - Index: 1 -; CHECK-NEXT: Type: I32 -; CHECK-NEXT: Mutable: false -; CHECK-NEXT: InitExpr: -; CHECK-NEXT: Opcode: I32_CONST -; CHK32-NEXT: Value: 1080 -; CHK64-NEXT: Value: 1088 -; CHECK-NEXT: - Index: 2 -; CHECK-NEXT: Type: I32 -; CHECK-NEXT: Mutable: false -; CHECK-NEXT: InitExpr: -; CHECK-NEXT: Opcode: I32_CONST -; CHECK-NEXT: Value: 66624 - -; CHECK: - Type: DATA -; CHECK-NEXT: Segments: -; CHECK-NEXT: - SectionOffset: 7 -; CHECK-NEXT: InitFlags: 0 -; CHECK-NEXT: Offset: -; CHK32-NEXT: Opcode: I32_CONST -; CHK64-NEXT: Opcode: I64_CONST -; CHECK-NEXT: Value: 1024 -; CHECK-NEXT: Content: 68656C6C6F0A00 -; CHECK-NEXT: - SectionOffset: 20 -; CHECK-NEXT: InitFlags: 0 -; CHECK-NEXT: Offset: -; CHK32-NEXT: Opcode: I32_CONST -; CHK64-NEXT: Opcode: I64_CONST -; CHECK-NEXT: Value: 1040 - - -; RUN: wasm-ld -no-gc-sections --allow-undefined --no-entry \ -; RUN: --initial-memory=131072 --max-memory=131072 -o %t_max.wasm %t32.o \ -; RUN: %t.hello32.o -; RUN: obj2yaml %t_max.wasm | FileCheck %s -check-prefix=CHECK-MAX - -; CHECK-MAX: - Type: MEMORY -; CHECK-MAX-NEXT: Memories: -; CHECK-MAX-NEXT: - Flags: [ HAS_MAX ] -; CHECK-MAX-NEXT: Initial: 0x2 -; CHECK-MAX-NEXT: Maximum: 0x2 - -; RUN: wasm-ld -no-gc-sections --allow-undefined --no-entry --shared-memory \ -; RUN: --features=atomics,bulk-memory --initial-memory=131072 \ -; RUN: --max-memory=131072 -o %t_max.wasm %t32.o %t.hello32.o -; RUN: obj2yaml %t_max.wasm | FileCheck %s -check-prefix=CHECK-SHARED - -; CHECK-SHARED: - Type: MEMORY -; CHECK-SHARED-NEXT: Memories: -; CHECK-SHARED-NEXT: - Flags: [ HAS_MAX, IS_SHARED ] -; CHECK-SHARED-NEXT: Initial: 0x2 -; CHECK-SHARED-NEXT: Maximum: 0x2 - -; RUN: wasm-ld --relocatable -o %t_reloc.wasm %t32.o %t.hello32.o -; RUN: obj2yaml %t_reloc.wasm | FileCheck %s -check-prefix=RELOC - -; RELOC: - Type: DATA -; RELOC-NEXT: Relocations: -; RELOC-NEXT: - Type: R_WASM_MEMORY_ADDR_I32 -; RELOC-NEXT: Index: 3 -; RELOC-NEXT: Offset: 0x24 -; RELOC-NEXT: - Type: R_WASM_MEMORY_ADDR_I32 -; RELOC-NEXT: Index: 4 -; RELOC-NEXT: Offset: 0x2D -; RELOC-NEXT: Addend: 4 -; RELOC-NEXT: Segments: -; RELOC-NEXT: - SectionOffset: 6 -; RELOC-NEXT: InitFlags: 0 -; RELOC-NEXT: Offset: -; RELOC-NEXT: Opcode: I32_CONST -; RELOC-NEXT: Value: 0 -; RELOC-NEXT: Content: 68656C6C6F0A00 -; RELOC-NEXT: - SectionOffset: 18 -; RELOC-NEXT: InitFlags: 0 -; RELOC-NEXT: Offset: -; RELOC-NEXT: Opcode: I32_CONST -; RELOC-NEXT: Value: 8 -; RELOC-NEXT: Content: '01000000' -; RELOC-NEXT: - SectionOffset: 27 -; RELOC-NEXT: InitFlags: 0 -; RELOC-NEXT: Offset: -; RELOC-NEXT: Opcode: I32_CONST -; RELOC-NEXT: Value: 16 -; RELOC-NEXT: Content: '03000000' -; RELOC-NEXT: - SectionOffset: 36 -; RELOC-NEXT: InitFlags: 0 -; RELOC-NEXT: Offset: -; RELOC-NEXT: Opcode: I32_CONST -; RELOC-NEXT: Value: 24 -; RELOC-NEXT: Content: '00000000' -; RELOC-NEXT: - SectionOffset: 45 -; RELOC-NEXT: InitFlags: 0 -; RELOC-NEXT: Offset: -; RELOC-NEXT: Opcode: I32_CONST -; RELOC-NEXT: Value: 28 -; RELOC-NEXT: Content: '24000000' -; RELOC-NEXT: - SectionOffset: 54 -; RELOC-NEXT: InitFlags: 0 -; RELOC-NEXT: Offset: -; RELOC-NEXT: Opcode: I32_CONST -; RELOC-NEXT: Value: 32 -; RELOC-NEXT: Content: '0000000000000000' - -; RELOC: SymbolTable: -; RELOC-NEXT: - Index: 0 -; RELOC-NEXT: Kind: DATA -; RELOC-NEXT: Name: foo -; RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN ] -; RELOC-NEXT: Segment: 1 -; RELOC-NEXT: Size: 4 -; RELOC-NEXT: - Index: 1 -; RELOC-NEXT: Kind: DATA -; RELOC-NEXT: Name: aligned_bar -; RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN ] -; RELOC-NEXT: Segment: 2 -; RELOC-NEXT: Size: 4 -; RELOC-NEXT: - Index: 2 -; RELOC-NEXT: Kind: DATA -; RELOC-NEXT: Name: external_ref -; RELOC-NEXT: Flags: [ ] -; RELOC-NEXT: Segment: 3 -; RELOC-NEXT: Size: 4 -; RELOC-NEXT: - Index: 3 -; RELOC-NEXT: Kind: DATA -; RELOC-NEXT: Name: hello_str -; RELOC-NEXT: Flags: [ ] -; RELOC-NEXT: Segment: 0 -; RELOC-NEXT: Size: 7 diff --git a/lld/test/wasm/data-layout.s b/lld/test/wasm/data-layout.s new file mode 100644 index 000000000000..72a859b8a0a1 --- /dev/null +++ b/lld/test/wasm/data-layout.s @@ -0,0 +1,195 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/hello.s -o %t.hello32.o +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %s -o %t32.o +# RUN: wasm-ld -m wasm32 -no-gc-sections --export=__data_end --export=__heap_base --allow-undefined --no-entry -o %t32.wasm %t32.o %t.hello32.o +# RUN: obj2yaml %t32.wasm | FileCheck --check-prefixes CHECK,CHK32 %s +# +# RUN: llvm-mc -filetype=obj -triple=wasm64-unknown-unknown %p/Inputs/hello.s -o %t.hello64.o +# RUN: llvm-mc -filetype=obj -triple=wasm64-unknown-unknown %s -o %t64.o +# RUN: wasm-ld -m wasm64 -no-gc-sections --export=__data_end --export=__heap_base --allow-undefined --no-entry -o %t64.wasm %t64.o %t.hello64.o +# RUN: obj2yaml %t64.wasm | FileCheck --check-prefixes CHECK,CHK64 %s + + .section .data.foo,"",@ + .globl foo + .hidden foo + .p2align 2 +foo: + .int32 1 + .size foo, 4 + + + .section .data.aligned_bar,"",@ + .globl aligned_bar + .hidden aligned_bar + .p2align 4 +aligned_bar: + .int32 3 + .size aligned_bar, 4 + + + .section .data.external_ref,"",@ + .globl external_ref + .p2align 3 +external_ref: + .int32 hello_str + .size external_ref, 4 + + + .section .bss.local_struct,"",@ + .globl local_struct + .hidden local_struct + .p2align 2 +local_struct: + .skip 8 + .size local_struct, 8 + + + .section .data.local_struct_internal_ptr,"",@ + .globl local_struct_internal_ptr + .hidden local_struct_internal_ptr + .p2align 2 +local_struct_internal_ptr: + .int32 local_struct+4 + .size local_struct_internal_ptr, 4 + .size hello_str, 4 + +# CHECK: - Type: MEMORY +# CHECK-NEXT: Memories: +# CHK64-NEXT: - Flags: [ IS_64 ] +# CHECK-NEXT: Initial: 0x2 +# CHECK-NEXT: - Type: GLOBAL +# CHECK-NEXT: Globals: +# CHECK-NEXT: - Index: 0 +# CHK32-NEXT: Type: I32 +# CHK64-NEXT: Type: I64 +# CHECK-NEXT: Mutable: true +# CHECK-NEXT: InitExpr: +# CHK32-NEXT: Opcode: I32_CONST +# CHK64-NEXT: Opcode: I64_CONST +# CHECK-NEXT: Value: 66624 +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Type: I32 +# CHECK-NEXT: Mutable: false +# CHECK-NEXT: InitExpr: +# CHECK-NEXT: Opcode: I32_CONST +# CHECK-NEXT: Value: 1080 +# CHECK-NEXT: - Index: 2 +# CHECK-NEXT: Type: I32 +# CHECK-NEXT: Mutable: false +# CHECK-NEXT: InitExpr: +# CHECK-NEXT: Opcode: I32_CONST +# CHECK-NEXT: Value: 66624 + +# CHECK: - Type: DATA +# CHECK-NEXT: Segments: +# CHECK-NEXT: - SectionOffset: 7 +# CHECK-NEXT: InitFlags: 0 +# CHECK-NEXT: Offset: +# CHK32-NEXT: Opcode: I32_CONST +# CHK64-NEXT: Opcode: I64_CONST +# CHECK-NEXT: Value: 1024 +# CHECK-NEXT: Content: 68656C6C6F0A00 +# CHECK-NEXT: - SectionOffset: 20 +# CHECK-NEXT: InitFlags: 0 +# CHECK-NEXT: Offset: +# CHK32-NEXT: Opcode: I32_CONST +# CHK64-NEXT: Opcode: I64_CONST +# CHECK-NEXT: Value: 1040 + + +# RUN: wasm-ld -no-gc-sections --allow-undefined --no-entry \ +# RUN: --initial-memory=131072 --max-memory=131072 -o %t_max.wasm %t32.o \ +# RUN: %t.hello32.o +# RUN: obj2yaml %t_max.wasm | FileCheck %s -check-prefix=CHECK-MAX + +# CHECK-MAX: - Type: MEMORY +# CHECK-MAX-NEXT: Memories: +# CHECK-MAX-NEXT: - Flags: [ HAS_MAX ] +# CHECK-MAX-NEXT: Initial: 0x2 +# CHECK-MAX-NEXT: Maximum: 0x2 + +# RUN: wasm-ld -no-gc-sections --allow-undefined --no-entry --shared-memory \ +# RUN: --features=atomics,bulk-memory --initial-memory=131072 \ +# RUN: --max-memory=131072 -o %t_max.wasm %t32.o %t.hello32.o +# RUN: obj2yaml %t_max.wasm | FileCheck %s -check-prefix=CHECK-SHARED + +# CHECK-SHARED: - Type: MEMORY +# CHECK-SHARED-NEXT: Memories: +# CHECK-SHARED-NEXT: - Flags: [ HAS_MAX, IS_SHARED ] +# CHECK-SHARED-NEXT: Initial: 0x2 +# CHECK-SHARED-NEXT: Maximum: 0x2 + +# XUN: wasm-ld --relocatable -o %t_reloc.wasm %t32.o %t.hello32.o +# XUN: obj2yaml %t_reloc.wasm | FileCheck %s -check-prefix=RELOC + +# RELOC: - Type: DATA +# RELOC-NEXT: Relocations: +# RELOC-NEXT: - Type: R_WASM_MEMORY_ADDR_I32 +# RELOC-NEXT: Index: 3 +# RELOC-NEXT: Offset: 0x24 +# RELOC-NEXT: - Type: R_WASM_MEMORY_ADDR_I32 +# RELOC-NEXT: Index: 4 +# RELOC-NEXT: Offset: 0x2D +# RELOC-NEXT: Addend: 4 +# RELOC-NEXT: Segments: +# RELOC-NEXT: - SectionOffset: 6 +# RELOC-NEXT: InitFlags: 0 +# RELOC-NEXT: Offset: +# RELOC-NEXT: Opcode: I32_CONST +# RELOC-NEXT: Value: 0 +# RELOC-NEXT: Content: 68656C6C6F0A00 +# RELOC-NEXT: - SectionOffset: 18 +# RELOC-NEXT: InitFlags: 0 +# RELOC-NEXT: Offset: +# RELOC-NEXT: Opcode: I32_CONST +# RELOC-NEXT: Value: 8 +# RELOC-NEXT: Content: '01000000' +# RELOC-NEXT: - SectionOffset: 27 +# RELOC-NEXT: InitFlags: 0 +# RELOC-NEXT: Offset: +# RELOC-NEXT: Opcode: I32_CONST +# RELOC-NEXT: Value: 16 +# RELOC-NEXT: Content: '03000000' +# RELOC-NEXT: - SectionOffset: 36 +# RELOC-NEXT: InitFlags: 0 +# RELOC-NEXT: Offset: +# RELOC-NEXT: Opcode: I32_CONST +# RELOC-NEXT: Value: 24 +# RELOC-NEXT: Content: '00000000' +# RELOC-NEXT: - SectionOffset: 45 +# RELOC-NEXT: InitFlags: 0 +# RELOC-NEXT: Offset: +# RELOC-NEXT: Opcode: I32_CONST +# RELOC-NEXT: Value: 28 +# RELOC-NEXT: Content: '24000000' +# RELOC-NEXT: - SectionOffset: 54 +# RELOC-NEXT: InitFlags: 0 +# RELOC-NEXT: Offset: +# RELOC-NEXT: Opcode: I32_CONST +# RELOC-NEXT: Value: 32 +# RELOC-NEXT: Content: '0000000000000000' + +# RELOC: SymbolTable: +# RELOC-NEXT: - Index: 0 +# RELOC-NEXT: Kind: DATA +# RELOC-NEXT: Name: foo +# RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN ] +# RELOC-NEXT: Segment: 1 +# RELOC-NEXT: Size: 4 +# RELOC-NEXT: - Index: 1 +# RELOC-NEXT: Kind: DATA +# RELOC-NEXT: Name: aligned_bar +# RELOC-NEXT: Flags: [ VISIBILITY_HIDDEN ] +# RELOC-NEXT: Segment: 2 +# RELOC-NEXT: Size: 4 +# RELOC-NEXT: - Index: 2 +# RELOC-NEXT: Kind: DATA +# RELOC-NEXT: Name: external_ref +# RELOC-NEXT: Flags: [ ] +# RELOC-NEXT: Segment: 3 +# RELOC-NEXT: Size: 4 +# RELOC-NEXT: - Index: 3 +# RELOC-NEXT: Kind: DATA +# RELOC-NEXT: Name: hello_str +# RELOC-NEXT: Flags: [ ] +# RELOC-NEXT: Segment: 0 +# RELOC-NEXT: Size: 7 From 7d09e1d7cf27ce781e83f9d388a7a3e1e6487ead Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Fri, 26 Feb 2021 16:54:15 -0800 Subject: [PATCH 004/784] [lld][WebAssembly] Minor refactor in preparation for SHF_STRINGS supports. NFC. A couple of small changes to match the ELF linker in preparation for adding support string mergings. Differential Revision: https://reviews.llvm.org/D97654 --- lld/wasm/InputChunks.h | 12 +++++++----- lld/wasm/OutputSegment.h | 4 ++-- lld/wasm/Writer.cpp | 5 +++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/lld/wasm/InputChunks.h b/lld/wasm/InputChunks.h index 77a057395a48..fe30fea62d1c 100644 --- a/lld/wasm/InputChunks.h +++ b/lld/wasm/InputChunks.h @@ -42,7 +42,7 @@ class InputChunk { virtual uint32_t getSize() const { return data().size(); } virtual uint32_t getInputSize() const { return getSize(); }; - virtual void writeTo(uint8_t *sectionStart) const; + virtual void writeTo(uint8_t *buf) const; ArrayRef getRelocations() const { return relocations; } void setRelocations(ArrayRef rs) { relocations = rs; } @@ -98,13 +98,14 @@ class InputChunk { class InputSegment : public InputChunk { public: InputSegment(const WasmSegment &seg, ObjFile *f) - : InputChunk(f, InputChunk::DataSegment), segment(seg) {} + : InputChunk(f, InputChunk::DataSegment), segment(seg) { + alignment = segment.Data.Alignment; + } static bool classof(const InputChunk *c) { return c->kind() == DataSegment; } void generateRelocationCode(raw_ostream &os) const; - uint32_t getAlignment() const { return segment.Data.Alignment; } StringRef getName() const override { return segment.Data.Name; } StringRef getDebugName() const override { return StringRef(); } uint32_t getComdat() const override { return segment.Data.Comdat; } @@ -114,7 +115,8 @@ class InputSegment : public InputChunk { uint64_t getVA(uint64_t offset = 0) const; const OutputSegment *outputSeg = nullptr; - int32_t outputSegmentOffset = 0; + uint32_t outputSegmentOffset = 0; + uint32_t alignment = 0; protected: ArrayRef data() const override { return segment.Data.Content; } @@ -137,7 +139,7 @@ class InputFunction : public InputChunk { c->kind() == InputChunk::SyntheticFunction; } - void writeTo(uint8_t *sectionStart) const override; + void writeTo(uint8_t *buf) const override; StringRef getName() const override { return function->SymbolName; } StringRef getDebugName() const override { return function->DebugName; } llvm::Optional getExportName() const { diff --git a/lld/wasm/OutputSegment.h b/lld/wasm/OutputSegment.h index f6985ce855e6..30553b9883e2 100644 --- a/lld/wasm/OutputSegment.h +++ b/lld/wasm/OutputSegment.h @@ -22,8 +22,8 @@ class OutputSegment { public: OutputSegment(StringRef n) : name(n) {} - void addInputSegment(InputSegment *inSeg, uint32_t forceAlignment = 0) { - uint32_t segAlign = std::max(forceAlignment, inSeg->getAlignment()); + void addInputSegment(InputSegment *inSeg) { + uint32_t segAlign = inSeg->alignment; alignment = std::max(alignment, segAlign); inputSegments.push_back(inSeg); size = llvm::alignTo(size, 1ULL << segAlign); diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index af2cbdb7146f..a1bd142ce83a 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -890,12 +890,13 @@ void Writer::combineOutputSegments() { } bool first = true; for (InputSegment *inSeg : s->inputSegments) { - uint32_t alignment = first ? s->alignment : 0; + if (first) + inSeg->alignment = std::max(inSeg->alignment, s->alignment); first = false; #ifndef NDEBUG uint64_t oldVA = inSeg->getVA(); #endif - combined->addInputSegment(inSeg, alignment); + combined->addInputSegment(inSeg); #ifndef NDEBUG uint64_t newVA = inSeg->getVA(); assert(oldVA == newVA); From 103928252e2fa178783059a3bec8b92af55ffaf4 Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Mon, 1 Mar 2021 16:49:26 -0800 Subject: [PATCH 005/784] Fix memleak for 5de2d189e6ad4 Fix typo `release` -> `reset` --- llvm/lib/MC/MCContext.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index 50f412789527..b0eb47069d7e 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -96,7 +96,7 @@ void MCContext::initInlineSourceManager() { void MCContext::reset() { SrcMgr = nullptr; - InlineSrcMgr.release(); + InlineSrcMgr.reset(); LocInfos.clear(); DiagHandler = defaultDiagHandler; From b17d46430fce665d23661e23ade3ca57c3791836 Mon Sep 17 00:00:00 2001 From: Kamlesh Kumar Date: Tue, 2 Mar 2021 06:57:54 +0530 Subject: [PATCH 006/784] [libunwind] This adds support in libunwind for rv32 hard float and soft-float for both rv32 and rv64. Differential Revision: https://reviews.llvm.org/D80690 --- libunwind/include/__libunwind_config.h | 17 +++- libunwind/src/Registers.hpp | 84 +++++++++++---- libunwind/src/UnwindRegistersRestore.S | 132 ++++++++++++------------ libunwind/src/UnwindRegistersSave.S | 136 ++++++++++++------------- libunwind/src/assembly.h | 29 ++++++ libunwind/src/libunwind.cpp | 2 +- 6 files changed, 239 insertions(+), 161 deletions(-) diff --git a/libunwind/include/__libunwind_config.h b/libunwind/include/__libunwind_config.h index 34ac6f717d6e..a50ba053884b 100644 --- a/libunwind/include/__libunwind_config.h +++ b/libunwind/include/__libunwind_config.h @@ -131,12 +131,19 @@ #define _LIBUNWIND_CONTEXT_SIZE 16 #define _LIBUNWIND_CURSOR_SIZE 23 # elif defined(__riscv) -# if __riscv_xlen == 64 -# define _LIBUNWIND_TARGET_RISCV 1 -# define _LIBUNWIND_CONTEXT_SIZE 64 -# define _LIBUNWIND_CURSOR_SIZE 76 +# define _LIBUNWIND_TARGET_RISCV 1 +# if defined(__riscv_flen) +# define RISCV_FLEN __riscv_flen # else -# error "Unsupported RISC-V ABI" +# define RISCV_FLEN 0 +# endif +# define _LIBUNWIND_CONTEXT_SIZE (32 * (__riscv_xlen + RISCV_FLEN) / 64) +# if __riscv_xlen == 32 +# define _LIBUNWIND_CURSOR_SIZE (_LIBUNWIND_CONTEXT_SIZE + 7) +# elif __riscv_xlen == 64 +# define _LIBUNWIND_CURSOR_SIZE (_LIBUNWIND_CONTEXT_SIZE + 12) +# else +# error "Unsupported RISC-V ABI" # endif # define _LIBUNWIND_HIGHEST_DWARF_REGISTER _LIBUNWIND_HIGHEST_DWARF_REGISTER_RISCV # elif defined(__ve__) diff --git a/libunwind/src/Registers.hpp b/libunwind/src/Registers.hpp index de8e067b9d0c..1d23f97aedfb 100644 --- a/libunwind/src/Registers.hpp +++ b/libunwind/src/Registers.hpp @@ -3728,19 +3728,42 @@ inline const char *Registers_hexagon::getRegisterName(int regNum) { #if defined(_LIBUNWIND_TARGET_RISCV) -/// Registers_riscv holds the register state of a thread in a 64-bit RISC-V +/// Registers_riscv holds the register state of a thread in a RISC-V /// process. + +# if __riscv_xlen == 32 +typedef uint32_t reg_t; +# elif __riscv_xlen == 64 +typedef uint64_t reg_t; +# else +# error "Unsupported __riscv_xlen" +# endif + +# if defined(__riscv_flen) +# if __riscv_flen == 64 +typedef double fp_t; +# elif __riscv_flen == 32 +typedef float fp_t; +# else +# error "Unsupported __riscv_flen" +# endif +# else +// This is just for supressing undeclared error of fp_t. +typedef double fp_t; +# endif + +/// Registers_riscv holds the register state of a thread. class _LIBUNWIND_HIDDEN Registers_riscv { public: Registers_riscv(); Registers_riscv(const void *registers); bool validRegister(int num) const; - uint64_t getRegister(int num) const; - void setRegister(int num, uint64_t value); + reg_t getRegister(int num) const; + void setRegister(int num, reg_t value); bool validFloatRegister(int num) const; - double getFloatRegister(int num) const; - void setFloatRegister(int num, double value); + fp_t getFloatRegister(int num) const; + void setFloatRegister(int num, fp_t value); bool validVectorRegister(int num) const; v128 getVectorRegister(int num) const; void setVectorRegister(int num, v128 value); @@ -3749,31 +3772,45 @@ class _LIBUNWIND_HIDDEN Registers_riscv { static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_RISCV; } static int getArch() { return REGISTERS_RISCV; } - uint64_t getSP() const { return _registers[2]; } - void setSP(uint64_t value) { _registers[2] = value; } - uint64_t getIP() const { return _registers[0]; } - void setIP(uint64_t value) { _registers[0] = value; } + reg_t getSP() const { return _registers[2]; } + void setSP(reg_t value) { _registers[2] = value; } + reg_t getIP() const { return _registers[0]; } + void setIP(reg_t value) { _registers[0] = value; } private: // _registers[0] holds the pc - uint64_t _registers[32]; - double _floats[32]; + reg_t _registers[32]; +# if defined(__riscv_flen) + fp_t _floats[32]; +# endif }; inline Registers_riscv::Registers_riscv(const void *registers) { static_assert((check_fit::does_fit), "riscv registers do not fit into unw_context_t"); memcpy(&_registers, registers, sizeof(_registers)); +# if __riscv_xlen == 32 + static_assert(sizeof(_registers) == 0x80, + "expected float registers to be at offset 128"); +# elif __riscv_xlen == 64 static_assert(sizeof(_registers) == 0x100, "expected float registers to be at offset 256"); +# else +# error "Unexpected float registers." +# endif + +# if defined(__riscv_flen) memcpy(_floats, static_cast(registers) + sizeof(_registers), sizeof(_floats)); +# endif } inline Registers_riscv::Registers_riscv() { memset(&_registers, 0, sizeof(_registers)); +# if defined(__riscv_flen) memset(&_floats, 0, sizeof(_floats)); +# endif } inline bool Registers_riscv::validRegister(int regNum) const { @@ -3788,7 +3825,7 @@ inline bool Registers_riscv::validRegister(int regNum) const { return true; } -inline uint64_t Registers_riscv::getRegister(int regNum) const { +inline reg_t Registers_riscv::getRegister(int regNum) const { if (regNum == UNW_REG_IP) return _registers[0]; if (regNum == UNW_REG_SP) @@ -3800,7 +3837,7 @@ inline uint64_t Registers_riscv::getRegister(int regNum) const { _LIBUNWIND_ABORT("unsupported riscv register"); } -inline void Registers_riscv::setRegister(int regNum, uint64_t value) { +inline void Registers_riscv::setRegister(int regNum, reg_t value) { if (regNum == UNW_REG_IP) _registers[0] = value; else if (regNum == UNW_REG_SP) @@ -3954,32 +3991,37 @@ inline const char *Registers_riscv::getRegisterName(int regNum) { } inline bool Registers_riscv::validFloatRegister(int regNum) const { +# if defined(__riscv_flen) if (regNum < UNW_RISCV_F0) return false; if (regNum > UNW_RISCV_F31) return false; return true; +# else + (void)regNum; + return false; +# endif } -inline double Registers_riscv::getFloatRegister(int regNum) const { -#if defined(__riscv_flen) && __riscv_flen == 64 +inline fp_t Registers_riscv::getFloatRegister(int regNum) const { +# if defined(__riscv_flen) assert(validFloatRegister(regNum)); return _floats[regNum - UNW_RISCV_F0]; -#else +# else (void)regNum; _LIBUNWIND_ABORT("libunwind not built with float support"); -#endif +# endif } -inline void Registers_riscv::setFloatRegister(int regNum, double value) { -#if defined(__riscv_flen) && __riscv_flen == 64 +inline void Registers_riscv::setFloatRegister(int regNum, fp_t value) { +# if defined(__riscv_flen) assert(validFloatRegister(regNum)); _floats[regNum - UNW_RISCV_F0] = value; -#else +# else (void)regNum; (void)value; _LIBUNWIND_ABORT("libunwind not built with float support"); -#endif +# endif } inline bool Registers_riscv::validVectorRegister(int) const { diff --git a/libunwind/src/UnwindRegistersRestore.S b/libunwind/src/UnwindRegistersRestore.S index 289afe98b0b2..6d12d93cb102 100644 --- a/libunwind/src/UnwindRegistersRestore.S +++ b/libunwind/src/UnwindRegistersRestore.S @@ -1072,7 +1072,7 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_sparc6jumptoEv) jmp %o7 nop -#elif defined(__riscv) && __riscv_xlen == 64 +#elif defined(__riscv) // // void libunwind::Registers_riscv::jumpto() @@ -1082,74 +1082,74 @@ DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_sparc6jumptoEv) // .p2align 2 DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_riscv6jumptoEv) -#if defined(__riscv_flen) && __riscv_flen == 64 - fld f0, (8 * 32 + 8 * 0)(a0) - fld f1, (8 * 32 + 8 * 1)(a0) - fld f2, (8 * 32 + 8 * 2)(a0) - fld f3, (8 * 32 + 8 * 3)(a0) - fld f4, (8 * 32 + 8 * 4)(a0) - fld f5, (8 * 32 + 8 * 5)(a0) - fld f6, (8 * 32 + 8 * 6)(a0) - fld f7, (8 * 32 + 8 * 7)(a0) - fld f8, (8 * 32 + 8 * 8)(a0) - fld f9, (8 * 32 + 8 * 9)(a0) - fld f10, (8 * 32 + 8 * 10)(a0) - fld f11, (8 * 32 + 8 * 11)(a0) - fld f12, (8 * 32 + 8 * 12)(a0) - fld f13, (8 * 32 + 8 * 13)(a0) - fld f14, (8 * 32 + 8 * 14)(a0) - fld f15, (8 * 32 + 8 * 15)(a0) - fld f16, (8 * 32 + 8 * 16)(a0) - fld f17, (8 * 32 + 8 * 17)(a0) - fld f18, (8 * 32 + 8 * 18)(a0) - fld f19, (8 * 32 + 8 * 19)(a0) - fld f20, (8 * 32 + 8 * 20)(a0) - fld f21, (8 * 32 + 8 * 21)(a0) - fld f22, (8 * 32 + 8 * 22)(a0) - fld f23, (8 * 32 + 8 * 23)(a0) - fld f24, (8 * 32 + 8 * 24)(a0) - fld f25, (8 * 32 + 8 * 25)(a0) - fld f26, (8 * 32 + 8 * 26)(a0) - fld f27, (8 * 32 + 8 * 27)(a0) - fld f28, (8 * 32 + 8 * 28)(a0) - fld f29, (8 * 32 + 8 * 29)(a0) - fld f30, (8 * 32 + 8 * 30)(a0) - fld f31, (8 * 32 + 8 * 31)(a0) -#endif +# if defined(__riscv_flen) + FLOAD f0, (RISCV_FOFFSET + RISCV_FSIZE * 0)(a0) + FLOAD f1, (RISCV_FOFFSET + RISCV_FSIZE * 1)(a0) + FLOAD f2, (RISCV_FOFFSET + RISCV_FSIZE * 2)(a0) + FLOAD f3, (RISCV_FOFFSET + RISCV_FSIZE * 3)(a0) + FLOAD f4, (RISCV_FOFFSET + RISCV_FSIZE * 4)(a0) + FLOAD f5, (RISCV_FOFFSET + RISCV_FSIZE * 5)(a0) + FLOAD f6, (RISCV_FOFFSET + RISCV_FSIZE * 6)(a0) + FLOAD f7, (RISCV_FOFFSET + RISCV_FSIZE * 7)(a0) + FLOAD f8, (RISCV_FOFFSET + RISCV_FSIZE * 8)(a0) + FLOAD f9, (RISCV_FOFFSET + RISCV_FSIZE * 9)(a0) + FLOAD f10, (RISCV_FOFFSET + RISCV_FSIZE * 10)(a0) + FLOAD f11, (RISCV_FOFFSET + RISCV_FSIZE * 11)(a0) + FLOAD f12, (RISCV_FOFFSET + RISCV_FSIZE * 12)(a0) + FLOAD f13, (RISCV_FOFFSET + RISCV_FSIZE * 13)(a0) + FLOAD f14, (RISCV_FOFFSET + RISCV_FSIZE * 14)(a0) + FLOAD f15, (RISCV_FOFFSET + RISCV_FSIZE * 15)(a0) + FLOAD f16, (RISCV_FOFFSET + RISCV_FSIZE * 16)(a0) + FLOAD f17, (RISCV_FOFFSET + RISCV_FSIZE * 17)(a0) + FLOAD f18, (RISCV_FOFFSET + RISCV_FSIZE * 18)(a0) + FLOAD f19, (RISCV_FOFFSET + RISCV_FSIZE * 19)(a0) + FLOAD f20, (RISCV_FOFFSET + RISCV_FSIZE * 20)(a0) + FLOAD f21, (RISCV_FOFFSET + RISCV_FSIZE * 21)(a0) + FLOAD f22, (RISCV_FOFFSET + RISCV_FSIZE * 22)(a0) + FLOAD f23, (RISCV_FOFFSET + RISCV_FSIZE * 23)(a0) + FLOAD f24, (RISCV_FOFFSET + RISCV_FSIZE * 24)(a0) + FLOAD f25, (RISCV_FOFFSET + RISCV_FSIZE * 25)(a0) + FLOAD f26, (RISCV_FOFFSET + RISCV_FSIZE * 26)(a0) + FLOAD f27, (RISCV_FOFFSET + RISCV_FSIZE * 27)(a0) + FLOAD f28, (RISCV_FOFFSET + RISCV_FSIZE * 28)(a0) + FLOAD f29, (RISCV_FOFFSET + RISCV_FSIZE * 29)(a0) + FLOAD f30, (RISCV_FOFFSET + RISCV_FSIZE * 30)(a0) + FLOAD f31, (RISCV_FOFFSET + RISCV_FSIZE * 31)(a0) +# endif // x0 is zero - ld x1, (8 * 0)(a0) // restore pc into ra - ld x2, (8 * 2)(a0) - ld x3, (8 * 3)(a0) - ld x4, (8 * 4)(a0) - ld x5, (8 * 5)(a0) - ld x6, (8 * 6)(a0) - ld x7, (8 * 7)(a0) - ld x8, (8 * 8)(a0) - ld x9, (8 * 9)(a0) + ILOAD x1, (RISCV_ISIZE * 0)(a0) // restore pc into ra + ILOAD x2, (RISCV_ISIZE * 2)(a0) + ILOAD x3, (RISCV_ISIZE * 3)(a0) + ILOAD x4, (RISCV_ISIZE * 4)(a0) + ILOAD x5, (RISCV_ISIZE * 5)(a0) + ILOAD x6, (RISCV_ISIZE * 6)(a0) + ILOAD x7, (RISCV_ISIZE * 7)(a0) + ILOAD x8, (RISCV_ISIZE * 8)(a0) + ILOAD x9, (RISCV_ISIZE * 9)(a0) // skip a0 for now - ld x11, (8 * 11)(a0) - ld x12, (8 * 12)(a0) - ld x13, (8 * 13)(a0) - ld x14, (8 * 14)(a0) - ld x15, (8 * 15)(a0) - ld x16, (8 * 16)(a0) - ld x17, (8 * 17)(a0) - ld x18, (8 * 18)(a0) - ld x19, (8 * 19)(a0) - ld x20, (8 * 20)(a0) - ld x21, (8 * 21)(a0) - ld x22, (8 * 22)(a0) - ld x23, (8 * 23)(a0) - ld x24, (8 * 24)(a0) - ld x25, (8 * 25)(a0) - ld x26, (8 * 26)(a0) - ld x27, (8 * 27)(a0) - ld x28, (8 * 28)(a0) - ld x29, (8 * 29)(a0) - ld x30, (8 * 30)(a0) - ld x31, (8 * 31)(a0) - ld x10, (8 * 10)(a0) // restore a0 + ILOAD x11, (RISCV_ISIZE * 11)(a0) + ILOAD x12, (RISCV_ISIZE * 12)(a0) + ILOAD x13, (RISCV_ISIZE * 13)(a0) + ILOAD x14, (RISCV_ISIZE * 14)(a0) + ILOAD x15, (RISCV_ISIZE * 15)(a0) + ILOAD x16, (RISCV_ISIZE * 16)(a0) + ILOAD x17, (RISCV_ISIZE * 17)(a0) + ILOAD x18, (RISCV_ISIZE * 18)(a0) + ILOAD x19, (RISCV_ISIZE * 19)(a0) + ILOAD x20, (RISCV_ISIZE * 20)(a0) + ILOAD x21, (RISCV_ISIZE * 21)(a0) + ILOAD x22, (RISCV_ISIZE * 22)(a0) + ILOAD x23, (RISCV_ISIZE * 23)(a0) + ILOAD x24, (RISCV_ISIZE * 24)(a0) + ILOAD x25, (RISCV_ISIZE * 25)(a0) + ILOAD x26, (RISCV_ISIZE * 26)(a0) + ILOAD x27, (RISCV_ISIZE * 27)(a0) + ILOAD x28, (RISCV_ISIZE * 28)(a0) + ILOAD x29, (RISCV_ISIZE * 29)(a0) + ILOAD x30, (RISCV_ISIZE * 30)(a0) + ILOAD x31, (RISCV_ISIZE * 31)(a0) + ILOAD x10, (RISCV_ISIZE * 10)(a0) // restore a0 ret // jump to ra diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S index 94fc8365455d..ef39ac1a9b6d 100644 --- a/libunwind/src/UnwindRegistersSave.S +++ b/libunwind/src/UnwindRegistersSave.S @@ -1026,7 +1026,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) jmp %o7 clr %o0 // return UNW_ESUCCESS -#elif defined(__riscv) && __riscv_xlen == 64 +#elif defined(__riscv) # # extern int __unw_getcontext(unw_context_t* thread_state) @@ -1035,73 +1035,73 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) # thread_state pointer is in a0 # DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) - sd x1, (8 * 0)(a0) // store ra as pc - sd x1, (8 * 1)(a0) - sd x2, (8 * 2)(a0) - sd x3, (8 * 3)(a0) - sd x4, (8 * 4)(a0) - sd x5, (8 * 5)(a0) - sd x6, (8 * 6)(a0) - sd x7, (8 * 7)(a0) - sd x8, (8 * 8)(a0) - sd x9, (8 * 9)(a0) - sd x10, (8 * 10)(a0) - sd x11, (8 * 11)(a0) - sd x12, (8 * 12)(a0) - sd x13, (8 * 13)(a0) - sd x14, (8 * 14)(a0) - sd x15, (8 * 15)(a0) - sd x16, (8 * 16)(a0) - sd x17, (8 * 17)(a0) - sd x18, (8 * 18)(a0) - sd x19, (8 * 19)(a0) - sd x20, (8 * 20)(a0) - sd x21, (8 * 21)(a0) - sd x22, (8 * 22)(a0) - sd x23, (8 * 23)(a0) - sd x24, (8 * 24)(a0) - sd x25, (8 * 25)(a0) - sd x26, (8 * 26)(a0) - sd x27, (8 * 27)(a0) - sd x28, (8 * 28)(a0) - sd x29, (8 * 29)(a0) - sd x30, (8 * 30)(a0) - sd x31, (8 * 31)(a0) - -#if defined(__riscv_flen) && __riscv_flen == 64 - fsd f0, (8 * 32 + 8 * 0)(a0) - fsd f1, (8 * 32 + 8 * 1)(a0) - fsd f2, (8 * 32 + 8 * 2)(a0) - fsd f3, (8 * 32 + 8 * 3)(a0) - fsd f4, (8 * 32 + 8 * 4)(a0) - fsd f5, (8 * 32 + 8 * 5)(a0) - fsd f6, (8 * 32 + 8 * 6)(a0) - fsd f7, (8 * 32 + 8 * 7)(a0) - fsd f8, (8 * 32 + 8 * 8)(a0) - fsd f9, (8 * 32 + 8 * 9)(a0) - fsd f10, (8 * 32 + 8 * 10)(a0) - fsd f11, (8 * 32 + 8 * 11)(a0) - fsd f12, (8 * 32 + 8 * 12)(a0) - fsd f13, (8 * 32 + 8 * 13)(a0) - fsd f14, (8 * 32 + 8 * 14)(a0) - fsd f15, (8 * 32 + 8 * 15)(a0) - fsd f16, (8 * 32 + 8 * 16)(a0) - fsd f17, (8 * 32 + 8 * 17)(a0) - fsd f18, (8 * 32 + 8 * 18)(a0) - fsd f19, (8 * 32 + 8 * 19)(a0) - fsd f20, (8 * 32 + 8 * 20)(a0) - fsd f21, (8 * 32 + 8 * 21)(a0) - fsd f22, (8 * 32 + 8 * 22)(a0) - fsd f23, (8 * 32 + 8 * 23)(a0) - fsd f24, (8 * 32 + 8 * 24)(a0) - fsd f25, (8 * 32 + 8 * 25)(a0) - fsd f26, (8 * 32 + 8 * 26)(a0) - fsd f27, (8 * 32 + 8 * 27)(a0) - fsd f28, (8 * 32 + 8 * 28)(a0) - fsd f29, (8 * 32 + 8 * 29)(a0) - fsd f30, (8 * 32 + 8 * 30)(a0) - fsd f31, (8 * 32 + 8 * 31)(a0) -#endif + ISTORE x1, (RISCV_ISIZE * 0)(a0) // store ra as pc + ISTORE x1, (RISCV_ISIZE * 1)(a0) + ISTORE x2, (RISCV_ISIZE * 2)(a0) + ISTORE x3, (RISCV_ISIZE * 3)(a0) + ISTORE x4, (RISCV_ISIZE * 4)(a0) + ISTORE x5, (RISCV_ISIZE * 5)(a0) + ISTORE x6, (RISCV_ISIZE * 6)(a0) + ISTORE x7, (RISCV_ISIZE * 7)(a0) + ISTORE x8, (RISCV_ISIZE * 8)(a0) + ISTORE x9, (RISCV_ISIZE * 9)(a0) + ISTORE x10, (RISCV_ISIZE * 10)(a0) + ISTORE x11, (RISCV_ISIZE * 11)(a0) + ISTORE x12, (RISCV_ISIZE * 12)(a0) + ISTORE x13, (RISCV_ISIZE * 13)(a0) + ISTORE x14, (RISCV_ISIZE * 14)(a0) + ISTORE x15, (RISCV_ISIZE * 15)(a0) + ISTORE x16, (RISCV_ISIZE * 16)(a0) + ISTORE x17, (RISCV_ISIZE * 17)(a0) + ISTORE x18, (RISCV_ISIZE * 18)(a0) + ISTORE x19, (RISCV_ISIZE * 19)(a0) + ISTORE x20, (RISCV_ISIZE * 20)(a0) + ISTORE x21, (RISCV_ISIZE * 21)(a0) + ISTORE x22, (RISCV_ISIZE * 22)(a0) + ISTORE x23, (RISCV_ISIZE * 23)(a0) + ISTORE x24, (RISCV_ISIZE * 24)(a0) + ISTORE x25, (RISCV_ISIZE * 25)(a0) + ISTORE x26, (RISCV_ISIZE * 26)(a0) + ISTORE x27, (RISCV_ISIZE * 27)(a0) + ISTORE x28, (RISCV_ISIZE * 28)(a0) + ISTORE x29, (RISCV_ISIZE * 29)(a0) + ISTORE x30, (RISCV_ISIZE * 30)(a0) + ISTORE x31, (RISCV_ISIZE * 31)(a0) + +# if defined(__riscv_flen) + FSTORE f0, (RISCV_FOFFSET + RISCV_FSIZE * 0)(a0) + FSTORE f1, (RISCV_FOFFSET + RISCV_FSIZE * 1)(a0) + FSTORE f2, (RISCV_FOFFSET + RISCV_FSIZE * 2)(a0) + FSTORE f3, (RISCV_FOFFSET + RISCV_FSIZE * 3)(a0) + FSTORE f4, (RISCV_FOFFSET + RISCV_FSIZE * 4)(a0) + FSTORE f5, (RISCV_FOFFSET + RISCV_FSIZE * 5)(a0) + FSTORE f6, (RISCV_FOFFSET + RISCV_FSIZE * 6)(a0) + FSTORE f7, (RISCV_FOFFSET + RISCV_FSIZE * 7)(a0) + FSTORE f8, (RISCV_FOFFSET + RISCV_FSIZE * 8)(a0) + FSTORE f9, (RISCV_FOFFSET + RISCV_FSIZE * 9)(a0) + FSTORE f10, (RISCV_FOFFSET + RISCV_FSIZE * 10)(a0) + FSTORE f11, (RISCV_FOFFSET + RISCV_FSIZE * 11)(a0) + FSTORE f12, (RISCV_FOFFSET + RISCV_FSIZE * 12)(a0) + FSTORE f13, (RISCV_FOFFSET + RISCV_FSIZE * 13)(a0) + FSTORE f14, (RISCV_FOFFSET + RISCV_FSIZE * 14)(a0) + FSTORE f15, (RISCV_FOFFSET + RISCV_FSIZE * 15)(a0) + FSTORE f16, (RISCV_FOFFSET + RISCV_FSIZE * 16)(a0) + FSTORE f17, (RISCV_FOFFSET + RISCV_FSIZE * 17)(a0) + FSTORE f18, (RISCV_FOFFSET + RISCV_FSIZE * 18)(a0) + FSTORE f19, (RISCV_FOFFSET + RISCV_FSIZE * 19)(a0) + FSTORE f20, (RISCV_FOFFSET + RISCV_FSIZE * 20)(a0) + FSTORE f21, (RISCV_FOFFSET + RISCV_FSIZE * 21)(a0) + FSTORE f22, (RISCV_FOFFSET + RISCV_FSIZE * 22)(a0) + FSTORE f23, (RISCV_FOFFSET + RISCV_FSIZE * 23)(a0) + FSTORE f24, (RISCV_FOFFSET + RISCV_FSIZE * 24)(a0) + FSTORE f25, (RISCV_FOFFSET + RISCV_FSIZE * 25)(a0) + FSTORE f26, (RISCV_FOFFSET + RISCV_FSIZE * 26)(a0) + FSTORE f27, (RISCV_FOFFSET + RISCV_FSIZE * 27)(a0) + FSTORE f28, (RISCV_FOFFSET + RISCV_FSIZE * 28)(a0) + FSTORE f29, (RISCV_FOFFSET + RISCV_FSIZE * 29)(a0) + FSTORE f30, (RISCV_FOFFSET + RISCV_FSIZE * 30)(a0) + FSTORE f31, (RISCV_FOFFSET + RISCV_FSIZE * 31)(a0) +# endif li a0, 0 // return UNW_ESUCCESS ret // jump to ra diff --git a/libunwind/src/assembly.h b/libunwind/src/assembly.h index dcd38198501c..f5ca35c0c189 100644 --- a/libunwind/src/assembly.h +++ b/libunwind/src/assembly.h @@ -27,6 +27,35 @@ #define PPC64_OFFS_V 824 #elif defined(__APPLE__) && defined(__aarch64__) #define SEPARATOR %% +#elif defined(__riscv) +# define RISCV_ISIZE (__riscv_xlen / 8) +# define RISCV_FOFFSET (RISCV_ISIZE * 32) +# if defined(__riscv_flen) +# define RISCV_FSIZE (__riscv_flen / 8) +# endif + +# if __riscv_xlen == 64 +# define ILOAD ld +# define ISTORE sd +# elif __riscv_xlen == 32 +# define ILOAD lw +# define ISTORE sw +# else +# error "Unsupported __riscv_xlen" +# endif + +# if defined(__riscv_flen) +# if __riscv_flen == 64 +# define FLOAD fld +# define FSTORE fsd +# elif __riscv_flen == 32 +# define FLOAD flw +# define FSTORE fsw +# else +# error "Unsupported __riscv_flen" +# endif +# endif +# define SEPARATOR ; #else #define SEPARATOR ; #endif diff --git a/libunwind/src/libunwind.cpp b/libunwind/src/libunwind.cpp index c21461b1f480..eb2623e56430 100644 --- a/libunwind/src/libunwind.cpp +++ b/libunwind/src/libunwind.cpp @@ -60,7 +60,7 @@ _LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor, # warning The MIPS architecture is not supported with this ABI and environment! #elif defined(__sparc__) # define REGISTER_KIND Registers_sparc -#elif defined(__riscv) && __riscv_xlen == 64 +#elif defined(__riscv) # define REGISTER_KIND Registers_riscv #elif defined(__ve__) # define REGISTER_KIND Registers_ve From c35105055ee4565ee6726d5b155538dd5c0307d3 Mon Sep 17 00:00:00 2001 From: Jian Cai Date: Mon, 1 Mar 2021 15:03:42 -0800 Subject: [PATCH 007/784] [ARM] support symbolic expressions as branch target in b.w Currently ARM backend validates the range of branch targets before the layout of fragments is finalized. This causes build failure if symbolic expressions are used, with the exception of a single symbolic value. For example, "b.w ." works but "b.w . + 2" currently fails to assemble. This fixes the issue by delaying this check (in ARMAsmParser::validateInstruction) of b.w instructions until the symbol expressions are resolved (in ARMAsmBackend::adjustFixupValue). Link: https://github.com/ClangBuiltLinux/linux/issues/1286 Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D97568 --- llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 5 ++++- llvm/test/MC/ARM/thumb2-b.w-target.s | 12 ++++++++++++ llvm/test/MC/ARM/thumb2-branch-ranges.s | 6 ++++++ 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 llvm/test/MC/ARM/thumb2-b.w-target.s diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index b64637cfb37b..9a2d94e3972c 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -7950,7 +7950,10 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst, break; case ARM::t2B: { int op = (Operands[2]->isImm()) ? 2 : 3; - if (!static_cast(*Operands[op]).isSignedOffset<24, 1>()) + ARMOperand &Operand = static_cast(*Operands[op]); + // Delay the checks of symbolic expressions until they are resolved. + if (!isa(Operand.getImm()) && + !Operand.isSignedOffset<24, 1>()) return Error(Operands[op]->getStartLoc(), "branch target out of range"); break; } diff --git a/llvm/test/MC/ARM/thumb2-b.w-target.s b/llvm/test/MC/ARM/thumb2-b.w-target.s new file mode 100644 index 000000000000..1323730552f0 --- /dev/null +++ b/llvm/test/MC/ARM/thumb2-b.w-target.s @@ -0,0 +1,12 @@ +// RUN: llvm-mc -triple=thumbv7 -filetype=obj %s | llvm-objdump --triple=thumbv7 -d - | FileCheck %s + +.syntax unified + +// CHECK-LABEL: start +// CHECK-NEXT: b.w #16777208 +// CHECK-NEXT: b.w #2 +start: + b.w start - 1f + 0x1000000 +1: + b.w . + (2f - 1b + 2) +2: diff --git a/llvm/test/MC/ARM/thumb2-branch-ranges.s b/llvm/test/MC/ARM/thumb2-branch-ranges.s index 6f537d85b449..83cb0259d741 100644 --- a/llvm/test/MC/ARM/thumb2-branch-ranges.s +++ b/llvm/test/MC/ARM/thumb2-branch-ranges.s @@ -94,3 +94,9 @@ start6: // CHECK: [[@LINE+2]]:{{[0-9]}}: error: Relocation out of range // CHECK-LABEL: beq.w start6 beq.w start6 + +start7: +// branch to thumb function resolved at assembly time +// CHECK: [[#@LINE+1]]:{{[0-9]}}: error: Relocation out of range + b.w start8 - start7 + 0x1000000 +start8: From 38a34e207f30747a4b0288d97ce67e422bf5f363 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 1 Mar 2021 19:23:26 -0600 Subject: [PATCH 008/784] [PowerPC] Use modulo arithmetic for vec_extract in altivec.h These interfaces are not covered in the ELFv2 ABI but are rather implemented to emulate those available in GCC/XLC. However, the ones in the other compilers are documented to perform modulo arithmetic on the element number. This patch just brings clang inline with the other compilers at -O0 (with optimization, clang already does the right thing). --- clang/lib/Headers/altivec.h | 58 +++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 4d50d47d51b5..402f3b389496 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -12915,73 +12915,75 @@ vec_vxor(vector bool long long __a, vector bool long long __b) { /* vec_extract */ static __inline__ signed char __ATTRS_o_ai vec_extract(vector signed char __a, - int __b) { - return __a[__b]; + unsigned int __b) { + return __a[__b & 0xf]; } static __inline__ unsigned char __ATTRS_o_ai -vec_extract(vector unsigned char __a, int __b) { - return __a[__b]; +vec_extract(vector unsigned char __a, unsigned int __b) { + return __a[__b & 0xf]; } static __inline__ unsigned char __ATTRS_o_ai vec_extract(vector bool char __a, - int __b) { - return __a[__b]; + unsigned int __b) { + return __a[__b & 0xf]; } static __inline__ signed short __ATTRS_o_ai vec_extract(vector signed short __a, - int __b) { - return __a[__b]; + unsigned int __b) { + return __a[__b & 0x7]; } static __inline__ unsigned short __ATTRS_o_ai -vec_extract(vector unsigned short __a, int __b) { - return __a[__b]; +vec_extract(vector unsigned short __a, unsigned int __b) { + return __a[__b & 0x7]; } static __inline__ unsigned short __ATTRS_o_ai vec_extract(vector bool short __a, - int __b) { - return __a[__b]; + unsigned int __b) { + return __a[__b & 0x7]; } static __inline__ signed int __ATTRS_o_ai vec_extract(vector signed int __a, - int __b) { - return __a[__b]; + unsigned int __b) { + return __a[__b & 0x3]; } static __inline__ unsigned int __ATTRS_o_ai vec_extract(vector unsigned int __a, - int __b) { - return __a[__b]; + unsigned int __b) { + return __a[__b & 0x3]; } static __inline__ unsigned int __ATTRS_o_ai vec_extract(vector bool int __a, - int __b) { - return __a[__b]; + unsigned int __b) { + return __a[__b & 0x3]; } #ifdef __VSX__ static __inline__ signed long long __ATTRS_o_ai -vec_extract(vector signed long long __a, int __b) { - return __a[__b]; +vec_extract(vector signed long long __a, unsigned int __b) { + return __a[__b & 0x1]; } static __inline__ unsigned long long __ATTRS_o_ai -vec_extract(vector unsigned long long __a, int __b) { - return __a[__b]; +vec_extract(vector unsigned long long __a, unsigned int __b) { + return __a[__b & 0x1]; } static __inline__ unsigned long long __ATTRS_o_ai -vec_extract(vector bool long long __a, int __b) { - return __a[__b]; +vec_extract(vector bool long long __a, unsigned int __b) { + return __a[__b & 0x1]; } -static __inline__ double __ATTRS_o_ai vec_extract(vector double __a, int __b) { - return __a[__b]; +static __inline__ double __ATTRS_o_ai vec_extract(vector double __a, + unsigned int __b) { + return __a[__b & 0x1]; } #endif -static __inline__ float __ATTRS_o_ai vec_extract(vector float __a, int __b) { - return __a[__b]; +static __inline__ float __ATTRS_o_ai vec_extract(vector float __a, + unsigned int __b) { + return __a[__b & 0x3]; } #ifdef __POWER9_VECTOR__ From 93c5e6bb49ca502d266700dd292e3873dfa51bb6 Mon Sep 17 00:00:00 2001 From: Peter Steinfeld Date: Mon, 1 Mar 2021 16:59:08 -0800 Subject: [PATCH 009/784] [flang] Detect circularly defined interfaces of procedures It's possible to define a procedure whose interface depends on a procedure which has an interface that depends on the original procedure. Such a circular definition was causing the compiler to fall into an infinite loop when resolving the name of the second procedure. It's also possible to create circular dependency chains of more than two procedures. I fixed this by adding the function HasCycle() to the class DeclarationVisitor and calling it from DeclareProcEntity() to detect procedures with such circularly defined interfaces. I marked the associated symbols of such procedures by calling SetError() on them. When processing subsequent procedures, I called HasError() before attempting to analyze their interfaces. Unfortunately, this did not work. With help from Tim, we determined that the SymbolSet used to track the erroneous symbols was instantiated using a "<" operator which was defined using the location of the name of the procedure. But the location of the procedure name was being changed by a call to ReplaceName() between the times that the calls to SetError() and HasError() were made. This caused HasError() to incorrectly report that a symbol was not in the set of erroneous symbols. I fixed this by changing SymbolSet to be an unordered set that uses the contents of the name of the symbol as the basis for its hash function. This works because the contents of the name of the symbol is preserved by ReplaceName() even though its location changes. I also fixed the error message used when reporting recursively defined dummy procedure arguments. I also added tests that will crash the compiler without this change. Note that the "<" operator is used in other contexts, for example, in the map of characterized procedures, maps of items in equivalence sets, maps of structure constructor values, ... All of these situations happen after name resolution has been completed and all calls to ReplaceName() have already happened and thus are not subject to the problem I ran into when ReplaceName() was called when processing procedure entities. Note also that the implementation of the "<" operator uses the relative location in the cooked character stream as the basis of its implementation. This is potentially problematic when symbols from diffent compilation units (for example symbols originating in .mod files) are put into the same map since their names will appear in two different source streams which may not be allocated in the same relative positions in memory. But I was unable to create a test that caused a problem. Using a direct comparison of the content of the name of the symbol in the "<" operator has problems. Symbols in enclosing or parallel scopes can have the same name. Also using the location of the symbol in the cooked character stream has the advantage that it preserves the the order of the symbols in a structure constructor constant, which makes matching the values with the symbols relatively easy. This change supersedes D97201. Differential Revision: https://reviews.llvm.org/D97749 --- flang/include/flang/Semantics/semantics.h | 2 +- flang/include/flang/Semantics/symbol.h | 13 ++++-- flang/lib/Evaluate/characteristics.cpp | 2 +- flang/lib/Semantics/resolve-names.cpp | 56 +++++++++++++++++------ flang/test/Semantics/resolve102.f90 | 31 +++++++++++-- 5 files changed, 81 insertions(+), 23 deletions(-) diff --git a/flang/include/flang/Semantics/semantics.h b/flang/include/flang/Semantics/semantics.h index 4f4bfc7fea2d..e6202c666429 100644 --- a/flang/include/flang/Semantics/semantics.h +++ b/flang/include/flang/Semantics/semantics.h @@ -199,7 +199,7 @@ class SemanticsContext { IndexVarKind kind; }; std::map activeIndexVars_; - std::set errorSymbols_; + SymbolSet errorSymbols_; std::set tempNames_; }; diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 6bc889fd2873..55859696be52 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -15,9 +15,10 @@ #include "flang/Common/reference.h" #include "llvm/ADT/DenseMapInfo.h" #include +#include #include #include -#include +#include #include namespace llvm { @@ -595,7 +596,7 @@ class Symbol { bool operator==(const Symbol &that) const { return this == &that; } bool operator!=(const Symbol &that) const { return !(*this == that); } bool operator<(const Symbol &that) const { - // For sets of symbols: collate them by source location + // For maps of symbols: collate them by source location return name_.begin() < that.name_.begin(); } @@ -765,7 +766,13 @@ inline bool operator<(SymbolRef x, SymbolRef y) { return *x < *y; } inline bool operator<(MutableSymbolRef x, MutableSymbolRef y) { return *x < *y; } -using SymbolSet = std::set; +struct SymbolHash { + std::size_t operator()(SymbolRef symRef) const { + std::hash hasher; + return hasher(symRef->name().ToString()); + } +}; +using SymbolSet = std::unordered_set; } // namespace Fortran::semantics diff --git a/flang/lib/Evaluate/characteristics.cpp b/flang/lib/Evaluate/characteristics.cpp index 1e8370928f8a..9b15e3e8a1cb 100644 --- a/flang/lib/Evaluate/characteristics.cpp +++ b/flang/lib/Evaluate/characteristics.cpp @@ -369,7 +369,7 @@ static std::optional CharacterizeProcedure( std::string procsList{GetSeenProcs(seenProcs)}; context.messages().Say(symbol.name(), "Procedure '%s' is recursively defined. Procedures in the cycle:" - " '%s'"_err_en_US, + " %s"_err_en_US, symbol.name(), procsList); return std::nullopt; } diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 7f14121d40b2..cf71f475159d 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1003,6 +1003,7 @@ class DeclarationVisitor : public ArraySpecVisitor, context().SetError(symbol); return symbol; } + bool HasCycle(const Symbol &, const ProcInterface &); }; // Resolve construct entities and statement entities. @@ -2132,7 +2133,7 @@ static bool NeedsType(const Symbol &symbol) { void ScopeHandler::ApplyImplicitRules( Symbol &symbol, bool allowForwardReference) { - if (!NeedsType(symbol)) { + if (context().HasError(symbol) || !NeedsType(symbol)) { return; } if (const DeclTypeSpec * type{GetImplicitType(symbol)}) { @@ -3641,6 +3642,35 @@ Symbol &DeclarationVisitor::DeclareUnknownEntity( } } +bool DeclarationVisitor::HasCycle( + const Symbol &procSymbol, const ProcInterface &interface) { + SymbolSet procsInCycle; + procsInCycle.insert(procSymbol); + const ProcInterface *thisInterface{&interface}; + bool haveInterface{true}; + while (haveInterface) { + haveInterface = false; + if (const Symbol * interfaceSymbol{thisInterface->symbol()}) { + if (procsInCycle.count(*interfaceSymbol) > 0) { + for (const auto procInCycle : procsInCycle) { + Say(procInCycle->name(), + "The interface for procedure '%s' is recursively " + "defined"_err_en_US, + procInCycle->name()); + context().SetError(*procInCycle); + } + return true; + } else if (const auto *procDetails{ + interfaceSymbol->detailsIf()}) { + haveInterface = true; + thisInterface = &procDetails->interface(); + procsInCycle.insert(*interfaceSymbol); + } + } + } + return false; +} + Symbol &DeclarationVisitor::DeclareProcEntity( const parser::Name &name, Attrs attrs, const ProcInterface &interface) { Symbol &symbol{DeclareEntity(name, attrs)}; @@ -3650,20 +3680,20 @@ Symbol &DeclarationVisitor::DeclareProcEntity( "The interface for procedure '%s' has already been " "declared"_err_en_US); context().SetError(symbol); - } else { - if (interface.type()) { + } else if (HasCycle(symbol, interface)) { + return symbol; + } else if (interface.type()) { + symbol.set(Symbol::Flag::Function); + } else if (interface.symbol()) { + if (interface.symbol()->test(Symbol::Flag::Function)) { symbol.set(Symbol::Flag::Function); - } else if (interface.symbol()) { - if (interface.symbol()->test(Symbol::Flag::Function)) { - symbol.set(Symbol::Flag::Function); - } else if (interface.symbol()->test(Symbol::Flag::Subroutine)) { - symbol.set(Symbol::Flag::Subroutine); - } + } else if (interface.symbol()->test(Symbol::Flag::Subroutine)) { + symbol.set(Symbol::Flag::Subroutine); } - details->set_interface(interface); - SetBindNameOn(symbol); - SetPassNameOn(symbol); } + details->set_interface(interface); + SetBindNameOn(symbol); + SetPassNameOn(symbol); } return symbol; } @@ -5005,7 +5035,7 @@ Symbol *DeclarationVisitor::NoteInterfaceName(const parser::Name &name) { void DeclarationVisitor::CheckExplicitInterface(const parser::Name &name) { if (const Symbol * symbol{name.symbol}) { - if (!symbol->HasExplicitInterface()) { + if (!context().HasError(*symbol) && !symbol->HasExplicitInterface()) { Say(name, "'%s' must be an abstract interface or a procedure with " "an explicit interface"_err_en_US, diff --git a/flang/test/Semantics/resolve102.f90 b/flang/test/Semantics/resolve102.f90 index d6894dbd43ab..69ec8b2e1c57 100644 --- a/flang/test/Semantics/resolve102.f90 +++ b/flang/test/Semantics/resolve102.f90 @@ -1,7 +1,7 @@ ! RUN: %S/test_errors.sh %s %t %f18 ! Tests for circularly defined procedures -!ERROR: Procedure 'sub' is recursively defined. Procedures in the cycle: ''sub', 'p2'' +!ERROR: Procedure 'sub' is recursively defined. Procedures in the cycle: 'p2', 'sub' subroutine sub(p2) PROCEDURE(sub) :: p2 @@ -9,7 +9,7 @@ subroutine sub(p2) end subroutine subroutine circular - !ERROR: Procedure 'p' is recursively defined. Procedures in the cycle: ''p', 'sub', 'p2'' + !ERROR: Procedure 'p' is recursively defined. Procedures in the cycle: 'p2', 'p', 'sub' procedure(sub) :: p call p(sub) @@ -21,7 +21,7 @@ subroutine sub(p2) end subroutine circular program iface - !ERROR: Procedure 'p' is recursively defined. Procedures in the cycle: ''p', 'sub', 'p2'' + !ERROR: Procedure 'p' is recursively defined. Procedures in the cycle: 'p2', 'p', 'sub' procedure(sub) :: p interface subroutine sub(p2) @@ -38,7 +38,7 @@ Program mutual Call p(sub) contains - !ERROR: Procedure 'sub1' is recursively defined. Procedures in the cycle: ''p', 'sub1', 'arg'' + !ERROR: Procedure 'sub1' is recursively defined. Procedures in the cycle: 'p', 'arg', 'sub1' Subroutine sub1(arg) procedure(sub1) :: arg End Subroutine @@ -54,7 +54,7 @@ Program mutual1 Call p(sub) contains - !ERROR: Procedure 'sub1' is recursively defined. Procedures in the cycle: ''p', 'sub1', 'arg', 'sub', 'p2'' + !ERROR: Procedure 'sub1' is recursively defined. Procedures in the cycle: 'p2', 'sub', 'p', 'arg', 'sub1' Subroutine sub1(arg) procedure(sub) :: arg End Subroutine @@ -63,3 +63,24 @@ Subroutine sub(p2) Procedure(sub1) :: p2 End Subroutine End Program + +program twoCycle + !ERROR: The interface for procedure 'p1' is recursively defined + !ERROR: The interface for procedure 'p2' is recursively defined + procedure(p1) p2 + procedure(p2) p1 + call p1 + call p2 +end program + +program threeCycle + !ERROR: The interface for procedure 'p1' is recursively defined + !ERROR: The interface for procedure 'p2' is recursively defined + procedure(p1) p2 + !ERROR: The interface for procedure 'p3' is recursively defined + procedure(p2) p3 + procedure(p3) p1 + call p1 + call p2 + call p3 +end program From 1ff93618e58df210def48d26878c20a1b414d900 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 1 Mar 2021 21:38:41 -0600 Subject: [PATCH 010/784] [PowerPC] Add missing overloads of vec_promote to altivec.h The VSX-only overloads (for 8-byte element vectors) are missing. Add the missing overloads and convert element numbering to modulo arithmetic to match GCC and XLC. --- clang/lib/Headers/altivec.h | 36 +++++++++++++++++++++------ clang/test/CodeGen/builtins-ppc-vsx.c | 19 ++++++++++++++ 2 files changed, 48 insertions(+), 7 deletions(-) diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 402f3b389496..935eac3c8672 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -14024,49 +14024,71 @@ static __inline__ void __ATTRS_o_ai vec_stvrxl(vector float __a, int __b, static __inline__ vector signed char __ATTRS_o_ai vec_promote(signed char __a, int __b) { vector signed char __res = (vector signed char)(0); - __res[__b] = __a; + __res[__b & 0x7] = __a; return __res; } static __inline__ vector unsigned char __ATTRS_o_ai vec_promote(unsigned char __a, int __b) { vector unsigned char __res = (vector unsigned char)(0); - __res[__b] = __a; + __res[__b & 0x7] = __a; return __res; } static __inline__ vector short __ATTRS_o_ai vec_promote(short __a, int __b) { vector short __res = (vector short)(0); - __res[__b] = __a; + __res[__b & 0x7] = __a; return __res; } static __inline__ vector unsigned short __ATTRS_o_ai vec_promote(unsigned short __a, int __b) { vector unsigned short __res = (vector unsigned short)(0); - __res[__b] = __a; + __res[__b & 0x7] = __a; return __res; } static __inline__ vector int __ATTRS_o_ai vec_promote(int __a, int __b) { vector int __res = (vector int)(0); - __res[__b] = __a; + __res[__b & 0x3] = __a; return __res; } static __inline__ vector unsigned int __ATTRS_o_ai vec_promote(unsigned int __a, int __b) { vector unsigned int __res = (vector unsigned int)(0); - __res[__b] = __a; + __res[__b & 0x3] = __a; return __res; } static __inline__ vector float __ATTRS_o_ai vec_promote(float __a, int __b) { vector float __res = (vector float)(0); - __res[__b] = __a; + __res[__b & 0x3] = __a; return __res; } +#ifdef __VSX__ +static __inline__ vector double __ATTRS_o_ai vec_promote(double __a, int __b) { + vector double __res = (vector double)(0); + __res[__b & 0x1] = __a; + return __res; +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_promote(signed long long __a, int __b) { + vector signed long long __res = (vector signed long long)(0); + __res[__b & 0x1] = __a; + return __res; +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_promote(unsigned long long __a, int __b) { + vector unsigned long long __res = (vector unsigned long long)(0); + __res[__b & 0x1] = __a; + return __res; +} +#endif + /* vec_splats */ static __inline__ vector signed char __ATTRS_o_ai vec_splats(signed char __a) { diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c index bd0e66e69800..53370cb3949e 100644 --- a/clang/test/CodeGen/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/builtins-ppc-vsx.c @@ -22,6 +22,7 @@ vector signed long long vsll = { 255LL, -937LL }; vector unsigned long long vull = { 1447LL, 2894LL }; double d = 23.4; signed long long sll = 618LL; +unsigned long long ull = 618ULL; float af[4] = {23.4f, 56.7f, 89.0f, 12.3f}; double ad[2] = {23.4, 56.7}; signed char asc[16] = { -8, 9, -10, 11, -12, 13, -14, 15, @@ -1851,6 +1852,24 @@ res_vsc = vec_xxsldwi(vsc, vsc, 0); res_vuc = vec_xxsldwi(vuc, vuc, 1); // CHECK: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> // CHECK-LE: shufflevector <4 x i32> %{{[0-9]+}}, <4 x i32> %{{[0-9]+}}, <4 x i32> + +res_vd = vec_promote(d, 0); +// CHECK: store <2 x double> zeroinitializer +// CHECK: insertelement <2 x double> +// CHECK-LE: store <2 x double> zeroinitializer +// CHECK-LE: insertelement <2 x double> + +res_vsll = vec_promote(sll, 0); +// CHECK: store <2 x i64> zeroinitializer +// CHECK: insertelement <2 x i64> +// CHECK-LE: store <2 x i64> zeroinitializer +// CHECK-LE: insertelement <2 x i64> + +res_vull = vec_promote(ull, 0); +// CHECK: store <2 x i64> zeroinitializer +// CHECK: insertelement <2 x i64> +// CHECK-LE: store <2 x i64> zeroinitializer +// CHECK-LE: insertelement <2 x i64> } // The return type of the call expression may be different from the return type of the shufflevector. From ea1a1ebbc673d810f1abf6cb58a40b5ec916ff07 Mon Sep 17 00:00:00 2001 From: Ta-Wei Tu Date: Tue, 2 Mar 2021 11:42:48 +0800 Subject: [PATCH 011/784] [NFC] Use std::swap in LoopInterchange --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index b6e071fe5faa..0162bf1307af 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -186,12 +186,8 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, // matrix by exchanging the two columns. static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx, unsigned ToIndx) { - unsigned numRows = DepMatrix.size(); - for (unsigned i = 0; i < numRows; ++i) { - char TmpVal = DepMatrix[i][ToIndx]; - DepMatrix[i][ToIndx] = DepMatrix[i][FromIndx]; - DepMatrix[i][FromIndx] = TmpVal; - } + for (unsigned I = 0, E = DepMatrix.size(); I < E; ++I) + std::swap(DepMatrix[I][ToIndx], DepMatrix[I][FromIndx]); } // Checks if outermost non '=','S'or'I' dependence in the dependence matrix is From 7c724a896f93c97fe75db6f37b0995c9b35e0b82 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 1 Mar 2021 16:13:13 -0800 Subject: [PATCH 012/784] [AMDGPU] Do not check max-bb for a single block callee -amdgpu-inline-max-bb option could lead to a suboptimal codegen preventing inlining of really simple functions including pure wrapper calls. Relax the cutoff by allowing to call a function with a single block on the grounds that it will not increase total number of blocks after inlining. Differential Revision: https://reviews.llvm.org/D97744 --- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 4 +- llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll | 44 ++++++++++++++----- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index eb2733ff0310..6728b07fb48b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -1149,7 +1149,9 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller, // Hack to make compile times reasonable. if (InlineMaxBB && !Callee->hasFnAttribute(Attribute::InlineHint)) { - // Single BB does not increase total BB amount, thus subtract 1. + // Single BB does not increase total BB amount. + if (Callee->size() == 1) + return true; size_t BBSize = Caller->size() + Callee->size() - 1; return BBSize <= InlineMaxBB; } diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll index 9b0803bbcb99..7d02f393dc6e 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll @@ -1,7 +1,8 @@ -; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S -inline-threshold=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INL1 %s -; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INLDEF %s -; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default' -S -inline-threshold=1 < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INL1 %s -; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default' -S < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-INLDEF %s +; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S -inline-threshold=1 < %s | FileCheck -check-prefixes=GCN,GCN-INL1,GCN-MAXBBDEF %s +; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S < %s | FileCheck -check-prefixes=GCN,GCN-INLDEF,GCN-MAXBBDEF %s +; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default' -S -inline-threshold=1 < %s | FileCheck -check-prefixes=GCN,GCN-INL1,GCN-MAXBBDEF %s +; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default' -S < %s | FileCheck -check-prefixes=GCN,GCN-INLDEF,GCN-MAXBBDEF %s +; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default' -S -amdgpu-inline-max-bb=1 < %s | FileCheck -check-prefixes=GCN,GCN-MAXBB1 %s define coldcc float @foo(float %x, float %y) { entry: @@ -57,12 +58,14 @@ entry: } ; GCN: define amdgpu_kernel void @test_inliner( -; GCN-INL1: %c1 = tail call coldcc float @foo( -; GCN-INLDEF: %cmp.i = fcmp ogt float %tmp2, 0.000000e+00 -; GCN: %div.i{{[0-9]*}} = fdiv float 1.000000e+00, %c -; GCN: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i -; GCN: call void @foo_noinline( -; GCN: tail call float @_Z3sinf( +; GCN-INL1: %c1 = tail call coldcc float @foo( +; GCN-INLDEF: %cmp.i = fcmp ogt float %tmp2, 0.000000e+00 +; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 1.000000e+00, %c +; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i +; GCN-MAXBB1: call coldcc void @foo_private_ptr +; GCN-MAXBB1: call coldcc void @foo_private_ptr2 +; GCN: call void @foo_noinline( +; GCN: tail call float @_Z3sinf( define amdgpu_kernel void @test_inliner(float addrspace(1)* nocapture %a, i32 %n) { entry: %pvt_arr = alloca [64 x float], align 4, addrspace(5) @@ -95,7 +98,8 @@ entry: } ; GCN: define amdgpu_kernel void @test_inliner_multi_pvt_ptr( -; GCN: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i +; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i +; GCN-MAXBB1: call coldcc void @foo_private_ptr2 define amdgpu_kernel void @test_inliner_multi_pvt_ptr(float addrspace(1)* nocapture %a, i32 %n, float %v) { entry: %pvt_arr1 = alloca [32 x float], align 4, addrspace(5) @@ -147,6 +151,24 @@ entry: ret void } +; GCN: define amdgpu_kernel void @test_inliner_maxbb_singlebb( +; GCN: tail call float @_Z3sinf +define amdgpu_kernel void @test_inliner_maxbb_singlebb(float addrspace(1)* nocapture %a, i32 %n) { +entry: + %cmp = icmp eq i32 %n, 1 + br i1 %cmp, label %bb.1, label %bb.2 + br label %bb.1 + +bb.1: + store float 1.0, float* undef + br label %bb.2 + +bb.2: + %c = call float @sin_wrapper(float 1.0) + store float %c, float addrspace(1)* %a + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare float @_Z3sinf(float) #1 From d36a15de1ff4d24e772233406d602c5f0b370f54 Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Fri, 26 Feb 2021 13:11:02 -0800 Subject: [PATCH 013/784] [mlir][linalg] Memoize indexing map generation. Differential Revision: https://reviews.llvm.org/D97602 --- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 7 +++++-- .../mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp | 9 ++++++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index acc8ff1807c1..46e5780e151f 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -2333,8 +2333,11 @@ static void printNamedStructuredOpResults(OpAsmPrinter &p, template static void printNamedStructuredOp(OpAsmPrinter &p, NamedStructuredOpType op) { p << op.getOperationName(); - p.printOptionalAttrDict(op->getAttrs(), - /*elidedAttrs=*/{"operand_segment_sizes"}); + p.printOptionalAttrDict( + op->getAttrs(), + /*elidedAttrs=*/{"operand_segment_sizes", + // See generated code in mlir-linalg-yaml-gen.cpp + "linalg.memoized_indexing_maps"}); // Printing is shared with generic ops, except for the region and // attributes. diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp index 5578ff52d477..1dddc57f25d3 100644 --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp @@ -651,11 +651,18 @@ static SmallVector getSymbolBindings({0} self) { // {2}: Statements static const char structuredOpIndexingMapsFormat[] = R"FMT( ArrayAttr {0}::indexing_maps() { + static const char memoizeAttr[] = "linalg.memoized_indexing_maps"; + ArrayAttr cached = getOperation()->getAttrOfType(memoizeAttr); + if (cached) + return cached; + MLIRContext *context = getContext(); auto symbolBindings = getSymbolBindings(*this); SmallVector maps; {2} - return Builder(context).getAffineMapArrayAttr(maps); + cached = Builder(context).getAffineMapArrayAttr(maps); + getOperation()->setAttr(memoizeAttr, cached); + return cached; } )FMT"; From 6d2fd3d9cdd6ed24784ec47741e7e70c236a140e Mon Sep 17 00:00:00 2001 From: Stella Laurenzo Date: Mon, 1 Mar 2021 21:19:39 -0800 Subject: [PATCH 014/784] [mlir][linalg] Replace monomorphic contration ops with polymorphic variants. * Moves `batch_matmul`, `matmul`, `matvec`, `vectmat`, `dot` to the new mechanism. * This is not just an NFC change, in addition to using a new code generation mechanism, it also activates symbolic casting, allowing mixed precision operands and results. * These definitions were generated from DSL by the tool: https://github.com/stellaraccident/mlir-linalgpy/blob/main/mlir_linalg/oplib/core.py (will be upstreamed in a subsequent set of changes). Reviewed By: nicolasvasilache, ThomasRaoux Differential Revision: https://reviews.llvm.org/D97719 --- .../Linalg/IR/LinalgNamedStructuredOps.yaml | 255 +++++++++++++++++- .../Linalg/IR/LinalgNamedStructuredOpsSpec.tc | 30 --- .../generalize-named-polymorphic-ops.mlir | 18 +- 3 files changed, 259 insertions(+), 44 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml index 93bc5760ed0c..5752af9bea9a 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -1,12 +1,12 @@ --- !LinalgOpConfig metadata: !LinalgOpMetadata - name: polymorphic_matmul - cpp_op_name: PolymorphicMatmulOp + name: matmul + cpp_op_name: MatmulOp doc: |- - Type polymorphic matrix multiplication. + Performs a matrix multiplacation of two 2D inputs. - This op is presently here to test a new path for generation and will replace - the existing 'matmul' op when ready. Do not use. + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. implements: - LinalgContractionOpInterface structured_op: !LinalgStructuredOpConfig @@ -60,4 +60,249 @@ structured_op: !LinalgStructuredOpConfig operands: - !ScalarExpression scalar_arg: B +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: batch_matmul + cpp_op_name: BatchMatmulOp + doc: |- + Performs a batched matrix multiplacation of two 3D inputs. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + implements: + - LinalgContractionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - ! + name: A + usage: input + shape: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s3)> + element_type_var: T1 + - ! + name: B + usage: input + shape: affine_map<()[s0, s1, s2, s3] -> (s0, s3, s2)> + element_type_var: T2 + - ! + name: C + usage: output + shape: affine_map<()[s0, s1, s2, s3] -> (s0, s1, s2)> + element_type_var: U + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d3)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d3, d2)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3] -> (d0, d1, d2)> + iterator_types: + - parallel + - parallel + - parallel + - reduction + assignments: + - !ScalarAssign + arg: C + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: C + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: A + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: B +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: matvec + cpp_op_name: MatvecOp + doc: |- + Performs a matrix-vector multiplication. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + implements: + - LinalgContractionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - ! + name: A + usage: input + shape: affine_map<()[s0, s1] -> (s0, s1)> + element_type_var: T1 + - ! + name: y + usage: input + shape: affine_map<()[s0, s1] -> (s1)> + element_type_var: T2 + - ! + name: x + usage: output + shape: affine_map<()[s0, s1] -> (s0)> + element_type_var: U + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1)[s0, s1] -> (d0, d1)> + - affine_map<(d0, d1)[s0, s1] -> (d1)> + - affine_map<(d0, d1)[s0, s1] -> (d0)> + iterator_types: + - parallel + - reduction + assignments: + - !ScalarAssign + arg: x + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: x + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: A + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: y +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: vecmat + cpp_op_name: VecmatOp + doc: |- + Performs a vector-matrix multiplacation. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + implements: + - LinalgContractionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - ! + name: y + usage: input + shape: affine_map<()[s0, s1] -> (s1)> + element_type_var: T1 + - ! + name: A + usage: input + shape: affine_map<()[s0, s1] -> (s1, s0)> + element_type_var: T2 + - ! + name: x + usage: output + shape: affine_map<()[s0, s1] -> (s0)> + element_type_var: U + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1)[s0, s1] -> (d1)> + - affine_map<(d0, d1)[s0, s1] -> (d1, d0)> + - affine_map<(d0, d1)[s0, s1] -> (d0)> + iterator_types: + - parallel + - reduction + assignments: + - !ScalarAssign + arg: x + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: x + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: y + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: A +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: dot + cpp_op_name: DotOp + doc: |- + Performs a dot product of two vectors to a scalar result. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + implements: + - LinalgContractionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - ! + name: A + usage: input + shape: affine_map<()[s0] -> (s0)> + element_type_var: T1 + - ! + name: B + usage: input + shape: affine_map<()[s0] -> (s0)> + element_type_var: T2 + - ! + name: C + usage: output + shape: affine_map<()[s0] -> ()> + element_type_var: U + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0)[s0] -> (d0)> + - affine_map<(d0)[s0] -> (d0)> + - affine_map<(d0)[s0] -> ()> + iterator_types: + - reduction + assignments: + - !ScalarAssign + arg: C + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: C + - !ScalarExpression + scalar_apply: + fn_name: mul + operands: + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: A + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: B diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc index 338cc6eaa4d6..37b972b73cf5 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc @@ -1,9 +1,3 @@ -ods_def -implements_interface : -def matmul(A: f32(M, K), B: f32(K, N)) -> (C: f32(M, N)) { - C(m, n) = std_addf(C(m, n), std_mulf(A(m, k), B(k, n))); -} - ods_def implements_interface : def matmul_column_major(A: f32(K, M), B: f32(N, K)) -> (C: f32(N, M)) { @@ -30,12 +24,6 @@ def matmul_i32_i32_i32(A: i32(M, K), B: i32(K, N)) -> (C: i32(M, N)) { C(m, n) = std_addi(C(m, n), std_muli(A(m, k), B(k, n))); } -ods_def -implements_interface : -def matvec(A: f32(M, N), y: f32(N)) -> (x: f32(M)) { - x(m) = std_addf(x(m), std_mulf(A(m, n), y(n))); -} - ods_def implements_interface : def matvec_i8_i8_i32(A: i8(M, N), y: i8(N)) -> (x: i32(M)) { @@ -54,12 +42,6 @@ def matvec_i32_i32_i32(A: i32(M, N), y: i32(N)) -> (x: i32(M)) { x(m) = std_addi(x(m), std_muli(A(m, n), y(n))); } -ods_def -implements_interface : -def vecmat(y: f32(M), A: f32(M, N)) -> (x: f32(N)) { - x(n) = std_addf(x(n), std_mulf(y(m), A(m, n))); -} - ods_def implements_interface : def vecmat_i8_i8_i32(y: i8(M), A: i8(M, N)) -> (x: i32(N)) { @@ -78,12 +60,6 @@ def vecmat_i32_i32_i32(y: i32(M), A: i32(M, N)) -> (x: i32(N)) { x(n) = std_addi(x(n), std_muli(y(m), A(m, n))); } -ods_def -implements_interface : -def dot(A: f32(M), B: f32(M)) -> (C: f32()) { - C() = std_addf(C(), std_mulf(A(m), B(m))); -} - ods_def implements_interface : def dot_i8_i8_i32(A: i8(M), B: i8(M)) -> (C: i32()) { @@ -102,12 +78,6 @@ def dot_i32_i32_i32(A: i32(M), B: i32(M)) -> (C: i32()) { C() = std_addi(C(), std_muli(A(m), B(m))); } -ods_def -implements_interface : -def batch_matmul(A: f32(Batch, M, K), B: f32(Batch, K, N)) -> (C: f32(Batch, M, N)) { - C(b, m, n) = std_addf(C(b, m, n), std_mulf(A(b, m, k), B(b, k, n))); -} - ods_def implements_interface : def batch_matmul_i8_i8_i32(A: i8(Batch, M, K), B: i8(Batch, K, N)) -> (C: i32(Batch, M, N)) { diff --git a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir index fc1183ec0d85..251dfe609606 100644 --- a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir @@ -1,7 +1,7 @@ // RUN: mlir-opt %s -split-input-file -linalg-generalize-named-ops | FileCheck %s func @generalize_matmul_tensor_f32(%A : tensor<16x8xf32>, %B: tensor<8x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { - %0 = linalg.polymorphic_matmul ins(%A, %B: tensor<16x8xf32>, tensor<8x32xf32>) + %0 = linalg.matmul ins(%A, %B: tensor<16x8xf32>, tensor<8x32xf32>) outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -16,7 +16,7 @@ func @generalize_matmul_tensor_f32(%A : tensor<16x8xf32>, %B: tensor<8x32xf32>, // ----- func @generalize_matmul_tensor_i32(%A : tensor<16x8xi32>, %B: tensor<8x32xi32>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { - %0 = linalg.polymorphic_matmul ins(%A, %B: tensor<16x8xi32>, tensor<8x32xi32>) + %0 = linalg.matmul ins(%A, %B: tensor<16x8xi32>, tensor<8x32xi32>) outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> return %0: tensor<16x32xi32> } @@ -31,7 +31,7 @@ func @generalize_matmul_tensor_i32(%A : tensor<16x8xi32>, %B: tensor<8x32xi32>, // ----- // Verifies floating point to integer cast. func @generalize_matmul_tensor_f32_f32_i16(%A : tensor<16x8xf32>, %B: tensor<8x32xf32>, %C: tensor<16x32xi16>) -> tensor<16x32xi16> { - %0 = linalg.polymorphic_matmul ins(%A, %B: tensor<16x8xf32>, tensor<8x32xf32>) + %0 = linalg.matmul ins(%A, %B: tensor<16x8xf32>, tensor<8x32xf32>) outs(%C: tensor<16x32xi16>) -> tensor<16x32xi16> return %0: tensor<16x32xi16> } @@ -48,7 +48,7 @@ func @generalize_matmul_tensor_f32_f32_i16(%A : tensor<16x8xf32>, %B: tensor<8x3 // ----- // Verifies sign extension cast. func @generalize_matmul_tensor_i8_i8_i32(%A : tensor<16x8xi8>, %B: tensor<8x32xi8>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { - %0 = linalg.polymorphic_matmul ins(%A, %B: tensor<16x8xi8>, tensor<8x32xi8>) + %0 = linalg.matmul ins(%A, %B: tensor<16x8xi8>, tensor<8x32xi8>) outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> return %0: tensor<16x32xi32> } @@ -65,7 +65,7 @@ func @generalize_matmul_tensor_i8_i8_i32(%A : tensor<16x8xi8>, %B: tensor<8x32xi // ----- // Verifies that different argument types is legal. func @generalize_matmul_tensor_i8_i16_i32(%A : tensor<16x8xi8>, %B: tensor<8x32xi16>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { - %0 = linalg.polymorphic_matmul ins(%A, %B: tensor<16x8xi8>, tensor<8x32xi16>) + %0 = linalg.matmul ins(%A, %B: tensor<16x8xi8>, tensor<8x32xi16>) outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> return %0: tensor<16x32xi32> } @@ -82,7 +82,7 @@ func @generalize_matmul_tensor_i8_i16_i32(%A : tensor<16x8xi8>, %B: tensor<8x32x // ----- // Somewhat non-sensical but checks integer truncation cast. func @generalize_matmul_tensor_i32_i32_i16(%A : tensor<16x8xi32>, %B: tensor<8x32xi32>, %C: tensor<16x32xi16>) -> tensor<16x32xi16> { - %0 = linalg.polymorphic_matmul ins(%A, %B: tensor<16x8xi32>, tensor<8x32xi32>) + %0 = linalg.matmul ins(%A, %B: tensor<16x8xi32>, tensor<8x32xi32>) outs(%C: tensor<16x32xi16>) -> tensor<16x32xi16> return %0: tensor<16x32xi16> } @@ -99,7 +99,7 @@ func @generalize_matmul_tensor_i32_i32_i16(%A : tensor<16x8xi32>, %B: tensor<8x3 // ----- // Verifies integer to floating point cast. func @generalize_matmul_tensor_i8_i8_f32(%A : tensor<16x8xi8>, %B: tensor<8x32xi8>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { - %0 = linalg.polymorphic_matmul ins(%A, %B: tensor<16x8xi8>, tensor<8x32xi8>) + %0 = linalg.matmul ins(%A, %B: tensor<16x8xi8>, tensor<8x32xi8>) outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -116,7 +116,7 @@ func @generalize_matmul_tensor_i8_i8_f32(%A : tensor<16x8xi8>, %B: tensor<8x32xi // ----- // Verifies floating point extension cast. func @generalize_matmul_tensor_f16_f16_f32(%A : tensor<16x8xf16>, %B: tensor<8x32xf16>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { - %0 = linalg.polymorphic_matmul ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf16>) + %0 = linalg.matmul ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf16>) outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -133,7 +133,7 @@ func @generalize_matmul_tensor_f16_f16_f32(%A : tensor<16x8xf16>, %B: tensor<8x3 // ----- // Verifies floating point truncation. func @generalize_matmul_tensor_f64_f64_f32(%A : tensor<16x8xf64>, %B: tensor<8x32xf64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { - %0 = linalg.polymorphic_matmul ins(%A, %B: tensor<16x8xf64>, tensor<8x32xf64>) + %0 = linalg.matmul ins(%A, %B: tensor<16x8xf64>, tensor<8x32xf64>) outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } From 9e2579dbf434e996b3d35f27b5a1762019cf27bb Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Mon, 1 Mar 2021 13:55:17 -0800 Subject: [PATCH 015/784] Fix infinite recursion during IR emission if a constant-initialized lifetime-extended temporary object's initializer refers back to the same object. `GetAddrOfGlobalTemporary` previously tried to emit the initializer of a global temporary before updating the global temporary map. Emitting the initializer could recurse back into `GetAddrOfGlobalTemporary` for the same temporary, resulting in an infinite recursion. Reviewed By: rjmccall Differential Revision: https://reviews.llvm.org/D97733 --- clang/lib/CodeGen/CodeGenModule.cpp | 29 ++++++++++++++++++++++++--- clang/test/CodeGenCXX/temporaries.cpp | 11 ++++++++++ 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 750439dd6844..765138bc798f 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -5325,8 +5325,21 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( CharUnits Align = getContext().getTypeAlignInChars(MaterializedType); - if (llvm::Constant *Slot = MaterializedGlobalTemporaryMap[E]) - return ConstantAddress(Slot, Align); + auto InsertResult = MaterializedGlobalTemporaryMap.insert({E, nullptr}); + if (!InsertResult.second) { + // We've seen this before: either we already created it or we're in the + // process of doing so. + if (!InsertResult.first->second) { + // We recursively re-entered this function, probably during emission of + // the initializer. Create a placeholder. We'll clean this up in the + // outer call, at the end of this function. + llvm::Type *Type = getTypes().ConvertTypeForMem(MaterializedType); + InsertResult.first->second = new llvm::GlobalVariable( + getModule(), Type, false, llvm::GlobalVariable::InternalLinkage, + nullptr); + } + return ConstantAddress(InsertResult.first->second, Align); + } // FIXME: If an externally-visible declaration extends multiple temporaries, // we need to give each temporary the same name in every translation unit (and @@ -5405,7 +5418,17 @@ ConstantAddress CodeGenModule::GetAddrOfGlobalTemporary( *this, GV, AddrSpace, LangAS::Default, Type->getPointerTo( getContext().getTargetAddressSpace(LangAS::Default))); - MaterializedGlobalTemporaryMap[E] = CV; + + // Update the map with the new temporary. If we created a placeholder above, + // replace it with the new global now. + llvm::Constant *&Entry = MaterializedGlobalTemporaryMap[E]; + if (Entry) { + Entry->replaceAllUsesWith( + llvm::ConstantExpr::getBitCast(CV, Entry->getType())); + llvm::cast(Entry)->eraseFromParent(); + } + Entry = CV; + return ConstantAddress(CV, Align); } diff --git a/clang/test/CodeGenCXX/temporaries.cpp b/clang/test/CodeGenCXX/temporaries.cpp index edbaa7619221..3ce350d03f48 100644 --- a/clang/test/CodeGenCXX/temporaries.cpp +++ b/clang/test/CodeGenCXX/temporaries.cpp @@ -53,6 +53,17 @@ namespace BraceInit { // CHECK: @_ZN9BraceInit1xE ={{.*}} constant i32* @_ZGRN9BraceInit1xE_ } +namespace RefTempSubobject { + struct SelfReferential { + int *p = ints; + int ints[3] = {1, 2, 3}; + }; + + // CHECK: @_ZGRN16RefTempSubobject2srE_ = internal global { i32*, [3 x i32] } { {{.*}} getelementptr {{.*}} @_ZGRN16RefTempSubobject2srE_ {{.*}}, [3 x i32] [i32 1, i32 2, i32 3] } + // CHECK: @_ZN16RefTempSubobject2srE = {{.*}} constant {{.*}} @_ZGRN16RefTempSubobject2srE_ + constexpr const SelfReferential &sr = SelfReferential(); +} + struct A { A(); ~A(); From 4fd3347d6e4b0c873c789528e1c9a1b55990d1b6 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 1 Mar 2021 23:40:29 -0800 Subject: [PATCH 016/784] [lldb] Fix typos in documentation (NFC) --- lldb/docs/design/overview.rst | 2 +- lldb/docs/doxygen.cfg.in | 2 +- lldb/docs/lldb-for-gdb-users.txt | 2 +- lldb/docs/lldb-platform-packets.txt | 2 +- lldb/docs/python_api_enums.rst | 2 +- lldb/docs/resources/build.rst | 2 +- lldb/docs/resources/caveats.rst | 2 +- lldb/docs/resources/contributing.rst | 2 +- lldb/docs/resources/test.rst | 2 +- lldb/docs/use/python-reference.rst | 2 +- lldb/docs/use/tutorial.rst | 4 ++-- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/lldb/docs/design/overview.rst b/lldb/docs/design/overview.rst index 72eac56d6c3e..af98658c401d 100644 --- a/lldb/docs/design/overview.rst +++ b/lldb/docs/design/overview.rst @@ -72,7 +72,7 @@ Core ---- The Core source files contain basic functionality that is required in the -debugger as well as the class represeting the debugger it self (Debugger). A +debugger as well as the class representing the debugger itself (Debugger). A wide variety of classes are implemented: - Address (section offset addressing) diff --git a/lldb/docs/doxygen.cfg.in b/lldb/docs/doxygen.cfg.in index 7228a1ea4a81..7750d89fd267 100644 --- a/lldb/docs/doxygen.cfg.in +++ b/lldb/docs/doxygen.cfg.in @@ -1434,7 +1434,7 @@ PERL_PATH = #--------------------------------------------------------------------------- # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will -# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# generate an inheritance diagram (in HTML, RTF and LaTeX) for classes with base # or super classes. Setting the tag to NO turns the diagrams off. Note that # this option is superseded by the HAVE_DOT option below. This is only a # fallback. It is recommended to install and use dot, since it yields more diff --git a/lldb/docs/lldb-for-gdb-users.txt b/lldb/docs/lldb-for-gdb-users.txt index d505d639192d..e5eae376bb48 100644 --- a/lldb/docs/lldb-for-gdb-users.txt +++ b/lldb/docs/lldb-for-gdb-users.txt @@ -162,7 +162,7 @@ Current breakpoints: 1.1: where = Sketch`-[SKTGraphicView alignLeftEdges:] + 33 at /Projects/Sketch/SKTGraphicView.m:1405, address = 0x0000000100010d5b, resolved, hit count = 0 Note that each "logical" breakpoint can have multiple "locations". -The logical breakpoint has an integer id, and it's locations have an +The logical breakpoint has an integer id, and its locations have an id within their parent breakpoint (the two are joined by a ".", e.g. 1.1 in the example above.) diff --git a/lldb/docs/lldb-platform-packets.txt b/lldb/docs/lldb-platform-packets.txt index 5deb005aabf2..9a1444afef05 100644 --- a/lldb/docs/lldb-platform-packets.txt +++ b/lldb/docs/lldb-platform-packets.txt @@ -113,7 +113,7 @@ incompatible with the flags that gdb specifies. // 3. {optional} working directory ascii-hex encoded // // Response is F followed by the return value of the command (base 16), -// followed by a another number, followed by the output of the command +// followed by another number, followed by the output of the command / in binary-escaped-data encoding. //---------------------------------------------------------------------- diff --git a/lldb/docs/python_api_enums.rst b/lldb/docs/python_api_enums.rst index b00ac47bd1fa..a05647f61ca8 100644 --- a/lldb/docs/python_api_enums.rst +++ b/lldb/docs/python_api_enums.rst @@ -496,7 +496,7 @@ limit the amount of information that gets parsed to only the information that is requested. These bits also can indicate what actually did get resolved during query function calls. -Each definition corresponds to a one of the member variables +Each definition corresponds to one of the member variables in this class, and requests that that item be resolved, or indicates that the member did get resolved. diff --git a/lldb/docs/resources/build.rst b/lldb/docs/resources/build.rst index 7c2a4317a87d..cb774d76c629 100644 --- a/lldb/docs/resources/build.rst +++ b/lldb/docs/resources/build.rst @@ -133,7 +133,7 @@ macOS Building LLDB with CMake ------------------------ -The LLVM project is migrating to a single monolithic respository for LLVM and +The LLVM project is migrating to a single monolithic repository for LLVM and its subprojects. This is the recommended way to build LLDB. Check out the source-tree with git: diff --git a/lldb/docs/resources/caveats.rst b/lldb/docs/resources/caveats.rst index 2f37a6821ca5..e46c364a5402 100644 --- a/lldb/docs/resources/caveats.rst +++ b/lldb/docs/resources/caveats.rst @@ -21,7 +21,7 @@ against Python comes with some constraints to be aware of. use it from Python 2 and vice versa. 2. It is not possible to build and link LLDB against one distribution on - Python and use it through a interpreter coming from another distribution. + Python and use it through an interpreter coming from another distribution. For example, on macOS, if you build and link against Python from python.org, you cannot import the lldb module from the Python interpreter installed with Homebrew. diff --git a/lldb/docs/resources/contributing.rst b/lldb/docs/resources/contributing.rst index fb7ad964fd95..26b13a5b745c 100644 --- a/lldb/docs/resources/contributing.rst +++ b/lldb/docs/resources/contributing.rst @@ -8,7 +8,7 @@ Please refer to the `LLVM Getting Started Guide `_ for general information on how to get started on the LLVM project. A detailed explanation on how to build and test LLDB can be found in the `build instructions `_ and `test -instructions `_ respecitvely. +instructions `_ respectively. Contributing to LLDB -------------------- diff --git a/lldb/docs/resources/test.rst b/lldb/docs/resources/test.rst index e70e60e614f4..e066f8e209a1 100644 --- a/lldb/docs/resources/test.rst +++ b/lldb/docs/resources/test.rst @@ -365,7 +365,7 @@ Running tests in QEMU System Emulation Environment QEMU can be used to test LLDB in an emulation environment in the absence of actual hardware. `QEMU based testing `_ -page describes how to setup a emulation environment using QEMU helper scripts +page describes how to setup an emulation environment using QEMU helper scripts found under llvm-project/lldb/scripts/lldb-test-qemu. These scripts currently work with Arm or AArch64, but support for other architectures can be added easily. diff --git a/lldb/docs/use/python-reference.rst b/lldb/docs/use/python-reference.rst index 85a7b152f8aa..0225d45e1ac8 100644 --- a/lldb/docs/use/python-reference.rst +++ b/lldb/docs/use/python-reference.rst @@ -787,7 +787,7 @@ Writing lldb frame recognizers in Python Frame recognizers allow for retrieving information about special frames based on ABI, arguments or other special properties of that frame, even without source code or debug info. Currently, one use case is to extract function -arguments that would otherwise be unaccesible, or augment existing arguments. +arguments that would otherwise be inaccessible, or augment existing arguments. Adding a custom frame recognizer is done by implementing a Python class and using the 'frame recognizer add' command. The Python class should have a diff --git a/lldb/docs/use/tutorial.rst b/lldb/docs/use/tutorial.rst index fc41a7ab6a70..30c25fa4381f 100644 --- a/lldb/docs/use/tutorial.rst +++ b/lldb/docs/use/tutorial.rst @@ -246,7 +246,7 @@ breakpoint on all the methods that implement that selector in the classes in your program. Similarly, a file and line breakpoint might result in multiple locations if that file and line were inlined in different places in your code. -The logical breakpoint has an integer id, and it's locations have an id within +The logical breakpoint has an integer id, and its locations have an id within their parent breakpoint (the two are joined by a ".", e.g. 1.1 in the example above.) @@ -324,7 +324,7 @@ do: Breakpoint Names ---------------- -Breakpoints carry two orthognal sets of information: one specifies where to set the breakpoint, and the other how to react when the breakpoint is hit. The latter set of information (e.g. commands, conditions, hit-count, auto-continue...) we call breakpoint options. +Breakpoints carry two orthogonal sets of information: one specifies where to set the breakpoint, and the other how to react when the breakpoint is hit. The latter set of information (e.g. commands, conditions, hit-count, auto-continue...) we call breakpoint options. It is fairly common to want to apply one set of options to a number of breakpoints. For instance, you might want to check that self == nil and if it is, print a backtrace and continue, on a number of methods. One convenient way to do that would be to make all the breakpoints, then configure the options with: From 3a80088357d14e90e5cfefeb2718bf4024d43d1b Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 1 Mar 2021 23:40:31 -0800 Subject: [PATCH 017/784] [readobj] Use ListSeparator (NFC) --- llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp | 35 ++++++--------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp index 5995a09514c8..3491ea464b5e 100644 --- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp +++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp @@ -184,31 +184,16 @@ void Decoder::printRegisters(const std::pair &RegisterMask) const uint16_t VFPMask = std::get<1>(RegisterMask); OS << '{'; - bool Comma = false; - for (unsigned RI = 0, RE = 11; RI < RE; ++RI) { - if (GPRMask & (1 << RI)) { - if (Comma) - OS << ", "; - OS << GPRRegisterNames[RI]; - Comma = true; - } - } - for (unsigned RI = 0, RE = 32; RI < RE; ++RI) { - if (VFPMask & (1 << RI)) { - if (Comma) - OS << ", "; - OS << "d" << unsigned(RI); - Comma = true; - } - } - for (unsigned RI = 11, RE = 16; RI < RE; ++RI) { - if (GPRMask & (1 << RI)) { - if (Comma) - OS << ", "; - OS << GPRRegisterNames[RI]; - Comma = true; - } - } + ListSeparator LS; + for (unsigned RI = 0, RE = 11; RI < RE; ++RI) + if (GPRMask & (1 << RI)) + OS << LS << GPRRegisterNames[RI]; + for (unsigned RI = 0, RE = 32; RI < RE; ++RI) + if (VFPMask & (1 << RI)) + OS << LS << "d" << unsigned(RI); + for (unsigned RI = 11, RE = 16; RI < RE; ++RI) + if (GPRMask & (1 << RI)) + OS << LS << GPRRegisterNames[RI]; OS << '}'; } From 4444b343d7e208e6e6f7ee885d380d90df1c231d Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Mon, 1 Mar 2021 23:40:32 -0800 Subject: [PATCH 018/784] [IR] Use range-based for loops (NFC) --- llvm/lib/IR/AsmWriter.cpp | 5 ++--- llvm/lib/IR/Core.cpp | 5 ++--- llvm/lib/IR/ModuleSummaryIndex.cpp | 7 +++---- llvm/lib/IR/Verifier.cpp | 7 +++---- 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index 43aba4efb6ea..3ae3bf430d2f 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -4373,9 +4373,8 @@ void AssemblyWriter::writeMDNode(unsigned Slot, const MDNode *Node) { void AssemblyWriter::writeAllMDNodes() { SmallVector Nodes; Nodes.resize(Machine.mdn_size()); - for (SlotTracker::mdn_iterator I = Machine.mdn_begin(), E = Machine.mdn_end(); - I != E; ++I) - Nodes[I->second] = cast(I->first); + for (auto &I : llvm::make_range(Machine.mdn_begin(), Machine.mdn_end())) + Nodes[I.second] = cast(I.first); for (unsigned i = 0, e = Nodes.size(); i != e; ++i) { writeMDNode(i, Nodes[i]); diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index 6631a4ac2248..7398a7efd8cd 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -3280,9 +3280,8 @@ unsigned LLVMGetNumHandlers(LLVMValueRef CatchSwitch) { void LLVMGetHandlers(LLVMValueRef CatchSwitch, LLVMBasicBlockRef *Handlers) { CatchSwitchInst *CSI = unwrap(CatchSwitch); - for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(), - E = CSI->handler_end(); I != E; ++I) - *Handlers++ = wrap(*I); + for (const BasicBlock *H : CSI->handlers()) + *Handlers++ = wrap(H); } LLVMValueRef LLVMGetParentCatchSwitch(LLVMValueRef CatchPad) { diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp index c2ded87c4947..f4ac6caf4f93 100644 --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -292,10 +292,9 @@ void ModuleSummaryIndex::propagateAttributes( if (!IsDSOLocal) // Mark the flag in all summaries false so that we can do quick check // without going through the whole list. - llvm::for_each(P.second.SummaryList, - [](const std::unique_ptr &Summary) { - return Summary->setDSOLocal(false); - }); + for (const std::unique_ptr &Summary : + P.second.SummaryList) + Summary->setDSOLocal(false); } setWithAttributePropagation(); setWithDSOLocalPropagation(); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index c382600683e5..47bfbfb19524 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -3671,10 +3671,9 @@ void Verifier::visitStoreInst(StoreInst &SI) { /// Check that SwiftErrorVal is used as a swifterror argument in CS. void Verifier::verifySwiftErrorCall(CallBase &Call, const Value *SwiftErrorVal) { - unsigned Idx = 0; - for (auto I = Call.arg_begin(), E = Call.arg_end(); I != E; ++I, ++Idx) { - if (*I == SwiftErrorVal) { - Assert(Call.paramHasAttr(Idx, Attribute::SwiftError), + for (const auto &I : llvm::enumerate(Call.args())) { + if (I.value() == SwiftErrorVal) { + Assert(Call.paramHasAttr(I.index(), Attribute::SwiftError), "swifterror value when used in a callsite should be marked " "with swifterror attribute", SwiftErrorVal, Call); From 37eca08e5bcfbe926176412a4a0acf5b963da7e6 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Feb 2021 18:54:21 +0300 Subject: [PATCH 019/784] [mlir][NFC] Rename `MemRefType::getMemorySpace` to `getMemorySpaceAsInt` Just a pure method renaming. It is a preparation step for replacing "memory space as raw integer" with more generic "memory space as attribute", which will be done in separate commit. The `MemRefType::getMemorySpace` method will return `Attribute` and become the main API, while `getMemorySpaceAsInt` will be declared as deprecated and will be replaced in all in-tree dialects (also in separate commits). Reviewed By: mehdi_amini, rriddle Differential Revision: https://reviews.llvm.org/D97476 --- .../mlir/Dialect/Affine/IR/AffineOps.h | 4 +-- .../include/mlir/Dialect/StandardOps/IR/Ops.h | 4 +-- mlir/include/mlir/IR/BuiltinTypes.h | 4 +-- mlir/lib/CAPI/IR/BuiltinTypes.cpp | 4 +-- .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 3 +- .../StandardToLLVM/StandardToLLVM.cpp | 28 ++++++++++--------- .../StandardToSPIRV/StandardToSPIRV.cpp | 5 ++-- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 4 +-- .../VectorToROCDL/VectorToROCDL.cpp | 4 +-- .../Conversion/VectorToSCF/VectorToSCF.cpp | 8 ++++-- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 2 +- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 6 ++-- .../SPIRV/Transforms/SPIRVConversion.cpp | 3 +- mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 20 +++++++------ mlir/lib/Dialect/Vector/VectorOps.cpp | 6 ++-- mlir/lib/IR/AsmPrinter.cpp | 8 +++--- mlir/lib/IR/BuiltinTypes.cpp | 8 +++--- mlir/lib/Transforms/LoopFusion.cpp | 2 +- mlir/lib/Transforms/Utils/LoopUtils.cpp | 4 +-- 19 files changed, 68 insertions(+), 59 deletions(-) diff --git a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h index 29fc305b682a..10216da70ab2 100644 --- a/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h +++ b/mlir/include/mlir/Dialect/Affine/IR/AffineOps.h @@ -120,7 +120,7 @@ class AffineDmaStartOp /// Returns the memory space of the src memref. unsigned getSrcMemorySpace() { - return getSrcMemRef().getType().cast().getMemorySpace(); + return getSrcMemRef().getType().cast().getMemorySpaceAsInt(); } /// Returns the operand index of the dst memref. @@ -141,7 +141,7 @@ class AffineDmaStartOp /// Returns the memory space of the src memref. unsigned getDstMemorySpace() { - return getDstMemRef().getType().cast().getMemorySpace(); + return getDstMemRef().getType().cast().getMemorySpaceAsInt(); } /// Returns the affine map used to access the dst memref. diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h index 9a253f8e814a..241f4ed9fa84 100644 --- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h +++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.h @@ -177,10 +177,10 @@ class DmaStartOp return getDstMemRef().getType().cast().getRank(); } unsigned getSrcMemorySpace() { - return getSrcMemRef().getType().cast().getMemorySpace(); + return getSrcMemRef().getType().cast().getMemorySpaceAsInt(); } unsigned getDstMemorySpace() { - return getDstMemRef().getType().cast().getMemorySpace(); + return getDstMemRef().getType().cast().getMemorySpaceAsInt(); } // Returns the destination memref indices for this DMA operation. diff --git a/mlir/include/mlir/IR/BuiltinTypes.h b/mlir/include/mlir/IR/BuiltinTypes.h index e3b8d597a2a7..61836b11fee8 100644 --- a/mlir/include/mlir/IR/BuiltinTypes.h +++ b/mlir/include/mlir/IR/BuiltinTypes.h @@ -293,7 +293,7 @@ class BaseMemRefType : public ShapedType { static bool classof(Type type); /// Returns the memory space in which data referred to by this memref resides. - unsigned getMemorySpace() const; + unsigned getMemorySpaceAsInt() const; }; //===----------------------------------------------------------------------===// @@ -314,7 +314,7 @@ class MemRefType : public Type::TypeBase shape, Type elementType) diff --git a/mlir/lib/CAPI/IR/BuiltinTypes.cpp b/mlir/lib/CAPI/IR/BuiltinTypes.cpp index 10cdbc4b1658..e4442ac4c567 100644 --- a/mlir/lib/CAPI/IR/BuiltinTypes.cpp +++ b/mlir/lib/CAPI/IR/BuiltinTypes.cpp @@ -270,7 +270,7 @@ MlirAffineMap mlirMemRefTypeGetAffineMap(MlirType type, intptr_t pos) { } unsigned mlirMemRefTypeGetMemorySpace(MlirType type) { - return unwrap(type).cast().getMemorySpace(); + return unwrap(type).cast().getMemorySpaceAsInt(); } bool mlirTypeIsAUnrankedMemRef(MlirType type) { @@ -289,7 +289,7 @@ MlirType mlirUnrankedMemRefTypeGetChecked(MlirLocation loc, } unsigned mlirUnrankedMemrefGetMemorySpace(MlirType type) { - return unwrap(type).cast().getMemorySpace(); + return unwrap(type).cast().getMemorySpaceAsInt(); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index 3deec2242c5e..bc4f65182bdb 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -118,7 +118,8 @@ struct LowerGpuOpsToNVVMOpsPass /// converter drops the private memory space to support the use case above. LLVMTypeConverter converter(m.getContext(), options); converter.addConversion([&](MemRefType type) -> Optional { - if (type.getMemorySpace() != gpu::GPUDialect::getPrivateAddressSpace()) + if (type.getMemorySpaceAsInt() != + gpu::GPUDialect::getPrivateAddressSpace()) return llvm::None; return converter.convertType(MemRefType::Builder(type).setMemorySpace(0)); }); diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp index ce2590f2cdfa..3e11a5ef1a14 100644 --- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp +++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp @@ -316,7 +316,8 @@ LLVMTypeConverter::getMemRefDescriptorFields(MemRefType type, Type elementType = unwrap(convertType(type.getElementType())); if (!elementType) return {}; - auto ptrTy = LLVM::LLVMPointerType::get(elementType, type.getMemorySpace()); + auto ptrTy = + LLVM::LLVMPointerType::get(elementType, type.getMemorySpaceAsInt()); auto indexTy = getIndexType(); SmallVector results = {ptrTy, ptrTy, indexTy}; @@ -388,7 +389,7 @@ Type LLVMTypeConverter::convertMemRefToBarePtr(BaseMemRefType type) { Type elementType = unwrap(convertType(type.getElementType())); if (!elementType) return {}; - return LLVM::LLVMPointerType::get(elementType, type.getMemorySpace()); + return LLVM::LLVMPointerType::get(elementType, type.getMemorySpaceAsInt()); } /// Convert an n-D vector type to an LLVM vector type via (n-1)-D array type @@ -1081,7 +1082,8 @@ bool ConvertToLLVMPattern::isConvertibleAndHasIdentityMaps( Type ConvertToLLVMPattern::getElementPtrType(MemRefType type) const { auto elementType = type.getElementType(); auto structElementType = unwrap(typeConverter->convertType(elementType)); - return LLVM::LLVMPointerType::get(structElementType, type.getMemorySpace()); + return LLVM::LLVMPointerType::get(structElementType, + type.getMemorySpaceAsInt()); } void ConvertToLLVMPattern::getMemRefDescriptorSizes( @@ -1899,7 +1901,7 @@ struct AllocOpLowering : public AllocLikeOpLowering { Value alignedPtr = allocatedPtr; if (alignment) { - auto intPtrType = getIntPtrType(memRefType.getMemorySpace()); + auto intPtrType = getIntPtrType(memRefType.getMemorySpaceAsInt()); // Compute the aligned type pointer. Value allocatedInt = rewriter.create(loc, intPtrType, allocatedPtr); @@ -2247,7 +2249,7 @@ struct GlobalMemrefOpLowering : public ConvertOpToLLVMPattern { rewriter.replaceOpWithNewOp( global, arrayTy, global.constant(), linkage, global.sym_name(), - initialValue, type.getMemorySpace()); + initialValue, type.getMemorySpaceAsInt()); return success(); } }; @@ -2266,7 +2268,7 @@ struct GetGlobalMemrefOpLowering : public AllocLikeOpLowering { Operation *op) const override { auto getGlobalOp = cast(op); MemRefType type = getGlobalOp.result().getType().cast(); - unsigned memSpace = type.getMemorySpace(); + unsigned memSpace = type.getMemorySpaceAsInt(); Type arrayTy = convertGlobalMemrefTypeToLLVM(type, *getTypeConverter()); auto addressOf = rewriter.create( @@ -2462,7 +2464,7 @@ static void extractPointersAndOffset(Location loc, } unsigned memorySpace = - operandType.cast().getMemorySpace(); + operandType.cast().getMemorySpaceAsInt(); Type elementType = operandType.cast().getElementType(); Type llvmElementType = unwrap(typeConverter.convertType(elementType)); Type elementPtrPtrType = LLVM::LLVMPointerType::get( @@ -2591,7 +2593,7 @@ struct MemRefReshapeOpLowering // Extract address space and element type. auto targetType = reshapeOp.getResult().getType().cast(); - unsigned addressSpace = targetType.getMemorySpace(); + unsigned addressSpace = targetType.getMemorySpaceAsInt(); Type elementType = targetType.getElementType(); // Create the unranked memref descriptor that holds the ranked one. The @@ -2751,7 +2753,7 @@ struct DimOpLowering : public ConvertOpToLLVMPattern { auto unrankedMemRefType = operandType.cast(); auto scalarMemRefType = MemRefType::get({}, unrankedMemRefType.getElementType()); - unsigned addressSpace = unrankedMemRefType.getMemorySpace(); + unsigned addressSpace = unrankedMemRefType.getMemorySpaceAsInt(); // Extract pointer to the underlying ranked descriptor and bitcast it to a // memref descriptor pointer to minimize the number of GEP @@ -3265,7 +3267,7 @@ struct SubViewOpLowering : public ConvertOpToLLVMPattern { Value bitcastPtr = rewriter.create( loc, LLVM::LLVMPointerType::get(targetElementTy, - viewMemRefType.getMemorySpace()), + viewMemRefType.getMemorySpaceAsInt()), extracted); targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr); @@ -3274,7 +3276,7 @@ struct SubViewOpLowering : public ConvertOpToLLVMPattern { bitcastPtr = rewriter.create( loc, LLVM::LLVMPointerType::get(targetElementTy, - viewMemRefType.getMemorySpace()), + viewMemRefType.getMemorySpaceAsInt()), extracted); targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr); @@ -3491,7 +3493,7 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern { Value bitcastPtr = rewriter.create( loc, LLVM::LLVMPointerType::get(targetElementTy, - srcMemRefType.getMemorySpace()), + srcMemRefType.getMemorySpaceAsInt()), allocatedPtr); targetMemRef.setAllocatedPtr(rewriter, loc, bitcastPtr); @@ -3502,7 +3504,7 @@ struct ViewOpLowering : public ConvertOpToLLVMPattern { bitcastPtr = rewriter.create( loc, LLVM::LLVMPointerType::get(targetElementTy, - srcMemRefType.getMemorySpace()), + srcMemRefType.getMemorySpaceAsInt()), alignedPtr); targetMemRef.setAlignedPtr(rewriter, loc, bitcastPtr); diff --git a/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp index c2db461cae79..e07934b16fe3 100644 --- a/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp +++ b/mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp @@ -194,7 +194,7 @@ static bool isAllocationSupported(MemRefType t) { // shape and int or float or vector of int or float element type. if (!(t.hasStaticShape() && SPIRVTypeConverter::getMemorySpaceForStorageClass( - spirv::StorageClass::Workgroup) == t.getMemorySpace())) + spirv::StorageClass::Workgroup) == t.getMemorySpaceAsInt())) return false; Type elementType = t.getElementType(); if (auto vecType = elementType.dyn_cast()) @@ -207,7 +207,8 @@ static bool isAllocationSupported(MemRefType t) { /// type. Returns None on failure. static Optional getAtomicOpScope(MemRefType t) { Optional storageClass = - SPIRVTypeConverter::getStorageClassForMemorySpace(t.getMemorySpace()); + SPIRVTypeConverter::getStorageClassForMemorySpace( + t.getMemorySpaceAsInt()); if (!storageClass) return {}; switch (*storageClass) { diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index d567e065479d..ac0e3fc003d1 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -188,7 +188,7 @@ static LogicalResult getIndexedPtrs(ConversionPatternRewriter &rewriter, SmallVector strides; auto successStrides = getStridesAndOffset(memRefType, strides, offset); if (failed(successStrides) || strides.back() != 1 || - memRefType.getMemorySpace() != 0) + memRefType.getMemorySpaceAsInt() != 0) return failure(); auto pType = MemRefDescriptor(memref).getElementPtrType(); auto ptrsType = LLVM::getFixedVectorType(pType, vType.getDimSize(0)); @@ -200,7 +200,7 @@ static LogicalResult getIndexedPtrs(ConversionPatternRewriter &rewriter, // will be in the same address space as the incoming memref type. static Value castDataPtr(ConversionPatternRewriter &rewriter, Location loc, Value ptr, MemRefType memRefType, Type vt) { - auto pType = LLVM::LLVMPointerType::get(vt, memRefType.getMemorySpace()); + auto pType = LLVM::LLVMPointerType::get(vt, memRefType.getMemorySpaceAsInt()); return rewriter.create(loc, pType, ptr); } diff --git a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp index 005e7b30ea7c..42c072626cf5 100644 --- a/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp +++ b/mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp @@ -94,8 +94,8 @@ class VectorTransferConversion : public ConvertOpToLLVMPattern { // MUBUF instruction operate only on addresspace 0(unified) or 1(global) // In case of 3(LDS): fall back to vector->llvm pass // In case of 5(VGPR): wrong - if ((memRefType.getMemorySpace() != 0) && - (memRefType.getMemorySpace() != 1)) + if ((memRefType.getMemorySpaceAsInt() != 0) && + (memRefType.getMemorySpaceAsInt() != 1)) return failure(); // Note that the dataPtr starts at the offset address specified by diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index 503a2f001345..724d0afdf67b 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -115,9 +115,11 @@ class NDTransferOpHelper { VectorType::get(vectorType.getShape().take_back(minorRank), vectorType.getElementType()); /// Memref of minor vector type is used for individual transfers. - memRefMinorVectorType = MemRefType::get( - majorVectorType.getShape(), minorVectorType, {}, - xferOp.getShapedType().template cast().getMemorySpace()); + memRefMinorVectorType = + MemRefType::get(majorVectorType.getShape(), minorVectorType, {}, + xferOp.getShapedType() + .template cast() + .getMemorySpaceAsInt()); } LogicalResult doReplace(); diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index e72119545001..27fde9b87405 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -727,7 +727,7 @@ static LogicalResult verifyAttributions(Operation *op, if (!type) return op->emitOpError() << "expected memref type in attribution"; - if (type.getMemorySpace() != memorySpace) { + if (type.getMemorySpaceAsInt() != memorySpace) { return op->emitOpError() << "expected memory space " << memorySpace << " in attribution"; } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 1361fa06546c..d63e7753f93e 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -1345,7 +1345,7 @@ static LogicalResult verifyCast(DialectCastOp op, Type llvmType, Type type, if (!memrefType.hasStaticShape()) return op->emitOpError( "unexpected bare pointer for dynamically shaped memref"); - if (memrefType.getMemorySpace() != ptrType.getAddressSpace()) + if (memrefType.getMemorySpaceAsInt() != ptrType.getAddressSpace()) return op->emitError("invalid conversion between memref and pointer in " "different memory spaces"); @@ -1369,7 +1369,7 @@ static LogicalResult verifyCast(DialectCastOp op, Type llvmType, Type type, // The first two elements are pointers to the element type. auto allocatedPtr = structType.getBody()[0].dyn_cast(); if (!allocatedPtr || - allocatedPtr.getAddressSpace() != memrefType.getMemorySpace()) + allocatedPtr.getAddressSpace() != memrefType.getMemorySpaceAsInt()) return op->emitOpError("expected first element of a memref descriptor to " "be a pointer in the address space of the memref"); if (failed(verifyCast(op, allocatedPtr.getElementType(), @@ -1378,7 +1378,7 @@ static LogicalResult verifyCast(DialectCastOp op, Type llvmType, Type type, auto alignedPtr = structType.getBody()[1].dyn_cast(); if (!alignedPtr || - alignedPtr.getAddressSpace() != memrefType.getMemorySpace()) + alignedPtr.getAddressSpace() != memrefType.getMemorySpaceAsInt()) return op->emitOpError( "expected second element of a memref descriptor to " "be a pointer in the address space of the memref"); diff --git a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp index a91244eef23c..47269f4d5ec2 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp @@ -344,7 +344,8 @@ static Optional convertTensorType(const spirv::TargetEnv &targetEnv, static Optional convertMemrefType(const spirv::TargetEnv &targetEnv, MemRefType type) { Optional storageClass = - SPIRVTypeConverter::getStorageClassForMemorySpace(type.getMemorySpace()); + SPIRVTypeConverter::getStorageClassForMemorySpace( + type.getMemorySpaceAsInt()); if (!storageClass) { LLVM_DEBUG(llvm::dbgs() << type << " illegal: cannot convert memory space\n"); diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp index 539252af5cf9..536d71d89d4f 100644 --- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp +++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp @@ -2096,7 +2096,7 @@ bool MemRefCastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) { if (!checkCompatible(aStride.value(), bStrides[aStride.index()])) return false; } - if (aT.getMemorySpace() != bT.getMemorySpace()) + if (aT.getMemorySpaceAsInt() != bT.getMemorySpaceAsInt()) return false; // They must have the same rank, and any specified dimensions must match. @@ -2123,8 +2123,10 @@ bool MemRefCastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) { if (aEltType != bEltType) return false; - auto aMemSpace = (aT) ? aT.getMemorySpace() : uaT.getMemorySpace(); - auto bMemSpace = (bT) ? bT.getMemorySpace() : ubT.getMemorySpace(); + auto aMemSpace = + (aT) ? aT.getMemorySpaceAsInt() : uaT.getMemorySpaceAsInt(); + auto bMemSpace = + (bT) ? bT.getMemorySpaceAsInt() : ubT.getMemorySpaceAsInt(); if (aMemSpace != bMemSpace) return false; @@ -2201,7 +2203,7 @@ static LogicalResult verify(MemRefReinterpretCastOp op) { // The source and result memrefs should be in the same memory space. auto srcType = op.source().getType().cast(); auto resultType = op.getType().cast(); - if (srcType.getMemorySpace() != resultType.getMemorySpace()) + if (srcType.getMemorySpaceAsInt() != resultType.getMemorySpaceAsInt()) return op.emitError("different memory spaces specified for source type ") << srcType << " and result memref type " << resultType; if (srcType.getElementType() != resultType.getElementType()) @@ -2875,7 +2877,7 @@ Type SubViewOp::inferResultType(MemRefType sourceMemRefType, staticSizes, sourceMemRefType.getElementType(), makeStridedLinearLayoutMap(targetStrides, targetOffset, sourceMemRefType.getContext()), - sourceMemRefType.getMemorySpace()); + sourceMemRefType.getMemorySpaceAsInt()); } Type SubViewOp::inferResultType(MemRefType sourceMemRefType, @@ -2932,7 +2934,7 @@ Type SubViewOp::inferRankReducedResultType( map = getProjectedMap(maps.front(), dimsToProject); inferredType = MemRefType::get(projectedShape, inferredType.getElementType(), map, - inferredType.getMemorySpace()); + inferredType.getMemorySpaceAsInt()); } return inferredType; } @@ -3154,7 +3156,7 @@ isRankReducedType(Type originalType, Type candidateReducedType, // Strided layout logic is relevant for MemRefType only. MemRefType original = originalType.cast(); MemRefType candidateReduced = candidateReducedType.cast(); - if (original.getMemorySpace() != candidateReduced.getMemorySpace()) + if (original.getMemorySpaceAsInt() != candidateReduced.getMemorySpaceAsInt()) return SubViewVerificationResult::MemSpaceMismatch; llvm::SmallDenseSet unusedDims = optionalUnusedDimsMask.getValue(); @@ -3228,7 +3230,7 @@ static LogicalResult verify(SubViewOp op) { MemRefType subViewType = op.getType(); // The base memref and the view memref should be in the same memory space. - if (baseType.getMemorySpace() != subViewType.getMemorySpace()) + if (baseType.getMemorySpaceAsInt() != subViewType.getMemorySpaceAsInt()) return op.emitError("different memory spaces specified for base memref " "type ") << baseType << " and subview memref type " << subViewType; @@ -4179,7 +4181,7 @@ static LogicalResult verify(ViewOp op) { return op.emitError("unsupported map for result memref type ") << viewType; // The base memref and the view memref should be in the same memory space. - if (baseType.getMemorySpace() != viewType.getMemorySpace()) + if (baseType.getMemorySpaceAsInt() != viewType.getMemorySpaceAsInt()) return op.emitError("different memory spaces specified for base memref " "type ") << baseType << " and view memref type " << viewType; diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp index 4702626c3e8c..b08a696281fa 100644 --- a/mlir/lib/Dialect/Vector/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/VectorOps.cpp @@ -3174,7 +3174,7 @@ void TypeCastOp::build(OpBuilder &builder, OperationState &result, VectorType::get(extractShape(memRefType), getElementTypeOrSelf(getElementTypeOrSelf(memRefType))); result.addTypes( - MemRefType::get({}, vectorType, {}, memRefType.getMemorySpace())); + MemRefType::get({}, vectorType, {}, memRefType.getMemorySpaceAsInt())); } static LogicalResult verify(TypeCastOp op) { @@ -3183,8 +3183,8 @@ static LogicalResult verify(TypeCastOp op) { return op.emitOpError("expects operand to be a memref with no layout"); if (!op.getResultMemRefType().getAffineMaps().empty()) return op.emitOpError("expects result to be a memref with no layout"); - if (op.getResultMemRefType().getMemorySpace() != - op.getMemRefType().getMemorySpace()) + if (op.getResultMemRefType().getMemorySpaceAsInt() != + op.getMemRefType().getMemorySpaceAsInt()) return op.emitOpError("expects result in same memory space"); auto sourceType = op.getMemRefType(); diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 437d353655ff..c0b20c064a78 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -1882,16 +1882,16 @@ void ModulePrinter::printType(Type type) { printAttribute(AffineMapAttr::get(map)); } // Only print the memory space if it is the non-default one. - if (memrefTy.getMemorySpace()) - os << ", " << memrefTy.getMemorySpace(); + if (memrefTy.getMemorySpaceAsInt()) + os << ", " << memrefTy.getMemorySpaceAsInt(); os << '>'; }) .Case([&](UnrankedMemRefType memrefTy) { os << "memref<*x"; printType(memrefTy.getElementType()); // Only print the memory space if it is the non-default one. - if (memrefTy.getMemorySpace()) - os << ", " << memrefTy.getMemorySpace(); + if (memrefTy.getMemorySpaceAsInt()) + os << ", " << memrefTy.getMemorySpaceAsInt(); os << '>'; }) .Case([&](ComplexType complexTy) { diff --git a/mlir/lib/IR/BuiltinTypes.cpp b/mlir/lib/IR/BuiltinTypes.cpp index 9b15854919e0..c84569a53531 100644 --- a/mlir/lib/IR/BuiltinTypes.cpp +++ b/mlir/lib/IR/BuiltinTypes.cpp @@ -206,7 +206,7 @@ ShapedType ShapedType::clone(ArrayRef shape, Type elementType) { if (auto other = dyn_cast()) { MemRefType::Builder b(shape, elementType); - b.setMemorySpace(other.getMemorySpace()); + b.setMemorySpace(other.getMemorySpaceAsInt()); return b; } @@ -229,7 +229,7 @@ ShapedType ShapedType::clone(ArrayRef shape) { if (auto other = dyn_cast()) { MemRefType::Builder b(shape, other.getElementType()); b.setShape(shape); - b.setMemorySpace(other.getMemorySpace()); + b.setMemorySpace(other.getMemorySpaceAsInt()); return b; } @@ -250,7 +250,7 @@ ShapedType ShapedType::clone(Type elementType) { } if (auto other = dyn_cast()) { - return UnrankedMemRefType::get(elementType, other.getMemorySpace()); + return UnrankedMemRefType::get(elementType, other.getMemorySpaceAsInt()); } if (isa()) { @@ -472,7 +472,7 @@ UnrankedTensorType::verify(function_ref emitError, // BaseMemRefType //===----------------------------------------------------------------------===// -unsigned BaseMemRefType::getMemorySpace() const { +unsigned BaseMemRefType::getMemorySpaceAsInt() const { return static_cast(impl)->memorySpace; } diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp index 26a20ee365b5..d6d18b3c6f7a 100644 --- a/mlir/lib/Transforms/LoopFusion.cpp +++ b/mlir/lib/Transforms/LoopFusion.cpp @@ -947,7 +947,7 @@ static Value createPrivateMemRef(AffineForOp forOp, Operation *srcStoreOpInst, if (bufSize <= localBufSizeThreshold && fastMemorySpace.hasValue()) { newMemSpace = fastMemorySpace.getValue(); } else { - newMemSpace = oldMemRefType.getMemorySpace(); + newMemSpace = oldMemRefType.getMemorySpaceAsInt(); } auto newMemRefType = MemRefType::get(newShape, oldMemRefType.getElementType(), {}, newMemSpace); diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp index 77d24fb0c161..71a0fc8e5d89 100644 --- a/mlir/lib/Transforms/Utils/LoopUtils.cpp +++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp @@ -2725,12 +2725,12 @@ uint64_t mlir::affineDataCopyGenerate(Block::iterator begin, // Gather regions to allocate to buffers in faster memory space. if (auto loadOp = dyn_cast(opInst)) { if ((filterMemRef.hasValue() && filterMemRef != loadOp.getMemRef()) || - (loadOp.getMemRefType().getMemorySpace() != + (loadOp.getMemRefType().getMemorySpaceAsInt() != copyOptions.slowMemorySpace)) return; } else if (auto storeOp = dyn_cast(opInst)) { if ((filterMemRef.hasValue() && filterMemRef != storeOp.getMemRef()) || - storeOp.getMemRefType().getMemorySpace() != + storeOp.getMemRefType().getMemorySpaceAsInt() != copyOptions.slowMemorySpace) return; } else { From d6ba8ecb60f2a2365b5fc79f5a1557a2490f7b68 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 2 Mar 2021 08:46:07 +0000 Subject: [PATCH 020/784] [ARM] Add handling of t2LDRSB/t2LDRSH in Constant Island Pass These constant pool loads should be treated similarly to t2LDRB/t2LDRH, acting on the same offset ranges. Add handling and a simple test. --- llvm/lib/Target/ARM/ARMConstantIslandPass.cpp | 2 + .../CodeGen/Thumb2/constant-islands-ldrsb.mir | 59 +++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 llvm/test/CodeGen/Thumb2/constant-islands-ldrsb.mir diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 0bb595b21107..a23e65aa9659 100644 --- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -850,7 +850,9 @@ initializeFunctionInfo(const std::vector &CPEMIs) { case ARM::LDRcp: case ARM::t2LDRpci: case ARM::t2LDRHpci: + case ARM::t2LDRSHpci: case ARM::t2LDRBpci: + case ARM::t2LDRSBpci: Bits = 12; // +-offset_12 NegOk = true; break; diff --git a/llvm/test/CodeGen/Thumb2/constant-islands-ldrsb.mir b/llvm/test/CodeGen/Thumb2/constant-islands-ldrsb.mir new file mode 100644 index 000000000000..776f0f705dd2 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/constant-islands-ldrsb.mir @@ -0,0 +1,59 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -run-pass=arm-cp-islands -o - %s | FileCheck %s + +# CHECK-NOT: Unknown addressing mode for CP reference + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define void @t2LDRSBpci() { ret void } + define void @t2LDRSHpci() { ret void } + +... +--- +name: t2LDRSBpci +alignment: 4 +tracksRegLiveness: true +constants: + - id: 0 + value: 'i32 0' + alignment: 4 +body: | + bb.0: + $sp = frame-setup tSUBspi $sp, 3, 14 /* CC::al */, $noreg + frame-setup CFI_INSTRUCTION def_cfa_offset 12 + renamable $r0 = t2LDRSBpci %const.0, 14 /* CC::al */, $noreg :: (dereferenceable load 1, align 4) + renamable $r1 = tMOVr $sp, 14 /* CC::al */, $noreg + tCMPr killed renamable $r1, killed renamable $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr + $r1 = t2MOVi16 target-flags(arm-lo16) @t2LDRSBpci, 14 /* CC::al */, $noreg + renamable $r0 = t2CSINC $zr, $zr, 3, implicit killed $cpsr + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @t2LDRSBpci, 14 /* CC::al */, $noreg + tSTRi killed renamable $r0, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store 4) + $sp = frame-destroy tADDspi $sp, 3, 14 /* CC::al */, $noreg + tBX_RET 14 /* CC::al */, $noreg + +... +--- +name: t2LDRSHpci +alignment: 4 +tracksRegLiveness: true +constants: + - id: 0 + value: 'i32 0' + alignment: 4 +body: | + bb.0: + $sp = frame-setup tSUBspi $sp, 3, 14 /* CC::al */, $noreg + frame-setup CFI_INSTRUCTION def_cfa_offset 12 + renamable $r0 = t2LDRSHpci %const.0, 14 /* CC::al */, $noreg :: (dereferenceable load 1, align 4) + renamable $r1 = tMOVr $sp, 14 /* CC::al */, $noreg + tCMPr killed renamable $r1, killed renamable $r0, 14 /* CC::al */, $noreg, implicit-def $cpsr + $r1 = t2MOVi16 target-flags(arm-lo16) @t2LDRSHpci, 14 /* CC::al */, $noreg + renamable $r0 = t2CSINC $zr, $zr, 3, implicit killed $cpsr + $r1 = t2MOVTi16 killed $r1, target-flags(arm-hi16) @t2LDRSHpci, 14 /* CC::al */, $noreg + tSTRi killed renamable $r0, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store 4) + $sp = frame-destroy tADDspi $sp, 3, 14 /* CC::al */, $noreg + tBX_RET 14 /* CC::al */, $noreg + +... From 14be3f0e8848ee5a2dc215404c0391de4dc9e252 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Tue, 2 Mar 2021 08:45:53 +0000 Subject: [PATCH 021/784] [debuginfo-tests] Add some optnone tests Add dexter tests using the optnone attribute in various scenarios. Our users have found optnone useful when debugging optimised code. We have these tests downstream (and one upstream already: D89873) and we would like to contribute them if there is any interest. The tests are fairly self explanatory. Testing optnone with: * optnone-fastmath.cpp: floats and -ffast-math, * optnone-simple-functions: simple functions and integer arithmetic, * optnone-struct-and-methods: a struct with methods, * optnone-vectors-and-functions: templates and integer vector arithmetic. optnone-vectors-and-functions contains two FIXMEs. The first problem is that lldb seems to struggle with evaluating expressions with the templates used here (example below). Perhaps this is PR42920? (lldb) p TypeTraits::NumElements error: :1:1: no template named 'TypeTraits' TypeTraits::NumElements ^ The second is that while lldb cannot evaluate the following expression, gdb can, but it reports that the variable has been optimzed away. It does this when compiling at O0 too. llvm-dwarfdump shows that MysteryNumber does have a location. I don't know whether the DIE is bad or if both debuggers just don't support it. TypeTraits::MysteryNumber DW_TAG_variable DW_AT_specification (0x0000006b "MysteryNumber") DW_AT_location (DW_OP_addr 0x601028) DW_AT_linkage_name ("_ZN10TypeTraitsIDv4_iE13MysteryNumberE") Reviewed By: rnk Differential Revision: https://reviews.llvm.org/D97668 --- .../dexter-tests/optnone-fastmath.cpp | 104 ++++++++++++++ .../dexter-tests/optnone-simple-functions.cpp | 104 ++++++++++++++ .../optnone-struct-and-methods.cpp | 105 ++++++++++++++ .../optnone-vectors-and-functions.cpp | 135 ++++++++++++++++++ 4 files changed, 448 insertions(+) create mode 100644 debuginfo-tests/dexter-tests/optnone-fastmath.cpp create mode 100644 debuginfo-tests/dexter-tests/optnone-simple-functions.cpp create mode 100644 debuginfo-tests/dexter-tests/optnone-struct-and-methods.cpp create mode 100644 debuginfo-tests/dexter-tests/optnone-vectors-and-functions.cpp diff --git a/debuginfo-tests/dexter-tests/optnone-fastmath.cpp b/debuginfo-tests/dexter-tests/optnone-fastmath.cpp new file mode 100644 index 000000000000..084eb9c71ea6 --- /dev/null +++ b/debuginfo-tests/dexter-tests/optnone-fastmath.cpp @@ -0,0 +1,104 @@ +// RUN: %dexter --fail-lt 1.0 -w \ +// RUN: --builder 'clang' --debugger 'lldb' \ +// RUN: --cflags "-ffast-math -O2 -g" -- %s +// RUN: %dexter --fail-lt 1.0 -w \ +// RUN: --builder 'clang' --debugger 'lldb' \ +// RUN: --cflags "-ffast-math -O0 -g" -- %s + +// REQUIRES: lldb +// UNSUPPORTED: system-windows + +//// Check that the debugging experience with __attribute__((optnone)) at O2 +//// matches O0. Test scalar floating point arithmetic with -ffast-math. + +//// Example of strength reduction. +//// The division by 10.0f can be rewritten as a multiply by 0.1f. +//// A / 10.f ==> A * 0.1f +//// This is safe with fastmath since we treat the two operations +//// as equally precise. However we don't want this to happen +//// with optnone. +__attribute__((optnone)) +float test_fdiv(float A) { + float result; + result = A / 10.f; // DexLabel('fdiv_assign') + return result; // DexLabel('fdiv_ret') +} +// DexExpectWatchValue('A', 4, on_line='fdiv_assign') +// DexExpectWatchValue('result', '0.400000006', on_line='fdiv_ret') + +//// (A * B) - (A * C) ==> A * (B - C) +__attribute__((optnone)) +float test_distributivity(float A, float B, float C) { + float result; + float op1 = A * B; + float op2 = A * C; // DexLabel('distributivity_op2') + result = op1 - op2; // DexLabel('distributivity_result') + return result; // DexLabel('distributivity_ret') +} +// DexExpectWatchValue('op1', '20', on_line='distributivity_op2') +// DexExpectWatchValue('op2', '24', on_line='distributivity_result') +// DexExpectWatchValue('result', '-4', on_line='distributivity_ret') + +//// (A + B) + C == A + (B + C) +//// therefore, ((A + B) + C) + (A + (B + C))) +//// can be rewritten as +//// 2.0f * ((A + B) + C) +//// Clang is currently unable to spot this optimization +//// opportunity with fastmath. +__attribute__((optnone)) +float test_associativity(float A, float B, float C) { + float result; + float op1 = A + B; + float op2 = B + C; + op1 += C; // DexLabel('associativity_op1') + op2 += A; + result = op1 + op2; // DexLabel('associativity_result') + return result; // DexLabel('associativity_ret') +} +// DexExpectWatchValue('op1', '9', '15', from_line='associativity_op1', to_line='associativity_result') +// DexExpectWatchValue('op2', '11', '15', from_line='associativity_op1', to_line='associativity_result') +// DexExpectWatchValue('result', '30', on_line='associativity_ret') + +//// With fastmath, the ordering of instructions doesn't matter +//// since we work under the assumption that there is no loss +//// in precision. This simplifies things for the optimizer which +//// can then decide to reorder instructions and fold +//// redundant operations like this: +//// A += 5.0f +//// A -= 5.0f +//// --> +//// A +//// This function can be simplified to a return A + B. +__attribute__((optnone)) +float test_simplify_fp_operations(float A, float B) { + float result = A + 10.0f; // DexLabel('fp_operations_result') + result += B; // DexLabel('fp_operations_add') + result -= 10.0f; + return result; // DexLabel('fp_operations_ret') +} +// DexExpectWatchValue('A', '8.25', on_line='fp_operations_result') +// DexExpectWatchValue('B', '26.3999996', on_line='fp_operations_result') +// DexExpectWatchValue('result', '18.25', '44.6500015', '34.6500015', from_line='fp_operations_add', to_line='fp_operations_ret') + +//// Again, this is a simple return A + B. +//// Clang is unable to spot the opportunity to fold the code sequence. +__attribute__((optnone)) +float test_simplify_fp_operations_2(float A, float B, float C) { + float result = A + C; // DexLabel('fp_operations_2_result') + result += B; + result -= C; // DexLabel('fp_operations_2_subtract') + return result; // DexLabel('fp_operations_2_ret') +} +// DexExpectWatchValue('A', '9.11999988', on_line='fp_operations_2_result') +// DexExpectWatchValue('B', '61.050003', on_line='fp_operations_2_result') +// DexExpectWatchValue('C', '1002.11102', on_line='fp_operations_2_result') +// DexExpectWatchValue('result', '1072.28101', '70.1699829', from_line='fp_operations_2_subtract', to_line='fp_operations_2_ret') + +int main() { + float result = test_fdiv(4.0f); + result += test_distributivity(4.0f, 5.0f, 6.0f); + result += test_associativity(4.0f, 5.0f, 6.0f); + result += test_simplify_fp_operations(8.25, result); + result += test_simplify_fp_operations_2(9.12, result, 1002.111); + return static_cast(result); +} diff --git a/debuginfo-tests/dexter-tests/optnone-simple-functions.cpp b/debuginfo-tests/dexter-tests/optnone-simple-functions.cpp new file mode 100644 index 000000000000..7450cd50a0fa --- /dev/null +++ b/debuginfo-tests/dexter-tests/optnone-simple-functions.cpp @@ -0,0 +1,104 @@ +// RUN: %dexter --fail-lt 1.0 -w \ +// RUN: --builder 'clang' --debugger 'lldb' \ +// RUN: --cflags "-O2 -g" -- %s +// RUN: %dexter --fail-lt 1.0 -w \ +// RUN: --builder 'clang' --debugger 'lldb' \ +// RUN: --cflags "-O0 -g" -- %s + +// REQUIRES: lldb +// UNSUPPORTED: system-windows + +//// Check that the debugging experience with __attribute__((optnone)) at O2 +//// matches O0. Test simple functions performing simple arithmetic +//// operations and small loops. + +__attribute__((optnone)) +int test1(int test1_a, int test1_b) { + int test1_result = 0; + // DexLabel('test1_start') + test1_result = test1_a + test1_b; // DexExpectStepOrder(1) + return test1_result; // DexExpectStepOrder(2) + // DexLabel('test1_end') +} +// DexExpectWatchValue('test1_a', 3, from_line='test1_start', to_line='test1_end') +// DexExpectWatchValue('test1_b', 4, from_line='test1_start', to_line='test1_end') +// DexExpectWatchValue('test1_result', 0, 7, from_line='test1_start', to_line='test1_end') + +__attribute__((optnone)) +int test2(int test2_a, int test2_b) { + int test2_result = test2_a + test2_a + test2_a + test2_a; // DexExpectStepOrder(3) + // DexLabel('test2_start') + return test2_a << 2; // DexExpectStepOrder(4) + // DexLabel('test2_end') +} +// DexExpectWatchValue('test2_a', 1, from_line='test2_start', to_line='test2_end') +// DexExpectWatchValue('test2_b', 2, from_line='test2_start', to_line='test2_end') +// DexExpectWatchValue('test2_result', 4, from_line='test2_start', to_line='test2_end') + +__attribute__((optnone)) +int test3(int test3_a, int test3_b) { + int test3_temp1 = 0, test3_temp2 = 0; + // DexLabel('test3_start') + test3_temp1 = test3_a + 5; // DexExpectStepOrder(5) + test3_temp2 = test3_b + 5; // DexExpectStepOrder(6) + if (test3_temp1 > test3_temp2) { // DexExpectStepOrder(7) + test3_temp1 *= test3_temp2; // DexUnreachable() + } + return test3_temp1; // DexExpectStepOrder(8) + // DexLabel('test3_end') +} +// DexExpectWatchValue('test3_a', 5, from_line='test3_start', to_line='test3_end') +// DexExpectWatchValue('test3_b', 6, from_line='test3_start', to_line='test3_end') +// DexExpectWatchValue('test3_temp1', 0, 10, from_line='test3_start', to_line='test3_end') +// DexExpectWatchValue('test3_temp2', 0, 11, from_line='test3_start', to_line='test3_end') + +unsigned num_iterations = 4; + +__attribute__((optnone)) +int test4(int test4_a, int test4_b) { + int val1 = 0, val2 = 0; + // DexLabel('test4_start') + + val1 = (test4_a > test4_b) ? test4_a : test4_b; // DexExpectStepOrder(9) + val2 = val1; + val2 += val1; // DexExpectStepOrder(10) + + for (unsigned i=0; i != num_iterations; ++i) { // DexExpectStepOrder(11, 13, 15, 17, 19) + val1--; + val2 += i; + if (val2 % 2 == 0) // DexExpectStepOrder(12, 14, 16, 18) + val2 /= 2; + } + + return (val1 > val2) ? val2 : val1; // DexExpectStepOrder(20) + // DexLabel('test4_end') +} +// DexExpectWatchValue('test4_a', 1, from_line='test4_start', to_line='test4_end') +// DexExpectWatchValue('test4_b', 9, from_line='test4_start', to_line='test4_end') +// DexExpectWatchValue('val1', 0, 9, 8, 7, 6, 5, from_line='test4_start', to_line='test4_end') +// DexExpectWatchValue('val2', 0, 9, 18, 9, 10, 5, 7, 10, 5, 9, from_line='test4_start', to_line='test4_end') + +__attribute__((optnone)) +int test5(int test5_val) { + int c = 1; // DexExpectStepOrder(21) + // DexLabel('test5_start') + if (test5_val) // DexExpectStepOrder(22) + c = 5; // DexExpectStepOrder(23) + return c ? test5_val : test5_val; // DexExpectStepOrder(24) + // DexLabel('test5_end') +} +// DexExpectWatchValue('test5_val', 7, from_line='test5_start', to_line='test5_end') +// DexExpectWatchValue('c', 1, 5, from_line='test5_start', to_line='test5_end') + +int main() { + int main_result = 0; + // DexLabel('main_start') + main_result = test1(3,4); + main_result += test2(1,2); + main_result += test3(5,6); + main_result += test4(1,9); + main_result += test5(7); + return main_result; + // DexLabel('main_end') +} +// DexExpectWatchValue('main_result', 0, 7, 11, 21, 26, 33, from_line='main_start', to_line='main_end') diff --git a/debuginfo-tests/dexter-tests/optnone-struct-and-methods.cpp b/debuginfo-tests/dexter-tests/optnone-struct-and-methods.cpp new file mode 100644 index 000000000000..522d23926f48 --- /dev/null +++ b/debuginfo-tests/dexter-tests/optnone-struct-and-methods.cpp @@ -0,0 +1,105 @@ +// RUN: %dexter --fail-lt 1.0 -w \ +// RUN: --builder 'clang' --debugger 'lldb' \ +// RUN: --cflags "-g -O2" -v -- %s +// RUN: %dexter --fail-lt 1.0 -w \ +// RUN: --builder 'clang' --debugger 'lldb' \ +// RUN: --cflags "-g -O0" -- %s + +// REQUIRES: lldb +// UNSUPPORTED: system-windows + +//// Check that the debugging experience with __attribute__((optnone)) at O2 +//// matches O0. Test simple structs and methods. + +long a_global_ptr[] = { 0xCAFEBABEL, 0xFEEDBEEFL }; + +namespace { + +struct A { + int a; + float b; + + enum B { + A_VALUE = 0x1, + B_VALUE = 0x2 + }; + + struct some_data { + enum B other_b; + enum B other_other_b; + }; + + struct other_data { + union { + void *raw_ptr; + long *long_ptr; + float *float_ptr; + } a; + struct some_data b; + struct some_data c; + }; +private: + struct other_data _data; + +public: + struct other_data *getOtherData() { return &_data; } + + __attribute__((always_inline,nodebug)) + void setSomeData1(A::B value, A::B other_value) { + struct other_data *data = getOtherData(); + data->b.other_b = value; + data->b.other_other_b = other_value; + } + + __attribute__((always_inline)) + void setSomeData2(A::B value, A::B other_value) { + struct other_data *data = getOtherData(); + data->c.other_b = value; + data->c.other_other_b = other_value; + } + + void setOtherData() { + setSomeData2(A_VALUE, B_VALUE); + getOtherData()->a.long_ptr = &a_global_ptr[0]; + } + + __attribute__((optnone)) + A() { + __builtin_memset(this, 0xFF, sizeof(*this)); + } //DexLabel('break_0') + // DexExpectWatchValue('a', '-1', on_line='break_0') + //// Check b is NaN by comparing it to itself. + // DexExpectWatchValue('this->b == this->b', 'false', on_line='break_0') + // DexExpectWatchValue('_data.a.raw_ptr == -1', 'true', on_line='break_0') + // DexExpectWatchValue('_data.a.float_ptr == -1', 'true', on_line='break_0') + // DexExpectWatchValue('_data.a.float_ptr == -1', 'true', on_line='break_0') + // DexExpectWatchValue('a_global_ptr[0]', 0xcafebabe, on_line='break_0') + // DexExpectWatchValue('a_global_ptr[1]', 0xfeedbeef, on_line='break_0') + + __attribute__((optnone)) + ~A() { + *getOtherData()->a.long_ptr = 0xADDF00DL; + } //DexLabel('break_1') + // DexExpectWatchValue('_data.a.raw_ptr == a_global_ptr', 'true', on_line='break_1') + // DexExpectWatchValue('a_global_ptr[0]', 0xaddf00d, on_line='break_1') + + __attribute__((optnone)) + long getData() { + setSomeData1(B_VALUE, A_VALUE); + setOtherData(); + return getOtherData()->a.long_ptr[1]; //DexLabel('break_2') + } + // DexExpectWatchValue('_data.b.other_b', 'B_VALUE', on_line='break_2') + // DexExpectWatchValue('_data.b.other_other_b', 'A_VALUE', on_line='break_2') +}; + +} // anonymous namespace + +int main() { + int result = 0; + { + A a; + result = a.getData(); + } + return result; +} diff --git a/debuginfo-tests/dexter-tests/optnone-vectors-and-functions.cpp b/debuginfo-tests/dexter-tests/optnone-vectors-and-functions.cpp new file mode 100644 index 000000000000..e8da8e3f0985 --- /dev/null +++ b/debuginfo-tests/dexter-tests/optnone-vectors-and-functions.cpp @@ -0,0 +1,135 @@ +// RUN: %dexter --fail-lt 1.0 -w \ +// RUN: --builder 'clang' --debugger 'lldb' \ +// RUN: --cflags "-g -O2" -v -- %s +// RUN: %dexter --fail-lt 1.0 -w \ +// RUN: --builder 'clang' --debugger 'lldb' \ +// RUN: --cflags "-g -O0" -- %s + +// REQUIRES: lldb +// UNSUPPORTED: system-windows + +//// Check that the debugging experience with __attribute__((optnone)) at O2 +//// matches O0. Test simple template functions performing simple arithmetic +//// vector operations and trivial loops. + +typedef int int4 __attribute__((ext_vector_type(4))); +template struct TypeTraits {}; + +template<> +struct TypeTraits { + static const unsigned NumElements = 4; + static const unsigned UnusedField = 0xDEADBEEFU; + static unsigned MysteryNumber; +}; +unsigned TypeTraits::MysteryNumber = 3U; + +template +__attribute__((optnone)) +T test1(T x, T y) { + T tmp = x + y; // DexLabel('break_0') + T tmp2 = tmp + y; + return tmp; // DexLabel('break_1') +} +// DexLimitSteps('1', '1', from_line='break_0', to_line='break_1') +//// FIXME: gdb can print this but lldb cannot. Perhaps PR42920? +// \DexExpectWatchValue('TypeTraits::NumElements', 4, on_line='break_0') +// \DexExpectWatchValue('TypeTraits::UnusedField', 0xdeadbeef, on_line='break_0') +// DexExpectWatchValue('x[0]', 1, on_line='break_0') +// DexExpectWatchValue('x[1]', 2, on_line='break_0') +// DexExpectWatchValue('x[2]', 3, on_line='break_0') +// DexExpectWatchValue('x[3]', 4, on_line='break_0') +// DexExpectWatchValue('y[0]', 5, on_line='break_0') +// DexExpectWatchValue('y[1]', 6, on_line='break_0') +// DexExpectWatchValue('y[2]', 7, on_line='break_0') +// DexExpectWatchValue('y[3]', 8, on_line='break_0') +// DexExpectWatchValue('tmp[0]', 6, on_line='break_1') +// DexExpectWatchValue('tmp[1]', 8, on_line='break_1') +// DexExpectWatchValue('tmp[2]', 10, on_line='break_1') +// DexExpectWatchValue('tmp[3]', 12, on_line='break_1') +// DexExpectWatchValue('tmp2[0]', 11, on_line='break_1') +// DexExpectWatchValue('tmp2[1]', 14, on_line='break_1') +// DexExpectWatchValue('tmp2[2]', 17, on_line='break_1') +// DexExpectWatchValue('tmp2[3]', 20, on_line='break_1') + +template +__attribute__((optnone)) +T test2(T x, T y) { + T tmp = x; + int break_2 = 0; // DexLabel('break_2') + for (unsigned i = 0; i != TypeTraits::NumElements; ++i) { + tmp <<= 1; // DexLabel('break_3') + tmp |= y; + } + + tmp[0] >>= TypeTraits::MysteryNumber; + return tmp; // DexLabel('break_5') +} +// DexLimitSteps('1', '1', on_line='break_2') +// DexExpectWatchValue('x[0]', 6, on_line='break_2') +// DexExpectWatchValue('x[1]', 8, on_line='break_2') +// DexExpectWatchValue('x[2]', 10, on_line='break_2') +// DexExpectWatchValue('x[3]', 12, on_line='break_2') +// DexExpectWatchValue('y[0]', 5, on_line='break_2') +// DexExpectWatchValue('y[1]', 6, on_line='break_2') +// DexExpectWatchValue('y[2]', 7, on_line='break_2') +// DexExpectWatchValue('y[3]', 8, on_line='break_2') +// DexExpectWatchValue('tmp[0]', 6, on_line='break_2') +// DexExpectWatchValue('tmp[1]', 8, on_line='break_2') +// DexExpectWatchValue('tmp[2]', 10, on_line='break_2') +// DexExpectWatchValue('tmp[3]', 12, on_line='break_2') +// DexLimitSteps('i', 3, on_line='break_3') +// DexExpectWatchValue('tmp[0]', 63, on_line='break_3') +// DexExpectWatchValue('tmp[1]', 94, on_line='break_3') +// DexExpectWatchValue('tmp[2]', 95, on_line='break_3') +// DexExpectWatchValue('tmp[3]', 120, on_line='break_3') +// DexLimitSteps('i', 3, on_line='break_5') +// DexExpectWatchValue('tmp[0]', 15, on_line='break_5') + +template +__attribute__((optnone)) +T test3(T InVec) { + T result; + for (unsigned i=0; i != TypeTraits::NumElements; ++i) + result[i] = InVec[i]; // DexLabel('break_6') + return result; // DexLabel('break_7') +} +// DexLimitSteps('i', '3', from_line='break_6', to_line='break_7') +// DexExpectWatchValue('InVec[0]', 15, from_line='break_6', to_line='break_7') +// DexExpectWatchValue('InVec[1]', 190, from_line='break_6', to_line='break_7') +// DexExpectWatchValue('InVec[2]', 191, from_line='break_6', to_line='break_7') +// DexExpectWatchValue('InVec[3]', 248, from_line='break_6', to_line='break_7') +// DexExpectWatchValue('result[0]', 15, from_line='break_6', to_line='break_7') +// DexExpectWatchValue('result[1]', 190, from_line='break_6', to_line='break_7') +// DexExpectWatchValue('result[2]', 191, from_line='break_6', to_line='break_7') +// DexExpectWatchValue('result[3]', 248, on_line='break_7') + +template +__attribute__((optnone)) +T test4(T x, T y) { + for (unsigned i=0; i != TypeTraits::NumElements; ++i) + x[i] = (x[i] > y[i])? x[i] : y[i] + TypeTraits::MysteryNumber; // DexLabel('break_11') + return x; // DexLabel('break_12') +} +// DexLimitSteps('1', '1', from_line='break_11', to_line='break_12') +//// FIXME: lldb won't print this but gdb unexpectedly says it's optimized out, even at O0. +// \DexExpectWatchValue('TypeTraits::MysteryNumber', 3, on_line='break_11') +// DexExpectWatchValue('i', 0, 1, 2, 3, on_line='break_11') +// DexExpectWatchValue('x[0]', 1, 8, from_line='break_11', to_line='break_12') +// DexExpectWatchValue('x[1]', 2, 9, from_line='break_11', to_line='break_12') +// DexExpectWatchValue('x[2]', 3, 10, from_line='break_11', to_line='break_12') +// DexExpectWatchValue('x[3]', 4, 11, from_line='break_11', to_line='break_12') +// DexExpectWatchValue('y[0]', 5, from_line='break_11', to_line='break_12') +// DexExpectWatchValue('y[1]', 6, from_line='break_11', to_line='break_12') +// DexExpectWatchValue('y[2]', 7, from_line='break_11', to_line='break_12') +// DexExpectWatchValue('y[3]', 8, from_line='break_11', to_line='break_12') + +int main() { + int4 a = (int4){1,2,3,4}; + int4 b = (int4){5,6,7,8}; + + int4 tmp = test1(a,b); + tmp = test2(tmp,b); + tmp = test3(tmp); + tmp += test4(a,b); + return tmp[0]; +} From 438b5bb05a429d697674088d400e4800c1108658 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Mon, 1 Feb 2021 16:49:38 +0100 Subject: [PATCH 022/784] [clangd] Use ML Code completion ranking as default. This makes code completion use a Decision Forest based ranking algorithm to rank completion candidates. [Esitmated 6% accuracy boost]. This was previously hidden behind the flag --ranking-model=decision_forest. This patch makes it the default ranking algorithm. Note: this is a generic model, not specialized for any particular project. clangd does not collect or upload data to train code completion. Also treat Keywords separately as they are not recorded by the training set generator. Differential Revision: https://reviews.llvm.org/D96353 --- clang-tools-extra/clangd/CodeComplete.h | 2 +- clang-tools-extra/clangd/Quality.cpp | 8 ++++++-- .../clangd/unittests/CodeCompleteTests.cpp | 14 +++++++------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/clang-tools-extra/clangd/CodeComplete.h b/clang-tools-extra/clangd/CodeComplete.h index debf71d4117c..40a528caa939 100644 --- a/clang-tools-extra/clangd/CodeComplete.h +++ b/clang-tools-extra/clangd/CodeComplete.h @@ -133,7 +133,7 @@ struct CodeCompleteOptions { enum CodeCompletionRankingModel { Heuristics, DecisionForest, - } RankingModel = Heuristics; + } RankingModel = DecisionForest; /// Callback used to score a CompletionCandidate if DecisionForest ranking /// model is enabled. diff --git a/clang-tools-extra/clangd/Quality.cpp b/clang-tools-extra/clangd/Quality.cpp index b49392bc7d04..99421009c71c 100644 --- a/clang-tools-extra/clangd/Quality.cpp +++ b/clang-tools-extra/clangd/Quality.cpp @@ -580,12 +580,16 @@ evaluateDecisionForest(const SymbolQualitySignals &Quality, // multiplciative boost (like NameMatch). This allows us to weigh the // prediciton score and NameMatch appropriately. Scores.ExcludingName = pow(Base, Evaluate(E)); - // NeedsFixIts is not part of the DecisionForest as generating training - // data that needs fixits is not-feasible. + // Following cases are not part of the generated training dataset: + // - Symbols with `NeedsFixIts`. + // - Forbidden symbols. + // - Keywords: Dataset contains only macros and decls. if (Relevance.NeedsFixIts) Scores.ExcludingName *= 0.5; if (Relevance.Forbidden) Scores.ExcludingName *= 0; + if (Quality.Category == SymbolQualitySignals::Keyword) + Scores.ExcludingName *= 4; // NameMatch should be a multiplier on total score to support rescoring. Scores.Total = Relevance.NameMatch * Scores.ExcludingName; diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index b7a40179aa98..0ff1e83b7613 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -647,13 +647,13 @@ TEST(CompletionTest, ScopedWithFilter) { } TEST(CompletionTest, ReferencesAffectRanking) { - auto Results = completions("int main() { abs^ }", {ns("absl"), func("absb")}); - EXPECT_THAT(Results.Completions, - HasSubsequence(Named("absb"), Named("absl"))); - Results = completions("int main() { abs^ }", - {withReferences(10000, ns("absl")), func("absb")}); - EXPECT_THAT(Results.Completions, - HasSubsequence(Named("absl"), Named("absb"))); + EXPECT_THAT(completions("int main() { abs^ }", {func("absA"), func("absB")}) + .Completions, + HasSubsequence(Named("absA"), Named("absB"))); + EXPECT_THAT(completions("int main() { abs^ }", + {func("absA"), withReferences(1000, func("absB"))}) + .Completions, + HasSubsequence(Named("absB"), Named("absA"))); } TEST(CompletionTest, ContextWords) { From bad8e577f9c75c8b84efca79980781599e8e9f86 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 2 Mar 2021 10:09:22 +0100 Subject: [PATCH 023/784] Fix DecisionForestBenchmark.cpp compile errors clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp fails to compile since `"CompletionModel.h"` is auto-generated from clang-tools-extra/clangd/quality/model/features.json, which was changed in https://reviews.llvm.org/D94697 to remove `setFilterLength` and `setIsForbidden`, rename `setFileProximityDistance` and `setSymbolScopeDistance`, and add `setNumNameInContext` and `setFractionNameInContext`. This patch removes calls to the two removed functions, updates calls to the two renamed functions, and adds calls to the two new functions. (`20` is an arbitrary choice for the `setNumNameInContext` argument.) It also changes the `FlipCoin` argument from float to double to silence lossy conversion warnings. Note: I don't use this tool but encountered the build errors and took a shot at fixing them. Please holler if there's another recommended solution. Thanks! Reviewed By: usaxena95 Differential Revision: https://reviews.llvm.org/D97620 --- .../CompletionModel/DecisionForestBenchmark.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp b/clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp index 69ce65e08b77..b146def7b36f 100644 --- a/clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp +++ b/clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp @@ -21,7 +21,7 @@ namespace clang { namespace clangd { namespace { std::vector generateRandomDataset(int NumExamples) { - auto FlipCoin = [&](float Probability) { + auto FlipCoin = [&](double Probability) { return rand() % 1000 <= Probability * 1000; }; auto RandInt = [&](int Max) { return rand() % Max; }; @@ -38,15 +38,15 @@ std::vector generateRandomDataset(int NumExamples) { E.setIsImplementationDetail(FlipCoin(0.3)); // Boolean. E.setNumReferences(RandInt(10000)); // Can be large integer. E.setSymbolCategory(RandInt(10)); // 10 Symbol Category. - + E.setNumNameInContext(RandInt(20)); // 0 to ContextWords->size(). + E.setFractionNameInContext(RandFloat(1.0)); // Float in range [0,1]. E.setIsNameInContext(FlipCoin(0.5)); // Boolean. - E.setIsForbidden(FlipCoin(0.1)); // Boolean. E.setIsInBaseClass(FlipCoin(0.3)); // Boolean. - E.setFileProximityDistance( + E.setFileProximityDistanceCost( FlipCoin(0.1) ? 999999 // Sometimes file distance is not available. : RandInt(20)); E.setSemaFileProximityScore(RandFloat(1)); // Float in range [0,1]. - E.setSymbolScopeDistance( + E.setSymbolScopeDistanceCost( FlipCoin(0.1) ? 999999 // Sometimes scope distance is not available. : RandInt(20)); E.setSemaSaysInScope(FlipCoin(0.5)); // Boolean. @@ -56,7 +56,6 @@ std::vector generateRandomDataset(int NumExamples) { E.setHadContextType(FlipCoin(0.6)); // Boolean. E.setHadSymbolType(FlipCoin(0.6)); // Boolean. E.setTypeMatchesPreferred(FlipCoin(0.5)); // Boolean. - E.setFilterLength(RandInt(15)); Examples.push_back(E); } return Examples; From 1e34cb008f506a9ec9ca161ed8a738bd957bffd9 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 2 Mar 2021 10:26:14 +0100 Subject: [PATCH 024/784] [AArch64] Mark test depending on -debug as requiring asserts --- llvm/test/CodeGen/AArch64/loh-adrp-add-ldr-clobber.mir | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/CodeGen/AArch64/loh-adrp-add-ldr-clobber.mir b/llvm/test/CodeGen/AArch64/loh-adrp-add-ldr-clobber.mir index e1e893c6383a..ce2d8f02f4cc 100644 --- a/llvm/test/CodeGen/AArch64/loh-adrp-add-ldr-clobber.mir +++ b/llvm/test/CodeGen/AArch64/loh-adrp-add-ldr-clobber.mir @@ -1,4 +1,5 @@ # RUN: llc -o /dev/null %s -mtriple=aarch64-apple-ios -run-pass=aarch64-collect-loh -debug-only=aarch64-collect-loh 2>&1 | FileCheck %s +# REQUIRES: asserts --- | @sym2 = local_unnamed_addr global [10000000 x i32] zeroinitializer, align 8 @sym = local_unnamed_addr global i32 zeroinitializer, align 8 From 365f5e24758826a6ba4e58ad424b321d3a5c49a2 Mon Sep 17 00:00:00 2001 From: Juneyoung Lee Date: Tue, 2 Mar 2021 18:34:32 +0900 Subject: [PATCH 025/784] [JumpThreading] Fix tryToUnfoldSelectInCurrBB to treat and/or and its select form equally This is a minor fix to update tryToUnfoldSelectInCurrBB to ignore select form of and/ors because the function does not look into binops as well --- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 1a15b8ce041a..05c64f17bd95 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -2874,11 +2874,14 @@ bool JumpThreadingPass::tryToUnfoldSelectInCurrBB(BasicBlock *BB) { continue; auto isUnfoldCandidate = [BB](SelectInst *SI, Value *V) { + using namespace PatternMatch; + // Check if SI is in BB and use V as condition. if (SI->getParent() != BB) return false; Value *Cond = SI->getCondition(); - return (Cond && Cond == V && Cond->getType()->isIntegerTy(1)); + bool IsAndOr = match(SI, m_CombineOr(m_LogicalAnd(), m_LogicalOr())); + return Cond && Cond == V && Cond->getType()->isIntegerTy(1) && !IsAndOr; }; SelectInst *SI = nullptr; From 3fa0e793729a4caca2f35ebe7c0b25a2615b8e8f Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 2 Mar 2021 09:37:26 +0000 Subject: [PATCH 026/784] [OpenCL] Use StringMap instead of std::map As the LLVM Programmer's Manual suggests, use a StringMap instead of an std::map with a StringRef key. --- clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp index 2288e2711e6a..3e8c90a00753 100644 --- a/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp +++ b/clang/utils/TableGen/ClangOpenCLBuiltinEmitter.cpp @@ -60,8 +60,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -69,7 +69,6 @@ #include "llvm/TableGen/Record.h" #include "llvm/TableGen/StringMatcher.h" #include "llvm/TableGen/TableGenBackend.h" -#include using namespace llvm; @@ -667,7 +666,7 @@ static void OCL2Qual(Sema &S, const OpenCLTypeStruct &Ty, Records.getAllDerivedDefinitions("ImageType"); // Map an image type name to its 3 access-qualified types (RO, WO, RW). - std::map> ImageTypesMap; + StringMap> ImageTypesMap; for (auto *IT : ImageTypes) { auto Entry = ImageTypesMap.find(IT->getValueAsString("Name")); if (Entry == ImageTypesMap.end()) { @@ -685,11 +684,11 @@ static void OCL2Qual(Sema &S, const OpenCLTypeStruct &Ty, // tells which one is needed. Emit a switch statement that puts the // corresponding QualType into "QT". for (const auto &ITE : ImageTypesMap) { - OS << " case OCLT_" << ITE.first.str() << ":\n" + OS << " case OCLT_" << ITE.getKey() << ":\n" << " switch (Ty.AccessQualifier) {\n" << " case OCLAQ_None:\n" << " llvm_unreachable(\"Image without access qualifier\");\n"; - for (const auto &Image : ITE.second) { + for (const auto &Image : ITE.getValue()) { OS << StringSwitch( Image->getValueAsString("AccessQualifier")) .Case("RO", " case OCLAQ_ReadOnly:\n") From f47ff8cff1ede6ee017f4948f25a14e63de18612 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Tue, 2 Mar 2021 10:37:55 +0100 Subject: [PATCH 027/784] [lli] Test debug support in RuntimeDyld with built-in functions When lli runs the below IR, it emits in-memory debug objects and registers them with the GDB JIT interface. The tests dump and check the registered information. IR has limited ability to produce complex output in a portable way. Instead the tests rely on built-in functions implemented in lli. They use a new command line flag `-generate=function-name` to instruct the ORC JIT to expose the built-in function with the given name to the JITed program. `debug-descriptor-elf-minimal.ll` calls `__dump_jit_debug_descriptor()` to reflect the list of debug entries issued for itself after emitting the main module. The output is textual and can be checked straight away. `debug-objects-elf-minimal.ll` calls `__dump_jit_debug_objects()`, which instructs lli to walk through the list of debug entries and append the encountered in-memory objects to the program output. We feed this output into llvm-dwarfdump to parse the DWARF in each file and dump their structures. We can do the same for JITLink once D97335 has landed. Reviewed By: lhames Differential Revision: https://reviews.llvm.org/D97694 --- .../OrcLazy/debug-descriptor-elf-minimal.ll | 43 ++++++ .../OrcLazy/debug-objects-elf-minimal.ll | 63 ++++++++ llvm/tools/lli/CMakeLists.txt | 1 + llvm/tools/lli/ExecutionUtils.cpp | 146 ++++++++++++++++++ llvm/tools/lli/ExecutionUtils.h | 60 +++++++ llvm/tools/lli/lli.cpp | 19 +++ 6 files changed, 332 insertions(+) create mode 100644 llvm/test/ExecutionEngine/OrcLazy/debug-descriptor-elf-minimal.ll create mode 100644 llvm/test/ExecutionEngine/OrcLazy/debug-objects-elf-minimal.ll create mode 100644 llvm/tools/lli/ExecutionUtils.cpp create mode 100644 llvm/tools/lli/ExecutionUtils.h diff --git a/llvm/test/ExecutionEngine/OrcLazy/debug-descriptor-elf-minimal.ll b/llvm/test/ExecutionEngine/OrcLazy/debug-descriptor-elf-minimal.ll new file mode 100644 index 000000000000..7542950dbae5 --- /dev/null +++ b/llvm/test/ExecutionEngine/OrcLazy/debug-descriptor-elf-minimal.ll @@ -0,0 +1,43 @@ +; RUN: lli --jit-kind=orc-lazy --per-module-lazy \ +; RUN: --generate=__dump_jit_debug_descriptor %s | FileCheck %s +; +; CHECK: Reading __jit_debug_descriptor at 0x{{.*}} +; CHECK: Version: 1 +; CHECK: Action: JIT_REGISTER_FN +; CHECK: Entry Symbol File Size Previous Entry +; CHECK: [ 0] 0x{{.*}} 0x{{.*}} {{.*}} 0x0000000000000000 + +target triple = "x86_64-unknown-unknown-elf" + +; Built-in symbol provided by the JIT +declare void @__dump_jit_debug_descriptor(i8*) + +; Host-process symbol from the GDB JIT interface +@__jit_debug_descriptor = external global i8, align 1 + +define i32 @main() !dbg !9 { + %1 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + call void @__dump_jit_debug_descriptor(i8* @__jit_debug_descriptor), !dbg !13 + ret i32 0, !dbg !14 +} + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.dbg.cu = !{!5} +!llvm.ident = !{!8} + +!0 = !{i32 2, !"SDK Version", [3 x i32] [i32 10, i32 15, i32 6]} +!1 = !{i32 7, !"Dwarf Version", i32 4} +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !{i32 1, !"wchar_size", i32 4} +!4 = !{i32 7, !"PIC Level", i32 2} +!5 = distinct !DICompileUnit(language: DW_LANG_C99, file: !6, producer: "compiler version", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !7, nameTableKind: None) +!6 = !DIFile(filename: "source-file.c", directory: "/workspace") +!7 = !{} +!8 = !{!"compiler version"} +!9 = distinct !DISubprogram(name: "main", scope: !6, file: !6, line: 4, type: !10, scopeLine: 4, spFlags: DISPFlagDefinition, unit: !5, retainedNodes: !7) +!10 = !DISubroutineType(types: !11) +!11 = !{!12} +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !DILocation(line: 5, column: 3, scope: !9) +!14 = !DILocation(line: 6, column: 3, scope: !9) diff --git a/llvm/test/ExecutionEngine/OrcLazy/debug-objects-elf-minimal.ll b/llvm/test/ExecutionEngine/OrcLazy/debug-objects-elf-minimal.ll new file mode 100644 index 000000000000..31c4a6665ca2 --- /dev/null +++ b/llvm/test/ExecutionEngine/OrcLazy/debug-objects-elf-minimal.ll @@ -0,0 +1,63 @@ +; RUN: lli --jit-kind=orc-lazy --per-module-lazy \ +; RUN: --generate=__dump_jit_debug_objects %s | llvm-dwarfdump --diff - | FileCheck %s +; +; CHECK: -: file format elf64-x86-64 +; CHECK: .debug_info contents: +; CHECK: 0x00000000: Compile Unit: length = 0x00000047, format = DWARF32, version = 0x0004, abbr_offset = 0x0000, addr_size = 0x08 (next unit at 0x0000004b) +; CHECK: DW_TAG_compile_unit +; CHECK: DW_AT_producer ("compiler version") +; CHECK: DW_AT_language (DW_LANG_C99) +; CHECK: DW_AT_name ("source-file.c") +; CHECK: DW_AT_stmt_list () +; CHECK: DW_AT_comp_dir ("/workspace") +; CHECK: DW_AT_low_pc () +; CHECK: DW_AT_high_pc () +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_low_pc () +; CHECK: DW_AT_high_pc () +; CHECK: DW_AT_frame_base (DW_OP_reg7 RSP) +; CHECK: DW_AT_name ("main") +; CHECK: DW_AT_decl_file ("/workspace/source-file.c") +; CHECK: DW_AT_decl_line (4) +; CHECK: DW_AT_type ("int") +; CHECK: DW_AT_external (true) +; CHECK: DW_TAG_base_type +; CHECK: DW_AT_name ("int") +; CHECK: DW_AT_encoding (DW_ATE_signed) +; CHECK: DW_AT_byte_size (0x04) +; CHECK: NULL + +target triple = "x86_64-unknown-unknown-elf" + +; Built-in symbol provided by the JIT +declare void @__dump_jit_debug_objects(i8*) + +; Host-process symbol from the GDB JIT interface +@__jit_debug_descriptor = external global i8, align 1 + +define i32 @main() !dbg !9 { + %1 = alloca i32, align 4 + store i32 0, i32* %1, align 4 + call void @__dump_jit_debug_objects(i8* @__jit_debug_descriptor), !dbg !13 + ret i32 0, !dbg !14 +} + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.dbg.cu = !{!5} +!llvm.ident = !{!8} + +!0 = !{i32 2, !"SDK Version", [3 x i32] [i32 10, i32 15, i32 6]} +!1 = !{i32 7, !"Dwarf Version", i32 4} +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !{i32 1, !"wchar_size", i32 4} +!4 = !{i32 7, !"PIC Level", i32 2} +!5 = distinct !DICompileUnit(language: DW_LANG_C99, file: !6, producer: "compiler version", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !7, nameTableKind: None) +!6 = !DIFile(filename: "source-file.c", directory: "/workspace") +!7 = !{} +!8 = !{!"compiler version"} +!9 = distinct !DISubprogram(name: "main", scope: !6, file: !6, line: 4, type: !10, scopeLine: 4, spFlags: DISPFlagDefinition, unit: !5, retainedNodes: !7) +!10 = !DISubroutineType(types: !11) +!11 = !{!12} +!12 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!13 = !DILocation(line: 5, column: 3, scope: !9) +!14 = !DILocation(line: 6, column: 3, scope: !9) diff --git a/llvm/tools/lli/CMakeLists.txt b/llvm/tools/lli/CMakeLists.txt index 098e9dd3e743..0725cd15f9ab 100644 --- a/llvm/tools/lli/CMakeLists.txt +++ b/llvm/tools/lli/CMakeLists.txt @@ -50,6 +50,7 @@ endif( LLVM_USE_PERF ) add_llvm_tool(lli lli.cpp + ExecutionUtils.cpp DEPENDS intrinsics_gen diff --git a/llvm/tools/lli/ExecutionUtils.cpp b/llvm/tools/lli/ExecutionUtils.cpp new file mode 100644 index 000000000000..55370ed40f2b --- /dev/null +++ b/llvm/tools/lli/ExecutionUtils.cpp @@ -0,0 +1,146 @@ +//===---- ExecutionUtils.cpp - Utilities for executing functions in lli ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "ExecutionUtils.h" + +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +// Declarations follow the GDB JIT interface (version 1, 2009) and must match +// those of the DYLD used for testing. See: +// +// llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp +// llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp +// +typedef enum { + JIT_NOACTION = 0, + JIT_REGISTER_FN, + JIT_UNREGISTER_FN +} jit_actions_t; + +struct jit_code_entry { + struct jit_code_entry *next_entry; + struct jit_code_entry *prev_entry; + const char *symfile_addr; + uint64_t symfile_size; +}; + +struct jit_descriptor { + uint32_t version; + // This should be jit_actions_t, but we want to be specific about the + // bit-width. + uint32_t action_flag; + struct jit_code_entry *relevant_entry; + struct jit_code_entry *first_entry; +}; + +namespace llvm { + +template static void outsv(const char *Fmt, Ts &&...Vals) { + outs() << formatv(Fmt, Vals...); +} + +static const char *actionFlagToStr(uint32_t ActionFlag) { + switch (ActionFlag) { + case JIT_NOACTION: + return "JIT_NOACTION"; + case JIT_REGISTER_FN: + return "JIT_REGISTER_FN"; + case JIT_UNREGISTER_FN: + return "JIT_UNREGISTER_FN"; + } + return ""; +} + +// Sample output: +// +// Reading __jit_debug_descriptor at 0x0000000000404048 +// +// Version: 0 +// Action: JIT_REGISTER_FN +// +// Entry Symbol File Size Previous Entry +// [ 0] 0x0000000000451290 0x0000000000002000 200 0x0000000000000000 +// [ 1] 0x0000000000451260 0x0000000000001000 100 0x0000000000451290 +// ... +// +static void dumpDebugDescriptor(void *Addr) { + outsv("Reading __jit_debug_descriptor at {0}\n\n", Addr); + + jit_descriptor *Descriptor = reinterpret_cast(Addr); + outsv("Version: {0}\n", Descriptor->version); + outsv("Action: {0}\n\n", actionFlagToStr(Descriptor->action_flag)); + outsv("{0,11} {1,24} {2,15} {3,14}\n", "Entry", "Symbol File", "Size", + "Previous Entry"); + + unsigned Idx = 0; + for (auto *Entry = Descriptor->first_entry; Entry; Entry = Entry->next_entry) + outsv("[{0,2}] {1:X16} {2:X16} {3,8:D} {4}\n", Idx++, Entry, + reinterpret_cast(Entry->symfile_addr), + Entry->symfile_size, Entry->prev_entry); +} + +static LLIBuiltinFunctionGenerator *Generator = nullptr; + +static void dumpDebugObjects(void *Addr) { + jit_descriptor *Descriptor = reinterpret_cast(Addr); + for (auto *Entry = Descriptor->first_entry; Entry; Entry = Entry->next_entry) + Generator->appendDebugObject(Entry->symfile_addr, Entry->symfile_size); +} + +LLIBuiltinFunctionGenerator::LLIBuiltinFunctionGenerator( + std::vector Enabled, orc::MangleAndInterner &Mangle) + : TestOut(nullptr) { + Generator = this; + for (BuiltinFunctionKind F : Enabled) { + switch (F) { + case BuiltinFunctionKind::DumpDebugDescriptor: + expose(Mangle("__dump_jit_debug_descriptor"), &dumpDebugDescriptor); + break; + case BuiltinFunctionKind::DumpDebugObjects: + expose(Mangle("__dump_jit_debug_objects"), &dumpDebugObjects); + TestOut = createToolOutput(); + break; + } + } +} + +Error LLIBuiltinFunctionGenerator::tryToGenerate( + orc::LookupState &LS, orc::LookupKind K, orc::JITDylib &JD, + orc::JITDylibLookupFlags JDLookupFlags, + const orc::SymbolLookupSet &Symbols) { + orc::SymbolMap NewSymbols; + for (const auto &NameFlags : Symbols) { + auto It = BuiltinFunctions.find(NameFlags.first); + if (It != BuiltinFunctions.end()) + NewSymbols.insert(*It); + } + + if (NewSymbols.empty()) + return Error::success(); + + return JD.define(absoluteSymbols(std::move(NewSymbols))); +} + +// static +std::unique_ptr +LLIBuiltinFunctionGenerator::createToolOutput() { + std::error_code EC; + auto TestOut = std::make_unique("-", EC, sys::fs::OF_None); + if (EC) { + errs() << "Error creating tool output file: " << EC.message() << '\n'; + exit(1); + } + return TestOut; +} + +} // namespace llvm diff --git a/llvm/tools/lli/ExecutionUtils.h b/llvm/tools/lli/ExecutionUtils.h new file mode 100644 index 000000000000..fcd1db05cca3 --- /dev/null +++ b/llvm/tools/lli/ExecutionUtils.h @@ -0,0 +1,60 @@ +//===- ExecutionUtils.h - Utilities for executing code in lli ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Contains utilities for executing code in lli. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLI_EXECUTIONUTILS_H +#define LLVM_TOOLS_LLI_EXECUTIONUTILS_H + +#include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/Mangling.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ToolOutputFile.h" + +#include +#include + +namespace llvm { + +enum class BuiltinFunctionKind { + DumpDebugDescriptor, + DumpDebugObjects, +}; + +// Utility class to expose symbols for special-purpose functions to the JIT. +class LLIBuiltinFunctionGenerator : public orc::DefinitionGenerator { +public: + LLIBuiltinFunctionGenerator(std::vector Enabled, + orc::MangleAndInterner &Mangle); + + Error tryToGenerate(orc::LookupState &LS, orc::LookupKind K, + orc::JITDylib &JD, orc::JITDylibLookupFlags JDLookupFlags, + const orc::SymbolLookupSet &Symbols) override; + + void appendDebugObject(const char *Addr, size_t Size) { + TestOut->os().write(Addr, Size); + } + +private: + orc::SymbolMap BuiltinFunctions; + std::unique_ptr TestOut; + + template void expose(orc::SymbolStringPtr Name, T *Handler) { + BuiltinFunctions[Name] = JITEvaluatedSymbol( + pointerToJITTargetAddress(Handler), JITSymbolFlags::Exported); + } + + static std::unique_ptr createToolOutput(); +}; + +} // end namespace llvm + +#endif // LLVM_TOOLS_LLI_EXECUTIONUTILS_H diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp index 420a18c3a429..63d0c493eebb 100644 --- a/llvm/tools/lli/lli.cpp +++ b/llvm/tools/lli/lli.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#include "ExecutionUtils.h" #include "RemoteJITUtils.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" @@ -243,6 +244,19 @@ namespace { "will overwrite existing files).")), cl::Hidden); + cl::list GenerateBuiltinFunctions( + "generate", + cl::desc("Provide built-in functions for access by JITed code " + "(jit-kind=orc-lazy only)"), + cl::values(clEnumValN(BuiltinFunctionKind::DumpDebugDescriptor, + "__dump_jit_debug_descriptor", + "Dump __jit_debug_descriptor contents to stdout"), + clEnumValN(BuiltinFunctionKind::DumpDebugObjects, + "__dump_jit_debug_objects", + "Dump __jit_debug_descriptor in-memory debug " + "objects as tool output")), + cl::Hidden); + ExitOnError ExitOnErr; } @@ -916,6 +930,11 @@ int runOrcLazyJIT(const char *ProgName) { return Name != MainName; }))); + if (GenerateBuiltinFunctions.size() > 0) + J->getMainJITDylib().addGenerator( + std::make_unique(GenerateBuiltinFunctions, + Mangle)); + // Add the main module. ExitOnErr(J->addLazyIRModule(std::move(MainModule))); From a63daf693ca4b67d8fae8546024d954fb2659c24 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 9 Feb 2021 21:12:30 +0100 Subject: [PATCH 028/784] [lldb] Remote leftover _llgs from TestGdbRemoteConnection.py the suffix will be added when the test is instantiated for llgs and debugserver. --- .../tools/lldb-server/commandline/TestGdbRemoteConnection.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/test/API/tools/lldb-server/commandline/TestGdbRemoteConnection.py b/lldb/test/API/tools/lldb-server/commandline/TestGdbRemoteConnection.py index c9799d1976cb..a82f4a8279b3 100644 --- a/lldb/test/API/tools/lldb-server/commandline/TestGdbRemoteConnection.py +++ b/lldb/test/API/tools/lldb-server/commandline/TestGdbRemoteConnection.py @@ -128,14 +128,14 @@ class TestGdbRemoteConnection(gdbremote_testcase.GdbRemoteTestCaseBase): mydir = TestBase.compute_mydir(__file__) @skipIfRemote # reverse connect is not a supported use case for now - def test_reverse_connect_llgs(self): + def test_reverse_connect(self): # Reverse connect is the default connection method. self.connect_to_debug_monitor() # Verify we can do the handshake. If that works, we'll call it good. self.do_handshake(self.sock) @skipIfRemote - def test_named_pipe_llgs(self): + def test_named_pipe(self): family, type, proto, _, addr = socket.getaddrinfo( self.stub_hostname, 0, proto=socket.IPPROTO_TCP)[0] self.sock = socket.socket(family, type, proto) From 1432ab171f9dece77b141d6c11da895275de14be Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 2 Mar 2021 11:00:22 +0100 Subject: [PATCH 029/784] [lldb] Add missing include to Cloneable.h This header is using make_shared so it needs to include . --- lldb/include/lldb/Utility/Cloneable.h | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/include/lldb/Utility/Cloneable.h b/lldb/include/lldb/Utility/Cloneable.h index 7082f3a59ce5..4c9b7ae340dc 100644 --- a/lldb/include/lldb/Utility/Cloneable.h +++ b/lldb/include/lldb/Utility/Cloneable.h @@ -9,6 +9,7 @@ #ifndef LLDB_UTILITY_CLONEABLE_H #define LLDB_UTILITY_CLONEABLE_H +#include #include namespace lldb_private { From 28f164bca724d52ed35f1ad9dd33f39d94c0b3c8 Mon Sep 17 00:00:00 2001 From: Dmitry Preobrazhensky Date: Tue, 2 Mar 2021 12:59:09 +0300 Subject: [PATCH 030/784] [AMDGPU][MC][GFX9+] Corrected encoding of op_sel_hi for unused operands in VOP3P Corrected encoding of VOP3P op_sel_hi for unused operands. See bug 49363. Differential Revision: https://reviews.llvm.org/D97689 --- .../AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp | 39 +- llvm/lib/Target/AMDGPU/SIDefines.h | 11 + llvm/lib/Target/AMDGPU/VOPInstructions.td | 6 +- llvm/test/CodeGen/AMDGPU/immv216.ll | 12 +- llvm/test/MC/AMDGPU/expressions-gfx9.s | 4 +- llvm/test/MC/AMDGPU/gfx90a_asm_features.s | 126 +- llvm/test/MC/AMDGPU/gfx9_asm_vop3p.s | 1466 ++++++++--------- llvm/test/MC/AMDGPU/lds_direct.s | 2 +- llvm/test/MC/AMDGPU/literals.s | 4 +- llvm/test/MC/AMDGPU/literalv216.s | 172 +- llvm/test/MC/AMDGPU/vop3-literal.s | 40 +- llvm/test/MC/AMDGPU/vop3p.s | 66 +- .../AMDGPU/gfx90a_dasm_features.txt | 126 +- .../MC/Disassembler/AMDGPU/gfx9_dasm_all.txt | 1216 +++++++------- .../Disassembler/AMDGPU/lds_direct_gfx9.txt | 2 +- .../MC/Disassembler/AMDGPU/literal_gfx9.txt | 4 +- .../Disassembler/AMDGPU/literalv216_gfx10.txt | 68 +- .../MC/Disassembler/AMDGPU/vop3-literal.txt | 18 +- .../MC/Disassembler/AMDGPU/vop3p_opsel.txt | 15 + 19 files changed, 1718 insertions(+), 1679 deletions(-) create mode 100644 llvm/test/MC/Disassembler/AMDGPU/vop3p_opsel.txt diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index 3fb11487a98d..9275db67b1cf 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -71,6 +71,9 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { unsigned getAVOperandEncoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const override; + +private: + uint64_t getImplicitOpSelHiEncoding(int Opcode) const; }; } // end anonymous namespace @@ -279,28 +282,38 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO, } } +uint64_t SIMCCodeEmitter::getImplicitOpSelHiEncoding(int Opcode) const { + using namespace AMDGPU::VOP3PEncoding; + using namespace AMDGPU::OpName; + + if (AMDGPU::getNamedOperandIdx(Opcode, op_sel_hi) != -1) { + if (AMDGPU::getNamedOperandIdx(Opcode, src2) != -1) + return 0; + if (AMDGPU::getNamedOperandIdx(Opcode, src1) != -1) + return OP_SEL_HI_2; + if (AMDGPU::getNamedOperandIdx(Opcode, src0) != -1) + return OP_SEL_HI_1 | OP_SEL_HI_2; + } + return OP_SEL_HI_0 | OP_SEL_HI_1 | OP_SEL_HI_2; +} + void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { verifyInstructionPredicates(MI, computeAvailableFeatures(STI.getFeatureBits())); + int Opcode = MI.getOpcode(); uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups, STI); - const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + const MCInstrDesc &Desc = MCII.get(Opcode); unsigned bytes = Desc.getSize(); - switch (MI.getOpcode()) { - case AMDGPU::V_ACCVGPR_READ_B32_vi: - case AMDGPU::V_ACCVGPR_WRITE_B32_vi: - // Set unused op_sel_hi bits to 1. - // FIXME: This shall be done for all VOP3P but not MAI instructions with - // unused op_sel_hi bits if corresponding operands do not exist. - // accvgpr_read/write are different, however. These are VOP3P, MAI, have - // src0, but do not use op_sel. - Encoding |= (1ull << 14) | (1ull << 59) | (1ull << 60); - break; - default: - break; + // Set unused op_sel_hi bits to 1 for VOP3P and MAI instructions. + // Note that accvgpr_read/write are MAI, have src0, but do not use op_sel. + if ((Desc.TSFlags & SIInstrFlags::VOP3P) || + Opcode == AMDGPU::V_ACCVGPR_READ_B32_vi || + Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_vi) { + Encoding |= getImplicitOpSelHiEncoding(Opcode); } for (unsigned i = 0; i < bytes; i++) { diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index b720cf45acb5..bd47f6b495ff 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -737,6 +737,17 @@ enum Target : unsigned { }; } // namespace Exp + +namespace VOP3PEncoding { + +enum OpSel : uint64_t { + OP_SEL_HI_0 = UINT64_C(1) << 59, + OP_SEL_HI_1 = UINT64_C(1) << 60, + OP_SEL_HI_2 = UINT64_C(1) << 14, +}; + +} // namespace VOP3PEncoding + } // namespace AMDGPU #define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028 diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index 382b02144612..45b64dde69cf 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -317,7 +317,7 @@ class VOP3Pe op, VOPProfile P> : Enc64 { let Inst{12} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{2}, 0); // op_sel(1) let Inst{13} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{2}, 0); // op_sel(2) - let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, 0); // op_sel_hi(2) + let Inst{14} = !if(!and(P.HasSrc2, P.HasOpSel), src2_modifiers{3}, ?); // op_sel_hi(2) let Inst{15} = !if(P.HasClamp, clamp{0}, 0); @@ -326,8 +326,8 @@ class VOP3Pe op, VOPProfile P> : Enc64 { let Inst{40-32} = !if(P.HasSrc0, src0, 0); let Inst{49-41} = !if(P.HasSrc1, src1, 0); let Inst{58-50} = !if(P.HasSrc2, src2, 0); - let Inst{59} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{3}, 0); // op_sel_hi(0) - let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3}, 0); // op_sel_hi(1) + let Inst{59} = !if(!and(P.HasSrc0, P.HasOpSel), src0_modifiers{3}, ?); // op_sel_hi(0) + let Inst{60} = !if(!and(P.HasSrc1, P.HasOpSel), src1_modifiers{3}, ?); // op_sel_hi(1) let Inst{61} = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // neg (lo) let Inst{62} = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // neg (lo) let Inst{63} = !if(P.HasSrc2Mods, src2_modifiers{0}, 0); // neg (lo) diff --git a/llvm/test/CodeGen/AMDGPU/immv216.ll b/llvm/test/CodeGen/AMDGPU/immv216.ll index 55157f1f1a2e..7cd5def4e5d6 100644 --- a/llvm/test/CodeGen/AMDGPU/immv216.ll +++ b/llvm/test/CodeGen/AMDGPU/immv216.ll @@ -139,11 +139,11 @@ define amdgpu_kernel void @add_inline_imm_0.0_v2f16(<2 x half> addrspace(1)* %ou ; GCN-LABEL: {{^}}add_inline_imm_0.5_v2f16: ; GFX10: s_load_dword [[VAL:s[0-9]+]] -; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x0f,0xcc,0x02,0xe0,0x01,0x08] +; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x0f,0xcc,0x02,0xe0,0x01,0x08] ; GFX10: buffer_store_dword [[REG]] ; GFX9: s_load_dword [[VAL:s[0-9]+]] -; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x8f,0xd3,0x04,0xe0,0x01,0x08] +; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x8f,0xd3,0x04,0xe0,0x01,0x08] ; GFX9: buffer_store_dword [[REG]] ; FIXME: Shouldn't need right shift and SDWA, also extra copy @@ -164,11 +164,11 @@ define amdgpu_kernel void @add_inline_imm_0.5_v2f16(<2 x half> addrspace(1)* %ou ; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_v2f16: ; GFX10: s_load_dword [[VAL:s[0-9]+]] -; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x0f,0xcc,0x02,0xe2,0x01,0x08] +; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x0f,0xcc,0x02,0xe2,0x01,0x08] ; GFX10: buffer_store_dword [[REG]] ; GFX9: s_load_dword [[VAL:s[0-9]+]] -; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x8f,0xd3,0x04,0xe2,0x01,0x08] +; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x8f,0xd3,0x04,0xe2,0x01,0x08] ; GFX9: buffer_store_dword [[REG]] ; FIXME: Shouldn't need right shift and SDWA, also extra copy @@ -334,11 +334,11 @@ define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(<2 x half> addrspace } ; GCN-LABEL: {{^}}commute_add_literal_v2f16: -; GFX10: v_pk_add_f16 v0, 0x6400, v0 op_sel_hi:[0,1] ; encoding: [0x00,0x00,0x0f,0xcc,0xff,0x00,0x02,0x10,0x00,0x64,0x00,0x00] +; GFX10: v_pk_add_f16 v0, 0x6400, v0 op_sel_hi:[0,1] ; encoding: [0x00,0x40,0x0f,0xcc,0xff,0x00,0x02,0x10,0x00,0x64,0x00,0x00] ; GFX9-DAG: buffer_load_dword [[VAL:v[0-9]+]] ; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400 ; encoding -; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0] ; encoding: [0x00,0x00,0x8f,0xd3,0x00,0x09,0x00,0x08] +; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x8f,0xd3,0x00,0x09,0x00,0x08] ; GFX9: buffer_store_dword [[REG]] ; VI-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400 ; encoding diff --git a/llvm/test/MC/AMDGPU/expressions-gfx9.s b/llvm/test/MC/AMDGPU/expressions-gfx9.s index 1305d9eb4d9c..cba9ee0445e7 100644 --- a/llvm/test/MC/AMDGPU/expressions-gfx9.s +++ b/llvm/test/MC/AMDGPU/expressions-gfx9.s @@ -42,7 +42,7 @@ v_mov_b32_sdwa v1, sext(-2+i1) //===----------------------------------------------------------------------===// v_pk_add_u16 v1, v2, v3 op_sel:[2-i1,i1-1] -// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] ; encoding: [0x01,0x08,0x8a,0xd3,0x02,0x07,0x02,0x18] +// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] ; encoding: [0x01,0x48,0x8a,0xd3,0x02,0x07,0x02,0x18] v_pk_add_u16 v1, v2, v3 neg_lo:[2-i1,i1-1] -// GFX9: v_pk_add_u16 v1, v2, v3 neg_lo:[1,0] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x38] +// GFX9: v_pk_add_u16 v1, v2, v3 neg_lo:[1,0] ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x38] diff --git a/llvm/test/MC/AMDGPU/gfx90a_asm_features.s b/llvm/test/MC/AMDGPU/gfx90a_asm_features.s index fecf23b084de..d8196350ea8a 100644 --- a/llvm/test/MC/AMDGPU/gfx90a_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx90a_asm_features.s @@ -71,260 +71,260 @@ v_pk_fma_f32 v[8:9], v[0:1], s[0:1], v[4:5] clamp v_pk_fma_f32 v[0:1], v[4:5], v[8:9], v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[254:255], v[8:9], v[16:17] ; encoding: [0xfe,0x00,0xb1,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[254:255], v[8:9], v[16:17] ; encoding: [0xfe,0x40,0xb1,0xd3,0x08,0x21,0x02,0x18] v_pk_mul_f32 v[254:255], v[8:9], v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[254:255], v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0xfe,0x21,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[254:255], v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0xfe,0x21,0x02,0x18] v_pk_mul_f32 v[4:5], v[254:255], v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], s[2:3], v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x02,0x20,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], s[2:3], v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x02,0x20,0x02,0x18] v_pk_mul_f32 v[4:5], s[2:3], v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], s[100:101], v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x64,0x20,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], s[100:101], v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x64,0x20,0x02,0x18] v_pk_mul_f32 v[4:5], s[100:101], v[16:17] -// GFX90A: v_pk_mul_f32 v[4:5], flat_scratch, v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x66,0x20,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], flat_scratch, v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x66,0x20,0x02,0x18] // NOT-GFX1010: error: instruction not supported on this GPU // NOT-GFX908: error: instruction not supported on this GPU v_pk_mul_f32 v[4:5], flat_scratch, v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], vcc, v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x6a,0x20,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], vcc, v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x6a,0x20,0x02,0x18] v_pk_mul_f32 v[4:5], vcc, v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], exec, v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x7e,0x20,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], exec, v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x7e,0x20,0x02,0x18] v_pk_mul_f32 v[4:5], exec, v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[254:255] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0xfd,0x03,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[254:255] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0xfd,0x03,0x18] v_pk_mul_f32 v[4:5], v[8:9], v[254:255] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], s[2:3] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x05,0x00,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], s[2:3] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x05,0x00,0x18] v_pk_mul_f32 v[4:5], v[8:9], s[2:3] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], s[100:101] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0xc9,0x00,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], s[100:101] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0xc9,0x00,0x18] v_pk_mul_f32 v[4:5], v[8:9], s[100:101] -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], flat_scratch ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0xcd,0x00,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], flat_scratch ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0xcd,0x00,0x18] // NOT-GFX1010: error: instruction not supported on this GPU // NOT-GFX908: error: instruction not supported on this GPU v_pk_mul_f32 v[4:5], v[8:9], flat_scratch // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], vcc ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0xd5,0x00,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], vcc ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0xd5,0x00,0x18] v_pk_mul_f32 v[4:5], v[8:9], vcc // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], exec ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0xfd,0x00,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], exec ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0xfd,0x00,0x18] v_pk_mul_f32 v[4:5], v[8:9], exec // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x18] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,0] ; encoding: [0x04,0x08,0xb1,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,0] ; encoding: [0x04,0x48,0xb1,0xd3,0x08,0x21,0x02,0x18] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,0] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[0,1] ; encoding: [0x04,0x10,0xb1,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[0,1] ; encoding: [0x04,0x50,0xb1,0xd3,0x08,0x21,0x02,0x18] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[0,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,1] ; encoding: [0x04,0x18,0xb1,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,1] ; encoding: [0x04,0x58,0xb1,0xd3,0x08,0x21,0x02,0x18] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x18] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,0] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x00] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,0] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x00] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,0] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[1,0] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x08] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[1,0] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x08] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[1,0] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,1] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x10] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,1] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x10] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,0] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x38] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,0] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x38] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,0] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[0,1] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x58] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[0,1] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x58] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[0,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,1] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x78] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,1] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x78] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,0] ; encoding: [0x04,0x01,0xb1,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,0] ; encoding: [0x04,0x41,0xb1,0xd3,0x08,0x21,0x02,0x18] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,0] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[0,1] ; encoding: [0x04,0x02,0xb1,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[0,1] ; encoding: [0x04,0x42,0xb1,0xd3,0x08,0x21,0x02,0x18] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[0,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,1] ; encoding: [0x04,0x03,0xb1,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,1] ; encoding: [0x04,0x43,0xb1,0xd3,0x08,0x21,0x02,0x18] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] clamp ; encoding: [0x04,0x80,0xb1,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] clamp ; encoding: [0x04,0xc0,0xb1,0xd3,0x08,0x21,0x02,0x18] v_pk_mul_f32 v[4:5], v[8:9], v[16:17] clamp // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[254:255], v[8:9], v[16:17] ; encoding: [0xfe,0x00,0xb2,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_add_f32 v[254:255], v[8:9], v[16:17] ; encoding: [0xfe,0x40,0xb2,0xd3,0x08,0x21,0x02,0x18] v_pk_add_f32 v[254:255], v[8:9], v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[254:255], v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0xfe,0x21,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[254:255], v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0xfe,0x21,0x02,0x18] v_pk_add_f32 v[4:5], v[254:255], v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], s[2:3], v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x02,0x20,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], s[2:3], v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x02,0x20,0x02,0x18] v_pk_add_f32 v[4:5], s[2:3], v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], s[100:101], v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x64,0x20,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], s[100:101], v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x64,0x20,0x02,0x18] v_pk_add_f32 v[4:5], s[100:101], v[16:17] -// GFX90A: v_pk_add_f32 v[4:5], flat_scratch, v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x66,0x20,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], flat_scratch, v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x66,0x20,0x02,0x18] // NOT-GFX1010: error: instruction not supported on this GPU // NOT-GFX908: error: instruction not supported on this GPU v_pk_add_f32 v[4:5], flat_scratch, v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], vcc, v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x6a,0x20,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], vcc, v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x6a,0x20,0x02,0x18] v_pk_add_f32 v[4:5], vcc, v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], exec, v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x7e,0x20,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], exec, v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x7e,0x20,0x02,0x18] v_pk_add_f32 v[4:5], exec, v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[254:255] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0xfd,0x03,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[254:255] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0xfd,0x03,0x18] v_pk_add_f32 v[4:5], v[8:9], v[254:255] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], s[2:3] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x05,0x00,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], s[2:3] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x05,0x00,0x18] v_pk_add_f32 v[4:5], v[8:9], s[2:3] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], s[100:101] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0xc9,0x00,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], s[100:101] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0xc9,0x00,0x18] v_pk_add_f32 v[4:5], v[8:9], s[100:101] -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], flat_scratch ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0xcd,0x00,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], flat_scratch ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0xcd,0x00,0x18] // NOT-GFX1010: error: instruction not supported on this GPU // NOT-GFX908: error: instruction not supported on this GPU v_pk_add_f32 v[4:5], v[8:9], flat_scratch // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], vcc ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0xd5,0x00,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], vcc ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0xd5,0x00,0x18] v_pk_add_f32 v[4:5], v[8:9], vcc // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], exec ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0xfd,0x00,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], exec ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0xfd,0x00,0x18] v_pk_add_f32 v[4:5], v[8:9], exec // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x18] v_pk_add_f32 v[4:5], v[8:9], v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,0] ; encoding: [0x04,0x08,0xb2,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,0] ; encoding: [0x04,0x48,0xb2,0xd3,0x08,0x21,0x02,0x18] v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,0] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[0,1] ; encoding: [0x04,0x10,0xb2,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[0,1] ; encoding: [0x04,0x50,0xb2,0xd3,0x08,0x21,0x02,0x18] v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[0,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,1] ; encoding: [0x04,0x18,0xb2,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,1] ; encoding: [0x04,0x58,0xb2,0xd3,0x08,0x21,0x02,0x18] v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x18] v_pk_add_f32 v[4:5], v[8:9], v[16:17] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,0] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x00] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,0] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x00] v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,0] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[1,0] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x08] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[1,0] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x08] v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[1,0] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,1] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x10] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,1] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x10] v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,0] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x38] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,0] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x38] v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,0] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[0,1] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x58] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[0,1] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x58] v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[0,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,1] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x78] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,1] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x78] v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,0] ; encoding: [0x04,0x01,0xb2,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,0] ; encoding: [0x04,0x41,0xb2,0xd3,0x08,0x21,0x02,0x18] v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,0] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[0,1] ; encoding: [0x04,0x02,0xb2,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[0,1] ; encoding: [0x04,0x42,0xb2,0xd3,0x08,0x21,0x02,0x18] v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[0,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,1] ; encoding: [0x04,0x03,0xb2,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,1] ; encoding: [0x04,0x43,0xb2,0xd3,0x08,0x21,0x02,0x18] v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] clamp ; encoding: [0x04,0x80,0xb2,0xd3,0x08,0x21,0x02,0x18] +// GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] clamp ; encoding: [0x04,0xc0,0xb2,0xd3,0x08,0x21,0x02,0x18] v_pk_add_f32 v[4:5], v[8:9], v[16:17] clamp // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] ; encoding: [0x00,0x00,0xb3,0xd3,0x02,0x09,0x02,0x18] +// GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] ; encoding: [0x00,0x40,0xb3,0xd3,0x02,0x09,0x02,0x18] v_pk_mov_b32 v[0:1], v[2:3], v[4:5] -// GFX90A: v_pk_mov_b32 v[0:1], flat_scratch, v[4:5] ; encoding: [0x00,0x00,0xb3,0xd3,0x66,0x08,0x02,0x18] +// GFX90A: v_pk_mov_b32 v[0:1], flat_scratch, v[4:5] ; encoding: [0x00,0x40,0xb3,0xd3,0x66,0x08,0x02,0x18] // NOT-GFX1010: error: instruction not supported on this GPU // NOT-GFX908: error: instruction not supported on this GPU v_pk_mov_b32 v[0:1], flat_scratch, v[4:5] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mov_b32 v[0:1], v[2:3], vcc ; encoding: [0x00,0x00,0xb3,0xd3,0x02,0xd5,0x00,0x18] +// GFX90A: v_pk_mov_b32 v[0:1], v[2:3], vcc ; encoding: [0x00,0x40,0xb3,0xd3,0x02,0xd5,0x00,0x18] v_pk_mov_b32 v[0:1], v[2:3], vcc // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mov_b32 v[0:1], v[2:3], s[0:1] ; encoding: [0x00,0x00,0xb3,0xd3,0x02,0x01,0x00,0x18] +// GFX90A: v_pk_mov_b32 v[0:1], v[2:3], s[0:1] ; encoding: [0x00,0x40,0xb3,0xd3,0x02,0x01,0x00,0x18] v_pk_mov_b32 v[0:1], v[2:3], s[0:1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel_hi:[0,1] ; encoding: [0x00,0x00,0xb3,0xd3,0x02,0x05,0x02,0x10] +// GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel_hi:[0,1] ; encoding: [0x00,0x40,0xb3,0xd3,0x02,0x05,0x02,0x10] v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel_hi:[0,1] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] op_sel:[1,0] ; encoding: [0x00,0x08,0xb3,0xd3,0x02,0x09,0x02,0x18] +// GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] op_sel:[1,0] ; encoding: [0x00,0x48,0xb3,0xd3,0x02,0x09,0x02,0x18] v_pk_mov_b32 v[0:1], v[2:3], v[4:5] op_sel:[1,0] // NOT-GFX90A: error: instruction not supported on this GPU -// GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] op_sel:[1,1] ; encoding: [0x00,0x18,0xb3,0xd3,0x02,0x09,0x02,0x18] +// GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] op_sel:[1,1] ; encoding: [0x00,0x58,0xb3,0xd3,0x02,0x09,0x02,0x18] v_pk_mov_b32 v[0:1], v[2:3], v[4:5] op_sel:[1,1] // GFX90A: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 scc ; encoding: [0x00,0x80,0x09,0xe8,0x00,0x04,0x20,0x80] diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop3p.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop3p.s index 690b83bdcbf1..8cb69ad77d81 100644 --- a/llvm/test/MC/AMDGPU/gfx9_asm_vop3p.s +++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop3p.s @@ -187,1033 +187,1033 @@ v_pk_mad_i16 v5, v1, v2, v3 clamp // CHECK: [0x05,0xc0,0x80,0xd3,0x01,0x05,0x0e,0x1c] v_pk_mul_lo_u16 v5, v1, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_lo_u16 v255, v1, v2 -// CHECK: [0xff,0x00,0x81,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x81,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_lo_u16 v5, v255, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0xff,0x05,0x02,0x18] v_pk_mul_lo_u16 v5, s1, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, s101, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x65,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x66,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x67,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x6a,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x6b,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x7b,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, m0, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x7c,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x7e,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x7f,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, 0, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x80,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, -1, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0xc1,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0xfb,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0xfc,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0xfd,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, src_lds_direct, v2 -// CHECK: [0x05,0x00,0x81,0xd3,0xfe,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0xfe,0x04,0x02,0x18] v_pk_mul_lo_u16 v5, v1, v255 -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0xff,0x03,0x18] v_pk_mul_lo_u16 v5, v1, s2 -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0x05,0x00,0x18] v_pk_mul_lo_u16 v5, v1, s101 -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0xcb,0x00,0x18] v_pk_mul_lo_u16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0xcd,0x00,0x18] v_pk_mul_lo_u16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0xcf,0x00,0x18] v_pk_mul_lo_u16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0xd5,0x00,0x18] v_pk_mul_lo_u16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0xd7,0x00,0x18] v_pk_mul_lo_u16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0xf7,0x00,0x18] v_pk_mul_lo_u16 v5, v1, m0 -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0xf9,0x00,0x18] v_pk_mul_lo_u16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0xfd,0x00,0x18] v_pk_mul_lo_u16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0xff,0x00,0x18] v_pk_mul_lo_u16 v5, v1, 0 -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0x01,0x01,0x18] v_pk_mul_lo_u16 v5, v1, -1 -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0x83,0x01,0x18] v_pk_mul_lo_u16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0xf7,0x01,0x18] v_pk_mul_lo_u16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0xf9,0x01,0x18] v_pk_mul_lo_u16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0xfb,0x01,0x18] v_pk_mul_lo_u16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_lo_u16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x81,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x81,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_lo_u16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x81,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x81,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_lo_u16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x81,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x81,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_lo_u16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_lo_u16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0x05,0x02,0x00] v_pk_mul_lo_u16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0x05,0x02,0x08] v_pk_mul_lo_u16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x81,0xd3,0x01,0x05,0x02,0x10] v_pk_add_i16 v5, v1, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0x05,0x02,0x18] v_pk_add_i16 v255, v1, v2 -// CHECK: [0xff,0x00,0x82,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x82,0xd3,0x01,0x05,0x02,0x18] v_pk_add_i16 v5, v255, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0xff,0x05,0x02,0x18] v_pk_add_i16 v5, s1, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0x04,0x02,0x18] v_pk_add_i16 v5, s101, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x65,0x04,0x02,0x18] v_pk_add_i16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x66,0x04,0x02,0x18] v_pk_add_i16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x67,0x04,0x02,0x18] v_pk_add_i16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x6a,0x04,0x02,0x18] v_pk_add_i16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x6b,0x04,0x02,0x18] v_pk_add_i16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x7b,0x04,0x02,0x18] v_pk_add_i16 v5, m0, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x7c,0x04,0x02,0x18] v_pk_add_i16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x7e,0x04,0x02,0x18] v_pk_add_i16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x7f,0x04,0x02,0x18] v_pk_add_i16 v5, 0, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x80,0x04,0x02,0x18] v_pk_add_i16 v5, -1, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0xc1,0x04,0x02,0x18] v_pk_add_i16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0xfb,0x04,0x02,0x18] v_pk_add_i16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0xfc,0x04,0x02,0x18] v_pk_add_i16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0xfd,0x04,0x02,0x18] v_pk_add_i16 v5, src_lds_direct, v2 -// CHECK: [0x05,0x00,0x82,0xd3,0xfe,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0xfe,0x04,0x02,0x18] v_pk_add_i16 v5, v1, v255 -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0xff,0x03,0x18] v_pk_add_i16 v5, v1, s2 -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0x05,0x00,0x18] v_pk_add_i16 v5, v1, s101 -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0xcb,0x00,0x18] v_pk_add_i16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0xcd,0x00,0x18] v_pk_add_i16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0xcf,0x00,0x18] v_pk_add_i16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0xd5,0x00,0x18] v_pk_add_i16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0xd7,0x00,0x18] v_pk_add_i16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0xf7,0x00,0x18] v_pk_add_i16 v5, v1, m0 -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0xf9,0x00,0x18] v_pk_add_i16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0xfd,0x00,0x18] v_pk_add_i16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0xff,0x00,0x18] v_pk_add_i16 v5, v1, 0 -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0x01,0x01,0x18] v_pk_add_i16 v5, v1, -1 -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0x83,0x01,0x18] v_pk_add_i16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0xf7,0x01,0x18] v_pk_add_i16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0xf9,0x01,0x18] v_pk_add_i16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0xfb,0x01,0x18] v_pk_add_i16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0x05,0x02,0x18] v_pk_add_i16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x82,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x82,0xd3,0x01,0x05,0x02,0x18] v_pk_add_i16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x82,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x82,0xd3,0x01,0x05,0x02,0x18] v_pk_add_i16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x82,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x82,0xd3,0x01,0x05,0x02,0x18] v_pk_add_i16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0x05,0x02,0x18] v_pk_add_i16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0x05,0x02,0x00] v_pk_add_i16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0x05,0x02,0x08] v_pk_add_i16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x82,0xd3,0x01,0x05,0x02,0x10] v_pk_add_i16 v5, v1, v2 clamp -// CHECK: [0x05,0x80,0x82,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0xc0,0x82,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_i16 v5, v1, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_i16 v255, v1, v2 -// CHECK: [0xff,0x00,0x83,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x83,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_i16 v5, v255, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0xff,0x05,0x02,0x18] v_pk_sub_i16 v5, s1, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0x04,0x02,0x18] v_pk_sub_i16 v5, s101, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x65,0x04,0x02,0x18] v_pk_sub_i16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x66,0x04,0x02,0x18] v_pk_sub_i16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x67,0x04,0x02,0x18] v_pk_sub_i16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x6a,0x04,0x02,0x18] v_pk_sub_i16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x6b,0x04,0x02,0x18] v_pk_sub_i16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x7b,0x04,0x02,0x18] v_pk_sub_i16 v5, m0, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x7c,0x04,0x02,0x18] v_pk_sub_i16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x7e,0x04,0x02,0x18] v_pk_sub_i16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x7f,0x04,0x02,0x18] v_pk_sub_i16 v5, 0, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x80,0x04,0x02,0x18] v_pk_sub_i16 v5, -1, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0xc1,0x04,0x02,0x18] v_pk_sub_i16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0xfb,0x04,0x02,0x18] v_pk_sub_i16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0xfc,0x04,0x02,0x18] v_pk_sub_i16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0xfd,0x04,0x02,0x18] v_pk_sub_i16 v5, src_lds_direct, v2 -// CHECK: [0x05,0x00,0x83,0xd3,0xfe,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0xfe,0x04,0x02,0x18] v_pk_sub_i16 v5, v1, v255 -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0xff,0x03,0x18] v_pk_sub_i16 v5, v1, s2 -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0x05,0x00,0x18] v_pk_sub_i16 v5, v1, s101 -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0xcb,0x00,0x18] v_pk_sub_i16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0xcd,0x00,0x18] v_pk_sub_i16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0xcf,0x00,0x18] v_pk_sub_i16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0xd5,0x00,0x18] v_pk_sub_i16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0xd7,0x00,0x18] v_pk_sub_i16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0xf7,0x00,0x18] v_pk_sub_i16 v5, v1, m0 -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0xf9,0x00,0x18] v_pk_sub_i16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0xfd,0x00,0x18] v_pk_sub_i16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0xff,0x00,0x18] v_pk_sub_i16 v5, v1, 0 -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0x01,0x01,0x18] v_pk_sub_i16 v5, v1, -1 -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0x83,0x01,0x18] v_pk_sub_i16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0xf7,0x01,0x18] v_pk_sub_i16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0xf9,0x01,0x18] v_pk_sub_i16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0xfb,0x01,0x18] v_pk_sub_i16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_i16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x83,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x83,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_i16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x83,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x83,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_i16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x83,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x83,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_i16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_i16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0x05,0x02,0x00] v_pk_sub_i16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0x05,0x02,0x08] v_pk_sub_i16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x83,0xd3,0x01,0x05,0x02,0x10] v_pk_sub_i16 v5, v1, v2 clamp -// CHECK: [0x05,0x80,0x83,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0xc0,0x83,0xd3,0x01,0x05,0x02,0x18] v_pk_lshlrev_b16 v5, v1, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0x05,0x02,0x18] v_pk_lshlrev_b16 v255, v1, v2 -// CHECK: [0xff,0x00,0x84,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x84,0xd3,0x01,0x05,0x02,0x18] v_pk_lshlrev_b16 v5, v255, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0xff,0x05,0x02,0x18] v_pk_lshlrev_b16 v5, s1, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, s101, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x65,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x66,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x67,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x6a,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x6b,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x7b,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, m0, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x7c,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x7e,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x7f,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, 0, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x80,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, -1, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0xc1,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0xfb,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0xfc,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x84,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0xfd,0x04,0x02,0x18] v_pk_lshlrev_b16 v5, v1, v255 -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0xff,0x03,0x18] v_pk_lshlrev_b16 v5, v1, s2 -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0x05,0x00,0x18] v_pk_lshlrev_b16 v5, v1, s101 -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0xcb,0x00,0x18] v_pk_lshlrev_b16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0xcd,0x00,0x18] v_pk_lshlrev_b16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0xcf,0x00,0x18] v_pk_lshlrev_b16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0xd5,0x00,0x18] v_pk_lshlrev_b16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0xd7,0x00,0x18] v_pk_lshlrev_b16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0xf7,0x00,0x18] v_pk_lshlrev_b16 v5, v1, m0 -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0xf9,0x00,0x18] v_pk_lshlrev_b16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0xfd,0x00,0x18] v_pk_lshlrev_b16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0xff,0x00,0x18] v_pk_lshlrev_b16 v5, v1, 0 -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0x01,0x01,0x18] v_pk_lshlrev_b16 v5, v1, -1 -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0x83,0x01,0x18] v_pk_lshlrev_b16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0xf7,0x01,0x18] v_pk_lshlrev_b16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0xf9,0x01,0x18] v_pk_lshlrev_b16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0xfb,0x01,0x18] v_pk_lshlrev_b16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0x05,0x02,0x18] v_pk_lshlrev_b16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x84,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x84,0xd3,0x01,0x05,0x02,0x18] v_pk_lshlrev_b16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x84,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x84,0xd3,0x01,0x05,0x02,0x18] v_pk_lshlrev_b16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x84,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x84,0xd3,0x01,0x05,0x02,0x18] v_pk_lshlrev_b16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0x05,0x02,0x18] v_pk_lshlrev_b16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0x05,0x02,0x00] v_pk_lshlrev_b16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0x05,0x02,0x08] v_pk_lshlrev_b16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x84,0xd3,0x01,0x05,0x02,0x10] v_pk_lshrrev_b16 v5, v1, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0x05,0x02,0x18] v_pk_lshrrev_b16 v255, v1, v2 -// CHECK: [0xff,0x00,0x85,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x85,0xd3,0x01,0x05,0x02,0x18] v_pk_lshrrev_b16 v5, v255, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0xff,0x05,0x02,0x18] v_pk_lshrrev_b16 v5, s1, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, s101, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x65,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x66,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x67,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x6a,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x6b,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x7b,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, m0, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x7c,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x7e,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x7f,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, 0, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x80,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, -1, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0xc1,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0xfb,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0xfc,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x85,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0xfd,0x04,0x02,0x18] v_pk_lshrrev_b16 v5, v1, v255 -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0xff,0x03,0x18] v_pk_lshrrev_b16 v5, v1, s2 -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0x05,0x00,0x18] v_pk_lshrrev_b16 v5, v1, s101 -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0xcb,0x00,0x18] v_pk_lshrrev_b16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0xcd,0x00,0x18] v_pk_lshrrev_b16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0xcf,0x00,0x18] v_pk_lshrrev_b16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0xd5,0x00,0x18] v_pk_lshrrev_b16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0xd7,0x00,0x18] v_pk_lshrrev_b16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0xf7,0x00,0x18] v_pk_lshrrev_b16 v5, v1, m0 -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0xf9,0x00,0x18] v_pk_lshrrev_b16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0xfd,0x00,0x18] v_pk_lshrrev_b16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0xff,0x00,0x18] v_pk_lshrrev_b16 v5, v1, 0 -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0x01,0x01,0x18] v_pk_lshrrev_b16 v5, v1, -1 -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0x83,0x01,0x18] v_pk_lshrrev_b16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0xf7,0x01,0x18] v_pk_lshrrev_b16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0xf9,0x01,0x18] v_pk_lshrrev_b16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0xfb,0x01,0x18] v_pk_lshrrev_b16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0x05,0x02,0x18] v_pk_lshrrev_b16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x85,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x85,0xd3,0x01,0x05,0x02,0x18] v_pk_lshrrev_b16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x85,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x85,0xd3,0x01,0x05,0x02,0x18] v_pk_lshrrev_b16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x85,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x85,0xd3,0x01,0x05,0x02,0x18] v_pk_lshrrev_b16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0x05,0x02,0x18] v_pk_lshrrev_b16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0x05,0x02,0x00] v_pk_lshrrev_b16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0x05,0x02,0x08] v_pk_lshrrev_b16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x85,0xd3,0x01,0x05,0x02,0x10] v_pk_ashrrev_i16 v5, v1, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0x05,0x02,0x18] v_pk_ashrrev_i16 v255, v1, v2 -// CHECK: [0xff,0x00,0x86,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x86,0xd3,0x01,0x05,0x02,0x18] v_pk_ashrrev_i16 v5, v255, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0xff,0x05,0x02,0x18] v_pk_ashrrev_i16 v5, s1, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, s101, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x65,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x66,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x67,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x6a,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x6b,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x7b,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, m0, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x7c,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x7e,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x7f,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, 0, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x80,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, -1, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0xc1,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0xfb,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0xfc,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x86,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0xfd,0x04,0x02,0x18] v_pk_ashrrev_i16 v5, v1, v255 -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0xff,0x03,0x18] v_pk_ashrrev_i16 v5, v1, s2 -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0x05,0x00,0x18] v_pk_ashrrev_i16 v5, v1, s101 -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0xcb,0x00,0x18] v_pk_ashrrev_i16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0xcd,0x00,0x18] v_pk_ashrrev_i16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0xcf,0x00,0x18] v_pk_ashrrev_i16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0xd5,0x00,0x18] v_pk_ashrrev_i16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0xd7,0x00,0x18] v_pk_ashrrev_i16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0xf7,0x00,0x18] v_pk_ashrrev_i16 v5, v1, m0 -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0xf9,0x00,0x18] v_pk_ashrrev_i16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0xfd,0x00,0x18] v_pk_ashrrev_i16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0xff,0x00,0x18] v_pk_ashrrev_i16 v5, v1, 0 -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0x01,0x01,0x18] v_pk_ashrrev_i16 v5, v1, -1 -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0x83,0x01,0x18] v_pk_ashrrev_i16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0xf7,0x01,0x18] v_pk_ashrrev_i16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0xf9,0x01,0x18] v_pk_ashrrev_i16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0xfb,0x01,0x18] v_pk_ashrrev_i16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0x05,0x02,0x18] v_pk_ashrrev_i16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x86,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x86,0xd3,0x01,0x05,0x02,0x18] v_pk_ashrrev_i16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x86,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x86,0xd3,0x01,0x05,0x02,0x18] v_pk_ashrrev_i16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x86,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x86,0xd3,0x01,0x05,0x02,0x18] v_pk_ashrrev_i16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0x05,0x02,0x18] v_pk_ashrrev_i16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0x05,0x02,0x00] v_pk_ashrrev_i16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0x05,0x02,0x08] v_pk_ashrrev_i16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x86,0xd3,0x01,0x05,0x02,0x10] v_pk_max_i16 v5, v1, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0x05,0x02,0x18] v_pk_max_i16 v255, v1, v2 -// CHECK: [0xff,0x00,0x87,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x87,0xd3,0x01,0x05,0x02,0x18] v_pk_max_i16 v5, v255, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0xff,0x05,0x02,0x18] v_pk_max_i16 v5, s1, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0x04,0x02,0x18] v_pk_max_i16 v5, s101, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x65,0x04,0x02,0x18] v_pk_max_i16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x66,0x04,0x02,0x18] v_pk_max_i16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x67,0x04,0x02,0x18] v_pk_max_i16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x6a,0x04,0x02,0x18] v_pk_max_i16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x6b,0x04,0x02,0x18] v_pk_max_i16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x7b,0x04,0x02,0x18] v_pk_max_i16 v5, m0, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x7c,0x04,0x02,0x18] v_pk_max_i16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x7e,0x04,0x02,0x18] v_pk_max_i16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x7f,0x04,0x02,0x18] v_pk_max_i16 v5, 0, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x80,0x04,0x02,0x18] v_pk_max_i16 v5, -1, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0xc1,0x04,0x02,0x18] v_pk_max_i16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0xfb,0x04,0x02,0x18] v_pk_max_i16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0xfc,0x04,0x02,0x18] v_pk_max_i16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0xfd,0x04,0x02,0x18] v_pk_max_i16 v5, src_lds_direct, v2 -// CHECK: [0x05,0x00,0x87,0xd3,0xfe,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0xfe,0x04,0x02,0x18] v_pk_max_i16 v5, v1, v255 -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0xff,0x03,0x18] v_pk_max_i16 v5, v1, s2 -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0x05,0x00,0x18] v_pk_max_i16 v5, v1, s101 -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0xcb,0x00,0x18] v_pk_max_i16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0xcd,0x00,0x18] v_pk_max_i16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0xcf,0x00,0x18] v_pk_max_i16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0xd5,0x00,0x18] v_pk_max_i16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0xd7,0x00,0x18] v_pk_max_i16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0xf7,0x00,0x18] v_pk_max_i16 v5, v1, m0 -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0xf9,0x00,0x18] v_pk_max_i16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0xfd,0x00,0x18] v_pk_max_i16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0xff,0x00,0x18] v_pk_max_i16 v5, v1, 0 -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0x01,0x01,0x18] v_pk_max_i16 v5, v1, -1 -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0x83,0x01,0x18] v_pk_max_i16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0xf7,0x01,0x18] v_pk_max_i16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0xf9,0x01,0x18] v_pk_max_i16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0xfb,0x01,0x18] v_pk_max_i16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0x05,0x02,0x18] v_pk_max_i16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x87,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x87,0xd3,0x01,0x05,0x02,0x18] v_pk_max_i16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x87,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x87,0xd3,0x01,0x05,0x02,0x18] v_pk_max_i16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x87,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x87,0xd3,0x01,0x05,0x02,0x18] v_pk_max_i16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0x05,0x02,0x18] v_pk_max_i16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0x05,0x02,0x00] v_pk_max_i16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0x05,0x02,0x08] v_pk_max_i16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x87,0xd3,0x01,0x05,0x02,0x10] v_pk_min_i16 v5, v1, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0x05,0x02,0x18] v_pk_min_i16 v255, v1, v2 -// CHECK: [0xff,0x00,0x88,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x88,0xd3,0x01,0x05,0x02,0x18] v_pk_min_i16 v5, v255, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0xff,0x05,0x02,0x18] v_pk_min_i16 v5, s1, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0x04,0x02,0x18] v_pk_min_i16 v5, s101, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x65,0x04,0x02,0x18] v_pk_min_i16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x66,0x04,0x02,0x18] v_pk_min_i16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x67,0x04,0x02,0x18] v_pk_min_i16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x6a,0x04,0x02,0x18] v_pk_min_i16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x6b,0x04,0x02,0x18] v_pk_min_i16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x7b,0x04,0x02,0x18] v_pk_min_i16 v5, m0, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x7c,0x04,0x02,0x18] v_pk_min_i16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x7e,0x04,0x02,0x18] v_pk_min_i16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x7f,0x04,0x02,0x18] v_pk_min_i16 v5, 0, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x80,0x04,0x02,0x18] v_pk_min_i16 v5, -1, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0xc1,0x04,0x02,0x18] v_pk_min_i16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0xfb,0x04,0x02,0x18] v_pk_min_i16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0xfc,0x04,0x02,0x18] v_pk_min_i16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0xfd,0x04,0x02,0x18] v_pk_min_i16 v5, src_lds_direct, v2 -// CHECK: [0x05,0x00,0x88,0xd3,0xfe,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0xfe,0x04,0x02,0x18] v_pk_min_i16 v5, v1, v255 -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0xff,0x03,0x18] v_pk_min_i16 v5, v1, s2 -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0x05,0x00,0x18] v_pk_min_i16 v5, v1, s101 -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0xcb,0x00,0x18] v_pk_min_i16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0xcd,0x00,0x18] v_pk_min_i16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0xcf,0x00,0x18] v_pk_min_i16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0xd5,0x00,0x18] v_pk_min_i16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0xd7,0x00,0x18] v_pk_min_i16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0xf7,0x00,0x18] v_pk_min_i16 v5, v1, m0 -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0xf9,0x00,0x18] v_pk_min_i16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0xfd,0x00,0x18] v_pk_min_i16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0xff,0x00,0x18] v_pk_min_i16 v5, v1, 0 -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0x01,0x01,0x18] v_pk_min_i16 v5, v1, -1 -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0x83,0x01,0x18] v_pk_min_i16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0xf7,0x01,0x18] v_pk_min_i16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0xf9,0x01,0x18] v_pk_min_i16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0xfb,0x01,0x18] v_pk_min_i16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0x05,0x02,0x18] v_pk_min_i16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x88,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x88,0xd3,0x01,0x05,0x02,0x18] v_pk_min_i16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x88,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x88,0xd3,0x01,0x05,0x02,0x18] v_pk_min_i16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x88,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x88,0xd3,0x01,0x05,0x02,0x18] v_pk_min_i16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0x05,0x02,0x18] v_pk_min_i16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0x05,0x02,0x00] v_pk_min_i16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0x05,0x02,0x08] v_pk_min_i16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x88,0xd3,0x01,0x05,0x02,0x10] v_pk_mad_u16 v5, v1, v2, v3 // CHECK: [0x05,0x40,0x89,0xd3,0x01,0x05,0x0e,0x1c] @@ -1402,526 +1402,526 @@ v_pk_mad_u16 v5, v1, v2, v3 clamp // CHECK: [0x05,0xc0,0x89,0xd3,0x01,0x05,0x0e,0x1c] v_pk_add_u16 v5, v1, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0x05,0x02,0x18] v_pk_add_u16 v255, v1, v2 -// CHECK: [0xff,0x00,0x8a,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x8a,0xd3,0x01,0x05,0x02,0x18] v_pk_add_u16 v5, v255, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0xff,0x05,0x02,0x18] v_pk_add_u16 v5, s1, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0x04,0x02,0x18] v_pk_add_u16 v5, s101, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x65,0x04,0x02,0x18] v_pk_add_u16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x66,0x04,0x02,0x18] v_pk_add_u16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x67,0x04,0x02,0x18] v_pk_add_u16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x6a,0x04,0x02,0x18] v_pk_add_u16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x6b,0x04,0x02,0x18] v_pk_add_u16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x7b,0x04,0x02,0x18] v_pk_add_u16 v5, m0, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x7c,0x04,0x02,0x18] v_pk_add_u16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x7e,0x04,0x02,0x18] v_pk_add_u16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x7f,0x04,0x02,0x18] v_pk_add_u16 v5, 0, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x80,0x04,0x02,0x18] v_pk_add_u16 v5, -1, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0xc1,0x04,0x02,0x18] v_pk_add_u16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0xfb,0x04,0x02,0x18] v_pk_add_u16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0xfc,0x04,0x02,0x18] v_pk_add_u16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0xfd,0x04,0x02,0x18] v_pk_add_u16 v5, src_lds_direct, v2 -// CHECK: [0x05,0x00,0x8a,0xd3,0xfe,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0xfe,0x04,0x02,0x18] v_pk_add_u16 v5, v1, v255 -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0xff,0x03,0x18] v_pk_add_u16 v5, v1, s2 -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0x05,0x00,0x18] v_pk_add_u16 v5, v1, s101 -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0xcb,0x00,0x18] v_pk_add_u16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0xcd,0x00,0x18] v_pk_add_u16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0xcf,0x00,0x18] v_pk_add_u16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0xd5,0x00,0x18] v_pk_add_u16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0xd7,0x00,0x18] v_pk_add_u16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0xf7,0x00,0x18] v_pk_add_u16 v5, v1, m0 -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0xf9,0x00,0x18] v_pk_add_u16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0xfd,0x00,0x18] v_pk_add_u16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0xff,0x00,0x18] v_pk_add_u16 v5, v1, 0 -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0x01,0x01,0x18] v_pk_add_u16 v5, v1, -1 -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0x83,0x01,0x18] v_pk_add_u16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0xf7,0x01,0x18] v_pk_add_u16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0xf9,0x01,0x18] v_pk_add_u16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0xfb,0x01,0x18] v_pk_add_u16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0x05,0x02,0x18] v_pk_add_u16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x8a,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x8a,0xd3,0x01,0x05,0x02,0x18] v_pk_add_u16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x8a,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x8a,0xd3,0x01,0x05,0x02,0x18] v_pk_add_u16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x8a,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x8a,0xd3,0x01,0x05,0x02,0x18] v_pk_add_u16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0x05,0x02,0x18] v_pk_add_u16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0x05,0x02,0x00] v_pk_add_u16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0x05,0x02,0x08] v_pk_add_u16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x8a,0xd3,0x01,0x05,0x02,0x10] v_pk_add_u16 v5, v1, v2 clamp -// CHECK: [0x05,0x80,0x8a,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0xc0,0x8a,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_u16 v5, v1, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_u16 v255, v1, v2 -// CHECK: [0xff,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x8b,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_u16 v5, v255, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0xff,0x05,0x02,0x18] v_pk_sub_u16 v5, s1, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0x04,0x02,0x18] v_pk_sub_u16 v5, s101, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x65,0x04,0x02,0x18] v_pk_sub_u16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x66,0x04,0x02,0x18] v_pk_sub_u16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x67,0x04,0x02,0x18] v_pk_sub_u16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x6a,0x04,0x02,0x18] v_pk_sub_u16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x6b,0x04,0x02,0x18] v_pk_sub_u16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x7b,0x04,0x02,0x18] v_pk_sub_u16 v5, m0, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x7c,0x04,0x02,0x18] v_pk_sub_u16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x7e,0x04,0x02,0x18] v_pk_sub_u16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x7f,0x04,0x02,0x18] v_pk_sub_u16 v5, 0, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x80,0x04,0x02,0x18] v_pk_sub_u16 v5, -1, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0xc1,0x04,0x02,0x18] v_pk_sub_u16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0xfb,0x04,0x02,0x18] v_pk_sub_u16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0xfc,0x04,0x02,0x18] v_pk_sub_u16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0xfd,0x04,0x02,0x18] v_pk_sub_u16 v5, src_lds_direct, v2 -// CHECK: [0x05,0x00,0x8b,0xd3,0xfe,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0xfe,0x04,0x02,0x18] v_pk_sub_u16 v5, v1, v255 -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0xff,0x03,0x18] v_pk_sub_u16 v5, v1, s2 -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0x05,0x00,0x18] v_pk_sub_u16 v5, v1, s101 -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0xcb,0x00,0x18] v_pk_sub_u16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0xcd,0x00,0x18] v_pk_sub_u16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0xcf,0x00,0x18] v_pk_sub_u16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0xd5,0x00,0x18] v_pk_sub_u16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0xd7,0x00,0x18] v_pk_sub_u16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0xf7,0x00,0x18] v_pk_sub_u16 v5, v1, m0 -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0xf9,0x00,0x18] v_pk_sub_u16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0xfd,0x00,0x18] v_pk_sub_u16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0xff,0x00,0x18] v_pk_sub_u16 v5, v1, 0 -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0x01,0x01,0x18] v_pk_sub_u16 v5, v1, -1 -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0x83,0x01,0x18] v_pk_sub_u16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0xf7,0x01,0x18] v_pk_sub_u16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0xf9,0x01,0x18] v_pk_sub_u16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0xfb,0x01,0x18] v_pk_sub_u16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_u16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x8b,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x8b,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_u16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x8b,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x8b,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_u16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x8b,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x8b,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_u16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_u16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0x05,0x02,0x00] v_pk_sub_u16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0x05,0x02,0x08] v_pk_sub_u16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x8b,0xd3,0x01,0x05,0x02,0x10] v_pk_sub_u16 v5, v1, v2 clamp -// CHECK: [0x05,0x80,0x8b,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0xc0,0x8b,0xd3,0x01,0x05,0x02,0x18] v_pk_max_u16 v5, v1, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0x05,0x02,0x18] v_pk_max_u16 v255, v1, v2 -// CHECK: [0xff,0x00,0x8c,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x8c,0xd3,0x01,0x05,0x02,0x18] v_pk_max_u16 v5, v255, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0xff,0x05,0x02,0x18] v_pk_max_u16 v5, s1, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0x04,0x02,0x18] v_pk_max_u16 v5, s101, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x65,0x04,0x02,0x18] v_pk_max_u16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x66,0x04,0x02,0x18] v_pk_max_u16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x67,0x04,0x02,0x18] v_pk_max_u16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x6a,0x04,0x02,0x18] v_pk_max_u16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x6b,0x04,0x02,0x18] v_pk_max_u16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x7b,0x04,0x02,0x18] v_pk_max_u16 v5, m0, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x7c,0x04,0x02,0x18] v_pk_max_u16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x7e,0x04,0x02,0x18] v_pk_max_u16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x7f,0x04,0x02,0x18] v_pk_max_u16 v5, 0, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x80,0x04,0x02,0x18] v_pk_max_u16 v5, -1, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0xc1,0x04,0x02,0x18] v_pk_max_u16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0xfb,0x04,0x02,0x18] v_pk_max_u16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0xfc,0x04,0x02,0x18] v_pk_max_u16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0xfd,0x04,0x02,0x18] v_pk_max_u16 v5, src_lds_direct, v2 -// CHECK: [0x05,0x00,0x8c,0xd3,0xfe,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0xfe,0x04,0x02,0x18] v_pk_max_u16 v5, v1, v255 -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0xff,0x03,0x18] v_pk_max_u16 v5, v1, s2 -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0x05,0x00,0x18] v_pk_max_u16 v5, v1, s101 -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0xcb,0x00,0x18] v_pk_max_u16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0xcd,0x00,0x18] v_pk_max_u16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0xcf,0x00,0x18] v_pk_max_u16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0xd5,0x00,0x18] v_pk_max_u16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0xd7,0x00,0x18] v_pk_max_u16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0xf7,0x00,0x18] v_pk_max_u16 v5, v1, m0 -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0xf9,0x00,0x18] v_pk_max_u16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0xfd,0x00,0x18] v_pk_max_u16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0xff,0x00,0x18] v_pk_max_u16 v5, v1, 0 -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0x01,0x01,0x18] v_pk_max_u16 v5, v1, -1 -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0x83,0x01,0x18] v_pk_max_u16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0xf7,0x01,0x18] v_pk_max_u16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0xf9,0x01,0x18] v_pk_max_u16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0xfb,0x01,0x18] v_pk_max_u16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0x05,0x02,0x18] v_pk_max_u16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x8c,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x8c,0xd3,0x01,0x05,0x02,0x18] v_pk_max_u16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x8c,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x8c,0xd3,0x01,0x05,0x02,0x18] v_pk_max_u16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x8c,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x8c,0xd3,0x01,0x05,0x02,0x18] v_pk_max_u16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0x05,0x02,0x18] v_pk_max_u16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0x05,0x02,0x00] v_pk_max_u16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0x05,0x02,0x08] v_pk_max_u16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x8c,0xd3,0x01,0x05,0x02,0x10] v_pk_min_u16 v5, v1, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0x05,0x02,0x18] v_pk_min_u16 v255, v1, v2 -// CHECK: [0xff,0x00,0x8d,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x8d,0xd3,0x01,0x05,0x02,0x18] v_pk_min_u16 v5, v255, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0xff,0x05,0x02,0x18] v_pk_min_u16 v5, s1, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0x04,0x02,0x18] v_pk_min_u16 v5, s101, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x65,0x04,0x02,0x18] v_pk_min_u16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x66,0x04,0x02,0x18] v_pk_min_u16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x67,0x04,0x02,0x18] v_pk_min_u16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x6a,0x04,0x02,0x18] v_pk_min_u16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x6b,0x04,0x02,0x18] v_pk_min_u16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x7b,0x04,0x02,0x18] v_pk_min_u16 v5, m0, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x7c,0x04,0x02,0x18] v_pk_min_u16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x7e,0x04,0x02,0x18] v_pk_min_u16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x7f,0x04,0x02,0x18] v_pk_min_u16 v5, 0, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x80,0x04,0x02,0x18] v_pk_min_u16 v5, -1, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0xc1,0x04,0x02,0x18] v_pk_min_u16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0xfb,0x04,0x02,0x18] v_pk_min_u16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0xfc,0x04,0x02,0x18] v_pk_min_u16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0xfd,0x04,0x02,0x18] v_pk_min_u16 v5, src_lds_direct, v2 -// CHECK: [0x05,0x00,0x8d,0xd3,0xfe,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0xfe,0x04,0x02,0x18] v_pk_min_u16 v5, v1, v255 -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0xff,0x03,0x18] v_pk_min_u16 v5, v1, s2 -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0x05,0x00,0x18] v_pk_min_u16 v5, v1, s101 -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0xcb,0x00,0x18] v_pk_min_u16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0xcd,0x00,0x18] v_pk_min_u16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0xcf,0x00,0x18] v_pk_min_u16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0xd5,0x00,0x18] v_pk_min_u16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0xd7,0x00,0x18] v_pk_min_u16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0xf7,0x00,0x18] v_pk_min_u16 v5, v1, m0 -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0xf9,0x00,0x18] v_pk_min_u16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0xfd,0x00,0x18] v_pk_min_u16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0xff,0x00,0x18] v_pk_min_u16 v5, v1, 0 -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0x01,0x01,0x18] v_pk_min_u16 v5, v1, -1 -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0x83,0x01,0x18] v_pk_min_u16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0xf7,0x01,0x18] v_pk_min_u16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0xf9,0x01,0x18] v_pk_min_u16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0xfb,0x01,0x18] v_pk_min_u16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0x05,0x02,0x18] v_pk_min_u16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x8d,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x8d,0xd3,0x01,0x05,0x02,0x18] v_pk_min_u16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x8d,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x8d,0xd3,0x01,0x05,0x02,0x18] v_pk_min_u16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x8d,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x8d,0xd3,0x01,0x05,0x02,0x18] v_pk_min_u16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0x05,0x02,0x18] v_pk_min_u16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0x05,0x02,0x00] v_pk_min_u16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0x05,0x02,0x08] v_pk_min_u16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x8d,0xd3,0x01,0x05,0x02,0x10] v_pk_fma_f16 v5, v1, v2, v3 // CHECK: [0x05,0x40,0x8e,0xd3,0x01,0x05,0x0e,0x1c] @@ -2152,652 +2152,652 @@ v_pk_fma_f16 v5, v1, v2, v3 clamp // CHECK: [0x05,0xc0,0x8e,0xd3,0x01,0x05,0x0e,0x1c] v_pk_add_f16 v5, v1, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x18] v_pk_add_f16 v255, v1, v2 -// CHECK: [0xff,0x00,0x8f,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x8f,0xd3,0x01,0x05,0x02,0x18] v_pk_add_f16 v5, v255, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0xff,0x05,0x02,0x18] v_pk_add_f16 v5, s1, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0x04,0x02,0x18] v_pk_add_f16 v5, s101, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x65,0x04,0x02,0x18] v_pk_add_f16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x66,0x04,0x02,0x18] v_pk_add_f16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x67,0x04,0x02,0x18] v_pk_add_f16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x6a,0x04,0x02,0x18] v_pk_add_f16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x6b,0x04,0x02,0x18] v_pk_add_f16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x7b,0x04,0x02,0x18] v_pk_add_f16 v5, m0, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x7c,0x04,0x02,0x18] v_pk_add_f16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x7e,0x04,0x02,0x18] v_pk_add_f16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x7f,0x04,0x02,0x18] v_pk_add_f16 v5, 0, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x80,0x04,0x02,0x18] v_pk_add_f16 v5, -1, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0xc1,0x04,0x02,0x18] v_pk_add_f16 v5, 0.5, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0xf0,0x04,0x02,0x18] v_pk_add_f16 v5, -4.0, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0xf7,0x04,0x02,0x18] v_pk_add_f16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0xfb,0x04,0x02,0x18] v_pk_add_f16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0xfc,0x04,0x02,0x18] v_pk_add_f16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0xfd,0x04,0x02,0x18] v_pk_add_f16 v5, src_lds_direct, v2 -// CHECK: [0x05,0x00,0x8f,0xd3,0xfe,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0xfe,0x04,0x02,0x18] v_pk_add_f16 v5, v1, v255 -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xff,0x03,0x18] v_pk_add_f16 v5, v1, s2 -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x00,0x18] v_pk_add_f16 v5, v1, s101 -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xcb,0x00,0x18] v_pk_add_f16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xcd,0x00,0x18] v_pk_add_f16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xcf,0x00,0x18] v_pk_add_f16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xd5,0x00,0x18] v_pk_add_f16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xd7,0x00,0x18] v_pk_add_f16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xf7,0x00,0x18] v_pk_add_f16 v5, v1, m0 -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xf9,0x00,0x18] v_pk_add_f16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xfd,0x00,0x18] v_pk_add_f16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xff,0x00,0x18] v_pk_add_f16 v5, v1, 0 -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0x01,0x01,0x18] v_pk_add_f16 v5, v1, -1 -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0x83,0x01,0x18] v_pk_add_f16 v5, v1, 0.5 -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xe1,0x01,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xe1,0x01,0x18] v_pk_add_f16 v5, v1, -4.0 -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xef,0x01,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xef,0x01,0x18] v_pk_add_f16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xf7,0x01,0x18] v_pk_add_f16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xf9,0x01,0x18] v_pk_add_f16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0xfb,0x01,0x18] v_pk_add_f16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x18] v_pk_add_f16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x8f,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x8f,0xd3,0x01,0x05,0x02,0x18] v_pk_add_f16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x8f,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x8f,0xd3,0x01,0x05,0x02,0x18] v_pk_add_f16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x8f,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x8f,0xd3,0x01,0x05,0x02,0x18] v_pk_add_f16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x18] v_pk_add_f16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x00] v_pk_add_f16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x08] v_pk_add_f16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x10] v_pk_add_f16 v5, v1, v2 neg_lo:[1,0] -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x38] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x38] v_pk_add_f16 v5, v1, v2 neg_lo:[0,1] -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x58] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x58] v_pk_add_f16 v5, v1, v2 neg_lo:[1,1] -// CHECK: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x78] +// CHECK: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x78] v_pk_add_f16 v5, v1, v2 neg_hi:[1,0] -// CHECK: [0x05,0x01,0x8f,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x41,0x8f,0xd3,0x01,0x05,0x02,0x18] v_pk_add_f16 v5, v1, v2 neg_hi:[0,1] -// CHECK: [0x05,0x02,0x8f,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x42,0x8f,0xd3,0x01,0x05,0x02,0x18] v_pk_add_f16 v5, v1, v2 neg_hi:[1,1] -// CHECK: [0x05,0x03,0x8f,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x43,0x8f,0xd3,0x01,0x05,0x02,0x18] v_pk_add_f16 v5, v1, v2 clamp -// CHECK: [0x05,0x80,0x8f,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0xc0,0x8f,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_f16 v5, v1, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_f16 v255, v1, v2 -// CHECK: [0xff,0x00,0x90,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x90,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_f16 v5, v255, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0xff,0x05,0x02,0x18] v_pk_mul_f16 v5, s1, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0x04,0x02,0x18] v_pk_mul_f16 v5, s101, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x65,0x04,0x02,0x18] v_pk_mul_f16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x66,0x04,0x02,0x18] v_pk_mul_f16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x67,0x04,0x02,0x18] v_pk_mul_f16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x6a,0x04,0x02,0x18] v_pk_mul_f16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x6b,0x04,0x02,0x18] v_pk_mul_f16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x7b,0x04,0x02,0x18] v_pk_mul_f16 v5, m0, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x7c,0x04,0x02,0x18] v_pk_mul_f16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x7e,0x04,0x02,0x18] v_pk_mul_f16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x7f,0x04,0x02,0x18] v_pk_mul_f16 v5, 0, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x80,0x04,0x02,0x18] v_pk_mul_f16 v5, -1, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0xc1,0x04,0x02,0x18] v_pk_mul_f16 v5, 0.5, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0xf0,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0xf0,0x04,0x02,0x18] v_pk_mul_f16 v5, -4.0, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0xf7,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0xf7,0x04,0x02,0x18] v_pk_mul_f16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0xfb,0x04,0x02,0x18] v_pk_mul_f16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0xfc,0x04,0x02,0x18] v_pk_mul_f16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0xfd,0x04,0x02,0x18] v_pk_mul_f16 v5, src_lds_direct, v2 -// CHECK: [0x05,0x00,0x90,0xd3,0xfe,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0xfe,0x04,0x02,0x18] v_pk_mul_f16 v5, v1, v255 -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xff,0x03,0x18] v_pk_mul_f16 v5, v1, s2 -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0x05,0x00,0x18] v_pk_mul_f16 v5, v1, s101 -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xcb,0x00,0x18] v_pk_mul_f16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xcd,0x00,0x18] v_pk_mul_f16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xcf,0x00,0x18] v_pk_mul_f16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xd5,0x00,0x18] v_pk_mul_f16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xd7,0x00,0x18] v_pk_mul_f16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xf7,0x00,0x18] v_pk_mul_f16 v5, v1, m0 -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xf9,0x00,0x18] v_pk_mul_f16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xfd,0x00,0x18] v_pk_mul_f16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xff,0x00,0x18] v_pk_mul_f16 v5, v1, 0 -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0x01,0x01,0x18] v_pk_mul_f16 v5, v1, -1 -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0x83,0x01,0x18] v_pk_mul_f16 v5, v1, 0.5 -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xe1,0x01,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xe1,0x01,0x18] v_pk_mul_f16 v5, v1, -4.0 -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xef,0x01,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xef,0x01,0x18] v_pk_mul_f16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xf7,0x01,0x18] v_pk_mul_f16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xf9,0x01,0x18] v_pk_mul_f16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0xfb,0x01,0x18] v_pk_mul_f16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_f16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x90,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x90,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_f16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x90,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x90,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_f16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x90,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x90,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_f16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_f16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x00] v_pk_mul_f16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x08] v_pk_mul_f16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x10] v_pk_mul_f16 v5, v1, v2 neg_lo:[1,0] -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x38] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x38] v_pk_mul_f16 v5, v1, v2 neg_lo:[0,1] -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x58] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x58] v_pk_mul_f16 v5, v1, v2 neg_lo:[1,1] -// CHECK: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x78] +// CHECK: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x78] v_pk_mul_f16 v5, v1, v2 neg_hi:[1,0] -// CHECK: [0x05,0x01,0x90,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x41,0x90,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_f16 v5, v1, v2 neg_hi:[0,1] -// CHECK: [0x05,0x02,0x90,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x42,0x90,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_f16 v5, v1, v2 neg_hi:[1,1] -// CHECK: [0x05,0x03,0x90,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x43,0x90,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_f16 v5, v1, v2 clamp -// CHECK: [0x05,0x80,0x90,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0xc0,0x90,0xd3,0x01,0x05,0x02,0x18] v_pk_min_f16 v5, v1, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x18] v_pk_min_f16 v255, v1, v2 -// CHECK: [0xff,0x00,0x91,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x91,0xd3,0x01,0x05,0x02,0x18] v_pk_min_f16 v5, v255, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0xff,0x05,0x02,0x18] v_pk_min_f16 v5, s1, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0x04,0x02,0x18] v_pk_min_f16 v5, s101, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x65,0x04,0x02,0x18] v_pk_min_f16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x66,0x04,0x02,0x18] v_pk_min_f16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x67,0x04,0x02,0x18] v_pk_min_f16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x6a,0x04,0x02,0x18] v_pk_min_f16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x6b,0x04,0x02,0x18] v_pk_min_f16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x7b,0x04,0x02,0x18] v_pk_min_f16 v5, m0, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x7c,0x04,0x02,0x18] v_pk_min_f16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x7e,0x04,0x02,0x18] v_pk_min_f16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x7f,0x04,0x02,0x18] v_pk_min_f16 v5, 0, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x80,0x04,0x02,0x18] v_pk_min_f16 v5, -1, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0xc1,0x04,0x02,0x18] v_pk_min_f16 v5, 0.5, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0xf0,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0xf0,0x04,0x02,0x18] v_pk_min_f16 v5, -4.0, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0xf7,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0xf7,0x04,0x02,0x18] v_pk_min_f16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0xfb,0x04,0x02,0x18] v_pk_min_f16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0xfc,0x04,0x02,0x18] v_pk_min_f16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0xfd,0x04,0x02,0x18] v_pk_min_f16 v5, src_lds_direct, v2 -// CHECK: [0x05,0x00,0x91,0xd3,0xfe,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0xfe,0x04,0x02,0x18] v_pk_min_f16 v5, v1, v255 -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xff,0x03,0x18] v_pk_min_f16 v5, v1, s2 -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0x05,0x00,0x18] v_pk_min_f16 v5, v1, s101 -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xcb,0x00,0x18] v_pk_min_f16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xcd,0x00,0x18] v_pk_min_f16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xcf,0x00,0x18] v_pk_min_f16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xd5,0x00,0x18] v_pk_min_f16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xd7,0x00,0x18] v_pk_min_f16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xf7,0x00,0x18] v_pk_min_f16 v5, v1, m0 -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xf9,0x00,0x18] v_pk_min_f16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xfd,0x00,0x18] v_pk_min_f16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xff,0x00,0x18] v_pk_min_f16 v5, v1, 0 -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0x01,0x01,0x18] v_pk_min_f16 v5, v1, -1 -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0x83,0x01,0x18] v_pk_min_f16 v5, v1, 0.5 -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xe1,0x01,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xe1,0x01,0x18] v_pk_min_f16 v5, v1, -4.0 -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xef,0x01,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xef,0x01,0x18] v_pk_min_f16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xf7,0x01,0x18] v_pk_min_f16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xf9,0x01,0x18] v_pk_min_f16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0xfb,0x01,0x18] v_pk_min_f16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x18] v_pk_min_f16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x91,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x91,0xd3,0x01,0x05,0x02,0x18] v_pk_min_f16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x91,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x91,0xd3,0x01,0x05,0x02,0x18] v_pk_min_f16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x91,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x91,0xd3,0x01,0x05,0x02,0x18] v_pk_min_f16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x18] v_pk_min_f16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x00] v_pk_min_f16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x08] v_pk_min_f16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x10] v_pk_min_f16 v5, v1, v2 neg_lo:[1,0] -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x38] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x38] v_pk_min_f16 v5, v1, v2 neg_lo:[0,1] -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x58] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x58] v_pk_min_f16 v5, v1, v2 neg_lo:[1,1] -// CHECK: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x78] +// CHECK: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x78] v_pk_min_f16 v5, v1, v2 neg_hi:[1,0] -// CHECK: [0x05,0x01,0x91,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x41,0x91,0xd3,0x01,0x05,0x02,0x18] v_pk_min_f16 v5, v1, v2 neg_hi:[0,1] -// CHECK: [0x05,0x02,0x91,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x42,0x91,0xd3,0x01,0x05,0x02,0x18] v_pk_min_f16 v5, v1, v2 neg_hi:[1,1] -// CHECK: [0x05,0x03,0x91,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x43,0x91,0xd3,0x01,0x05,0x02,0x18] v_pk_min_f16 v5, v1, v2 clamp -// CHECK: [0x05,0x80,0x91,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0xc0,0x91,0xd3,0x01,0x05,0x02,0x18] v_pk_max_f16 v5, v1, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x18] v_pk_max_f16 v255, v1, v2 -// CHECK: [0xff,0x00,0x92,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0xff,0x40,0x92,0xd3,0x01,0x05,0x02,0x18] v_pk_max_f16 v5, v255, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0xff,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0xff,0x05,0x02,0x18] v_pk_max_f16 v5, s1, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0x04,0x02,0x18] v_pk_max_f16 v5, s101, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0x65,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x65,0x04,0x02,0x18] v_pk_max_f16 v5, flat_scratch_lo, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0x66,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x66,0x04,0x02,0x18] v_pk_max_f16 v5, flat_scratch_hi, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0x67,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x67,0x04,0x02,0x18] v_pk_max_f16 v5, vcc_lo, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0x6a,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x6a,0x04,0x02,0x18] v_pk_max_f16 v5, vcc_hi, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0x6b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x6b,0x04,0x02,0x18] v_pk_max_f16 v5, ttmp15, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0x7b,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x7b,0x04,0x02,0x18] v_pk_max_f16 v5, m0, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0x7c,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x7c,0x04,0x02,0x18] v_pk_max_f16 v5, exec_lo, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0x7e,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x7e,0x04,0x02,0x18] v_pk_max_f16 v5, exec_hi, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0x7f,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x7f,0x04,0x02,0x18] v_pk_max_f16 v5, 0, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0x80,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x80,0x04,0x02,0x18] v_pk_max_f16 v5, -1, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0xc1,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0xc1,0x04,0x02,0x18] v_pk_max_f16 v5, 0.5, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0xf0,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0xf0,0x04,0x02,0x18] v_pk_max_f16 v5, -4.0, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0xf7,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0xf7,0x04,0x02,0x18] v_pk_max_f16 v5, src_vccz, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0xfb,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0xfb,0x04,0x02,0x18] v_pk_max_f16 v5, src_execz, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0xfc,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0xfc,0x04,0x02,0x18] v_pk_max_f16 v5, src_scc, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0xfd,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0xfd,0x04,0x02,0x18] v_pk_max_f16 v5, src_lds_direct, v2 -// CHECK: [0x05,0x00,0x92,0xd3,0xfe,0x04,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0xfe,0x04,0x02,0x18] v_pk_max_f16 v5, v1, v255 -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xff,0x03,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xff,0x03,0x18] v_pk_max_f16 v5, v1, s2 -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0x05,0x00,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0x05,0x00,0x18] v_pk_max_f16 v5, v1, s101 -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xcb,0x00,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xcb,0x00,0x18] v_pk_max_f16 v5, v1, flat_scratch_lo -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xcd,0x00,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xcd,0x00,0x18] v_pk_max_f16 v5, v1, flat_scratch_hi -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xcf,0x00,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xcf,0x00,0x18] v_pk_max_f16 v5, v1, vcc_lo -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xd5,0x00,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xd5,0x00,0x18] v_pk_max_f16 v5, v1, vcc_hi -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xd7,0x00,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xd7,0x00,0x18] v_pk_max_f16 v5, v1, ttmp15 -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xf7,0x00,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xf7,0x00,0x18] v_pk_max_f16 v5, v1, m0 -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xf9,0x00,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xf9,0x00,0x18] v_pk_max_f16 v5, v1, exec_lo -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xfd,0x00,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xfd,0x00,0x18] v_pk_max_f16 v5, v1, exec_hi -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xff,0x00,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xff,0x00,0x18] v_pk_max_f16 v5, v1, 0 -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0x01,0x01,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0x01,0x01,0x18] v_pk_max_f16 v5, v1, -1 -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0x83,0x01,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0x83,0x01,0x18] v_pk_max_f16 v5, v1, 0.5 -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xe1,0x01,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xe1,0x01,0x18] v_pk_max_f16 v5, v1, -4.0 -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xef,0x01,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xef,0x01,0x18] v_pk_max_f16 v5, v1, src_vccz -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xf7,0x01,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xf7,0x01,0x18] v_pk_max_f16 v5, v1, src_execz -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xf9,0x01,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xf9,0x01,0x18] v_pk_max_f16 v5, v1, src_scc -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0xfb,0x01,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0xfb,0x01,0x18] v_pk_max_f16 v5, v1, v2 op_sel:[0,0] -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x18] v_pk_max_f16 v5, v1, v2 op_sel:[1,0] -// CHECK: [0x05,0x08,0x92,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x48,0x92,0xd3,0x01,0x05,0x02,0x18] v_pk_max_f16 v5, v1, v2 op_sel:[0,1] -// CHECK: [0x05,0x10,0x92,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x50,0x92,0xd3,0x01,0x05,0x02,0x18] v_pk_max_f16 v5, v1, v2 op_sel:[1,1] -// CHECK: [0x05,0x18,0x92,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x58,0x92,0xd3,0x01,0x05,0x02,0x18] v_pk_max_f16 v5, v1, v2 op_sel_hi:[1,1] -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x18] v_pk_max_f16 v5, v1, v2 op_sel_hi:[0,0] -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x00] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x00] v_pk_max_f16 v5, v1, v2 op_sel_hi:[1,0] -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x08] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x08] v_pk_max_f16 v5, v1, v2 op_sel_hi:[0,1] -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x10] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x10] v_pk_max_f16 v5, v1, v2 neg_lo:[1,0] -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x38] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x38] v_pk_max_f16 v5, v1, v2 neg_lo:[0,1] -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x58] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x58] v_pk_max_f16 v5, v1, v2 neg_lo:[1,1] -// CHECK: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x78] +// CHECK: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x78] v_pk_max_f16 v5, v1, v2 neg_hi:[1,0] -// CHECK: [0x05,0x01,0x92,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x41,0x92,0xd3,0x01,0x05,0x02,0x18] v_pk_max_f16 v5, v1, v2 neg_hi:[0,1] -// CHECK: [0x05,0x02,0x92,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x42,0x92,0xd3,0x01,0x05,0x02,0x18] v_pk_max_f16 v5, v1, v2 neg_hi:[1,1] -// CHECK: [0x05,0x03,0x92,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0x43,0x92,0xd3,0x01,0x05,0x02,0x18] v_pk_max_f16 v5, v1, v2 clamp -// CHECK: [0x05,0x80,0x92,0xd3,0x01,0x05,0x02,0x18] +// CHECK: [0x05,0xc0,0x92,0xd3,0x01,0x05,0x02,0x18] v_mad_mix_f32 v5, v1, v2, v3 // CHECK: [0x05,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] diff --git a/llvm/test/MC/AMDGPU/lds_direct.s b/llvm/test/MC/AMDGPU/lds_direct.s index e677f59ad748..e8ca18933bf8 100644 --- a/llvm/test/MC/AMDGPU/lds_direct.s +++ b/llvm/test/MC/AMDGPU/lds_direct.s @@ -86,7 +86,7 @@ v_pk_mad_i16 v0, src_lds_direct, v0, v0 // GFX9: v_pk_mad_i16 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x40,0x80,0xd3,0xfe,0x00,0x02,0x1c] v_pk_add_i16 v0, src_lds_direct, v0 -// GFX9: v_pk_add_i16 v0, src_lds_direct, v0 ; encoding: [0x00,0x00,0x82,0xd3,0xfe,0x00,0x02,0x18] +// GFX9: v_pk_add_i16 v0, src_lds_direct, v0 ; encoding: [0x00,0x40,0x82,0xd3,0xfe,0x00,0x02,0x18] //---------------------------------------------------------------------------// // VOPC diff --git a/llvm/test/MC/AMDGPU/literals.s b/llvm/test/MC/AMDGPU/literals.s index b694bd62e736..678079f03c52 100644 --- a/llvm/test/MC/AMDGPU/literals.s +++ b/llvm/test/MC/AMDGPU/literals.s @@ -597,7 +597,7 @@ v_max_f32 v0, vccz, v0 v_max_f64 v[0:1], scc, v[0:1] // NOSICIVI: error: instruction not supported on this GPU -// GFX9: v_pk_add_f16 v0, src_execz, v0 ; encoding: [0x00,0x00,0x8f,0xd3,0xfc,0x00,0x02,0x18] +// GFX9: v_pk_add_f16 v0, src_execz, v0 ; encoding: [0x00,0x40,0x8f,0xd3,0xfc,0x00,0x02,0x18] v_pk_add_f16 v0, execz, v0 // NOSICI: error: instruction not supported on this GPU @@ -737,7 +737,7 @@ v_max_f32 v0, src_shared_base, v0 v_max_f64 v[0:1], src_shared_base, v[0:1] // NOSICIVI: error: instruction not supported on this GPU -// GFX9: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x8f,0xd3,0xeb,0x00,0x02,0x18] +// GFX9: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x40,0x8f,0xd3,0xeb,0x00,0x02,0x18] v_pk_add_f16 v0, src_shared_base, v0 // GFX9: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20] diff --git a/llvm/test/MC/AMDGPU/literalv216.s b/llvm/test/MC/AMDGPU/literalv216.s index f55216619578..c297371847ce 100644 --- a/llvm/test/MC/AMDGPU/literalv216.s +++ b/llvm/test/MC/AMDGPU/literalv216.s @@ -9,152 +9,152 @@ //===----------------------------------------------------------------------===// v_pk_add_f16 v1, 0, v2 -// GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0x80,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x80,0x04,0x02,0x18] v_pk_add_f16 v1, 0.0, v2 -// GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0x80,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x80,0x04,0x02,0x18] v_pk_add_f16 v1, v2, 0 -// GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18] -// GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18] +// GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x40,0x8f,0xd3,0x02,0x01,0x01,0x18] +// GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x40,0x0f,0xcc,0x02,0x01,0x01,0x18] v_pk_add_f16 v1, v2, 0.0 -// GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x8f,0xd3,0x02,0x01,0x01,0x18] -// GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18] +// GFX9: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x40,0x8f,0xd3,0x02,0x01,0x01,0x18] +// GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x40,0x0f,0xcc,0x02,0x01,0x01,0x18] v_pk_add_f16 v1, 1.0, v2 -// GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf2,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf2,0x04,0x02,0x18] v_pk_add_f16 v1, -1.0, v2 -// GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf3,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf3,0x04,0x02,0x18] v_pk_add_f16 v1, -0.5, v2 -// GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf1,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf1,0x04,0x02,0x18] v_pk_add_f16 v1, 0.5, v2 -// GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf0,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf0,0x04,0x02,0x18] v_pk_add_f16 v1, 2.0, v2 -// GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf4,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf4,0x04,0x02,0x18] v_pk_add_f16 v1, -2.0, v2 -// GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf5,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf5,0x04,0x02,0x18] v_pk_add_f16 v1, 4.0, v2 -// GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf6,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf6,0x04,0x02,0x18] v_pk_add_f16 v1, -4.0, v2 -// GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf7,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf7,0x04,0x02,0x18] v_pk_add_f16 v1, 0.15915494, v2 -// GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf8,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf8,0x04,0x02,0x18] v_pk_add_f16 v1, -1, v2 -// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xc1,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc1,0x04,0x02,0x18] v_pk_add_f16 v1, -2, v2 -// GFX9: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc2,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc2,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xc2,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc2,0x04,0x02,0x18] v_pk_add_f16 v1, -3, v2 -// GFX9: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc3,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc3,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xc3,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc3,0x04,0x02,0x18] v_pk_add_f16 v1, -16, v2 -// GFX9: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xd0,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xd0,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xd0,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xd0,0x04,0x02,0x18] v_pk_add_f16 v1, 1, v2 -// GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0x81,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x81,0x04,0x02,0x18] v_pk_add_f16 v1, 2, v2 -// GFX9: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x82,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x82,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0x82,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x82,0x04,0x02,0x18] v_pk_add_f16 v1, 3, v2 -// GFX9: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x83,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x83,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0x83,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x83,0x04,0x02,0x18] v_pk_add_f16 v1, 4, v2 -// GFX9: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x84,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x84,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0x84,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x84,0x04,0x02,0x18] v_pk_add_f16 v1, 15, v2 -// GFX9: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x8f,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x8f,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0x8f,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x8f,0x04,0x02,0x18] v_pk_add_f16 v1, 16, v2 -// GFX9: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x90,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x90,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0x90,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x90,0x04,0x02,0x18] v_pk_add_f16 v1, 63, v2 -// GFX9: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xbf,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xbf,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xbf,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xbf,0x04,0x02,0x18] v_pk_add_f16 v1, 64, v2 -// GFX9: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc0,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc0,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xc0,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc0,0x04,0x02,0x18] v_pk_add_f16 v1, 0x0001, v2 -// GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0x81,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0x81,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x81,0x04,0x02,0x18] v_pk_add_f16 v1, 0xffff, v2 -// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xc1,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc1,0x04,0x02,0x18] v_pk_add_f16 v1, 0x3c00, v2 -// GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf2,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf2,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf2,0x04,0x02,0x18] v_pk_add_f16 v1, 0xbc00, v2 -// GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf3,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf3,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf3,0x04,0x02,0x18] v_pk_add_f16 v1, 0x3800, v2 -// GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf0,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf0,0x04,0x02,0x18] v_pk_add_f16 v1, 0xb800, v2 -// GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf1,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf1,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf1,0x04,0x02,0x18] v_pk_add_f16 v1, 0x4000, v2 -// GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf4,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf4,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf4,0x04,0x02,0x18] v_pk_add_f16 v1, 0xc000, v2 -// GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf5,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf5,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf5,0x04,0x02,0x18] v_pk_add_f16 v1, 0x4400, v2 -// GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf6,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf6,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf6,0x04,0x02,0x18] v_pk_add_f16 v1, 0xc400, v2 -// GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf7,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf7,0x04,0x02,0x18] v_pk_add_f16 v1, 0x3118, v2 -// GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xf8,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xf8,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf8,0x04,0x02,0x18] v_pk_add_f16 v1, 65535, v2 -// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18] -// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18] +// GFX9: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x8f,0xd3,0xc1,0x04,0x02,0x18] +// GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc1,0x04,0x02,0x18] //===----------------------------------------------------------------------===// // Integer literals @@ -162,35 +162,35 @@ v_pk_add_f16 v1, 65535, v2 v_pk_add_f16 v5, v1, 0x12345678 // NOGFX9: error: literal operands are not supported -// GFX10: v_pk_add_f16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] +// GFX10: v_pk_add_f16 v5, v1, 0x12345678 ; encoding: [0x05,0x40,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] v_pk_add_f16 v5, 0x12345678, v2 // NOGFX9: error: literal operands are not supported -// GFX10: v_pk_add_f16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] +// GFX10: v_pk_add_f16 v5, 0x12345678, v2 ; encoding: [0x05,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] v_pk_add_f16 v5, -256, v2 // NOGFX9: error: literal operands are not supported -// GFX10: v_pk_add_f16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff] +// GFX10: v_pk_add_f16 v5, 0xffffff00, v2 ; encoding: [0x05,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff] v_pk_add_f16 v5, v1, 256 // NOGFX9: error: literal operands are not supported -// GFX10: v_pk_add_f16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00] +// GFX10: v_pk_add_f16 v5, v1, 0x100 ; encoding: [0x05,0x40,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00] v_pk_add_u16 v5, v1, 0x12345678 // NOGFX9: error: literal operands are not supported -// GFX10: v_pk_add_u16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] +// GFX10: v_pk_add_u16 v5, v1, 0x12345678 ; encoding: [0x05,0x40,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] v_pk_add_u16 v5, 0x12345678, v2 // NOGFX9: error: literal operands are not supported -// GFX10: v_pk_add_u16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] +// GFX10: v_pk_add_u16 v5, 0x12345678, v2 ; encoding: [0x05,0x40,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] v_pk_add_u16 v5, -256, v2 // NOGFX9: error: literal operands are not supported -// GFX10: v_pk_add_u16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff] +// GFX10: v_pk_add_u16 v5, 0xffffff00, v2 ; encoding: [0x05,0x40,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff] v_pk_add_u16 v5, v1, 256 // NOGFX9: error: literal operands are not supported -// GFX10: v_pk_add_u16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00] +// GFX10: v_pk_add_u16 v5, v1, 0x100 ; encoding: [0x05,0x40,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00] v_pk_add_f16 v5, v1, 0x123456780 // NOGFX9: error: invalid operand for instruction @@ -226,11 +226,11 @@ v_pk_mad_i16 v5, v1, v2, 0xaf123456 v_pk_ashrrev_i16 v5, 0x12345678, v2 // NOGFX9: error: literal operands are not supported -// GFX10: v_pk_ashrrev_i16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] +// GFX10: v_pk_ashrrev_i16 v5, 0x12345678, v2 ; encoding: [0x05,0x40,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] v_pk_ashrrev_i16 v5, v1, 0x12345678 // NOGFX9: error: literal operands are not supported -// GFX10: v_pk_ashrrev_i16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] +// GFX10: v_pk_ashrrev_i16 v5, v1, 0x12345678 ; encoding: [0x05,0x40,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] //===----------------------------------------------------------------------===// // Floating-point literals (allowed if lossless conversion to f16 is possible) @@ -238,11 +238,11 @@ v_pk_ashrrev_i16 v5, v1, 0x12345678 v_pk_add_f16 v5, v1, 0.1234 // NOGFX9: error: literal operands are not supported -// GFX10: v_pk_add_f16 v5, v1, 0x2fe6 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0xe6,0x2f,0x00,0x00] +// GFX10: v_pk_add_f16 v5, v1, 0x2fe6 ; encoding: [0x05,0x40,0x0f,0xcc,0x01,0xff,0x01,0x18,0xe6,0x2f,0x00,0x00] v_pk_add_u16 v5, v1, 0.1234 // NOGFX9: error: literal operands are not supported -// GFX10: v_pk_add_u16 v5, v1, 0x2fe6 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0xe6,0x2f,0x00,0x00] +// GFX10: v_pk_add_u16 v5, v1, 0x2fe6 ; encoding: [0x05,0x40,0x0a,0xcc,0x01,0xff,0x01,0x18,0xe6,0x2f,0x00,0x00] v_pk_fma_f16 v5, 0.1234, v2, v3 // NOGFX9: error: literal operands are not supported diff --git a/llvm/test/MC/AMDGPU/vop3-literal.s b/llvm/test/MC/AMDGPU/vop3-literal.s index 76bae508656b..bae958b0ca8c 100644 --- a/llvm/test/MC/AMDGPU/vop3-literal.s +++ b/llvm/test/MC/AMDGPU/vop3-literal.s @@ -68,19 +68,19 @@ v_bfm_b32_e64 v0, 0x3039, 0x3038 // GFX9-ERR: error: literal operands are not supported v_pk_add_f16 v1, 25.0, v2 -// GFX10: v_pk_add_f16 v1, 0x4e40, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0x4e,0x00,0x00] +// GFX10: v_pk_add_f16 v1, 0x4e40, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0x4e,0x00,0x00] // GFX9-ERR: error: literal operands are not supported v_pk_add_f16 v1, 123456, v2 -// GFX10: v_pk_add_f16 v1, 0x1e240, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0xe2,0x01,0x00] +// GFX10: v_pk_add_f16 v1, 0x1e240, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0xe2,0x01,0x00] // GFX9-ERR: error: literal operands are not supported v_pk_add_f16 v1, -200, v2 -// GFX10: v_pk_add_f16 v1, 0xffffff38, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff] +// GFX10: v_pk_add_f16 v1, 0xffffff38, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff] // GFX9-ERR: error: literal operands are not supported v_pk_add_f16 v1, 25.0, 25.0 -// GFX10: v_pk_add_f16 v1, 0x4e40, 0x4e40 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0xfe,0x01,0x18,0x40,0x4e,0x00,0x00] +// GFX10: v_pk_add_f16 v1, 0x4e40, 0x4e40 ; encoding: [0x01,0x40,0x0f,0xcc,0xff,0xfe,0x01,0x18,0x40,0x4e,0x00,0x00] // GFX9-ERR: error: literal operands are not supported v_pk_add_f16 v1, 25.0, 25.1 @@ -88,31 +88,31 @@ v_pk_add_f16 v1, 25.0, 25.1 // GFX9-ERR: error: literal operands are not supported v_pk_add_u16 v1, -200, v2 -// GFX10: v_pk_add_u16 v1, 0xffffff38, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff] +// GFX10: v_pk_add_u16 v1, 0xffffff38, v2 ; encoding: [0x01,0x40,0x0a,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff] // GFX9-ERR: error: literal operands are not supported v_pk_add_u16 v1, 64, v2 -// GFX10: v_pk_add_u16 v1, 64, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc0,0x04,0x02,0x18] -// GFX9: v_pk_add_u16 v1, 64, v2 ; encoding: [0x01,0x00,0x8a,0xd3,0xc0,0x04,0x02,0x18] +// GFX10: v_pk_add_u16 v1, 64, v2 ; encoding: [0x01,0x40,0x0a,0xcc,0xc0,0x04,0x02,0x18] +// GFX9: v_pk_add_u16 v1, 64, v2 ; encoding: [0x01,0x40,0x8a,0xd3,0xc0,0x04,0x02,0x18] v_pk_add_u16 v1, 65, v2 -// GFX10: v_pk_add_u16 v1, 0x41, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x41,0x00,0x00,0x00] +// GFX10: v_pk_add_u16 v1, 0x41, v2 ; encoding: [0x01,0x40,0x0a,0xcc,0xff,0x04,0x02,0x18,0x41,0x00,0x00,0x00] // GFX9-ERR: error: literal operands are not supported v_pk_add_u16 v1, -1, v2 -// GFX10: v_pk_add_u16 v1, -1, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc1,0x04,0x02,0x18] -// GFX9: v_pk_add_u16 v1, -1, v2 ; encoding: [0x01,0x00,0x8a,0xd3,0xc1,0x04,0x02,0x18] +// GFX10: v_pk_add_u16 v1, -1, v2 ; encoding: [0x01,0x40,0x0a,0xcc,0xc1,0x04,0x02,0x18] +// GFX9: v_pk_add_u16 v1, -1, v2 ; encoding: [0x01,0x40,0x8a,0xd3,0xc1,0x04,0x02,0x18] v_pk_add_u16 v1, -5, v2 -// GFX10: v_pk_add_u16 v1, -5, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc5,0x04,0x02,0x18] -// GFX9: v_pk_add_u16 v1, -5, v2 ; encoding: [0x01,0x00,0x8a,0xd3,0xc5,0x04,0x02,0x18] +// GFX10: v_pk_add_u16 v1, -5, v2 ; encoding: [0x01,0x40,0x0a,0xcc,0xc5,0x04,0x02,0x18] +// GFX9: v_pk_add_u16 v1, -5, v2 ; encoding: [0x01,0x40,0x8a,0xd3,0xc5,0x04,0x02,0x18] v_pk_add_u16 v1, -100, v2 -// GFX10: v_pk_add_u16 v1, 0xffffff9c, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff] +// GFX10: v_pk_add_u16 v1, 0xffffff9c, v2 ; encoding: [0x01,0x40,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff] // GFX9-ERR: error: literal operands are not supported v_pk_add_u16 v1, -100, -100 -// GFX10: v_pk_add_u16 v1, 0xffffff9c, 0xffffff9c ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0xfe,0x01,0x18,0x9c,0xff,0xff,0xff] +// GFX10: v_pk_add_u16 v1, 0xffffff9c, 0xffffff9c ; encoding: [0x01,0x40,0x0a,0xcc,0xff,0xfe,0x01,0x18,0x9c,0xff,0xff,0xff] // GFX9-ERR: error: literal operands are not supported v_add_f32_e64 v1, neg(abs(0x123)), v3 @@ -340,27 +340,27 @@ v_fma_mix_f32 v5, 0x1c8a, 0x1c8a, 0x1c8a // GFX9-ERR: error: instruction not supported on this GPU v_pk_add_f16 v5, 0xaf123456, v2 -// GFX10: v_pk_add_f16 v5, 0xaf123456, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x56,0x34,0x12,0xaf] +// GFX10: v_pk_add_f16 v5, 0xaf123456, v2 ; encoding: [0x05,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0x56,0x34,0x12,0xaf] // GFX9-ERR: error: literal operands are not supported v_pk_add_f16 v5, v1, 0x3f717273 -// GFX10: v_pk_add_f16 v5, v1, 0x3f717273 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x73,0x72,0x71,0x3f] +// GFX10: v_pk_add_f16 v5, v1, 0x3f717273 ; encoding: [0x05,0x40,0x0f,0xcc,0x01,0xff,0x01,0x18,0x73,0x72,0x71,0x3f] // GFX9-ERR: error: literal operands are not supported v_pk_add_f16 v5, 0x3f717273, 0x3f717273 -// GFX10: v_pk_add_f16 v5, 0x3f717273, 0x3f717273 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0xfe,0x01,0x18,0x73,0x72,0x71,0x3f] +// GFX10: v_pk_add_f16 v5, 0x3f717273, 0x3f717273 ; encoding: [0x05,0x40,0x0f,0xcc,0xff,0xfe,0x01,0x18,0x73,0x72,0x71,0x3f] // GFX9-ERR: error: literal operands are not supported v_pk_add_i16 v5, 0x7b, v2 -// GFX10: v_pk_add_i16 v5, 0x7b, v2 ; encoding: [0x05,0x00,0x02,0xcc,0xff,0x04,0x02,0x18,0x7b,0x00,0x00,0x00] +// GFX10: v_pk_add_i16 v5, 0x7b, v2 ; encoding: [0x05,0x40,0x02,0xcc,0xff,0x04,0x02,0x18,0x7b,0x00,0x00,0x00] // GFX9-ERR: error: literal operands are not supported v_pk_add_i16 v5, v1, 0x7b -// GFX10: v_pk_add_i16 v5, v1, 0x7b ; encoding: [0x05,0x00,0x02,0xcc,0x01,0xff,0x01,0x18,0x7b,0x00,0x00,0x00] +// GFX10: v_pk_add_i16 v5, v1, 0x7b ; encoding: [0x05,0x40,0x02,0xcc,0x01,0xff,0x01,0x18,0x7b,0x00,0x00,0x00] // GFX9-ERR: error: literal operands are not supported v_pk_add_i16 v5, 0xab7b, 0xab7b -// GFX10: v_pk_add_i16 v5, 0xab7b, 0xab7b ; encoding: [0x05,0x00,0x02,0xcc,0xff,0xfe,0x01,0x18,0x7b,0xab,0x00,0x00] +// GFX10: v_pk_add_i16 v5, 0xab7b, 0xab7b ; encoding: [0x05,0x40,0x02,0xcc,0xff,0xfe,0x01,0x18,0x7b,0xab,0x00,0x00] // GFX9-ERR: error: literal operands are not supported v_pk_add_i16 v5, 0xab7b, 0xab7a diff --git a/llvm/test/MC/AMDGPU/vop3p.s b/llvm/test/MC/AMDGPU/vop3p.s index 1a0741247cc0..197da25734be 100644 --- a/llvm/test/MC/AMDGPU/vop3p.s +++ b/llvm/test/MC/AMDGPU/vop3p.s @@ -4,52 +4,52 @@ // v_pk_add_u16 v1, v2, v3 -// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18] +// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x18] v_pk_add_u16 v1, v2, v3 op_sel:[0,0] -// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18] +// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x18] v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,1] -// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18] +// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x18] v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[1,1] -// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x18] +// GFX9: v_pk_add_u16 v1, v2, v3 ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x18] v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] -// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x00] +// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x00] v_pk_add_u16 v1, v2, v3 op_sel:[0,0] op_sel_hi:[0,0] -// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x00] +// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,0] ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x00] v_pk_add_u16 v1, v2, v3 op_sel:[1,0] -// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] ; encoding: [0x01,0x08,0x8a,0xd3,0x02,0x07,0x02,0x18] +// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] ; encoding: [0x01,0x48,0x8a,0xd3,0x02,0x07,0x02,0x18] v_pk_add_u16 v1, v2, v3 op_sel:[0,1] -// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] ; encoding: [0x01,0x10,0x8a,0xd3,0x02,0x07,0x02,0x18] +// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] ; encoding: [0x01,0x50,0x8a,0xd3,0x02,0x07,0x02,0x18] v_pk_add_u16 v1, v2, v3 op_sel:[1,1] -// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x18,0x8a,0xd3,0x02,0x07,0x02,0x18] +// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x58,0x8a,0xd3,0x02,0x07,0x02,0x18] v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1] -// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x10] +// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[0,1] ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x10] v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0] -// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0] ; encoding: [0x01,0x00,0x8a,0xd3,0x02,0x07,0x02,0x08] +// GFX9: v_pk_add_u16 v1, v2, v3 op_sel_hi:[1,0] ; encoding: [0x01,0x40,0x8a,0xd3,0x02,0x07,0x02,0x08] v_pk_add_u16 v1, v2, v3 op_sel:[1,1] op_sel_hi:[1,1] -// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x18,0x8a,0xd3,0x02,0x07,0x02,0x18] +// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,1] ; encoding: [0x01,0x58,0x8a,0xd3,0x02,0x07,0x02,0x18] v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] -// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] ; encoding: [0x01,0x08,0x8a,0xd3,0x02,0x07,0x02,0x08] +// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[1,0] ; encoding: [0x01,0x48,0x8a,0xd3,0x02,0x07,0x02,0x08] v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] -// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] ; encoding: [0x01,0x10,0x8a,0xd3,0x02,0x07,0x02,0x10] +// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[0,1] ; encoding: [0x01,0x50,0x8a,0xd3,0x02,0x07,0x02,0x10] v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] -// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x01,0x08,0x8a,0xd3,0x02,0x07,0x02,0x10] +// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[1,0] op_sel_hi:[0,1] ; encoding: [0x01,0x48,0x8a,0xd3,0x02,0x07,0x02,0x10] v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] -// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] ; encoding: [0x01,0x10,0x8a,0xd3,0x02,0x07,0x02,0x08] +// GFX9: v_pk_add_u16 v1, v2, v3 op_sel:[0,1] op_sel_hi:[1,0] ; encoding: [0x01,0x50,0x8a,0xd3,0x02,0x07,0x02,0x08] // // Test src2 op_sel/op_sel_hi @@ -110,59 +110,59 @@ v_pk_fma_f16 v8, v0, s0, v1 clamp // GFX9: v_pk_fma_f16 v8, v0, s0, v1 clamp ; encoding: [0x08,0xc0,0x8e,0xd3,0x00,0x01,0x04,0x1c] v_pk_add_u16 v1, v2, v3 clamp -// GFX9: v_pk_add_u16 v1, v2, v3 clamp ; encoding: [0x01,0x80,0x8a,0xd3,0x02,0x07,0x02,0x18] +// GFX9: v_pk_add_u16 v1, v2, v3 clamp ; encoding: [0x01,0xc0,0x8a,0xd3,0x02,0x07,0x02,0x18] v_pk_min_i16 v0, v1, v2 clamp -// GFX9: v_pk_min_i16 v0, v1, v2 clamp ; encoding: [0x00,0x80,0x88,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_min_i16 v0, v1, v2 clamp ; encoding: [0x00,0xc0,0x88,0xd3,0x01,0x05,0x02,0x18] // // Instruction tests: // v_pk_mul_lo_u16 v0, v1, v2 -// GFX9: v_pk_mul_lo_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x81,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_mul_lo_u16 v0, v1, v2 ; encoding: [0x00,0x40,0x81,0xd3,0x01,0x05,0x02,0x18] v_pk_add_i16 v0, v1, v2 -// GFX9: v_pk_add_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x82,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_add_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x82,0xd3,0x01,0x05,0x02,0x18] v_pk_sub_i16 v0, v1, v2 -// GFX9: v_pk_sub_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x83,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_sub_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x83,0xd3,0x01,0x05,0x02,0x18] v_pk_lshlrev_b16 v0, v1, v2 -// GFX9: v_pk_lshlrev_b16 v0, v1, v2 ; encoding: [0x00,0x00,0x84,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_lshlrev_b16 v0, v1, v2 ; encoding: [0x00,0x40,0x84,0xd3,0x01,0x05,0x02,0x18] v_pk_lshrrev_b16 v0, v1, v2 -// GFX9: v_pk_lshrrev_b16 v0, v1, v2 ; encoding: [0x00,0x00,0x85,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_lshrrev_b16 v0, v1, v2 ; encoding: [0x00,0x40,0x85,0xd3,0x01,0x05,0x02,0x18] v_pk_ashrrev_i16 v0, v1, v2 -// GFX9: v_pk_ashrrev_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x86,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_ashrrev_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x86,0xd3,0x01,0x05,0x02,0x18] v_pk_max_i16 v0, v1, v2 -// GFX9: v_pk_max_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x87,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_max_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x87,0xd3,0x01,0x05,0x02,0x18] v_pk_min_i16 v0, v1, v2 -// GFX9: v_pk_min_i16 v0, v1, v2 ; encoding: [0x00,0x00,0x88,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_min_i16 v0, v1, v2 ; encoding: [0x00,0x40,0x88,0xd3,0x01,0x05,0x02,0x18] v_pk_add_u16 v0, v1, v2 -// GFX9: v_pk_add_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x8a,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_add_u16 v0, v1, v2 ; encoding: [0x00,0x40,0x8a,0xd3,0x01,0x05,0x02,0x18] v_pk_max_u16 v0, v1, v2 -// GFX9: v_pk_max_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x8c,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_max_u16 v0, v1, v2 ; encoding: [0x00,0x40,0x8c,0xd3,0x01,0x05,0x02,0x18] v_pk_min_u16 v0, v1, v2 -// GFX9: v_pk_min_u16 v0, v1, v2 ; encoding: [0x00,0x00,0x8d,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_min_u16 v0, v1, v2 ; encoding: [0x00,0x40,0x8d,0xd3,0x01,0x05,0x02,0x18] v_pk_fma_f16 v0, v1, v2, v3 // GFX9: v_pk_fma_f16 v0, v1, v2, v3 ; encoding: [0x00,0x40,0x8e,0xd3,0x01,0x05,0x0e,0x1c] v_pk_add_f16 v0, v1, v2 -// GFX9: v_pk_add_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x8f,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_add_f16 v0, v1, v2 ; encoding: [0x00,0x40,0x8f,0xd3,0x01,0x05,0x02,0x18] v_pk_mul_f16 v0, v1, v2 -// GFX9: v_pk_mul_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x90,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_mul_f16 v0, v1, v2 ; encoding: [0x00,0x40,0x90,0xd3,0x01,0x05,0x02,0x18] v_pk_min_f16 v0, v1, v2 -// GFX9: v_pk_min_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x91,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_min_f16 v0, v1, v2 ; encoding: [0x00,0x40,0x91,0xd3,0x01,0x05,0x02,0x18] v_pk_max_f16 v0, v1, v2 -// GFX9: v_pk_max_f16 v0, v1, v2 ; encoding: [0x00,0x00,0x92,0xd3,0x01,0x05,0x02,0x18] +// GFX9: v_pk_max_f16 v0, v1, v2 ; encoding: [0x00,0x40,0x92,0xd3,0x01,0x05,0x02,0x18] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt index 80922b576b56..0fe794137826 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx90a_dasm_features.txt @@ -52,193 +52,193 @@ # GFX90A: v_pk_fma_f32 v[0:1], v[4:5], v[8:9], v[16:17] ; encoding: [0x00,0x40,0xb0,0xd3,0x04,0x11,0x42,0x1c] 0x00,0x40,0xb0,0xd3,0x04,0x11,0x42,0x1c -# GFX90A: v_pk_mul_f32 v[254:255], v[8:9], v[16:17] ; encoding: [0xfe,0x00,0xb1,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[254:255], v[8:9], v[16:17] ; encoding: [0xfe,0x40,0xb1,0xd3,0x08,0x21,0x02,0x18] 0xfe,0x00,0xb1,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[254:255], v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0xfe,0x21,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[254:255], v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0xfe,0x21,0x02,0x18] 0x04,0x00,0xb1,0xd3,0xfe,0x21,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], s[2:3], v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x02,0x20,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], s[2:3], v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x02,0x20,0x02,0x18] 0x04,0x00,0xb1,0xd3,0x02,0x20,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], s[100:101], v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x64,0x20,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], s[100:101], v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x64,0x20,0x02,0x18] 0x04,0x00,0xb1,0xd3,0x64,0x20,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], flat_scratch, v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x66,0x20,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], flat_scratch, v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x66,0x20,0x02,0x18] 0x04,0x00,0xb1,0xd3,0x66,0x20,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], vcc, v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x6a,0x20,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], vcc, v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x6a,0x20,0x02,0x18] 0x04,0x00,0xb1,0xd3,0x6a,0x20,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], exec, v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x7e,0x20,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], exec, v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x7e,0x20,0x02,0x18] 0x04,0x00,0xb1,0xd3,0x7e,0x20,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[254:255] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0xfd,0x03,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[254:255] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0xfd,0x03,0x18] 0x04,0x00,0xb1,0xd3,0x08,0xfd,0x03,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], s[2:3] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x05,0x00,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], s[2:3] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x05,0x00,0x18] 0x04,0x00,0xb1,0xd3,0x08,0x05,0x00,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], s[100:101] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0xc9,0x00,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], s[100:101] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0xc9,0x00,0x18] 0x04,0x00,0xb1,0xd3,0x08,0xc9,0x00,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], flat_scratch ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0xcd,0x00,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], flat_scratch ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0xcd,0x00,0x18] 0x04,0x00,0xb1,0xd3,0x08,0xcd,0x00,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], vcc ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0xd5,0x00,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], vcc ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0xd5,0x00,0x18] 0x04,0x00,0xb1,0xd3,0x08,0xd5,0x00,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], exec ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0xfd,0x00,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], exec ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0xfd,0x00,0x18] 0x04,0x00,0xb1,0xd3,0x08,0xfd,0x00,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x18] 0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,0] ; encoding: [0x04,0x08,0xb1,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,0] ; encoding: [0x04,0x48,0xb1,0xd3,0x08,0x21,0x02,0x18] 0x04,0x08,0xb1,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[0,1] ; encoding: [0x04,0x10,0xb1,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[0,1] ; encoding: [0x04,0x50,0xb1,0xd3,0x08,0x21,0x02,0x18] 0x04,0x10,0xb1,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,1] ; encoding: [0x04,0x18,0xb1,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,1] ; encoding: [0x04,0x58,0xb1,0xd3,0x08,0x21,0x02,0x18] 0x04,0x18,0xb1,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x18] 0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,0] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x00] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,0] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x00] 0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x00 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[1,0] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x08] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[1,0] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x08] 0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x08 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,1] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x10] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,1] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x10] 0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x10 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,0] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x38] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,0] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x38] 0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x38 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[0,1] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x58] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[0,1] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x58] 0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x58 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,1] ; encoding: [0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x78] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,1] ; encoding: [0x04,0x40,0xb1,0xd3,0x08,0x21,0x02,0x78] 0x04,0x00,0xb1,0xd3,0x08,0x21,0x02,0x78 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,0] ; encoding: [0x04,0x01,0xb1,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,0] ; encoding: [0x04,0x41,0xb1,0xd3,0x08,0x21,0x02,0x18] 0x04,0x01,0xb1,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[0,1] ; encoding: [0x04,0x02,0xb1,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[0,1] ; encoding: [0x04,0x42,0xb1,0xd3,0x08,0x21,0x02,0x18] 0x04,0x02,0xb1,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,1] ; encoding: [0x04,0x03,0xb1,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,1] ; encoding: [0x04,0x43,0xb1,0xd3,0x08,0x21,0x02,0x18] 0x04,0x03,0xb1,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] clamp ; encoding: [0x04,0x80,0xb1,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_mul_f32 v[4:5], v[8:9], v[16:17] clamp ; encoding: [0x04,0xc0,0xb1,0xd3,0x08,0x21,0x02,0x18] 0x04,0x80,0xb1,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_add_f32 v[254:255], v[8:9], v[16:17] ; encoding: [0xfe,0x00,0xb2,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_add_f32 v[254:255], v[8:9], v[16:17] ; encoding: [0xfe,0x40,0xb2,0xd3,0x08,0x21,0x02,0x18] 0xfe,0x00,0xb2,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[254:255], v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0xfe,0x21,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[254:255], v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0xfe,0x21,0x02,0x18] 0x04,0x00,0xb2,0xd3,0xfe,0x21,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], s[2:3], v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x02,0x20,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], s[2:3], v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x02,0x20,0x02,0x18] 0x04,0x00,0xb2,0xd3,0x02,0x20,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], s[100:101], v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x64,0x20,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], s[100:101], v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x64,0x20,0x02,0x18] 0x04,0x00,0xb2,0xd3,0x64,0x20,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], flat_scratch, v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x66,0x20,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], flat_scratch, v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x66,0x20,0x02,0x18] 0x04,0x00,0xb2,0xd3,0x66,0x20,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], vcc, v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x6a,0x20,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], vcc, v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x6a,0x20,0x02,0x18] 0x04,0x00,0xb2,0xd3,0x6a,0x20,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], exec, v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x7e,0x20,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], exec, v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x7e,0x20,0x02,0x18] 0x04,0x00,0xb2,0xd3,0x7e,0x20,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[254:255] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0xfd,0x03,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[254:255] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0xfd,0x03,0x18] 0x04,0x00,0xb2,0xd3,0x08,0xfd,0x03,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], s[2:3] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x05,0x00,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], s[2:3] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x05,0x00,0x18] 0x04,0x00,0xb2,0xd3,0x08,0x05,0x00,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], s[100:101] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0xc9,0x00,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], s[100:101] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0xc9,0x00,0x18] 0x04,0x00,0xb2,0xd3,0x08,0xc9,0x00,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], flat_scratch ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0xcd,0x00,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], flat_scratch ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0xcd,0x00,0x18] 0x04,0x00,0xb2,0xd3,0x08,0xcd,0x00,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], vcc ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0xd5,0x00,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], vcc ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0xd5,0x00,0x18] 0x04,0x00,0xb2,0xd3,0x08,0xd5,0x00,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], exec ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0xfd,0x00,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], exec ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0xfd,0x00,0x18] 0x04,0x00,0xb2,0xd3,0x08,0xfd,0x00,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x18] 0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,0] ; encoding: [0x04,0x08,0xb2,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,0] ; encoding: [0x04,0x48,0xb2,0xd3,0x08,0x21,0x02,0x18] 0x04,0x08,0xb2,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[0,1] ; encoding: [0x04,0x10,0xb2,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[0,1] ; encoding: [0x04,0x50,0xb2,0xd3,0x08,0x21,0x02,0x18] 0x04,0x10,0xb2,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,1] ; encoding: [0x04,0x18,0xb2,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel:[1,1] ; encoding: [0x04,0x58,0xb2,0xd3,0x08,0x21,0x02,0x18] 0x04,0x18,0xb2,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x18] 0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,0] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x00] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,0] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x00] 0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x00 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[1,0] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x08] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[1,0] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x08] 0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x08 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,1] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x10] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] op_sel_hi:[0,1] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x10] 0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x10 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,0] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x38] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,0] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x38] 0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x38 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[0,1] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x58] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[0,1] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x58] 0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x58 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,1] ; encoding: [0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x78] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_lo:[1,1] ; encoding: [0x04,0x40,0xb2,0xd3,0x08,0x21,0x02,0x78] 0x04,0x00,0xb2,0xd3,0x08,0x21,0x02,0x78 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,0] ; encoding: [0x04,0x01,0xb2,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,0] ; encoding: [0x04,0x41,0xb2,0xd3,0x08,0x21,0x02,0x18] 0x04,0x01,0xb2,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[0,1] ; encoding: [0x04,0x02,0xb2,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[0,1] ; encoding: [0x04,0x42,0xb2,0xd3,0x08,0x21,0x02,0x18] 0x04,0x02,0xb2,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,1] ; encoding: [0x04,0x03,0xb2,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] neg_hi:[1,1] ; encoding: [0x04,0x43,0xb2,0xd3,0x08,0x21,0x02,0x18] 0x04,0x03,0xb2,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] clamp ; encoding: [0x04,0x80,0xb2,0xd3,0x08,0x21,0x02,0x18] +# GFX90A: v_pk_add_f32 v[4:5], v[8:9], v[16:17] clamp ; encoding: [0x04,0xc0,0xb2,0xd3,0x08,0x21,0x02,0x18] 0x04,0x80,0xb2,0xd3,0x08,0x21,0x02,0x18 -# GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] ; encoding: [0x00,0x00,0xb3,0xd3,0x02,0x09,0x02,0x18] +# GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] ; encoding: [0x00,0x40,0xb3,0xd3,0x02,0x09,0x02,0x18] 0x00,0x00,0xb3,0xd3,0x02,0x09,0x02,0x18 -# GFX90A: v_pk_mov_b32 v[0:1], flat_scratch, v[4:5] ; encoding: [0x00,0x00,0xb3,0xd3,0x66,0x08,0x02,0x18] +# GFX90A: v_pk_mov_b32 v[0:1], flat_scratch, v[4:5] ; encoding: [0x00,0x40,0xb3,0xd3,0x66,0x08,0x02,0x18] 0x00,0x00,0xb3,0xd3,0x66,0x08,0x02,0x18 -# GFX90A: v_pk_mov_b32 v[0:1], v[2:3], vcc ; encoding: [0x00,0x00,0xb3,0xd3,0x02,0xd5,0x00,0x18] +# GFX90A: v_pk_mov_b32 v[0:1], v[2:3], vcc ; encoding: [0x00,0x40,0xb3,0xd3,0x02,0xd5,0x00,0x18] 0x00,0x00,0xb3,0xd3,0x02,0xd5,0x00,0x18 -# GFX90A: v_pk_mov_b32 v[0:1], v[2:3], s[0:1] ; encoding: [0x00,0x00,0xb3,0xd3,0x02,0x01,0x00,0x18] +# GFX90A: v_pk_mov_b32 v[0:1], v[2:3], s[0:1] ; encoding: [0x00,0x40,0xb3,0xd3,0x02,0x01,0x00,0x18] 0x00,0x00,0xb3,0xd3,0x02,0x01,0x00,0x18 -# GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] op_sel_hi:[0,1] ; encoding: [0x00,0x00,0xb3,0xd3,0x02,0x09,0x02,0x10] +# GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] op_sel_hi:[0,1] ; encoding: [0x00,0x40,0xb3,0xd3,0x02,0x09,0x02,0x10] 0x00,0x00,0xb3,0xd3,0x02,0x09,0x02,0x10 -# GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] op_sel:[1,0] ; encoding: [0x00,0x08,0xb3,0xd3,0x02,0x09,0x02,0x18] +# GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] op_sel:[1,0] ; encoding: [0x00,0x48,0xb3,0xd3,0x02,0x09,0x02,0x18] 0x00,0x08,0xb3,0xd3,0x02,0x09,0x02,0x18 -# GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] op_sel:[1,1] ; encoding: [0x00,0x18,0xb3,0xd3,0x02,0x09,0x02,0x18] +# GFX90A: v_pk_mov_b32 v[0:1], v[2:3], v[4:5] op_sel:[1,1] ; encoding: [0x00,0x58,0xb3,0xd3,0x02,0x09,0x02,0x18] 0x00,0x18,0xb3,0xd3,0x02,0x09,0x02,0x18 # GFX908: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 ; encoding: [0x00,0x80,0x09,0xe8,0x00,0x04,0x20,0x80] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt index 22da396cab8c..eee589fff175 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt @@ -49608,874 +49608,874 @@ # CHECK: v_pk_mad_i16 v5, v1, v2, v3 clamp ; encoding: [0x05,0xc0,0x80,0xd3,0x01,0x05,0x0e,0x1c] 0x05,0xc0,0x80,0xd3,0x01,0x05,0x0e,0x1c -# CHECK: v_pk_mul_lo_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, v2 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x81,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v255, v1, v2 ; encoding: [0xff,0x40,0x81,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x81,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x81,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v255, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x81,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x81,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, s1, v2 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x81,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x81,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, s101, v2 ; encoding: [0x05,0x40,0x81,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x81,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x81,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x81,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x81,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x81,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x81,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x81,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x81,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x81,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x81,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x81,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x81,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x81,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x81,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, m0, v2 ; encoding: [0x05,0x40,0x81,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x81,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x81,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x81,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x81,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x81,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x81,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x81,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x81,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, 0, v2 ; encoding: [0x05,0x40,0x81,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x81,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x81,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x81,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x81,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x81,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x81,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x81,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x81,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x81,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x81,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x81,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, s2 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x81,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x81,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, s101 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x81,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x81,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x81,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x81,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x81,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x81,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x81,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x81,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x81,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x81,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, m0 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x81,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x81,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x81,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x81,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x81,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x81,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, 0 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x81,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x81,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x81,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x81,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x81,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x81,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x81,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x81,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x81,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x81,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x81,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x81,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x81,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x81,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x81,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x81,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x81,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x81,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x81,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_mul_lo_u16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x81,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x81,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_add_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_i16 v5, v1, v2 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_i16 v255, v1, v2 ; encoding: [0xff,0x00,0x82,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_i16 v255, v1, v2 ; encoding: [0xff,0x40,0x82,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x82,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_i16 v5, v255, v2 ; encoding: [0x05,0x00,0x82,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_add_i16 v5, v255, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x82,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_add_i16 v5, s1, v2 ; encoding: [0x05,0x00,0x82,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_add_i16 v5, s1, v2 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x82,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_add_i16 v5, s101, v2 ; encoding: [0x05,0x00,0x82,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_add_i16 v5, s101, v2 ; encoding: [0x05,0x40,0x82,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x82,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_add_i16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x82,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_add_i16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x82,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x82,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_add_i16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x82,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_add_i16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x82,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x82,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_add_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x82,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_add_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x82,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x82,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_add_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x82,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_add_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x82,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x82,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_add_i16 v5, m0, v2 ; encoding: [0x05,0x00,0x82,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_add_i16 v5, m0, v2 ; encoding: [0x05,0x40,0x82,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x82,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_add_i16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x82,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_add_i16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x82,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x82,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_add_i16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x82,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_add_i16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x82,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x82,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_add_i16 v5, 0, v2 ; encoding: [0x05,0x00,0x82,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_add_i16 v5, 0, v2 ; encoding: [0x05,0x40,0x82,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x82,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_add_i16 v5, -1, v2 ; encoding: [0x05,0x00,0x82,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_add_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x82,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_add_i16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x82,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_add_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x82,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_add_i16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x82,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_add_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x82,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x82,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_add_i16 v5, v1, v255 ; encoding: [0x05,0x00,0x82,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_add_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x82,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_add_i16 v5, v1, s2 ; encoding: [0x05,0x00,0x82,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_add_i16 v5, v1, s2 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x82,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_add_i16 v5, v1, s101 ; encoding: [0x05,0x00,0x82,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_add_i16 v5, v1, s101 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x82,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_add_i16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x82,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_add_i16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x82,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_add_i16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x82,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_add_i16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x82,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_add_i16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x82,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_add_i16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x82,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_add_i16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x82,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_add_i16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x82,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_add_i16 v5, v1, m0 ; encoding: [0x05,0x00,0x82,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_add_i16 v5, v1, m0 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x82,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_add_i16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x82,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_add_i16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x82,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_add_i16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x82,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_add_i16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x82,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_add_i16 v5, v1, 0 ; encoding: [0x05,0x00,0x82,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_add_i16 v5, v1, 0 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x82,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_add_i16 v5, v1, -1 ; encoding: [0x05,0x00,0x82,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_add_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x82,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_add_i16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x82,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_add_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x82,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_add_i16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x82,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_add_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x82,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x82,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_add_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x82,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x82,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x82,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_i16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x82,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_i16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x82,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x82,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_i16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x82,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_i16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x82,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x82,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_i16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_add_i16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x82,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_add_i16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_add_i16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x82,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_add_i16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_add_i16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x82,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x82,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_add_i16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x82,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_i16 v5, v1, v2 clamp ; encoding: [0x05,0xc0,0x82,0xd3,0x01,0x05,0x02,0x18] 0x05,0x80,0x82,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, v1, v2 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_sub_i16 v255, v1, v2 ; encoding: [0xff,0x00,0x83,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_sub_i16 v255, v1, v2 ; encoding: [0xff,0x40,0x83,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x83,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, v255, v2 ; encoding: [0x05,0x00,0x83,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, v255, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x83,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, s1, v2 ; encoding: [0x05,0x00,0x83,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, s1, v2 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x83,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, s101, v2 ; encoding: [0x05,0x00,0x83,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, s101, v2 ; encoding: [0x05,0x40,0x83,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x83,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x83,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x83,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x83,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x83,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x83,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x83,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x83,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x83,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x83,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x83,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x83,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x83,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, m0, v2 ; encoding: [0x05,0x00,0x83,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, m0, v2 ; encoding: [0x05,0x40,0x83,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x83,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x83,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x83,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x83,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x83,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x83,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x83,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, 0, v2 ; encoding: [0x05,0x00,0x83,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, 0, v2 ; encoding: [0x05,0x40,0x83,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x83,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, -1, v2 ; encoding: [0x05,0x00,0x83,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x83,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x83,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x83,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x83,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x83,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x83,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, v1, v255 ; encoding: [0x05,0x00,0x83,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_sub_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x83,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_sub_i16 v5, v1, s2 ; encoding: [0x05,0x00,0x83,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_sub_i16 v5, v1, s2 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x83,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_sub_i16 v5, v1, s101 ; encoding: [0x05,0x00,0x83,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_sub_i16 v5, v1, s101 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x83,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_sub_i16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x83,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_sub_i16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x83,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_sub_i16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x83,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_sub_i16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x83,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_sub_i16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x83,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_sub_i16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x83,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_sub_i16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x83,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_sub_i16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x83,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_sub_i16 v5, v1, m0 ; encoding: [0x05,0x00,0x83,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_sub_i16 v5, v1, m0 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x83,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_sub_i16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x83,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_sub_i16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x83,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_sub_i16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x83,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_sub_i16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x83,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_sub_i16 v5, v1, 0 ; encoding: [0x05,0x00,0x83,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_sub_i16 v5, v1, 0 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x83,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_sub_i16 v5, v1, -1 ; encoding: [0x05,0x00,0x83,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_sub_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x83,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_sub_i16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x83,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_sub_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x83,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_sub_i16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x83,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_sub_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x83,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x83,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x83,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x83,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x83,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x83,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x83,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x83,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x83,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x83,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x83,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x83,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x83,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_sub_i16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x83,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x83,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_sub_i16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x83,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_sub_i16 v5, v1, v2 clamp ; encoding: [0x05,0xc0,0x83,0xd3,0x01,0x05,0x02,0x18] 0x05,0x80,0x83,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, v2 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v255, v1, v2 ; encoding: [0xff,0x00,0x84,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v255, v1, v2 ; encoding: [0xff,0x40,0x84,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x84,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v255, v2 ; encoding: [0x05,0x00,0x84,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v255, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x84,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, s1, v2 ; encoding: [0x05,0x00,0x84,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, s1, v2 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x84,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, s101, v2 ; encoding: [0x05,0x00,0x84,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, s101, v2 ; encoding: [0x05,0x40,0x84,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x84,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x84,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x84,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x84,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x84,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x84,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x84,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x84,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x84,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x84,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x84,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x84,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x84,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, m0, v2 ; encoding: [0x05,0x00,0x84,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, m0, v2 ; encoding: [0x05,0x40,0x84,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x84,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x84,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x84,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x84,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x84,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x84,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x84,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, 0, v2 ; encoding: [0x05,0x00,0x84,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, 0, v2 ; encoding: [0x05,0x40,0x84,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x84,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, -1, v2 ; encoding: [0x05,0x00,0x84,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, -1, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x84,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x84,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x84,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x84,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x84,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x84,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, v255 ; encoding: [0x05,0x00,0x84,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, v255 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x84,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, s2 ; encoding: [0x05,0x00,0x84,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, s2 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x84,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, s101 ; encoding: [0x05,0x00,0x84,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, s101 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x84,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x84,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x84,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x84,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x84,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x84,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x84,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x84,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x84,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, m0 ; encoding: [0x05,0x00,0x84,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, m0 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x84,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x84,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x84,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x84,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x84,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, 0 ; encoding: [0x05,0x00,0x84,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, 0 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x84,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, -1 ; encoding: [0x05,0x00,0x84,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, -1 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x84,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x84,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x84,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x84,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x84,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x84,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x84,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x84,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x84,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x84,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x84,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x84,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x84,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x84,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x84,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x84,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x84,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_lshlrev_b16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x84,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x84,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_lshrrev_b16 v5, v1, v2 ; encoding: [0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, v2 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v255, v1, v2 ; encoding: [0xff,0x00,0x85,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v255, v1, v2 ; encoding: [0xff,0x40,0x85,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x85,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v255, v2 ; encoding: [0x05,0x00,0x85,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v255, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x85,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, s1, v2 ; encoding: [0x05,0x00,0x85,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, s1, v2 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x85,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, s101, v2 ; encoding: [0x05,0x00,0x85,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, s101, v2 ; encoding: [0x05,0x40,0x85,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x85,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x85,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x85,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x85,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x85,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x85,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x85,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x85,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x85,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x85,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x85,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x85,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x85,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, m0, v2 ; encoding: [0x05,0x00,0x85,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, m0, v2 ; encoding: [0x05,0x40,0x85,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x85,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x85,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x85,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x85,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x85,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x85,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x85,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, 0, v2 ; encoding: [0x05,0x00,0x85,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, 0, v2 ; encoding: [0x05,0x40,0x85,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x85,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, -1, v2 ; encoding: [0x05,0x00,0x85,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, -1, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x85,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x85,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x85,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x85,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x85,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x85,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, v255 ; encoding: [0x05,0x00,0x85,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, v255 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x85,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, s2 ; encoding: [0x05,0x00,0x85,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, s2 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x85,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, s101 ; encoding: [0x05,0x00,0x85,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, s101 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x85,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x85,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x85,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x85,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x85,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x85,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x85,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x85,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x85,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, m0 ; encoding: [0x05,0x00,0x85,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, m0 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x85,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x85,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x85,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x85,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x85,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, 0 ; encoding: [0x05,0x00,0x85,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, 0 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x85,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, -1 ; encoding: [0x05,0x00,0x85,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, -1 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x85,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x85,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x85,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x85,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x85,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x85,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x85,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x85,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x85,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x85,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x85,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x85,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x85,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x85,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x85,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x85,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x85,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_lshrrev_b16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x85,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x85,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_ashrrev_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, v2 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v255, v1, v2 ; encoding: [0xff,0x00,0x86,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v255, v1, v2 ; encoding: [0xff,0x40,0x86,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x86,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v255, v2 ; encoding: [0x05,0x00,0x86,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v255, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x86,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, s1, v2 ; encoding: [0x05,0x00,0x86,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, s1, v2 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x86,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, s101, v2 ; encoding: [0x05,0x00,0x86,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, s101, v2 ; encoding: [0x05,0x40,0x86,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x86,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x86,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x86,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x86,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x86,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x86,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x86,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x86,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x86,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x86,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x86,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x86,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x86,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, m0, v2 ; encoding: [0x05,0x00,0x86,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, m0, v2 ; encoding: [0x05,0x40,0x86,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x86,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x86,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x86,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x86,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x86,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x86,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x86,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, 0, v2 ; encoding: [0x05,0x00,0x86,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, 0, v2 ; encoding: [0x05,0x40,0x86,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x86,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, -1, v2 ; encoding: [0x05,0x00,0x86,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x86,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x86,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x86,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x86,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x86,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x86,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, v255 ; encoding: [0x05,0x00,0x86,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x86,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, s2 ; encoding: [0x05,0x00,0x86,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, s2 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x86,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, s101 ; encoding: [0x05,0x00,0x86,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, s101 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x86,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x86,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x86,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x86,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x86,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x86,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x86,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x86,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x86,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, m0 ; encoding: [0x05,0x00,0x86,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, m0 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x86,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x86,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x86,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x86,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x86,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, 0 ; encoding: [0x05,0x00,0x86,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, 0 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x86,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, -1 ; encoding: [0x05,0x00,0x86,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x86,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x86,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x86,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x86,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x86,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x86,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x86,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x86,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x86,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x86,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x86,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x86,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x86,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x86,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x86,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x86,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x86,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_ashrrev_i16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x86,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x86,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_max_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_i16 v5, v1, v2 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_i16 v255, v1, v2 ; encoding: [0xff,0x00,0x87,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_i16 v255, v1, v2 ; encoding: [0xff,0x40,0x87,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x87,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_i16 v5, v255, v2 ; encoding: [0x05,0x00,0x87,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_max_i16 v5, v255, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x87,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_max_i16 v5, s1, v2 ; encoding: [0x05,0x00,0x87,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_max_i16 v5, s1, v2 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x87,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_max_i16 v5, s101, v2 ; encoding: [0x05,0x00,0x87,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_max_i16 v5, s101, v2 ; encoding: [0x05,0x40,0x87,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x87,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_max_i16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x87,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_max_i16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x87,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x87,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_max_i16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x87,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_max_i16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x87,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x87,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_max_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x87,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_max_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x87,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x87,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_max_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x87,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_max_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x87,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x87,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_max_i16 v5, m0, v2 ; encoding: [0x05,0x00,0x87,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_max_i16 v5, m0, v2 ; encoding: [0x05,0x40,0x87,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x87,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_max_i16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x87,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_max_i16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x87,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x87,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_max_i16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x87,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_max_i16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x87,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x87,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_max_i16 v5, 0, v2 ; encoding: [0x05,0x00,0x87,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_max_i16 v5, 0, v2 ; encoding: [0x05,0x40,0x87,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x87,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_max_i16 v5, -1, v2 ; encoding: [0x05,0x00,0x87,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_max_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x87,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_max_i16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x87,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_max_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x87,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_max_i16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x87,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_max_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x87,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x87,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_max_i16 v5, v1, v255 ; encoding: [0x05,0x00,0x87,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_max_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x87,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_max_i16 v5, v1, s2 ; encoding: [0x05,0x00,0x87,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_max_i16 v5, v1, s2 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x87,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_max_i16 v5, v1, s101 ; encoding: [0x05,0x00,0x87,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_max_i16 v5, v1, s101 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x87,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_max_i16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x87,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_max_i16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x87,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_max_i16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x87,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_max_i16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x87,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_max_i16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x87,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_max_i16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x87,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_max_i16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x87,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_max_i16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x87,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_max_i16 v5, v1, m0 ; encoding: [0x05,0x00,0x87,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_max_i16 v5, v1, m0 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x87,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_max_i16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x87,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_max_i16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x87,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_max_i16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x87,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_max_i16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x87,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_max_i16 v5, v1, 0 ; encoding: [0x05,0x00,0x87,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_max_i16 v5, v1, 0 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x87,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_max_i16 v5, v1, -1 ; encoding: [0x05,0x00,0x87,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_max_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x87,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_max_i16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x87,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_max_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x87,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_max_i16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x87,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_max_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x87,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x87,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_max_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x87,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x87,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x87,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_i16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x87,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_i16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x87,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x87,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_i16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x87,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_i16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x87,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x87,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_i16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_max_i16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x87,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_max_i16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_max_i16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x87,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_max_i16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_max_i16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x87,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x87,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_min_i16 v5, v1, v2 ; encoding: [0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_i16 v5, v1, v2 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_i16 v255, v1, v2 ; encoding: [0xff,0x00,0x88,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_i16 v255, v1, v2 ; encoding: [0xff,0x40,0x88,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x88,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_i16 v5, v255, v2 ; encoding: [0x05,0x00,0x88,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_min_i16 v5, v255, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x88,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_min_i16 v5, s1, v2 ; encoding: [0x05,0x00,0x88,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_min_i16 v5, s1, v2 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x88,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_min_i16 v5, s101, v2 ; encoding: [0x05,0x00,0x88,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_min_i16 v5, s101, v2 ; encoding: [0x05,0x40,0x88,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x88,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_min_i16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x88,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_min_i16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x88,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x88,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_min_i16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x88,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_min_i16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x88,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x88,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_min_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x88,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_min_i16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x88,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x88,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_min_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x88,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_min_i16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x88,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x88,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_min_i16 v5, m0, v2 ; encoding: [0x05,0x00,0x88,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_min_i16 v5, m0, v2 ; encoding: [0x05,0x40,0x88,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x88,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_min_i16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x88,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_min_i16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x88,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x88,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_min_i16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x88,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_min_i16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x88,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x88,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_min_i16 v5, 0, v2 ; encoding: [0x05,0x00,0x88,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_min_i16 v5, 0, v2 ; encoding: [0x05,0x40,0x88,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x88,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_min_i16 v5, -1, v2 ; encoding: [0x05,0x00,0x88,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_min_i16 v5, -1, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x88,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_min_i16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x88,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_min_i16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x88,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_min_i16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x88,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_min_i16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x88,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x88,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_min_i16 v5, v1, v255 ; encoding: [0x05,0x00,0x88,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_min_i16 v5, v1, v255 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x88,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_min_i16 v5, v1, s2 ; encoding: [0x05,0x00,0x88,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_min_i16 v5, v1, s2 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x88,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_min_i16 v5, v1, s101 ; encoding: [0x05,0x00,0x88,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_min_i16 v5, v1, s101 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x88,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_min_i16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x88,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_min_i16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x88,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_min_i16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x88,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_min_i16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x88,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_min_i16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x88,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_min_i16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x88,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_min_i16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x88,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_min_i16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x88,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_min_i16 v5, v1, m0 ; encoding: [0x05,0x00,0x88,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_min_i16 v5, v1, m0 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x88,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_min_i16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x88,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_min_i16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x88,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_min_i16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x88,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_min_i16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x88,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_min_i16 v5, v1, 0 ; encoding: [0x05,0x00,0x88,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_min_i16 v5, v1, 0 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x88,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_min_i16 v5, v1, -1 ; encoding: [0x05,0x00,0x88,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_min_i16 v5, v1, -1 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x88,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_min_i16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x88,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_min_i16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x88,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_min_i16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x88,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_min_i16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x88,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x88,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_min_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x88,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_i16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x88,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x88,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_i16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x88,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_i16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x88,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x88,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_i16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x88,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_i16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x88,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x88,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_i16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_min_i16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x88,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_min_i16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_min_i16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x88,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_min_i16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_min_i16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x88,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x88,0xd3,0x01,0x05,0x02,0x10 # CHECK: v_pk_mad_u16 v5, v1, v2, v3 ; encoding: [0x05,0x40,0x89,0xd3,0x01,0x05,0x0e,0x1c] @@ -50637,442 +50637,442 @@ # CHECK: v_pk_mad_u16 v5, v1, v2, v3 clamp ; encoding: [0x05,0xc0,0x89,0xd3,0x01,0x05,0x0e,0x1c] 0x05,0xc0,0x89,0xd3,0x01,0x05,0x0e,0x1c -# CHECK: v_pk_add_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_u16 v5, v1, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x8a,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_u16 v255, v1, v2 ; encoding: [0xff,0x40,0x8a,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x8a,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_add_u16 v5, v255, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x8a,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_add_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_add_u16 v5, s1, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x8a,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_add_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_add_u16 v5, s101, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x8a,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_add_u16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_add_u16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x8a,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_add_u16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_add_u16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x8a,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_add_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_add_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x8a,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_add_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_add_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x8a,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_add_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_add_u16 v5, m0, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x8a,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_add_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_add_u16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x8a,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_add_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_add_u16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x8a,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_add_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_add_u16 v5, 0, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x8a,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_add_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_add_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x8a,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_add_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_add_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x8a,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_add_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x8a,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_add_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x8a,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x8a,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_add_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_add_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x8a,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_add_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_add_u16 v5, v1, s2 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x8a,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_add_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_add_u16 v5, v1, s101 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x8a,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_add_u16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_add_u16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x8a,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_add_u16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_add_u16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x8a,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_add_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_add_u16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x8a,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_add_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_add_u16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x8a,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_add_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_add_u16 v5, v1, m0 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x8a,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_add_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_add_u16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x8a,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_add_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_add_u16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x8a,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_add_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_add_u16 v5, v1, 0 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x8a,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_add_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_add_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x8a,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_add_u16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_add_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x8a,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_add_u16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_add_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x8a,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_add_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x8a,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x8a,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x8a,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_u16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x8a,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_u16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x8a,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x8a,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_u16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x8a,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_u16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x8a,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x8a,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_u16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_add_u16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_add_u16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_add_u16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_add_u16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_add_u16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x8a,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x8a,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_add_u16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x8a,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_u16 v5, v1, v2 clamp ; encoding: [0x05,0xc0,0x8a,0xd3,0x01,0x05,0x02,0x18] 0x05,0x80,0x8a,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, v1, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_sub_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_sub_u16 v255, v1, v2 ; encoding: [0xff,0x40,0x8b,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x8b,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, v255, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x8b,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, s1, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x8b,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, s101, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x8b,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x8b,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x8b,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x8b,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x8b,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, m0, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x8b,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x8b,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x8b,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, 0, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x8b,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x8b,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x8b,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x8b,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x8b,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x8b,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_sub_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x8b,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_sub_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_sub_u16 v5, v1, s2 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x8b,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_sub_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_sub_u16 v5, v1, s101 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x8b,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_sub_u16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_sub_u16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x8b,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_sub_u16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_sub_u16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x8b,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_sub_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_sub_u16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x8b,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_sub_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_sub_u16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x8b,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_sub_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_sub_u16 v5, v1, m0 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x8b,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_sub_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_sub_u16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x8b,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_sub_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_sub_u16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x8b,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_sub_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_sub_u16 v5, v1, 0 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x8b,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_sub_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_sub_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x8b,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_sub_u16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_sub_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x8b,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_sub_u16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_sub_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x8b,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x8b,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x8b,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x8b,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x8b,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x8b,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x8b,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x8b,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x8b,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x8b,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_sub_u16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x8b,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x8b,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_sub_u16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x8b,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_sub_u16 v5, v1, v2 clamp ; encoding: [0x05,0xc0,0x8b,0xd3,0x01,0x05,0x02,0x18] 0x05,0x80,0x8b,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_u16 v5, v1, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x8c,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_u16 v255, v1, v2 ; encoding: [0xff,0x40,0x8c,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x8c,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_max_u16 v5, v255, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x8c,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_max_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_max_u16 v5, s1, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x8c,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_max_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_max_u16 v5, s101, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x8c,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_max_u16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_max_u16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x8c,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_max_u16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_max_u16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x8c,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_max_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_max_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x8c,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_max_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_max_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x8c,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_max_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_max_u16 v5, m0, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x8c,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_max_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_max_u16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x8c,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_max_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_max_u16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x8c,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_max_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_max_u16 v5, 0, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x8c,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_max_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_max_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x8c,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_max_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_max_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x8c,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_max_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x8c,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_max_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x8c,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x8c,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_max_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_max_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x8c,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_max_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_max_u16 v5, v1, s2 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x8c,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_max_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_max_u16 v5, v1, s101 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x8c,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_max_u16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_max_u16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x8c,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_max_u16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_max_u16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x8c,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_max_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_max_u16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x8c,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_max_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_max_u16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x8c,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_max_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_max_u16 v5, v1, m0 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x8c,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_max_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_max_u16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x8c,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_max_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_max_u16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x8c,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_max_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_max_u16 v5, v1, 0 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x8c,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_max_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_max_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x8c,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_max_u16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_max_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x8c,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_max_u16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_max_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x8c,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_max_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x8c,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x8c,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x8c,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_u16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x8c,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_u16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x8c,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x8c,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_u16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x8c,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_u16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x8c,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x8c,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_u16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_max_u16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_max_u16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_max_u16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_max_u16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_max_u16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x8c,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x8c,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_min_u16 v5, v1, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_u16 v5, v1, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_u16 v255, v1, v2 ; encoding: [0xff,0x00,0x8d,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_u16 v255, v1, v2 ; encoding: [0xff,0x40,0x8d,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x8d,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_u16 v5, v255, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_min_u16 v5, v255, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x8d,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_min_u16 v5, s1, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_min_u16 v5, s1, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x8d,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_min_u16 v5, s101, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_min_u16 v5, s101, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x8d,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_min_u16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_min_u16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x8d,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_min_u16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_min_u16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x8d,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_min_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_min_u16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x8d,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_min_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_min_u16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x8d,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_min_u16 v5, m0, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_min_u16 v5, m0, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x8d,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_min_u16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_min_u16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x8d,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_min_u16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_min_u16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x8d,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_min_u16 v5, 0, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_min_u16 v5, 0, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x8d,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_min_u16 v5, -1, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_min_u16 v5, -1, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x8d,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_min_u16 v5, 0x3800, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_min_u16 v5, 0x3800, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x8d,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_min_u16 v5, 0xc400, v2 ; encoding: [0x05,0x00,0x8d,0xd3,0xff,0x04,0x02,0x18] +# CHECK: v_pk_min_u16 v5, 0xc400, v2 ; encoding: [0x05,0x40,0x8d,0xd3,0xff,0x04,0x02,0x18] 0x05,0x00,0x8d,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_min_u16 v5, v1, v255 ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_min_u16 v5, v1, v255 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x8d,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_min_u16 v5, v1, s2 ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_min_u16 v5, v1, s2 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x8d,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_min_u16 v5, v1, s101 ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_min_u16 v5, v1, s101 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x8d,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_min_u16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_min_u16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x8d,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_min_u16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_min_u16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x8d,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_min_u16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_min_u16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x8d,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_min_u16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_min_u16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x8d,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_min_u16 v5, v1, m0 ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_min_u16 v5, v1, m0 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x8d,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_min_u16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_min_u16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x8d,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_min_u16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_min_u16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x8d,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_min_u16 v5, v1, 0 ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_min_u16 v5, v1, 0 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x8d,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_min_u16 v5, v1, -1 ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_min_u16 v5, v1, -1 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x8d,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_min_u16 v5, v1, 0x3800 ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_min_u16 v5, v1, 0x3800 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x8d,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_min_u16 v5, v1, 0xc400 ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0xff,0x01,0x18] +# CHECK: v_pk_min_u16 v5, v1, 0xc400 ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0xff,0x01,0x18] 0x05,0x00,0x8d,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_min_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x8d,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_u16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x8d,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x8d,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_u16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x8d,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_u16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x8d,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x8d,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_u16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x8d,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_u16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x8d,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x8d,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_u16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_min_u16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_min_u16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_min_u16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_min_u16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_min_u16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x8d,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x8d,0xd3,0x01,0x05,0x02,0x10 # CHECK: v_pk_fma_f16 v5, v1, v2, v3 ; encoding: [0x05,0x40,0x8e,0xd3,0x01,0x05,0x0e,0x1c] @@ -51258,520 +51258,520 @@ # CHECK: v_pk_fma_f16 v5, v1, v2, v3 clamp ; encoding: [0x05,0xc0,0x8e,0xd3,0x01,0x05,0x0e,0x1c] 0x05,0xc0,0x8e,0xd3,0x01,0x05,0x0e,0x1c -# CHECK: v_pk_add_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_f16 v5, v1, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_f16 v255, v1, v2 ; encoding: [0xff,0x00,0x8f,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_f16 v255, v1, v2 ; encoding: [0xff,0x40,0x8f,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x8f,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_f16 v5, v255, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_add_f16 v5, v255, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x8f,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_add_f16 v5, s1, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_add_f16 v5, s1, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x8f,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_add_f16 v5, s101, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_add_f16 v5, s101, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x8f,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_add_f16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_add_f16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x8f,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_add_f16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_add_f16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x8f,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_add_f16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_add_f16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x8f,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_add_f16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_add_f16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x8f,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_add_f16 v5, m0, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_add_f16 v5, m0, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x8f,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_add_f16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_add_f16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x8f,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_add_f16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_add_f16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x8f,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_add_f16 v5, 0, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_add_f16 v5, 0, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x8f,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_add_f16 v5, -1, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_add_f16 v5, -1, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x8f,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_add_f16 v5, 0.5, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18] +# CHECK: v_pk_add_f16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0xf0,0x04,0x02,0x18] 0x05,0x00,0x8f,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_add_f16 v5, -4.0, v2 ; encoding: [0x05,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18] +# CHECK: v_pk_add_f16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0xf7,0x04,0x02,0x18] 0x05,0x00,0x8f,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_add_f16 v5, v1, v255 ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_add_f16 v5, v1, v255 ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x8f,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_add_f16 v5, v1, s2 ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_add_f16 v5, v1, s2 ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x8f,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_add_f16 v5, v1, s101 ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_add_f16 v5, v1, s101 ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x8f,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_add_f16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_add_f16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x8f,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_add_f16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_add_f16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x8f,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_add_f16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_add_f16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x8f,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_add_f16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_add_f16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x8f,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_add_f16 v5, v1, m0 ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_add_f16 v5, v1, m0 ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x8f,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_add_f16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_add_f16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x8f,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_add_f16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_add_f16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x8f,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_add_f16 v5, v1, 0 ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_add_f16 v5, v1, 0 ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x8f,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_add_f16 v5, v1, -1 ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_add_f16 v5, v1, -1 ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x8f,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_add_f16 v5, v1, 0.5 ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0xe1,0x01,0x18] +# CHECK: v_pk_add_f16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0xe1,0x01,0x18] 0x05,0x00,0x8f,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_add_f16 v5, v1, -4.0 ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0xef,0x01,0x18] +# CHECK: v_pk_add_f16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0xef,0x01,0x18] 0x05,0x00,0x8f,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_add_f16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x8f,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_f16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x8f,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x8f,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_f16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x8f,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_f16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x8f,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x8f,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_f16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x8f,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_f16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x8f,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x8f,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_f16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_add_f16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_add_f16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_add_f16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_add_f16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_add_f16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_add_f16 v5, v1, v2 neg_lo:[1,0] ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x38] +# CHECK: v_pk_add_f16 v5, v1, v2 neg_lo:[1,0] ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x38] 0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x38 -# CHECK: v_pk_add_f16 v5, v1, v2 neg_lo:[0,1] ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x58] +# CHECK: v_pk_add_f16 v5, v1, v2 neg_lo:[0,1] ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x58] 0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x58 -# CHECK: v_pk_add_f16 v5, v1, v2 neg_lo:[1,1] ; encoding: [0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x78] +# CHECK: v_pk_add_f16 v5, v1, v2 neg_lo:[1,1] ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x78] 0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x78 -# CHECK: v_pk_add_f16 v5, v1, v2 neg_hi:[1,0] ; encoding: [0x05,0x01,0x8f,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_f16 v5, v1, v2 neg_hi:[1,0] ; encoding: [0x05,0x41,0x8f,0xd3,0x01,0x05,0x02,0x18] 0x05,0x01,0x8f,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_f16 v5, v1, v2 neg_hi:[0,1] ; encoding: [0x05,0x02,0x8f,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_f16 v5, v1, v2 neg_hi:[0,1] ; encoding: [0x05,0x42,0x8f,0xd3,0x01,0x05,0x02,0x18] 0x05,0x02,0x8f,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_f16 v5, v1, v2 neg_hi:[1,1] ; encoding: [0x05,0x03,0x8f,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_f16 v5, v1, v2 neg_hi:[1,1] ; encoding: [0x05,0x43,0x8f,0xd3,0x01,0x05,0x02,0x18] 0x05,0x03,0x8f,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_add_f16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x8f,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_add_f16 v5, v1, v2 clamp ; encoding: [0x05,0xc0,0x8f,0xd3,0x01,0x05,0x02,0x18] 0x05,0x80,0x8f,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, v1, v2 ; encoding: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_f16 v255, v1, v2 ; encoding: [0xff,0x00,0x90,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_f16 v255, v1, v2 ; encoding: [0xff,0x40,0x90,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x90,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, v255, v2 ; encoding: [0x05,0x00,0x90,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, v255, v2 ; encoding: [0x05,0x40,0x90,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x90,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, s1, v2 ; encoding: [0x05,0x00,0x90,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, s1, v2 ; encoding: [0x05,0x40,0x90,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x90,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, s101, v2 ; encoding: [0x05,0x00,0x90,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, s101, v2 ; encoding: [0x05,0x40,0x90,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x90,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x90,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x90,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x90,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x90,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x90,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x90,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x90,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x90,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x90,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x90,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x90,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x90,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, m0, v2 ; encoding: [0x05,0x00,0x90,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, m0, v2 ; encoding: [0x05,0x40,0x90,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x90,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x90,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x90,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x90,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x90,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x90,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x90,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, 0, v2 ; encoding: [0x05,0x00,0x90,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, 0, v2 ; encoding: [0x05,0x40,0x90,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x90,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, -1, v2 ; encoding: [0x05,0x00,0x90,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, -1, v2 ; encoding: [0x05,0x40,0x90,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x90,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, 0.5, v2 ; encoding: [0x05,0x00,0x90,0xd3,0xf0,0x04,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x90,0xd3,0xf0,0x04,0x02,0x18] 0x05,0x00,0x90,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, -4.0, v2 ; encoding: [0x05,0x00,0x90,0xd3,0xf7,0x04,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x90,0xd3,0xf7,0x04,0x02,0x18] 0x05,0x00,0x90,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, v1, v255 ; encoding: [0x05,0x00,0x90,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_mul_f16 v5, v1, v255 ; encoding: [0x05,0x40,0x90,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x90,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_mul_f16 v5, v1, s2 ; encoding: [0x05,0x00,0x90,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_mul_f16 v5, v1, s2 ; encoding: [0x05,0x40,0x90,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x90,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_mul_f16 v5, v1, s101 ; encoding: [0x05,0x00,0x90,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_mul_f16 v5, v1, s101 ; encoding: [0x05,0x40,0x90,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x90,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_mul_f16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x90,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_mul_f16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x90,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x90,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_mul_f16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x90,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_mul_f16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x90,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x90,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_mul_f16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x90,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_mul_f16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x90,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x90,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_mul_f16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x90,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_mul_f16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x90,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x90,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_mul_f16 v5, v1, m0 ; encoding: [0x05,0x00,0x90,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_mul_f16 v5, v1, m0 ; encoding: [0x05,0x40,0x90,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x90,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_mul_f16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x90,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_mul_f16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x90,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x90,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_mul_f16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x90,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_mul_f16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x90,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x90,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_mul_f16 v5, v1, 0 ; encoding: [0x05,0x00,0x90,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_mul_f16 v5, v1, 0 ; encoding: [0x05,0x40,0x90,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x90,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_mul_f16 v5, v1, -1 ; encoding: [0x05,0x00,0x90,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_mul_f16 v5, v1, -1 ; encoding: [0x05,0x40,0x90,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x90,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_mul_f16 v5, v1, 0.5 ; encoding: [0x05,0x00,0x90,0xd3,0x01,0xe1,0x01,0x18] +# CHECK: v_pk_mul_f16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x90,0xd3,0x01,0xe1,0x01,0x18] 0x05,0x00,0x90,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_mul_f16 v5, v1, -4.0 ; encoding: [0x05,0x00,0x90,0xd3,0x01,0xef,0x01,0x18] +# CHECK: v_pk_mul_f16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x90,0xd3,0x01,0xef,0x01,0x18] 0x05,0x00,0x90,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_mul_f16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x90,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x90,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x90,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x90,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x90,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x90,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x90,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x90,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x90,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_mul_f16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_mul_f16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_mul_f16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_mul_f16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_mul_f16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_mul_f16 v5, v1, v2 neg_lo:[1,0] ; encoding: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x38] +# CHECK: v_pk_mul_f16 v5, v1, v2 neg_lo:[1,0] ; encoding: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x38] 0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x38 -# CHECK: v_pk_mul_f16 v5, v1, v2 neg_lo:[0,1] ; encoding: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x58] +# CHECK: v_pk_mul_f16 v5, v1, v2 neg_lo:[0,1] ; encoding: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x58] 0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x58 -# CHECK: v_pk_mul_f16 v5, v1, v2 neg_lo:[1,1] ; encoding: [0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x78] +# CHECK: v_pk_mul_f16 v5, v1, v2 neg_lo:[1,1] ; encoding: [0x05,0x40,0x90,0xd3,0x01,0x05,0x02,0x78] 0x05,0x00,0x90,0xd3,0x01,0x05,0x02,0x78 -# CHECK: v_pk_mul_f16 v5, v1, v2 neg_hi:[1,0] ; encoding: [0x05,0x01,0x90,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, v1, v2 neg_hi:[1,0] ; encoding: [0x05,0x41,0x90,0xd3,0x01,0x05,0x02,0x18] 0x05,0x01,0x90,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, v1, v2 neg_hi:[0,1] ; encoding: [0x05,0x02,0x90,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, v1, v2 neg_hi:[0,1] ; encoding: [0x05,0x42,0x90,0xd3,0x01,0x05,0x02,0x18] 0x05,0x02,0x90,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, v1, v2 neg_hi:[1,1] ; encoding: [0x05,0x03,0x90,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, v1, v2 neg_hi:[1,1] ; encoding: [0x05,0x43,0x90,0xd3,0x01,0x05,0x02,0x18] 0x05,0x03,0x90,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_mul_f16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x90,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_mul_f16 v5, v1, v2 clamp ; encoding: [0x05,0xc0,0x90,0xd3,0x01,0x05,0x02,0x18] 0x05,0x80,0x90,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_f16 v5, v1, v2 ; encoding: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_f16 v255, v1, v2 ; encoding: [0xff,0x00,0x91,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_f16 v255, v1, v2 ; encoding: [0xff,0x40,0x91,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x91,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_f16 v5, v255, v2 ; encoding: [0x05,0x00,0x91,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_min_f16 v5, v255, v2 ; encoding: [0x05,0x40,0x91,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x91,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_min_f16 v5, s1, v2 ; encoding: [0x05,0x00,0x91,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_min_f16 v5, s1, v2 ; encoding: [0x05,0x40,0x91,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x91,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_min_f16 v5, s101, v2 ; encoding: [0x05,0x00,0x91,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_min_f16 v5, s101, v2 ; encoding: [0x05,0x40,0x91,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x91,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_min_f16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x91,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_min_f16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x91,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x91,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_min_f16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x91,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_min_f16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x91,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x91,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_min_f16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x91,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_min_f16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x91,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x91,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_min_f16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x91,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_min_f16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x91,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x91,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_min_f16 v5, m0, v2 ; encoding: [0x05,0x00,0x91,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_min_f16 v5, m0, v2 ; encoding: [0x05,0x40,0x91,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x91,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_min_f16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x91,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_min_f16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x91,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x91,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_min_f16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x91,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_min_f16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x91,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x91,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_min_f16 v5, 0, v2 ; encoding: [0x05,0x00,0x91,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_min_f16 v5, 0, v2 ; encoding: [0x05,0x40,0x91,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x91,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_min_f16 v5, -1, v2 ; encoding: [0x05,0x00,0x91,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_min_f16 v5, -1, v2 ; encoding: [0x05,0x40,0x91,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x91,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_min_f16 v5, 0.5, v2 ; encoding: [0x05,0x00,0x91,0xd3,0xf0,0x04,0x02,0x18] +# CHECK: v_pk_min_f16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x91,0xd3,0xf0,0x04,0x02,0x18] 0x05,0x00,0x91,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_min_f16 v5, -4.0, v2 ; encoding: [0x05,0x00,0x91,0xd3,0xf7,0x04,0x02,0x18] +# CHECK: v_pk_min_f16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x91,0xd3,0xf7,0x04,0x02,0x18] 0x05,0x00,0x91,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_min_f16 v5, v1, v255 ; encoding: [0x05,0x00,0x91,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_min_f16 v5, v1, v255 ; encoding: [0x05,0x40,0x91,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x91,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_min_f16 v5, v1, s2 ; encoding: [0x05,0x00,0x91,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_min_f16 v5, v1, s2 ; encoding: [0x05,0x40,0x91,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x91,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_min_f16 v5, v1, s101 ; encoding: [0x05,0x00,0x91,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_min_f16 v5, v1, s101 ; encoding: [0x05,0x40,0x91,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x91,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_min_f16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x91,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_min_f16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x91,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x91,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_min_f16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x91,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_min_f16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x91,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x91,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_min_f16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x91,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_min_f16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x91,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x91,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_min_f16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x91,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_min_f16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x91,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x91,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_min_f16 v5, v1, m0 ; encoding: [0x05,0x00,0x91,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_min_f16 v5, v1, m0 ; encoding: [0x05,0x40,0x91,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x91,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_min_f16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x91,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_min_f16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x91,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x91,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_min_f16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x91,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_min_f16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x91,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x91,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_min_f16 v5, v1, 0 ; encoding: [0x05,0x00,0x91,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_min_f16 v5, v1, 0 ; encoding: [0x05,0x40,0x91,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x91,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_min_f16 v5, v1, -1 ; encoding: [0x05,0x00,0x91,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_min_f16 v5, v1, -1 ; encoding: [0x05,0x40,0x91,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x91,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_min_f16 v5, v1, 0.5 ; encoding: [0x05,0x00,0x91,0xd3,0x01,0xe1,0x01,0x18] +# CHECK: v_pk_min_f16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x91,0xd3,0x01,0xe1,0x01,0x18] 0x05,0x00,0x91,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_min_f16 v5, v1, -4.0 ; encoding: [0x05,0x00,0x91,0xd3,0x01,0xef,0x01,0x18] +# CHECK: v_pk_min_f16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x91,0xd3,0x01,0xef,0x01,0x18] 0x05,0x00,0x91,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_min_f16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x91,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_f16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x91,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x91,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_f16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x91,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_f16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x91,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x91,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_f16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x91,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_f16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x91,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x91,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_f16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_min_f16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_min_f16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_min_f16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_min_f16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_min_f16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_min_f16 v5, v1, v2 neg_lo:[1,0] ; encoding: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x38] +# CHECK: v_pk_min_f16 v5, v1, v2 neg_lo:[1,0] ; encoding: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x38] 0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x38 -# CHECK: v_pk_min_f16 v5, v1, v2 neg_lo:[0,1] ; encoding: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x58] +# CHECK: v_pk_min_f16 v5, v1, v2 neg_lo:[0,1] ; encoding: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x58] 0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x58 -# CHECK: v_pk_min_f16 v5, v1, v2 neg_lo:[1,1] ; encoding: [0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x78] +# CHECK: v_pk_min_f16 v5, v1, v2 neg_lo:[1,1] ; encoding: [0x05,0x40,0x91,0xd3,0x01,0x05,0x02,0x78] 0x05,0x00,0x91,0xd3,0x01,0x05,0x02,0x78 -# CHECK: v_pk_min_f16 v5, v1, v2 neg_hi:[1,0] ; encoding: [0x05,0x01,0x91,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_f16 v5, v1, v2 neg_hi:[1,0] ; encoding: [0x05,0x41,0x91,0xd3,0x01,0x05,0x02,0x18] 0x05,0x01,0x91,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_f16 v5, v1, v2 neg_hi:[0,1] ; encoding: [0x05,0x02,0x91,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_f16 v5, v1, v2 neg_hi:[0,1] ; encoding: [0x05,0x42,0x91,0xd3,0x01,0x05,0x02,0x18] 0x05,0x02,0x91,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_f16 v5, v1, v2 neg_hi:[1,1] ; encoding: [0x05,0x03,0x91,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_f16 v5, v1, v2 neg_hi:[1,1] ; encoding: [0x05,0x43,0x91,0xd3,0x01,0x05,0x02,0x18] 0x05,0x03,0x91,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_min_f16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x91,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_min_f16 v5, v1, v2 clamp ; encoding: [0x05,0xc0,0x91,0xd3,0x01,0x05,0x02,0x18] 0x05,0x80,0x91,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_f16 v5, v1, v2 ; encoding: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x18] 0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_f16 v255, v1, v2 ; encoding: [0xff,0x00,0x92,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_f16 v255, v1, v2 ; encoding: [0xff,0x40,0x92,0xd3,0x01,0x05,0x02,0x18] 0xff,0x00,0x92,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_f16 v5, v255, v2 ; encoding: [0x05,0x00,0x92,0xd3,0xff,0x05,0x02,0x18] +# CHECK: v_pk_max_f16 v5, v255, v2 ; encoding: [0x05,0x40,0x92,0xd3,0xff,0x05,0x02,0x18] 0x05,0x00,0x92,0xd3,0xff,0x05,0x02,0x18 -# CHECK: v_pk_max_f16 v5, s1, v2 ; encoding: [0x05,0x00,0x92,0xd3,0x01,0x04,0x02,0x18] +# CHECK: v_pk_max_f16 v5, s1, v2 ; encoding: [0x05,0x40,0x92,0xd3,0x01,0x04,0x02,0x18] 0x05,0x00,0x92,0xd3,0x01,0x04,0x02,0x18 -# CHECK: v_pk_max_f16 v5, s101, v2 ; encoding: [0x05,0x00,0x92,0xd3,0x65,0x04,0x02,0x18] +# CHECK: v_pk_max_f16 v5, s101, v2 ; encoding: [0x05,0x40,0x92,0xd3,0x65,0x04,0x02,0x18] 0x05,0x00,0x92,0xd3,0x65,0x04,0x02,0x18 -# CHECK: v_pk_max_f16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x00,0x92,0xd3,0x66,0x04,0x02,0x18] +# CHECK: v_pk_max_f16 v5, flat_scratch_lo, v2 ; encoding: [0x05,0x40,0x92,0xd3,0x66,0x04,0x02,0x18] 0x05,0x00,0x92,0xd3,0x66,0x04,0x02,0x18 -# CHECK: v_pk_max_f16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x00,0x92,0xd3,0x67,0x04,0x02,0x18] +# CHECK: v_pk_max_f16 v5, flat_scratch_hi, v2 ; encoding: [0x05,0x40,0x92,0xd3,0x67,0x04,0x02,0x18] 0x05,0x00,0x92,0xd3,0x67,0x04,0x02,0x18 -# CHECK: v_pk_max_f16 v5, vcc_lo, v2 ; encoding: [0x05,0x00,0x92,0xd3,0x6a,0x04,0x02,0x18] +# CHECK: v_pk_max_f16 v5, vcc_lo, v2 ; encoding: [0x05,0x40,0x92,0xd3,0x6a,0x04,0x02,0x18] 0x05,0x00,0x92,0xd3,0x6a,0x04,0x02,0x18 -# CHECK: v_pk_max_f16 v5, vcc_hi, v2 ; encoding: [0x05,0x00,0x92,0xd3,0x6b,0x04,0x02,0x18] +# CHECK: v_pk_max_f16 v5, vcc_hi, v2 ; encoding: [0x05,0x40,0x92,0xd3,0x6b,0x04,0x02,0x18] 0x05,0x00,0x92,0xd3,0x6b,0x04,0x02,0x18 -# CHECK: v_pk_max_f16 v5, m0, v2 ; encoding: [0x05,0x00,0x92,0xd3,0x7c,0x04,0x02,0x18] +# CHECK: v_pk_max_f16 v5, m0, v2 ; encoding: [0x05,0x40,0x92,0xd3,0x7c,0x04,0x02,0x18] 0x05,0x00,0x92,0xd3,0x7c,0x04,0x02,0x18 -# CHECK: v_pk_max_f16 v5, exec_lo, v2 ; encoding: [0x05,0x00,0x92,0xd3,0x7e,0x04,0x02,0x18] +# CHECK: v_pk_max_f16 v5, exec_lo, v2 ; encoding: [0x05,0x40,0x92,0xd3,0x7e,0x04,0x02,0x18] 0x05,0x00,0x92,0xd3,0x7e,0x04,0x02,0x18 -# CHECK: v_pk_max_f16 v5, exec_hi, v2 ; encoding: [0x05,0x00,0x92,0xd3,0x7f,0x04,0x02,0x18] +# CHECK: v_pk_max_f16 v5, exec_hi, v2 ; encoding: [0x05,0x40,0x92,0xd3,0x7f,0x04,0x02,0x18] 0x05,0x00,0x92,0xd3,0x7f,0x04,0x02,0x18 -# CHECK: v_pk_max_f16 v5, 0, v2 ; encoding: [0x05,0x00,0x92,0xd3,0x80,0x04,0x02,0x18] +# CHECK: v_pk_max_f16 v5, 0, v2 ; encoding: [0x05,0x40,0x92,0xd3,0x80,0x04,0x02,0x18] 0x05,0x00,0x92,0xd3,0x80,0x04,0x02,0x18 -# CHECK: v_pk_max_f16 v5, -1, v2 ; encoding: [0x05,0x00,0x92,0xd3,0xc1,0x04,0x02,0x18] +# CHECK: v_pk_max_f16 v5, -1, v2 ; encoding: [0x05,0x40,0x92,0xd3,0xc1,0x04,0x02,0x18] 0x05,0x00,0x92,0xd3,0xc1,0x04,0x02,0x18 -# CHECK: v_pk_max_f16 v5, 0.5, v2 ; encoding: [0x05,0x00,0x92,0xd3,0xf0,0x04,0x02,0x18] +# CHECK: v_pk_max_f16 v5, 0.5, v2 ; encoding: [0x05,0x40,0x92,0xd3,0xf0,0x04,0x02,0x18] 0x05,0x00,0x92,0xd3,0xf0,0x04,0x02,0x18 -# CHECK: v_pk_max_f16 v5, -4.0, v2 ; encoding: [0x05,0x00,0x92,0xd3,0xf7,0x04,0x02,0x18] +# CHECK: v_pk_max_f16 v5, -4.0, v2 ; encoding: [0x05,0x40,0x92,0xd3,0xf7,0x04,0x02,0x18] 0x05,0x00,0x92,0xd3,0xf7,0x04,0x02,0x18 -# CHECK: v_pk_max_f16 v5, v1, v255 ; encoding: [0x05,0x00,0x92,0xd3,0x01,0xff,0x03,0x18] +# CHECK: v_pk_max_f16 v5, v1, v255 ; encoding: [0x05,0x40,0x92,0xd3,0x01,0xff,0x03,0x18] 0x05,0x00,0x92,0xd3,0x01,0xff,0x03,0x18 -# CHECK: v_pk_max_f16 v5, v1, s2 ; encoding: [0x05,0x00,0x92,0xd3,0x01,0x05,0x00,0x18] +# CHECK: v_pk_max_f16 v5, v1, s2 ; encoding: [0x05,0x40,0x92,0xd3,0x01,0x05,0x00,0x18] 0x05,0x00,0x92,0xd3,0x01,0x05,0x00,0x18 -# CHECK: v_pk_max_f16 v5, v1, s101 ; encoding: [0x05,0x00,0x92,0xd3,0x01,0xcb,0x00,0x18] +# CHECK: v_pk_max_f16 v5, v1, s101 ; encoding: [0x05,0x40,0x92,0xd3,0x01,0xcb,0x00,0x18] 0x05,0x00,0x92,0xd3,0x01,0xcb,0x00,0x18 -# CHECK: v_pk_max_f16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x00,0x92,0xd3,0x01,0xcd,0x00,0x18] +# CHECK: v_pk_max_f16 v5, v1, flat_scratch_lo ; encoding: [0x05,0x40,0x92,0xd3,0x01,0xcd,0x00,0x18] 0x05,0x00,0x92,0xd3,0x01,0xcd,0x00,0x18 -# CHECK: v_pk_max_f16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x00,0x92,0xd3,0x01,0xcf,0x00,0x18] +# CHECK: v_pk_max_f16 v5, v1, flat_scratch_hi ; encoding: [0x05,0x40,0x92,0xd3,0x01,0xcf,0x00,0x18] 0x05,0x00,0x92,0xd3,0x01,0xcf,0x00,0x18 -# CHECK: v_pk_max_f16 v5, v1, vcc_lo ; encoding: [0x05,0x00,0x92,0xd3,0x01,0xd5,0x00,0x18] +# CHECK: v_pk_max_f16 v5, v1, vcc_lo ; encoding: [0x05,0x40,0x92,0xd3,0x01,0xd5,0x00,0x18] 0x05,0x00,0x92,0xd3,0x01,0xd5,0x00,0x18 -# CHECK: v_pk_max_f16 v5, v1, vcc_hi ; encoding: [0x05,0x00,0x92,0xd3,0x01,0xd7,0x00,0x18] +# CHECK: v_pk_max_f16 v5, v1, vcc_hi ; encoding: [0x05,0x40,0x92,0xd3,0x01,0xd7,0x00,0x18] 0x05,0x00,0x92,0xd3,0x01,0xd7,0x00,0x18 -# CHECK: v_pk_max_f16 v5, v1, m0 ; encoding: [0x05,0x00,0x92,0xd3,0x01,0xf9,0x00,0x18] +# CHECK: v_pk_max_f16 v5, v1, m0 ; encoding: [0x05,0x40,0x92,0xd3,0x01,0xf9,0x00,0x18] 0x05,0x00,0x92,0xd3,0x01,0xf9,0x00,0x18 -# CHECK: v_pk_max_f16 v5, v1, exec_lo ; encoding: [0x05,0x00,0x92,0xd3,0x01,0xfd,0x00,0x18] +# CHECK: v_pk_max_f16 v5, v1, exec_lo ; encoding: [0x05,0x40,0x92,0xd3,0x01,0xfd,0x00,0x18] 0x05,0x00,0x92,0xd3,0x01,0xfd,0x00,0x18 -# CHECK: v_pk_max_f16 v5, v1, exec_hi ; encoding: [0x05,0x00,0x92,0xd3,0x01,0xff,0x00,0x18] +# CHECK: v_pk_max_f16 v5, v1, exec_hi ; encoding: [0x05,0x40,0x92,0xd3,0x01,0xff,0x00,0x18] 0x05,0x00,0x92,0xd3,0x01,0xff,0x00,0x18 -# CHECK: v_pk_max_f16 v5, v1, 0 ; encoding: [0x05,0x00,0x92,0xd3,0x01,0x01,0x01,0x18] +# CHECK: v_pk_max_f16 v5, v1, 0 ; encoding: [0x05,0x40,0x92,0xd3,0x01,0x01,0x01,0x18] 0x05,0x00,0x92,0xd3,0x01,0x01,0x01,0x18 -# CHECK: v_pk_max_f16 v5, v1, -1 ; encoding: [0x05,0x00,0x92,0xd3,0x01,0x83,0x01,0x18] +# CHECK: v_pk_max_f16 v5, v1, -1 ; encoding: [0x05,0x40,0x92,0xd3,0x01,0x83,0x01,0x18] 0x05,0x00,0x92,0xd3,0x01,0x83,0x01,0x18 -# CHECK: v_pk_max_f16 v5, v1, 0.5 ; encoding: [0x05,0x00,0x92,0xd3,0x01,0xe1,0x01,0x18] +# CHECK: v_pk_max_f16 v5, v1, 0.5 ; encoding: [0x05,0x40,0x92,0xd3,0x01,0xe1,0x01,0x18] 0x05,0x00,0x92,0xd3,0x01,0xe1,0x01,0x18 -# CHECK: v_pk_max_f16 v5, v1, -4.0 ; encoding: [0x05,0x00,0x92,0xd3,0x01,0xef,0x01,0x18] +# CHECK: v_pk_max_f16 v5, v1, -4.0 ; encoding: [0x05,0x40,0x92,0xd3,0x01,0xef,0x01,0x18] 0x05,0x00,0x92,0xd3,0x01,0xef,0x01,0x18 -# CHECK: v_pk_max_f16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x08,0x92,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_f16 v5, v1, v2 op_sel:[1,0] ; encoding: [0x05,0x48,0x92,0xd3,0x01,0x05,0x02,0x18] 0x05,0x08,0x92,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_f16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x10,0x92,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_f16 v5, v1, v2 op_sel:[0,1] ; encoding: [0x05,0x50,0x92,0xd3,0x01,0x05,0x02,0x18] 0x05,0x10,0x92,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_f16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x18,0x92,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_f16 v5, v1, v2 op_sel:[1,1] ; encoding: [0x05,0x58,0x92,0xd3,0x01,0x05,0x02,0x18] 0x05,0x18,0x92,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_f16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x00] +# CHECK: v_pk_max_f16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x00] 0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x00 -# CHECK: v_pk_max_f16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x08] +# CHECK: v_pk_max_f16 v5, v1, v2 op_sel_hi:[1,0] ; encoding: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x08] 0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x08 -# CHECK: v_pk_max_f16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x10] +# CHECK: v_pk_max_f16 v5, v1, v2 op_sel_hi:[0,1] ; encoding: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x10] 0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x10 -# CHECK: v_pk_max_f16 v5, v1, v2 neg_lo:[1,0] ; encoding: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x38] +# CHECK: v_pk_max_f16 v5, v1, v2 neg_lo:[1,0] ; encoding: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x38] 0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x38 -# CHECK: v_pk_max_f16 v5, v1, v2 neg_lo:[0,1] ; encoding: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x58] +# CHECK: v_pk_max_f16 v5, v1, v2 neg_lo:[0,1] ; encoding: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x58] 0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x58 -# CHECK: v_pk_max_f16 v5, v1, v2 neg_lo:[1,1] ; encoding: [0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x78] +# CHECK: v_pk_max_f16 v5, v1, v2 neg_lo:[1,1] ; encoding: [0x05,0x40,0x92,0xd3,0x01,0x05,0x02,0x78] 0x05,0x00,0x92,0xd3,0x01,0x05,0x02,0x78 -# CHECK: v_pk_max_f16 v5, v1, v2 neg_hi:[1,0] ; encoding: [0x05,0x01,0x92,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_f16 v5, v1, v2 neg_hi:[1,0] ; encoding: [0x05,0x41,0x92,0xd3,0x01,0x05,0x02,0x18] 0x05,0x01,0x92,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_f16 v5, v1, v2 neg_hi:[0,1] ; encoding: [0x05,0x02,0x92,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_f16 v5, v1, v2 neg_hi:[0,1] ; encoding: [0x05,0x42,0x92,0xd3,0x01,0x05,0x02,0x18] 0x05,0x02,0x92,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_f16 v5, v1, v2 neg_hi:[1,1] ; encoding: [0x05,0x03,0x92,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_f16 v5, v1, v2 neg_hi:[1,1] ; encoding: [0x05,0x43,0x92,0xd3,0x01,0x05,0x02,0x18] 0x05,0x03,0x92,0xd3,0x01,0x05,0x02,0x18 -# CHECK: v_pk_max_f16 v5, v1, v2 clamp ; encoding: [0x05,0x80,0x92,0xd3,0x01,0x05,0x02,0x18] +# CHECK: v_pk_max_f16 v5, v1, v2 clamp ; encoding: [0x05,0xc0,0x92,0xd3,0x01,0x05,0x02,0x18] 0x05,0x80,0x92,0xd3,0x01,0x05,0x02,0x18 # CHECK: v_mad_mix_f32 v5, v1, v2, v3 ; encoding: [0x05,0x00,0xa0,0xd3,0x01,0x05,0x0e,0x04] diff --git a/llvm/test/MC/Disassembler/AMDGPU/lds_direct_gfx9.txt b/llvm/test/MC/Disassembler/AMDGPU/lds_direct_gfx9.txt index e29c4d2f62b4..7ab114578abe 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/lds_direct_gfx9.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/lds_direct_gfx9.txt @@ -12,7 +12,7 @@ # GFX9: v_pk_mad_i16 v0, src_lds_direct, v0, v0 ; encoding: [0x00,0x40,0x80,0xd3,0xfe,0x00,0x02,0x1c] 0x00,0x40,0x80,0xd3,0xfe,0x00,0x02,0x1c -# GFX9: v_pk_mul_lo_u16 v0, src_lds_direct, v0 ; encoding: [0x00,0x00,0x81,0xd3,0xfe,0x00,0x02,0x18] +# GFX9: v_pk_mul_lo_u16 v0, src_lds_direct, v0 ; encoding: [0x00,0x40,0x81,0xd3,0xfe,0x00,0x02,0x18] 0x00,0x00,0x81,0xd3,0xfe,0x00,0x02,0x18 # GFX9: v_cmpx_le_i32_e32 vcc, src_lds_direct, v0 ; encoding: [0xfe,0x00,0xa6,0x7d] diff --git a/llvm/test/MC/Disassembler/AMDGPU/literal_gfx9.txt b/llvm/test/MC/Disassembler/AMDGPU/literal_gfx9.txt index 97c86c81e4d6..e058fd13a48e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/literal_gfx9.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/literal_gfx9.txt @@ -45,7 +45,7 @@ # GFX9: v_max_f64 v[0:1], src_shared_base, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xeb,0x00,0x02,0x00] 0x00,0x00,0x83,0xd2,0xeb,0x00,0x02,0x00 -# GFX9: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x00,0x8f,0xd3,0xeb,0x00,0x02,0x18] +# GFX9: v_pk_add_f16 v0, src_shared_base, v0 ; encoding: [0x00,0x40,0x8f,0xd3,0xeb,0x00,0x02,0x18] 0x00,0x00,0x8f,0xd3,0xeb,0x00,0x02,0x18 # GFX9: v_ceil_f16_e64 v0, -src_shared_base ; encoding: [0x00,0x00,0x85,0xd1,0xeb,0x00,0x00,0x20] @@ -126,7 +126,7 @@ # GFX9: v_max_f64 v[0:1], src_scc, v[0:1] ; encoding: [0x00,0x00,0x83,0xd2,0xfd,0x00,0x02,0x00] 0x00,0x00,0x83,0xd2,0xfd,0x00,0x02,0x00 -# GFX9: v_pk_add_f16 v0, src_execz, v0 ; encoding: [0x00,0x00,0x8f,0xd3,0xfc,0x00,0x02,0x18] +# GFX9: v_pk_add_f16 v0, src_execz, v0 ; encoding: [0x00,0x40,0x8f,0xd3,0xfc,0x00,0x02,0x18] 0x00,0x00,0x8f,0xd3,0xfc,0x00,0x02,0x18 # GFX9: v_ceil_f16_e64 v0, -src_vccz ; encoding: [0x00,0x00,0x85,0xd1,0xfb,0x00,0x00,0x20] diff --git a/llvm/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt b/llvm/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt index 693c869dfac7..fb6c6a407575 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/literalv216_gfx10.txt @@ -4,73 +4,73 @@ # Inline constants #===----------------------------------------------------------------------===// -# GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x80,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0x80,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18] +# GFX10: v_pk_add_f16 v1, v2, 0 ; encoding: [0x01,0x40,0x0f,0xcc,0x02,0x01,0x01,0x18] 0x01,0x00,0x0f,0xcc,0x02,0x01,0x01,0x18 -# GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 1.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf2,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xf2,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, -1.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf3,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xf3,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, -0.5, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf1,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xf1,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 0.5, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf0,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xf0,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 2.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf4,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xf4,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, -2.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf5,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xf5,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 4.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf6,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xf6,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, -4.0, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf7,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xf7,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 0.15915494, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xf8,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xf8,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, -1, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc1,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xc1,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc2,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, -2, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc2,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xc2,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc3,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, -3, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc3,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xc3,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xd0,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, -16, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xd0,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xd0,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 1, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x81,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0x81,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x82,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 2, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x82,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0x82,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x83,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 3, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x83,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0x83,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x84,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 4, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x84,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0x84,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x8f,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 15, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x8f,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0x8f,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0x90,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 16, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0x90,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0x90,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xbf,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 63, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xbf,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xbf,0x04,0x02,0x18 -# GFX10: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xc0,0x04,0x02,0x18] +# GFX10: v_pk_add_f16 v1, 64, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xc0,0x04,0x02,0x18] 0x01,0x00,0x0f,0xcc,0xc0,0x04,0x02,0x18 # GFX10: v_pk_fma_f16 v5, 1.0, 2.0, 4.0 ; encoding: [0x05,0x40,0x0e,0xcc,0xf2,0xe8,0xd9,0x1b] @@ -85,35 +85,35 @@ # GFX10: v_pk_mad_u16 v5, -1, -2, -3 ; encoding: [0x05,0x40,0x09,0xcc,0xc1,0x84,0x0d,0x1b] 0x05,0x40,0x09,0xcc,0xc1,0x84,0x0d,0x1b -# GFX10: v_pk_ashrrev_i16 v5, 1, 16 ; encoding: [0x05,0x00,0x06,0xcc,0x81,0x20,0x01,0x18] +# GFX10: v_pk_ashrrev_i16 v5, 1, 16 ; encoding: [0x05,0x40,0x06,0xcc,0x81,0x20,0x01,0x18] 0x05,0x00,0x06,0xcc,0x81,0x20,0x01,0x18 #===----------------------------------------------------------------------===// # 32-bit literals #===----------------------------------------------------------------------===// -# GFX10: v_pk_add_f16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] +# GFX10: v_pk_add_f16 v5, v1, 0x12345678 ; encoding: [0x05,0x40,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] 0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12 -# GFX10: v_pk_add_f16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] +# GFX10: v_pk_add_f16 v5, 0x12345678, v2 ; encoding: [0x05,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] 0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12 -# GFX10: v_pk_add_f16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff] +# GFX10: v_pk_add_f16 v5, 0xffffff00, v2 ; encoding: [0x05,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff] 0x05,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff -# GFX10: v_pk_add_f16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00] +# GFX10: v_pk_add_f16 v5, v1, 0x100 ; encoding: [0x05,0x40,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00] 0x05,0x00,0x0f,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00 -# GFX10: v_pk_add_u16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] +# GFX10: v_pk_add_u16 v5, v1, 0x12345678 ; encoding: [0x05,0x40,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] 0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12 -# GFX10: v_pk_add_u16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] +# GFX10: v_pk_add_u16 v5, 0x12345678, v2 ; encoding: [0x05,0x40,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] 0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12 -# GFX10: v_pk_add_u16 v5, 0xffffff00, v2 ; encoding: [0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff] +# GFX10: v_pk_add_u16 v5, 0xffffff00, v2 ; encoding: [0x05,0x40,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff] 0x05,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x00,0xff,0xff,0xff -# GFX10: v_pk_add_u16 v5, v1, 0x100 ; encoding: [0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00] +# GFX10: v_pk_add_u16 v5, v1, 0x100 ; encoding: [0x05,0x40,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00] 0x05,0x00,0x0a,0xcc,0x01,0xff,0x01,0x18,0x00,0x01,0x00,0x00 # GFX10: v_pk_fma_f16 v5, 0xaf123456, v2, v3 ; encoding: [0x05,0x40,0x0e,0xcc,0xff,0x04,0x0e,0x1c,0x56,0x34,0x12,0xaf] @@ -134,10 +134,10 @@ # GFX10: v_pk_mad_i16 v5, v1, v2, 0xaf123456 ; encoding: [0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf] 0x05,0x40,0x00,0xcc,0x01,0x05,0xfe,0x1b,0x56,0x34,0x12,0xaf -# GFX10: v_pk_ashrrev_i16 v5, 0x12345678, v2 ; encoding: [0x05,0x00,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] +# GFX10: v_pk_ashrrev_i16 v5, 0x12345678, v2 ; encoding: [0x05,0x40,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12] 0x05,0x00,0x06,0xcc,0xff,0x04,0x02,0x18,0x78,0x56,0x34,0x12 -# GFX10: v_pk_ashrrev_i16 v5, v1, 0x12345678 ; encoding: [0x05,0x00,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] +# GFX10: v_pk_ashrrev_i16 v5, v1, 0x12345678 ; encoding: [0x05,0x40,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12] 0x05,0x00,0x06,0xcc,0x01,0xff,0x01,0x18,0x78,0x56,0x34,0x12 #===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/Disassembler/AMDGPU/vop3-literal.txt b/llvm/test/MC/Disassembler/AMDGPU/vop3-literal.txt index d9d8972b88d4..8780bd7372d2 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/vop3-literal.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/vop3-literal.txt @@ -21,31 +21,31 @@ # GFX10: v_bfm_b32_e64 v0, 0x3039, v1 ; encoding: [0x00,0x00,0x63,0xd7,0xff,0x02,0x02,0x00,0x39,0x30,0x00,0x00] 0x00,0x00,0x63,0xd7,0xff,0x02,0x02,0x00,0x39,0x30,0x00,0x00 -# GFX10: v_pk_add_f16 v1, 0x4e40, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0x4e,0x00,0x00] +# GFX10: v_pk_add_f16 v1, 0x4e40, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0x4e,0x00,0x00] 0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0x4e,0x00,0x00 -# GFX10: v_pk_add_f16 v1, 0x1e240, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0xe2,0x01,0x00] +# GFX10: v_pk_add_f16 v1, 0x1e240, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0xe2,0x01,0x00] 0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x40,0xe2,0x01,0x00 -# GFX10: v_pk_add_f16 v1, 0xffffff38, v2 ; encoding: [0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff] +# GFX10: v_pk_add_f16 v1, 0xffffff38, v2 ; encoding: [0x01,0x40,0x0f,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff] 0x01,0x00,0x0f,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff -# GFX10: v_pk_add_u16 v1, 0xffffff38, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff] +# GFX10: v_pk_add_u16 v1, 0xffffff38, v2 ; encoding: [0x01,0x40,0x0a,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff] 0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x38,0xff,0xff,0xff -# GFX10: v_pk_add_u16 v1, 64, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc0,0x04,0x02,0x18] +# GFX10: v_pk_add_u16 v1, 64, v2 ; encoding: [0x01,0x40,0x0a,0xcc,0xc0,0x04,0x02,0x18] 0x01,0x00,0x0a,0xcc,0xc0,0x04,0x02,0x18 -# GFX10: v_pk_add_u16 v1, 0x41, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x41,0x00,0x00,0x00] +# GFX10: v_pk_add_u16 v1, 0x41, v2 ; encoding: [0x01,0x40,0x0a,0xcc,0xff,0x04,0x02,0x18,0x41,0x00,0x00,0x00] 0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x41,0x00,0x00,0x00 -# GFX10: v_pk_add_u16 v1, -1, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc1,0x04,0x02,0x18] +# GFX10: v_pk_add_u16 v1, -1, v2 ; encoding: [0x01,0x40,0x0a,0xcc,0xc1,0x04,0x02,0x18] 0x01,0x00,0x0a,0xcc,0xc1,0x04,0x02,0x18 -# GFX10: v_pk_add_u16 v1, -5, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xc5,0x04,0x02,0x18] +# GFX10: v_pk_add_u16 v1, -5, v2 ; encoding: [0x01,0x40,0x0a,0xcc,0xc5,0x04,0x02,0x18] 0x01,0x00,0x0a,0xcc,0xc5,0x04,0x02,0x18 -# GFX10: v_pk_add_u16 v1, 0xffffff9c, v2 ; encoding: [0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff] +# GFX10: v_pk_add_u16 v1, 0xffffff9c, v2 ; encoding: [0x01,0x40,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff] 0x01,0x00,0x0a,0xcc,0xff,0x04,0x02,0x18,0x9c,0xff,0xff,0xff # GFX10: v_add_nc_i16 v5, v1, 0xcdab ; encoding: [0x05,0x00,0x0d,0xd7,0x01,0xff,0x01,0x00,0xab,0xcd,0xff,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/vop3p_opsel.txt b/llvm/test/MC/Disassembler/AMDGPU/vop3p_opsel.txt new file mode 100644 index 000000000000..2269bdd814c3 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/vop3p_opsel.txt @@ -0,0 +1,15 @@ +# RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -disassemble -show-encoding < %s | FileCheck %s + +# Check that we can disassemble opcodes w/o src2 with any op_sel_hi value for src2 + +# CHECK: v_pk_add_f16 v5, v1, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x18] +0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x18 + +# CHECK: v_pk_add_f16 v5, v1, v2 ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x18] +0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x18 + +# CHECK: v_pk_add_f16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x00] +0x05,0x00,0x8f,0xd3,0x01,0x05,0x02,0x00 + +# CHECK: v_pk_add_f16 v5, v1, v2 op_sel_hi:[0,0] ; encoding: [0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x00] +0x05,0x40,0x8f,0xd3,0x01,0x05,0x02,0x00 From 10c256ccaf520eed766f594f897b5bafdc8061ae Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 2 Mar 2021 11:21:54 +0100 Subject: [PATCH 031/784] Revert "[X86] Fold shuffle(not(x),undef) -> not(shuffle(x,undef))" This reverts commit 925093d88ae74560a8e94cf66f95d60ea3ffa2d3. Causes an infinite loop when compiling some shuffles: $ cat bugpoint-reduced-simplified.ll target triple = "x86_64-unknown-linux-gnu" define void @foo() { entry: %0 = load i8, i8* undef, align 1 %broadcast.splatinsert = insertelement <16 x i8> poison, i8 %0, i32 0 %1 = icmp ne <16 x i8> %broadcast.splatinsert, zeroinitializer %2 = shufflevector <16 x i1> %1, <16 x i1> undef, <16 x i32> zeroinitializer %wide.load = load <16 x i8>, <16 x i8>* undef, align 1 %3 = icmp ne <16 x i8> %wide.load, zeroinitializer %4 = and <16 x i1> %3, %2 %5 = zext <16 x i1> %4 to <16 x i8> store <16 x i8> %5, <16 x i8>* undef, align 1 ret void } $ llc < bugpoint-reduced-simplified.ll --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 ---------- llvm/test/CodeGen/X86/combine-bitselect.ll | 30 +++++++++++++++++----- llvm/test/CodeGen/X86/promote-cmp.ll | 11 ++++---- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2e9022205801..86052fad5721 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -38003,19 +38003,6 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, if (SDValue HAddSub = foldShuffleOfHorizOp(N, DAG)) return HAddSub; - - // Fold shuffle(not(x),undef) -> not(shuffle(x,undef)). - if (N->getOpcode() == ISD::VECTOR_SHUFFLE && - N->getOperand(0).getOpcode() == ISD::XOR && - N->getOperand(1).isUndef() && - N->isOnlyUserOf(N->getOperand(0).getNode())) { - if (SDValue Not = IsNOT(N->getOperand(0), DAG, true)) { - SDValue NewShuffle = DAG.getVectorShuffle( - VT, dl, DAG.getBitcast(VT, Not), DAG.getUNDEF(VT), - cast(N)->getMask()); - return DAG.getNOT(dl, NewShuffle, VT); - } - } } // Attempt to combine into a vector load/broadcast. diff --git a/llvm/test/CodeGen/X86/combine-bitselect.ll b/llvm/test/CodeGen/X86/combine-bitselect.ll index 5c8b8f691fb8..d57bd877500c 100644 --- a/llvm/test/CodeGen/X86/combine-bitselect.ll +++ b/llvm/test/CodeGen/X86/combine-bitselect.ll @@ -505,18 +505,26 @@ define <4 x i64> @bitselect_v4i64_broadcast_rrr(<4 x i64> %a0, <4 x i64> %a1, i6 ; XOP-LABEL: bitselect_v4i64_broadcast_rrr: ; XOP: # %bb.0: ; XOP-NEXT: vmovq %rdi, %xmm2 +; XOP-NEXT: vmovq %rdi, %xmm3 ; XOP-NEXT: vmovddup {{.*#+}} xmm2 = xmm2[0,0] ; XOP-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; XOP-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 +; XOP-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,1,0,1] +; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 +; XOP-NEXT: vandps %ymm2, %ymm0, %ymm0 +; XOP-NEXT: vandnps %ymm1, %ymm3, %ymm1 +; XOP-NEXT: vorps %ymm1, %ymm0, %ymm0 ; XOP-NEXT: retq ; ; AVX1-LABEL: bitselect_v4i64_broadcast_rrr: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovq %rdi, %xmm2 +; AVX1-NEXT: vmovq %rdi, %xmm3 ; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = xmm2[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 +; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,1,0,1] +; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3 ; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 -; AVX1-NEXT: vandnps %ymm1, %ymm2, %ymm1 +; AVX1-NEXT: vandnps %ymm1, %ymm3, %ymm1 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -873,22 +881,32 @@ define <8 x i64> @bitselect_v8i64_broadcast_rrr(<8 x i64> %a0, <8 x i64> %a1, i6 ; XOP-LABEL: bitselect_v8i64_broadcast_rrr: ; XOP: # %bb.0: ; XOP-NEXT: vmovq %rdi, %xmm4 +; XOP-NEXT: vmovq %rdi, %xmm5 ; XOP-NEXT: vmovddup {{.*#+}} xmm4 = xmm4[0,0] ; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4 -; XOP-NEXT: vpcmov %ymm4, %ymm2, %ymm0, %ymm0 -; XOP-NEXT: vpcmov %ymm4, %ymm3, %ymm1, %ymm1 +; XOP-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[0,1,0,1] +; XOP-NEXT: vinsertf128 $1, %xmm5, %ymm5, %ymm5 +; XOP-NEXT: vandps %ymm4, %ymm1, %ymm1 +; XOP-NEXT: vandps %ymm4, %ymm0, %ymm0 +; XOP-NEXT: vandnps %ymm3, %ymm5, %ymm3 +; XOP-NEXT: vorps %ymm3, %ymm1, %ymm1 +; XOP-NEXT: vandnps %ymm2, %ymm5, %ymm2 +; XOP-NEXT: vorps %ymm2, %ymm0, %ymm0 ; XOP-NEXT: retq ; ; AVX1-LABEL: bitselect_v8i64_broadcast_rrr: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovq %rdi, %xmm4 +; AVX1-NEXT: vmovq %rdi, %xmm5 ; AVX1-NEXT: vmovddup {{.*#+}} xmm4 = xmm4[0,0] ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4 +; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm5[0,1,0,1] +; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm5, %ymm5 ; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1 ; AVX1-NEXT: vandps %ymm4, %ymm0, %ymm0 -; AVX1-NEXT: vandnps %ymm3, %ymm4, %ymm3 +; AVX1-NEXT: vandnps %ymm3, %ymm5, %ymm3 ; AVX1-NEXT: vorps %ymm3, %ymm1, %ymm1 -; AVX1-NEXT: vandnps %ymm2, %ymm4, %ymm2 +; AVX1-NEXT: vandnps %ymm2, %ymm5, %ymm2 ; AVX1-NEXT: vorps %ymm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/promote-cmp.ll b/llvm/test/CodeGen/X86/promote-cmp.ll index 1350b2b9e065..c59f808a3029 100644 --- a/llvm/test/CodeGen/X86/promote-cmp.ll +++ b/llvm/test/CodeGen/X86/promote-cmp.ll @@ -47,16 +47,17 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) { ; SSE4-LABEL: PR45808: ; SSE4: # %bb.0: ; SSE4-NEXT: movdqa %xmm0, %xmm4 -; SSE4-NEXT: movdqa %xmm0, %xmm5 -; SSE4-NEXT: pcmpgtq %xmm2, %xmm5 ; SSE4-NEXT: movdqa %xmm1, %xmm0 ; SSE4-NEXT: pcmpgtq %xmm3, %xmm0 +; SSE4-NEXT: movdqa %xmm4, %xmm5 +; SSE4-NEXT: pcmpgtq %xmm2, %xmm5 +; SSE4-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3] ; SSE4-NEXT: pcmpeqd %xmm6, %xmm6 -; SSE4-NEXT: pxor %xmm6, %xmm5 +; SSE4-NEXT: pxor %xmm5, %xmm6 ; SSE4-NEXT: psllq $63, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm3 -; SSE4-NEXT: psllq $63, %xmm5 -; SSE4-NEXT: movdqa %xmm5, %xmm0 +; SSE4-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm6[0],zero,xmm6[1],zero +; SSE4-NEXT: psllq $63, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm4, %xmm2 ; SSE4-NEXT: movapd %xmm2, %xmm0 ; SSE4-NEXT: movapd %xmm3, %xmm1 From 5ba568f21b51825522f3a60a4ba656cf37a144c9 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Tue, 2 Mar 2021 11:32:03 +0100 Subject: [PATCH 032/784] [clang][docs] Fix code blocks rendering Some code blocks that render fine locally don't appear on the llvm.org website. Attempting to fix this by specifying the `text` type. --- clang/docs/InternalsManual.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/clang/docs/InternalsManual.rst b/clang/docs/InternalsManual.rst index ec018755f491..bc574f4ae2d9 100644 --- a/clang/docs/InternalsManual.rst +++ b/clang/docs/InternalsManual.rst @@ -770,7 +770,7 @@ uses key paths, which are declared in two steps. First, a tablegen definition for the ``CompilerInvocation`` member is created by inheriting from ``KeyPathAndMacro``: -.. code-block:: +.. code-block:: text // Options.td @@ -861,7 +861,7 @@ information required for parsing or generating the command line argument. The key path defaults to ``false`` and is set to ``true`` when the flag is present on command line. -.. code-block:: +.. code-block:: text def fignore_exceptions : Flag<["-"], "fignore-exceptions">, Flags<[CC1Option]>, MarshallingInfoFlag>; @@ -871,7 +871,7 @@ present on command line. The key path defaults to ``true`` and is set to ``false`` when the flag is present on command line. -.. code-block:: +.. code-block:: text def fno_verbose_asm : Flag<["-"], "fno-verbose-asm">, Flags<[CC1Option]>, MarshallingInfoNegativeFlag>; @@ -883,7 +883,7 @@ boolean value that's statically unknown in the tablegen file). Then, the key path is set to the value associated with the flag that appears last on command line. -.. code-block:: +.. code-block:: text defm legacy_pass_manager : BoolOption<"f", "legacy-pass-manager", CodeGenOpts<"LegacyPassManager">, DefaultFalse, @@ -911,7 +911,7 @@ the positive and negative flag and their common help text suffix. The key path defaults to the specified string, or an empty one, if omitted. When the option appears on the command line, the argument value is simply copied. -.. code-block:: +.. code-block:: text def isysroot : JoinedOrSeparate<["-"], "isysroot">, Flags<[CC1Option]>, MarshallingInfoString, [{"/"}]>; @@ -922,7 +922,7 @@ The key path defaults to an empty ``std::vector``. Values specified with each appearance of the option on the command line are appended to the vector. -.. code-block:: +.. code-block:: text def frewrite_map_file : Separate<["-"], "frewrite-map-file">, Flags<[CC1Option]>, MarshallingInfoStringVector>; @@ -933,7 +933,7 @@ The key path defaults to the specified integer value, or ``0`` if omitted. When the option appears on the command line, its value gets parsed by ``llvm::APInt`` and the result is assigned to the key path on success. -.. code-block:: +.. code-block:: text def mstack_probe_size : Joined<["-"], "mstack-probe-size=">, Flags<[CC1Option]>, MarshallingInfoStringInt, "4096">; @@ -950,7 +950,7 @@ same index is assigned to the key path (also correctly scoped). The number of comma-separated string values and elements of the array within ``NormalizedValues`` must match. -.. code-block:: +.. code-block:: text def mthread_model : Separate<["-"], "mthread-model">, Flags<[CC1Option]>, Values<"posix,single">, NormalizedValues<["POSIX", "Single"]>, @@ -970,7 +970,7 @@ annotation. Then, if any of the elements of ``ImpliedByAnyOf`` evaluate to true, the key path value is changed to the specified value or ``true`` if missing. Finally, the command line is parsed according to the primary annotation. -.. code-block:: +.. code-block:: text def fms_extensions : Flag<["-"], "fms-extensions">, Flags<[CC1Option]>, MarshallingInfoFlag>, @@ -981,7 +981,7 @@ Finally, the command line is parsed according to the primary annotation. The option is parsed only if the expression in ``ShouldParseIf`` evaluates to true. -.. code-block:: +.. code-block:: text def fopenmp_enable_irbuilder : Flag<["-"], "fopenmp-enable-irbuilder">, Flags<[CC1Option]>, MarshallingInfoFlag>, From 3b021fbdc04b627b8bc1f53835dc2f6aefddd7c2 Mon Sep 17 00:00:00 2001 From: KareemErgawy-TomTom Date: Tue, 2 Mar 2021 11:16:28 +0100 Subject: [PATCH 033/784] [MLIR][LinAlg] Detensorize interal function control flow. This patch continues detensorizing implementation by detensoring internal control flow in functions. In order to detensorize functions, all the non-entry block's arguments are detensored and branches between such blocks are properly updated to reflect the detensored types as well. Function entry block (signature) is left intact. This continues work towards handling github/google/iree#1159. Reviewed By: silvas Differential Revision: https://reviews.llvm.org/D97148 --- mlir/include/mlir/Dialect/Linalg/Passes.td | 8 +- .../StandardOps/Transforms/FuncConversions.h | 37 ++++++- .../mlir/Transforms/DialectConversion.h | 6 ++ .../Dialect/Linalg/Transforms/Detensorize.cpp | 98 ++++++++++++++----- .../StandardOps/Transforms/FuncBufferize.cpp | 38 ++----- .../Transforms/FuncConversions.cpp | 58 ++++++++++- .../Transforms/Utils/DialectConversion.cpp | 25 ++++- .../Dialect/Linalg/detensorized_while.mlir | 53 ++++++++++ 8 files changed, 255 insertions(+), 68 deletions(-) create mode 100644 mlir/test/Dialect/Linalg/detensorized_while.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index e51d08d3770d..ec54e93c988d 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -151,12 +151,16 @@ def LinalgDetensorize : FunctionPass<"linalg-detensorize"> { linalg-on-tensor op is checked to see whether *all* its operands can be detensored. If so, those operands are converted to their primitive counterparts and the linalg op is replaced by an equivalent op that takes - those new primitive values as operands. Therefore, the detensoring process - can be divided into 2 main logical phases: + those new primitive values as operands. Therefore, detensoring an op can be + divided into 2 main logical phases: 1. Detect/match an op that can be detensored. 2. Detensor the operands of the op and replace it with a primitive equivalent. + + In addition to detensoring individual ops, this pass detensors internal + control flow inside a function. All blocks except for the entry block are + detensored by converting their arguments whenever possible. }]; } diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/FuncConversions.h b/mlir/include/mlir/Dialect/StandardOps/Transforms/FuncConversions.h index 55da3af88758..1a0308d96259 100644 --- a/mlir/include/mlir/Dialect/StandardOps/Transforms/FuncConversions.h +++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/FuncConversions.h @@ -16,7 +16,9 @@ namespace mlir { // Forward declarations. +class ConversionTarget; class MLIRContext; +class Operation; class OwningRewritePatternList; class TypeConverter; @@ -26,13 +28,38 @@ void populateCallOpTypeConversionPattern(OwningRewritePatternList &patterns, MLIRContext *ctx, TypeConverter &converter); -/// Add a pattern to the given pattern list to rewrite branch operations and -/// `return` to use operands that have been legalized by the conversion -/// framework. This can only be done if the branch operation implements the -/// BranchOpInterface. Only needed for partial conversions. -void populateBranchOpInterfaceAndReturnOpTypeConversionPattern( +/// Add a pattern to the given pattern list to rewrite branch operations to use +/// operands that have been legalized by the conversion framework. This can only +/// be done if the branch operation implements the BranchOpInterface. Only +/// needed for partial conversions. +void populateBranchOpInterfaceTypeConversionPattern( OwningRewritePatternList &patterns, MLIRContext *ctx, TypeConverter &converter); + +/// Return true if op is a BranchOpInterface op whose operands are all legal +/// according to converter. +bool isLegalForBranchOpInterfaceTypeConversionPattern(Operation *op, + TypeConverter &converter); + +/// Add a pattern to the given pattern list to rewrite `return` ops to use +/// operands that have been legalized by the conversion framework. +void populateReturnOpTypeConversionPattern(OwningRewritePatternList &patterns, + MLIRContext *ctx, + TypeConverter &converter); + +/// For ReturnLike ops (except `return`), return True. If op is a `return` && +/// returnOpAlwaysLegal is false, legalize op according to converter. Otherwise, +/// return false. +bool isLegalForReturnOpTypeConversionPattern(Operation *op, + TypeConverter &converter, + bool returnOpAlwaysLegal = false); + +/// Return true if op is neither BranchOpInterface nor ReturnLike. +/// +/// TODO Try to get rid of this function and invert the meaning of +/// `isLegalForBranchOpInterfaceTypeConversionPattern` and +/// `isLegalForReturnOpTypeConversionPattern`. +bool isNotBranchOpInterfaceOrReturnLikeOp(Operation *op); } // end namespace mlir #endif // MLIR_DIALECT_STANDARDOPS_TRANSFORMS_FUNCCONVERSIONS_H_ diff --git a/mlir/include/mlir/Transforms/DialectConversion.h b/mlir/include/mlir/Transforms/DialectConversion.h index 0c1161b9945c..fa1752d96268 100644 --- a/mlir/include/mlir/Transforms/DialectConversion.h +++ b/mlir/include/mlir/Transforms/DialectConversion.h @@ -474,6 +474,12 @@ class ConversionPatternRewriter final : public PatternRewriter { Region *region, TypeConverter &converter, TypeConverter::SignatureConversion *entryConversion = nullptr); + /// Convert the types of block arguments within the given region except for + /// the entry region. This replaces each non-entry block with a new block + /// containing the updated signature. + LogicalResult convertNonEntryRegionTypes(Region *region, + TypeConverter &converter); + /// Replace all the uses of the block argument `from` with value `to`. void replaceUsesOfBlockArgument(BlockArgument from, Value to); diff --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp index 2e2e3b94a34a..2d34468dae72 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp @@ -21,6 +21,20 @@ using namespace mlir; using namespace mlir::linalg; +static Value sourceMaterializationCallback(OpBuilder &builder, Type type, + ValueRange inputs, Location loc) { + assert(inputs.size() == 1); + // A detensored value is converted back by creating a new tensor from its + // element(s). + auto createNewTensorOp = builder.create( + loc, inputs[0].getType(), inputs[0]); + + // FromElementsOp results in a tensor<1xdtype>, we need to reshape that to + // a tensor instead. + return builder.create( + loc, type, createNewTensorOp, ArrayRef{}); +} + namespace { /// Defines the criteria a TensorType must follow in order to be considered /// "detensorable". @@ -64,6 +78,29 @@ class DetensorizeGenericOp : public OpConversionPattern { } }; +/// A conversion pattern for detensoring internal (non-entry) blocks within a +/// function. +struct FunctionNonEntryBlockConversion : public ConversionPattern { + FunctionNonEntryBlockConversion(StringRef functionLikeOpName, + MLIRContext *ctx, TypeConverter &converter) + : ConversionPattern(functionLikeOpName, /*benefit=*/1, converter, ctx) {} + + LogicalResult + matchAndRewrite(Operation *op, ArrayRef operands, + ConversionPatternRewriter &rewriter) const override { + rewriter.startRootUpdate(op); + + if (failed(rewriter.convertNonEntryRegionTypes( + &mlir::impl::getFunctionBody(op), *typeConverter))) { + rewriter.cancelRootUpdate(op); + return failure(); + } + + rewriter.finalizeRootUpdate(op); + return success(); + } +}; + class DetensorizeTypeConverter : public TypeConverter { public: DetensorizeTypeConverter() { @@ -84,18 +121,8 @@ class DetensorizeTypeConverter : public TypeConverter { return builder.create(loc, inputs[0], ValueRange{}); }); - // A detensored value is converted back by creating a new tensor from its - // element(s). - addSourceMaterialization([](OpBuilder &builder, Type type, - ValueRange inputs, Location loc) -> Value { - auto createNewTensorOp = builder.create( - loc, inputs[0].getType(), inputs[0]); - - // FromElementsOp results in a tensor<1xdtype>, we need to reshape that to - // a tensor instead. - return builder.create( - loc, type, createNewTensorOp, ArrayRef{}); - }); + addSourceMaterialization(sourceMaterializationCallback); + addArgumentMaterialization(sourceMaterializationCallback); } }; @@ -139,22 +166,43 @@ struct LinalgDetensorize : public LinalgDetensorizeBase { OwningRewritePatternList patterns; ConversionTarget target(*context); - target.markUnknownOpDynamicallyLegal([](Operation *op) { return true; }); - target.addLegalDialect(); target.addDynamicallyLegalOp([&](GenericOp op) { - // If any of the operands or results cannot be detensored, the op is - // considered legal and won't be detensored. - return llvm::any_of( - op.getShapedOperandTypes(), [](ShapedType shapedType) { - assert(shapedType.isa()); - return !canBeDetensored(shapedType.cast()); - }); + // If any of the operands or results cannot be detensored (i.e. they are + // all legal according the DetensorizeTypeConverter), the op is considered + // legal and won't be detensored. + return llvm::any_of(op.getShapedOperandTypes(), + [&](ShapedType shapedType) { + return typeConverter.isLegal(shapedType); + }); }); - patterns.insert(typeConverter, context); + target.addDynamicallyLegalOp([&](FuncOp op) { + // A function is legal if all of its non-entry blocks are legal. We don't + // legalize the entry block (i.e. the function's signature) since + // detensoring can't happen along external calling convention boundaries, + // which we conservatively approximate as all function signatures. + return llvm::all_of(llvm::drop_begin(op.getBody(), 1), [&](Block &block) { + return typeConverter.isLegal(block.getArgumentTypes()); + }); + }); + + target.markUnknownOpDynamicallyLegal([&](Operation *op) { + return isNotBranchOpInterfaceOrReturnLikeOp(op) || + isLegalForBranchOpInterfaceTypeConversionPattern(op, + typeConverter) || + isLegalForReturnOpTypeConversionPattern( + op, typeConverter, /*returnOpAlwaysLegal*/ true); + }); - if (failed( - applyPartialConversion(getFunction(), target, std::move(patterns)))) + patterns.insert(typeConverter, context); + patterns.insert(FuncOp::getOperationName(), + context, typeConverter); + // Since non-entry block arguments get detensorized, we also need to update + // the control flow inside the function to reflect the correct types. + populateBranchOpInterfaceTypeConversionPattern(patterns, context, + typeConverter); + + if (failed(applyFullConversion(getFunction(), target, std::move(patterns)))) signalPassFailure(); OwningRewritePatternList canonPatterns; @@ -162,8 +210,6 @@ struct LinalgDetensorize : public LinalgDetensorizeBase { if (failed(applyPatternsAndFoldGreedily(getFunction(), std::move(canonPatterns)))) signalPassFailure(); - - // TODO Properly handle control flow within function boundaries. } }; } // namespace diff --git a/mlir/lib/Dialect/StandardOps/Transforms/FuncBufferize.cpp b/mlir/lib/Dialect/StandardOps/Transforms/FuncBufferize.cpp index c63150f3ab87..4b5a2d632670 100644 --- a/mlir/lib/Dialect/StandardOps/Transforms/FuncBufferize.cpp +++ b/mlir/lib/Dialect/StandardOps/Transforms/FuncBufferize.cpp @@ -40,39 +40,17 @@ struct FuncBufferizePass : public FuncBufferizeBase { target.addDynamicallyLegalOp( [&](CallOp op) { return typeConverter.isLegal(op); }); - populateBranchOpInterfaceAndReturnOpTypeConversionPattern(patterns, context, - typeConverter); + populateBranchOpInterfaceTypeConversionPattern(patterns, context, + typeConverter); + populateReturnOpTypeConversionPattern(patterns, context, typeConverter); target.addLegalOp(); - target.addDynamicallyLegalOp( - [&](ReturnOp op) { return typeConverter.isLegal(op); }); - // Mark terminators as legal if they have the ReturnLike trait or - // implement the BranchOpInterface and have valid types. If they do not - // implement the trait or interface, mark them as illegal no matter what. + target.markUnknownOpDynamicallyLegal([&](Operation *op) { - // If it is not a terminator, ignore it. - if (!op->mightHaveTrait()) - return true; - // If it is not the last operation in the block, also ignore it. We do - // this to handle unknown operations, as well. - Block *block = op->getBlock(); - if (!block || &block->back() != op) - return true; - // ReturnLike operations have to be legalized with their parent. For - // return this is handled, for other ops they remain as is. - if (op->hasTrait()) - return true; - // All successor operands of branch like operations must be rewritten. - if (auto branchOp = dyn_cast(op)) { - for (int p = 0, e = op->getBlock()->getNumSuccessors(); p < e; ++p) { - auto successorOperands = branchOp.getSuccessorOperands(p); - if (successorOperands.hasValue() && - !typeConverter.isLegal(successorOperands.getValue().getTypes())) - return false; - } - return true; - } - return false; + return isNotBranchOpInterfaceOrReturnLikeOp(op) || + isLegalForBranchOpInterfaceTypeConversionPattern(op, + typeConverter) || + isLegalForReturnOpTypeConversionPattern(op, typeConverter); }); if (failed(applyFullConversion(module, target, std::move(patterns)))) diff --git a/mlir/lib/Dialect/StandardOps/Transforms/FuncConversions.cpp b/mlir/lib/Dialect/StandardOps/Transforms/FuncConversions.cpp index 07d7c59e192b..4ba2069817a3 100644 --- a/mlir/lib/Dialect/StandardOps/Transforms/FuncConversions.cpp +++ b/mlir/lib/Dialect/StandardOps/Transforms/FuncConversions.cpp @@ -102,9 +102,61 @@ class ReturnOpTypeConversion : public OpConversionPattern { }; } // end anonymous namespace -void mlir::populateBranchOpInterfaceAndReturnOpTypeConversionPattern( +void mlir::populateBranchOpInterfaceTypeConversionPattern( OwningRewritePatternList &patterns, MLIRContext *ctx, TypeConverter &typeConverter) { - patterns.insert( - typeConverter, ctx); + patterns.insert(typeConverter, ctx); +} + +bool mlir::isLegalForBranchOpInterfaceTypeConversionPattern( + Operation *op, TypeConverter &converter) { + // All successor operands of branch like operations must be rewritten. + if (auto branchOp = dyn_cast(op)) { + for (int p = 0, e = op->getBlock()->getNumSuccessors(); p < e; ++p) { + auto successorOperands = branchOp.getSuccessorOperands(p); + if (successorOperands.hasValue() && + !converter.isLegal(successorOperands.getValue().getTypes())) + return false; + } + return true; + } + + return false; +} + +void mlir::populateReturnOpTypeConversionPattern( + OwningRewritePatternList &patterns, MLIRContext *ctx, + TypeConverter &typeConverter) { + patterns.insert(typeConverter, ctx); +} + +bool mlir::isLegalForReturnOpTypeConversionPattern(Operation *op, + TypeConverter &converter, + bool returnOpAlwaysLegal) { + // If this is a `return` and the user pass wants to convert/transform across + // function boundaries, then `converter` is invoked to check whether the the + // `return` op is legal. + if (dyn_cast(op) && !returnOpAlwaysLegal) + return converter.isLegal(op); + + // ReturnLike operations have to be legalized with their parent. For + // return this is handled, for other ops they remain as is. + if (op->hasTrait()) + return true; + + return false; +} + +bool mlir::isNotBranchOpInterfaceOrReturnLikeOp(Operation *op) { + // If it is not a terminator, ignore it. + if (!op->mightHaveTrait()) + return true; + + // If it is not the last operation in the block, also ignore it. We do + // this to handle unknown operations, as well. + Block *block = op->getBlock(); + if (!block || &block->back() != op) + return true; + + return false; } diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 002843c27c6f..ae5b566f32d1 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -749,6 +749,10 @@ struct ConversionPatternRewriterImpl { convertRegionTypes(Region *region, TypeConverter &converter, TypeConverter::SignatureConversion *entryConversion); + /// Convert the types of non-entry block arguments within the given region. + LogicalResult convertNonEntryRegionTypes(Region *region, + TypeConverter &converter); + //===--------------------------------------------------------------------===// // Rewriter Notification Hooks //===--------------------------------------------------------------------===// @@ -1150,13 +1154,25 @@ FailureOr ConversionPatternRewriterImpl::convertRegionTypes( if (region->empty()) return nullptr; - // Convert the arguments of each block within the region. + if (failed(convertNonEntryRegionTypes(region, converter))) + return failure(); + FailureOr newEntry = convertBlockSignature(®ion->front(), converter, entryConversion); + return newEntry; +} + +LogicalResult ConversionPatternRewriterImpl::convertNonEntryRegionTypes( + Region *region, TypeConverter &converter) { + argConverter.setConverter(region, &converter); + if (region->empty()) + return success(); + + // Convert the arguments of each block within the region. for (Block &block : llvm::make_early_inc_range(llvm::drop_begin(*region, 1))) if (failed(convertBlockSignature(&block, converter))) return failure(); - return newEntry; + return success(); } //===----------------------------------------------------------------------===// @@ -1323,6 +1339,11 @@ FailureOr ConversionPatternRewriter::convertRegionTypes( return impl->convertRegionTypes(region, converter, entryConversion); } +LogicalResult ConversionPatternRewriter::convertNonEntryRegionTypes( + Region *region, TypeConverter &converter) { + return impl->convertNonEntryRegionTypes(region, converter); +} + void ConversionPatternRewriter::replaceUsesOfBlockArgument(BlockArgument from, Value to) { LLVM_DEBUG({ diff --git a/mlir/test/Dialect/Linalg/detensorized_while.mlir b/mlir/test/Dialect/Linalg/detensorized_while.mlir new file mode 100644 index 000000000000..a227e753006c --- /dev/null +++ b/mlir/test/Dialect/Linalg/detensorized_while.mlir @@ -0,0 +1,53 @@ +// RUN: mlir-opt %s -linalg-detensorize | FileCheck %s + +#map0 = affine_map<() -> ()> + +#attrs = { + indexing_maps = [#map0, #map0, #map0], + iterator_types = [] +} + +func @main(%farg0: tensor, %farg1: tensor) -> tensor attributes {} { + br ^bb1(%farg0 : tensor) + +^bb1(%0: tensor): // 2 preds: ^bb0, ^bb2 + %1 = linalg.init_tensor [] : tensor + %2 = linalg.generic #attrs + ins(%0, %farg1 : tensor, tensor) + outs(%1 : tensor) { + ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): // no predecessors + %8 = cmpi slt, %arg0, %arg1 : i32 + linalg.yield %8 : i1 + } -> tensor + %3 = tensor.extract %2[] : tensor + cond_br %3, ^bb2(%0 : tensor), ^bb3(%0 : tensor) + +^bb2(%4: tensor): // pred: ^bb1 + %5 = linalg.init_tensor [] : tensor + %6 = linalg.generic #attrs + ins(%4, %4 : tensor, tensor) + outs(%5 : tensor) { + ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): // no predecessors + %8 = addi %arg0, %arg1 : i32 + linalg.yield %8 : i32 + } -> tensor + br ^bb1(%6 : tensor) + +^bb3(%7: tensor): // pred: ^bb1 + return %7 : tensor +} + +// CHECK-LABEL: func @main +// CHECK-SAME: (%{{.*}}: tensor, %{{.*}}: tensor) +// CHECK: tensor.extract {{.*}} +// CHECK: br ^[[bb1:.*]](%{{.*}} : i32) +// CHECK: ^[[bb1]](%{{.*}}: i32) +// CHECK: cmpi slt, {{.*}} +// CHECK: cond_br {{.*}}, ^[[bb2:.*]](%{{.*}} : i32), ^[[bb3:.*]](%{{.*}} : i32) +// CHECK: ^[[bb2]](%{{.*}}: i32) +// CHECK: addi {{.*}} +// CHECK: br ^[[bb1]](%{{.*}} : i32) +// CHECK: ^[[bb3]](%{{.*}}: i32) +// CHECK: tensor.from_elements {{.*}} +// CHECK: linalg.tensor_reshape {{.*}} +// CHECK: return %{{.*}} : tensor From 0cb9d8acbccb0f3ef3659d0efa1845845f1fc982 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 2 Mar 2021 10:43:33 +0000 Subject: [PATCH 034/784] [LV] Add test cases that require a larger number of RT checks. Precommit tests cases for D75981. --- .../runtime-check-size-based-threshold.ll | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll new file mode 100644 index 000000000000..ccb57dee6cbf --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll @@ -0,0 +1,111 @@ +; RUN: opt -loop-vectorize -mtriple=arm64-apple-iphoneos -S %s | FileCheck %s + +; Tests for loops with large numbers of runtime checks. Check that loops are +; vectorized, if the loop trip counts are large and the impact of the runtime +; checks is very small compared to the expected loop runtimes. + + +; The trip count in the loop in this function is too to warrant large runtime checks. +; CHECK-LABEL: define {{.*}} @test_tc_too_small +; CHECK-NOT: vector.memcheck +; CHECK-NOT: vector.body +define void @test_tc_too_small(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2) { +entry: + br label %loop + +loop: ; preds = %bb54, %bb37 + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.1 = getelementptr inbounds i16, i16* %ptr.1, i64 %iv + %lv.1 = load i16, i16* %gep.1, align 2 + %ext.1 = sext i16 %lv.1 to i32 + %gep.2 = getelementptr inbounds i16, i16* %ptr.2, i64 %iv + %lv.2 = load i16, i16* %gep.2, align 2 + %ext.2 = sext i16 %lv.2 to i32 + %gep.off.1 = getelementptr inbounds i16, i16* %gep.2, i64 %off.1 + %lv.3 = load i16, i16* %gep.off.1, align 2 + %ext.3 = sext i16 %lv.3 to i32 + %gep.off.2 = getelementptr inbounds i16, i16* %gep.2, i64 %off.2 + %lv.4 = load i16, i16* %gep.off.2, align 2 + %ext.4 = sext i16 %lv.4 to i32 + %tmp62 = mul nsw i32 %ext.2, 11 + %tmp66 = mul nsw i32 %ext.3, -4 + %tmp70 = add nsw i32 %tmp62, 4 + %tmp71 = add nsw i32 %tmp70, %tmp66 + %tmp72 = add nsw i32 %tmp71, %ext.4 + %tmp73 = lshr i32 %tmp72, 3 + %tmp74 = add nsw i32 %tmp73, %ext.1 + %tmp75 = lshr i32 %tmp74, 1 + %tmp76 = mul nsw i32 %ext.2, 5 + %tmp77 = shl nsw i32 %ext.3, 2 + %tmp78 = add nsw i32 %tmp76, 4 + %tmp79 = add nsw i32 %tmp78, %tmp77 + %tmp80 = sub nsw i32 %tmp79, %ext.4 + %tmp81 = lshr i32 %tmp80, 3 + %tmp82 = sub nsw i32 %tmp81, %ext.1 + %tmp83 = lshr i32 %tmp82, 1 + %trunc.1 = trunc i32 %tmp75 to i16 + %gep.3 = getelementptr inbounds i16, i16* %ptr.3, i64 %iv + store i16 %trunc.1, i16* %gep.3, align 2 + %trunc.2 = trunc i32 %tmp83 to i16 + %gep.4 = getelementptr inbounds i16, i16* %ptr.4, i64 %iv + store i16 %trunc.2, i16* %gep.4, align 2 + %iv.next = add nuw nsw i64 %iv, 1 + %cmp = icmp ult i64 %iv, 50 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + +; FIXME +; The trip count in the loop in this function high enough to warrant large runtime checks. +; CHECK-LABEL: define {{.*}} @test_tc_big_enough +; CHECK-NOT: vector.memcheck +; CHECK-NOT: vector.body +define void @test_tc_big_enough(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2) { +entry: + br label %loop + +loop: ; preds = %bb54, %bb37 + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %gep.1 = getelementptr inbounds i16, i16* %ptr.1, i64 %iv + %lv.1 = load i16, i16* %gep.1, align 2 + %ext.1 = sext i16 %lv.1 to i32 + %gep.2 = getelementptr inbounds i16, i16* %ptr.2, i64 %iv + %lv.2 = load i16, i16* %gep.2, align 2 + %ext.2 = sext i16 %lv.2 to i32 + %gep.off.1 = getelementptr inbounds i16, i16* %gep.2, i64 %off.1 + %lv.3 = load i16, i16* %gep.off.1, align 2 + %ext.3 = sext i16 %lv.3 to i32 + %gep.off.2 = getelementptr inbounds i16, i16* %gep.2, i64 %off.2 + %lv.4 = load i16, i16* %gep.off.2, align 2 + %ext.4 = sext i16 %lv.4 to i32 + %tmp62 = mul nsw i32 %ext.2, 11 + %tmp66 = mul nsw i32 %ext.3, -4 + %tmp70 = add nsw i32 %tmp62, 4 + %tmp71 = add nsw i32 %tmp70, %tmp66 + %tmp72 = add nsw i32 %tmp71, %ext.4 + %tmp73 = lshr i32 %tmp72, 3 + %tmp74 = add nsw i32 %tmp73, %ext.1 + %tmp75 = lshr i32 %tmp74, 1 + %tmp76 = mul nsw i32 %ext.2, 5 + %tmp77 = shl nsw i32 %ext.3, 2 + %tmp78 = add nsw i32 %tmp76, 4 + %tmp79 = add nsw i32 %tmp78, %tmp77 + %tmp80 = sub nsw i32 %tmp79, %ext.4 + %tmp81 = lshr i32 %tmp80, 3 + %tmp82 = sub nsw i32 %tmp81, %ext.1 + %tmp83 = lshr i32 %tmp82, 1 + %trunc.1 = trunc i32 %tmp75 to i16 + %gep.3 = getelementptr inbounds i16, i16* %ptr.3, i64 %iv + store i16 %trunc.1, i16* %gep.3, align 2 + %trunc.2 = trunc i32 %tmp83 to i16 + %gep.4 = getelementptr inbounds i16, i16* %ptr.4, i64 %iv + store i16 %trunc.2, i16* %gep.4, align 2 + %iv.next = add nuw nsw i64 %iv, 1 + %cmp = icmp ult i64 %iv, 500 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} From 4545813b17e3079af424764a7aa44b629e04b7c1 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Tue, 2 Mar 2021 11:53:40 +0100 Subject: [PATCH 035/784] [clang][cli] NFC: Rename marshalling multiclass The new name drops `String` from `MarshallingInfoStringInt`, which follows the naming convention of other marshalling multiclasses. --- clang/docs/InternalsManual.rst | 2 +- clang/include/clang/Driver/Options.td | 94 +++++++++++++-------------- llvm/include/llvm/Option/OptParser.td | 2 +- 3 files changed, 49 insertions(+), 49 deletions(-) diff --git a/clang/docs/InternalsManual.rst b/clang/docs/InternalsManual.rst index bc574f4ae2d9..4bae84295ec8 100644 --- a/clang/docs/InternalsManual.rst +++ b/clang/docs/InternalsManual.rst @@ -936,7 +936,7 @@ and the result is assigned to the key path on success. .. code-block:: text def mstack_probe_size : Joined<["-"], "mstack-probe-size=">, Flags<[CC1Option]>, - MarshallingInfoStringInt, "4096">; + MarshallingInfoInt, "4096">; **Enumeration** diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 20dcac5d08ad..72213ed51ce2 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -934,7 +934,7 @@ defm gpu_exclude_wrong_side_overloads : BoolFOption<"gpu-exclude-wrong-side-over def gpu_max_threads_per_block_EQ : Joined<["--"], "gpu-max-threads-per-block=">, Flags<[CC1Option]>, HelpText<"Default max threads per block for kernel launch bounds for HIP">, - MarshallingInfoStringInt, "1024">, + MarshallingInfoInt, "1024">, ShouldParseIf; def gpu_instrument_lib_EQ : Joined<["--"], "gpu-instrument-lib=">, HelpText<"Instrument device library for HIP, which is a LLVM bitcode containing " @@ -1008,7 +1008,7 @@ def exported__symbols__list : Separate<["-"], "exported_symbols_list">; def e : JoinedOrSeparate<["-"], "e">, Flags<[LinkerInput]>, Group; def fmax_tokens_EQ : Joined<["-"], "fmax-tokens=">, Group, Flags<[CC1Option]>, HelpText<"Max total number of preprocessed tokens for -Wmax-tokens.">, - MarshallingInfoStringInt>; + MarshallingInfoInt>; def fPIC : Flag<["-"], "fPIC">, Group; def fno_PIC : Flag<["-"], "fno-PIC">, Group; def fPIE : Flag<["-"], "fPIE">, Group; @@ -1480,7 +1480,7 @@ def : Joined<["-"], "fsanitize-coverage-blacklist=">, def fsanitize_memory_track_origins_EQ : Joined<["-"], "fsanitize-memory-track-origins=">, Group, HelpText<"Enable origins tracking in MemorySanitizer">, - MarshallingInfoStringInt>; + MarshallingInfoInt>; def fsanitize_memory_track_origins : Flag<["-"], "fsanitize-memory-track-origins">, Group, HelpText<"Enable origins tracking in MemorySanitizer">; @@ -1496,7 +1496,7 @@ defm sanitize_memory_use_after_dtor : BoolOption<"f", "sanitize-memory-use-after def fsanitize_address_field_padding : Joined<["-"], "fsanitize-address-field-padding=">, Group, HelpText<"Level of field padding for AddressSanitizer">, - MarshallingInfoStringInt>; + MarshallingInfoInt>; defm sanitize_address_use_after_scope : BoolOption<"f", "sanitize-address-use-after-scope", CodeGenOpts<"SanitizeAddressUseAfterScope">, DefaultFalse, PosFlag, NegFlag, @@ -1621,7 +1621,7 @@ def fsanitize_undefined_strip_path_components_EQ : Joined<["-"], "fsanitize-unde Group, MetaVarName<"">, HelpText<"Strip (or keep only, if negative) a given number of path components " "when emitting check metadata.">, - MarshallingInfoStringInt, "0", "int">; + MarshallingInfoInt, "0", "int">; } // end -f[no-]sanitize* flags @@ -1775,7 +1775,7 @@ def fxray_instruction_threshold_EQ : JoinedOrSeparate<["-"], "fxray-instruction-threshold=">, Group, Flags<[CC1Option]>, HelpText<"Sets the minimum function size to instrument with XRay">, - MarshallingInfoStringInt, "200">; + MarshallingInfoInt, "200">; def fxray_instruction_threshold_ : JoinedOrSeparate<["-"], "fxray-instruction-threshold">, Group, Flags<[CC1Option]>; @@ -1839,13 +1839,13 @@ def fxray_function_groups : Joined<["-"], "fxray-function-groups=">, Group, Flags<[CC1Option]>, HelpText<"Only instrument 1 of N groups">, - MarshallingInfoStringInt, "1">; + MarshallingInfoInt, "1">; def fxray_selected_function_group : Joined<["-"], "fxray-selected-function-group=">, Group, Flags<[CC1Option]>, HelpText<"When using -fxray-function-groups, select which group of functions to instrument. Valid range is 0 to fxray-function-groups - 1">, - MarshallingInfoStringInt, "0">; + MarshallingInfoInt, "0">; defm fine_grained_bitfield_accesses : BoolOption<"f", "fine-grained-bitfield-accesses", @@ -1900,7 +1900,7 @@ defm merge_all_constants : BoolFOption<"merge-all-constants", BothFlags<[], " merging of constants">>; def fmessage_length_EQ : Joined<["-"], "fmessage-length=">, Group, Flags<[CC1Option]>, HelpText<"Format message diagnostics so that they fit within N columns">, - MarshallingInfoStringInt>; + MarshallingInfoInt>; def fms_compatibility : Flag<["-"], "fms-compatibility">, Group, Flags<[CC1Option, CoreOption]>, HelpText<"Enable full Microsoft Visual C++ compatibility">, MarshallingInfoFlag>; @@ -1962,15 +1962,15 @@ defm prebuilt_implicit_modules : BoolFOption<"prebuilt-implicit-modules", def fmodules_prune_interval : Joined<["-"], "fmodules-prune-interval=">, Group, Flags<[CC1Option]>, MetaVarName<"">, HelpText<"Specify the interval (in seconds) between attempts to prune the module cache">, - MarshallingInfoStringInt, "7 * 24 * 60 * 60">; + MarshallingInfoInt, "7 * 24 * 60 * 60">; def fmodules_prune_after : Joined<["-"], "fmodules-prune-after=">, Group, Flags<[CC1Option]>, MetaVarName<"">, HelpText<"Specify the interval (in seconds) after which a module file will be considered unused">, - MarshallingInfoStringInt, "31 * 24 * 60 * 60">; + MarshallingInfoInt, "31 * 24 * 60 * 60">; def fbuild_session_timestamp : Joined<["-"], "fbuild-session-timestamp=">, Group, Flags<[CC1Option]>, MetaVarName<"