Skip to content

[X86] Support EGPR for inline assembly. #92338

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -6280,6 +6280,8 @@ def mno_apx_features_EQ : CommaJoined<["-"], "mno-apx-features=">, Group<m_x86_F
// For stability, we only add a feature to -mapxf after it passes the validation of llvm-test-suite && cpu2017 on Intel SDE.
def mapxf : Flag<["-"], "mapxf">, Alias<mapx_features_EQ>, AliasArgs<["egpr","push2pop2","ppx","ndd","ccmp","nf"]>;
def mno_apxf : Flag<["-"], "mno-apxf">, Alias<mno_apx_features_EQ>, AliasArgs<["egpr","push2pop2","ppx","ndd","ccmp","nf"]>;
def mapx_inline_asm_use_gpr32 : Flag<["-"], "mapx-inline-asm-use-gpr32">, Group<m_Group>,
HelpText<"Enable use of GPR32 in inline assembly for APX">;
} // let Flags = [TargetSpecific]

// VE feature flags
Expand Down
30 changes: 30 additions & 0 deletions clang/lib/Basic/Targets/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasFullBFloat16 = true;
} else if (Feature == "+egpr") {
HasEGPR = true;
} else if (Feature == "+inline-asm-use-gpr32") {
HasInlineAsmUseGPR32 = true;
} else if (Feature == "+push2pop2") {
HasPush2Pop2 = true;
} else if (Feature == "+ppx") {
Expand Down Expand Up @@ -963,6 +965,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
// Condition here is aligned with the feature set of mapxf in Options.td
if (HasEGPR && HasPush2Pop2 && HasPPX && HasNDD && HasCCMP && HasNF)
Builder.defineMacro("__APX_F__");
if (HasEGPR && HasInlineAsmUseGPR32)
Builder.defineMacro("__APX_INLINE_ASM_USE_GPR32__");

// Each case falls through to the previous one here.
switch (SSELevel) {
Expand Down Expand Up @@ -1478,6 +1482,18 @@ bool X86TargetInfo::validateAsmConstraint(
case 'C': // SSE floating point constant.
case 'G': // x87 floating point constant.
return true;
case 'j':
Name++;
switch (*Name) {
default:
return false;
case 'r':
Info.setAllowsRegister();
return true;
case 'R':
Info.setAllowsRegister();
return true;
}
case '@':
// CC condition changes.
if (auto Len = matchAsmCCConstraint(Name)) {
Expand Down Expand Up @@ -1749,6 +1765,20 @@ std::string X86TargetInfo::convertConstraint(const char *&Constraint) const {
// to the next constraint.
return std::string("^") + std::string(Constraint++, 2);
}
case 'j':
switch (Constraint[1]) {
default:
// Break from inner switch and fall through (copy single char),
// continue parsing after copying the current constraint into
// the return string.
break;
case 'r':
case 'R':
// "^" hints llvm that this is a 2 letter constraint.
// "Constraint++" is used to promote the string iterator
// to the next constraint.
return std::string("^") + std::string(Constraint++, 2);
}
[[fallthrough]];
default:
return std::string(1, *Constraint);
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Basic/Targets/X86.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasCCMP = false;
bool HasNF = false;
bool HasCF = false;
bool HasInlineAsmUseGPR32 = false;

protected:
llvm::X86::CPUKind CPU = llvm::X86::CK_None;
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/Arch/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,4 +310,6 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
Features.push_back("+prefer-no-gather");
if (Args.hasArg(options::OPT_mno_scatter))
Features.push_back("+prefer-no-scatter");
if (Args.hasArg(options::OPT_mapx_inline_asm_use_gpr32))
Features.push_back("+inline-asm-use-gpr32");
}
3 changes: 3 additions & 0 deletions clang/test/Driver/x86-apx-inline-asm-use-gpr32.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/// Tests -mapx-inline-asm-use-gpr32
// RUN: %clang -target x86_64-unknown-linux-gnu -c -mapx-inline-asm-use-gpr32 -### %s 2>&1 | FileCheck --check-prefix=GPR32 %s
// GPR32: "-target-feature" "+inline-asm-use-gpr32"
5 changes: 5 additions & 0 deletions clang/test/Preprocessor/x86_target_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -763,3 +763,8 @@
// NF: #define __NF__ 1
// PPX: #define __PPX__ 1
// PUSH2POP2: #define __PUSH2POP2__ 1

// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=NOUSEGPR32 %s
// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-features=egpr -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=USEGPR32 %s
// NOUSEGPR32-NOT: #define __APX_INLINE_ASM_USE_GPR32__ 1
// USEGPR32: #define __APX_INLINE_ASM_USE_GPR32__ 1
10 changes: 8 additions & 2 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5428,10 +5428,12 @@ X86:
- ``Z``: An immediate 32-bit unsigned integer.
- ``q``: An 8, 16, 32, or 64-bit register which can be accessed as an 8-bit
``l`` integer register. On X86-32, this is the ``a``, ``b``, ``c``, and ``d``
registers, and on X86-64, it is all of the integer registers.
registers, and on X86-64, it is all of the integer registers. When feature
`egpr` and `inline-asm-use-gpr32` are both on, it will be extended to gpr32.
- ``Q``: An 8, 16, 32, or 64-bit register which can be accessed as an 8-bit
``h`` integer register. This is the ``a``, ``b``, ``c``, and ``d`` registers.
- ``r`` or ``l``: An 8, 16, 32, or 64-bit integer register.
- ``r`` or ``l``: An 8, 16, 32, or 64-bit integer register. When feature
`egpr` and `inline-asm-use-gpr32` are both on, it will be extended to gpr32.
- ``R``: An 8, 16, 32, or 64-bit "legacy" integer register -- one which has
existed since i386, and can be accessed without the REX prefix.
- ``f``: A 32, 64, or 80-bit '387 FPU stack pseudo-register.
Expand All @@ -5452,6 +5454,10 @@ X86:
operand will get allocated only to RAX -- if two 32-bit operands are needed,
you're better off splitting it yourself, before passing it to the asm
statement.
- ``jr``: An 8, 16, 32, or 64-bit integer gpr16. It won't be extended to gpr32
when feature `egpr` or `inline-asm-use-gpr32` is on.
- ``jR``: An 8, 16, 32, or 64-bit integer gpr32 when feature `egpr`` is on.
Otherwise, same as ``r``.

XCore:

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/X86/X86.td
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,9 @@ def FeatureNF : SubtargetFeature<"nf", "HasNF", "true",
"Support status flags update suppression">;
def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
"Support conditional faulting">;
def FeatureUseGPR32InInlineAsm
: SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true",
"Enable use of GPR32 in inline assembly for APX">;

// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
// "string operations"). See "REP String Enhancement" in the Intel Software
Expand Down
83 changes: 75 additions & 8 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57840,6 +57840,15 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
case '2':
return C_RegisterClass;
}
break;
case 'j':
switch (Constraint[1]) {
default:
break;
case 'r':
case 'R':
return C_RegisterClass;
}
}
} else if (parseConstraintCode(Constraint) != X86::COND_INVALID)
return C_Other;
Expand Down Expand Up @@ -57919,6 +57928,19 @@ X86TargetLowering::getSingleConstraintMatchWeight(
break;
}
break;
case 'j':
if (StringRef(Constraint).size() != 2)
break;
switch (Constraint[1]) {
default:
return CW_Invalid;
case 'r':
case 'R':
if (CallOperandVal->getType()->isIntegerTy())
Wt = CW_SpecificReg;
break;
}
break;
case 'v':
if ((Ty->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
Wt = CW_Register;
Expand Down Expand Up @@ -58218,6 +58240,10 @@ static bool isVKClass(const TargetRegisterClass &RC) {
RC.hasSuperClassEq(&X86::VK64RegClass);
}

static bool useEGPRInlineAsm(const X86Subtarget &Subtarget) {
return Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32();
}

std::pair<unsigned, const TargetRegisterClass *>
X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
Expand Down Expand Up @@ -58258,13 +58284,21 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
if (Subtarget.is64Bit()) {
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
? &X86::GR8RegClass
: &X86::GR8_NOREX2RegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
? &X86::GR16RegClass
: &X86::GR16_NOREX2RegClass);
if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
? &X86::GR32RegClass
: &X86::GR32_NOREX2RegClass);
if (VT != MVT::f80 && !VT.isVector())
return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
? &X86::GR64RegClass
: &X86::GR64_NOREX2RegClass);
break;
}
[[fallthrough]];
Expand All @@ -58283,14 +58317,22 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
case 'r': // GENERAL_REGS
case 'l': // INDEX_REGS
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
? &X86::GR8RegClass
: &X86::GR8_NOREX2RegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
? &X86::GR16RegClass
: &X86::GR16_NOREX2RegClass);
if (VT == MVT::i32 || VT == MVT::f32 ||
(!VT.isVector() && !Subtarget.is64Bit()))
return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
? &X86::GR32RegClass
: &X86::GR32_NOREX2RegClass);
if (VT != MVT::f80 && !VT.isVector())
return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
? &X86::GR64RegClass
: &X86::GR64_NOREX2RegClass);
break;
case 'R': // LEGACY_REGS
if (VT == MVT::i8 || VT == MVT::i1)
Expand Down Expand Up @@ -58514,6 +58556,31 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
break;
}
} else if (Constraint.size() == 2 && Constraint[0] == 'j') {
switch (Constraint[1]) {
default:
break;
case 'r':
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
if (VT != MVT::f80 && !VT.isVector())
return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
break;
case 'R':
if (VT == MVT::i8 || VT == MVT::i1)
return std::make_pair(0U, &X86::GR8RegClass);
if (VT == MVT::i16)
return std::make_pair(0U, &X86::GR16RegClass);
if (VT == MVT::i32 || VT == MVT::f32)
return std::make_pair(0U, &X86::GR32RegClass);
if (VT != MVT::f80 && !VT.isVector())
return std::make_pair(0U, &X86::GR64RegClass);
break;
}
}

if (parseConstraintCode(Constraint) != X86::COND_INVALID)
Expand Down
17 changes: 17 additions & 0 deletions llvm/test/CodeGen/X86/apx/asm-constraint-jR.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: not llc -mtriple=x86_64 %s 2>&1 | FileCheck %s --check-prefix=ERR
; RUN: llc -mtriple=x86_64 -mattr=+egpr < %s | FileCheck %s
; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s
; RUN: not llc -mtriple=x86_64 -mattr=+inline-asm-use-gpr32 %s 2>&1 | FileCheck %s --check-prefix=ERR

; ERR: error: inline assembly requires more registers than available

define void @constraint_jR_test() nounwind {
; CHECK-LABEL: constraint_jR_test:
; CHECK: addq %r16, %rax
entry:
%reg = alloca i64, align 8
%0 = load i64, ptr %reg, align 8
call void asm sideeffect "add $0, %rax", "^jR,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
ret void
}
28 changes: 28 additions & 0 deletions llvm/test/CodeGen/X86/apx/asm-constraint-jr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: not llc -mtriple=x86_64 < %s >%t1 2>%t2
; RUN: FileCheck %s <%t1
; RUN: FileCheck %s <%t2 --check-prefix=ERR
; RUN: not llc -mattr=+egpr -mtriple=x86_64 < %s >%t1 2>%t2
; RUN: FileCheck %s <%t1
; RUN: FileCheck %s <%t2 --check-prefix=ERR
; RUN: not llc -mattr=+egpr,+inline-asm-use-gpr32 -mtriple=x86_64 < %s >%t1 2>%t2
; RUN: FileCheck %s <%t1
; RUN: FileCheck %s <%t2 --check-prefix=ERR

; CHECK: addq %r8, %rax
define void @constraint_jr_test() nounwind {
entry:
%reg = alloca i64, align 8
%0 = load i64, ptr %reg, align 8
call void asm sideeffect "add $0, %rax", "^jr,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{dirflag},~{fpsr},~{flags}"(i64 %0)
ret void
}

; ERR: error: inline assembly requires more registers than available
define void @constraint_jr_test_err() nounwind {
entry:
%reg = alloca i64, align 8
%0 = load i64, ptr %reg, align 8
call void asm sideeffect "add $0, %rax", "^jr,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
ret void
}
19 changes: 12 additions & 7 deletions llvm/test/CodeGen/X86/apx/asm-constraint.ll
Original file line number Diff line number Diff line change
@@ -1,21 +1,26 @@
; Check r16-r31 can not be used with 'q','r','l' constraint for backward compatibility.
; RUN: not llc < %s -mtriple=x86_64-unknown-unknown -mattr=+egpr 2>&1 | FileCheck %s
; RUN: not llc -mtriple=x86_64 < %s 2>&1 | FileCheck %s --check-prefix=ERR
; RUN: not llc -mtriple=x86_64 -mattr=+egpr < %s 2>&1 | FileCheck %s --check-prefix=ERR
; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s

define void @q() {
; CHECK: error: inline assembly requires more registers than available
%a = call i32 asm sideeffect "movq %rax, $0", "=q,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
; ERR: error: inline assembly requires more registers than available
; CHECK: movq %rax, %r16
%a = call i64 asm sideeffect "movq %rax, $0", "=q,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
ret void
}

define void @r() {
; CHECK: error: inline assembly requires more registers than available
%a = call i32 asm sideeffect "movq %rax, $0", "=r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
; ERR: error: inline assembly requires more registers than available
; CHECK: movq %rax, %r16
%a = call i64 asm sideeffect "movq %rax, $0", "=r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
ret void
}

define void @l() {
; CHECK: error: inline assembly requires more registers than available
%a = call i32 asm sideeffect "movq %rax, $0", "=l,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
; ERR: error: inline assembly requires more registers than available
; CHECK: movq %rax, %r16
%a = call i64 asm sideeffect "movq %rax, $0", "=l,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
ret void
}

Loading