Skip to content

Commit 73f4c25

Browse files
authored
[X86] Support EGPR for inline assembly. (#92338)
"jR": explicitly enables EGPR "r", "l", "q": enables/disables EGPR w/wo -mapx-inline-asm-use-gpr32 "jr": explicitly enables GPR with -mapx-inline-asm-use-gpr32 -mapx-inline-asm-use-gpr32 will also define a new macro: `__APX_INLINE_ASM_USE_GPR32__` GCC patches: https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631183.html https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631186.html [[PATCH v2] x86: Define _APX_INLINE_ASM_USE_GPR32_ (gnu.org)](https://gcc.gnu.org/pipermail/gcc-patches/2024-April/649003.html) Reference: https://gcc.godbolt.org/z/nPPvbY6r4
1 parent 1ac592c commit 73f4c25

File tree

12 files changed

+186
-17
lines changed

12 files changed

+186
-17
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6280,6 +6280,8 @@ def mno_apx_features_EQ : CommaJoined<["-"], "mno-apx-features=">, Group<m_x86_F
62806280
// For stability, we only add a feature to -mapxf after it passes the validation of llvm-test-suite && cpu2017 on Intel SDE.
62816281
def mapxf : Flag<["-"], "mapxf">, Alias<mapx_features_EQ>, AliasArgs<["egpr","push2pop2","ppx","ndd","ccmp","nf"]>;
62826282
def mno_apxf : Flag<["-"], "mno-apxf">, Alias<mno_apx_features_EQ>, AliasArgs<["egpr","push2pop2","ppx","ndd","ccmp","nf"]>;
6283+
def mapx_inline_asm_use_gpr32 : Flag<["-"], "mapx-inline-asm-use-gpr32">, Group<m_Group>,
6284+
HelpText<"Enable use of GPR32 in inline assembly for APX">;
62836285
} // let Flags = [TargetSpecific]
62846286

62856287
// VE feature flags

clang/lib/Basic/Targets/X86.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
441441
HasFullBFloat16 = true;
442442
} else if (Feature == "+egpr") {
443443
HasEGPR = true;
444+
} else if (Feature == "+inline-asm-use-gpr32") {
445+
HasInlineAsmUseGPR32 = true;
444446
} else if (Feature == "+push2pop2") {
445447
HasPush2Pop2 = true;
446448
} else if (Feature == "+ppx") {
@@ -963,6 +965,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
963965
// Condition here is aligned with the feature set of mapxf in Options.td
964966
if (HasEGPR && HasPush2Pop2 && HasPPX && HasNDD && HasCCMP && HasNF)
965967
Builder.defineMacro("__APX_F__");
968+
if (HasEGPR && HasInlineAsmUseGPR32)
969+
Builder.defineMacro("__APX_INLINE_ASM_USE_GPR32__");
966970

967971
// Each case falls through to the previous one here.
968972
switch (SSELevel) {
@@ -1478,6 +1482,18 @@ bool X86TargetInfo::validateAsmConstraint(
14781482
case 'C': // SSE floating point constant.
14791483
case 'G': // x87 floating point constant.
14801484
return true;
1485+
case 'j':
1486+
Name++;
1487+
switch (*Name) {
1488+
default:
1489+
return false;
1490+
case 'r':
1491+
Info.setAllowsRegister();
1492+
return true;
1493+
case 'R':
1494+
Info.setAllowsRegister();
1495+
return true;
1496+
}
14811497
case '@':
14821498
// CC condition changes.
14831499
if (auto Len = matchAsmCCConstraint(Name)) {
@@ -1749,6 +1765,20 @@ std::string X86TargetInfo::convertConstraint(const char *&Constraint) const {
17491765
// to the next constraint.
17501766
return std::string("^") + std::string(Constraint++, 2);
17511767
}
1768+
case 'j':
1769+
switch (Constraint[1]) {
1770+
default:
1771+
// Break from inner switch and fall through (copy single char),
1772+
// continue parsing after copying the current constraint into
1773+
// the return string.
1774+
break;
1775+
case 'r':
1776+
case 'R':
1777+
// "^" hints llvm that this is a 2 letter constraint.
1778+
// "Constraint++" is used to promote the string iterator
1779+
// to the next constraint.
1780+
return std::string("^") + std::string(Constraint++, 2);
1781+
}
17521782
[[fallthrough]];
17531783
default:
17541784
return std::string(1, *Constraint);

clang/lib/Basic/Targets/X86.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
172172
bool HasCCMP = false;
173173
bool HasNF = false;
174174
bool HasCF = false;
175+
bool HasInlineAsmUseGPR32 = false;
175176

176177
protected:
177178
llvm::X86::CPUKind CPU = llvm::X86::CK_None;

clang/lib/Driver/ToolChains/Arch/X86.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,4 +310,6 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple,
310310
Features.push_back("+prefer-no-gather");
311311
if (Args.hasArg(options::OPT_mno_scatter))
312312
Features.push_back("+prefer-no-scatter");
313+
if (Args.hasArg(options::OPT_mapx_inline_asm_use_gpr32))
314+
Features.push_back("+inline-asm-use-gpr32");
313315
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
/// Tests -mapx-inline-asm-use-gpr32
2+
// RUN: %clang -target x86_64-unknown-linux-gnu -c -mapx-inline-asm-use-gpr32 -### %s 2>&1 | FileCheck --check-prefix=GPR32 %s
3+
// GPR32: "-target-feature" "+inline-asm-use-gpr32"

clang/test/Preprocessor/x86_target_features.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,3 +763,8 @@
763763
// NF: #define __NF__ 1
764764
// PPX: #define __PPX__ 1
765765
// PUSH2POP2: #define __PUSH2POP2__ 1
766+
767+
// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=NOUSEGPR32 %s
768+
// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-features=egpr -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=USEGPR32 %s
769+
// NOUSEGPR32-NOT: #define __APX_INLINE_ASM_USE_GPR32__ 1
770+
// USEGPR32: #define __APX_INLINE_ASM_USE_GPR32__ 1

llvm/docs/LangRef.rst

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5428,10 +5428,12 @@ X86:
54285428
- ``Z``: An immediate 32-bit unsigned integer.
54295429
- ``q``: An 8, 16, 32, or 64-bit register which can be accessed as an 8-bit
54305430
``l`` integer register. On X86-32, this is the ``a``, ``b``, ``c``, and ``d``
5431-
registers, and on X86-64, it is all of the integer registers.
5431+
registers, and on X86-64, it is all of the integer registers. When feature
5432+
`egpr` and `inline-asm-use-gpr32` are both on, it will be extended to gpr32.
54325433
- ``Q``: An 8, 16, 32, or 64-bit register which can be accessed as an 8-bit
54335434
``h`` integer register. This is the ``a``, ``b``, ``c``, and ``d`` registers.
5434-
- ``r`` or ``l``: An 8, 16, 32, or 64-bit integer register.
5435+
- ``r`` or ``l``: An 8, 16, 32, or 64-bit integer register. When feature
5436+
`egpr` and `inline-asm-use-gpr32` are both on, it will be extended to gpr32.
54355437
- ``R``: An 8, 16, 32, or 64-bit "legacy" integer register -- one which has
54365438
existed since i386, and can be accessed without the REX prefix.
54375439
- ``f``: A 32, 64, or 80-bit '387 FPU stack pseudo-register.
@@ -5452,6 +5454,10 @@ X86:
54525454
operand will get allocated only to RAX -- if two 32-bit operands are needed,
54535455
you're better off splitting it yourself, before passing it to the asm
54545456
statement.
5457+
- ``jr``: An 8, 16, 32, or 64-bit integer gpr16. It won't be extended to gpr32
5458+
when feature `egpr` or `inline-asm-use-gpr32` is on.
5459+
- ``jR``: An 8, 16, 32, or 64-bit integer gpr32 when feature `egpr`` is on.
5460+
Otherwise, same as ``r``.
54555461

54565462
XCore:
54575463

llvm/lib/Target/X86/X86.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,9 @@ def FeatureNF : SubtargetFeature<"nf", "HasNF", "true",
346346
"Support status flags update suppression">;
347347
def FeatureCF : SubtargetFeature<"cf", "HasCF", "true",
348348
"Support conditional faulting">;
349+
def FeatureUseGPR32InInlineAsm
350+
: SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true",
351+
"Enable use of GPR32 in inline assembly for APX">;
349352

350353
// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
351354
// "string operations"). See "REP String Enhancement" in the Intel Software

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 75 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -57840,6 +57840,15 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
5784057840
case '2':
5784157841
return C_RegisterClass;
5784257842
}
57843+
break;
57844+
case 'j':
57845+
switch (Constraint[1]) {
57846+
default:
57847+
break;
57848+
case 'r':
57849+
case 'R':
57850+
return C_RegisterClass;
57851+
}
5784357852
}
5784457853
} else if (parseConstraintCode(Constraint) != X86::COND_INVALID)
5784557854
return C_Other;
@@ -57919,6 +57928,19 @@ X86TargetLowering::getSingleConstraintMatchWeight(
5791957928
break;
5792057929
}
5792157930
break;
57931+
case 'j':
57932+
if (StringRef(Constraint).size() != 2)
57933+
break;
57934+
switch (Constraint[1]) {
57935+
default:
57936+
return CW_Invalid;
57937+
case 'r':
57938+
case 'R':
57939+
if (CallOperandVal->getType()->isIntegerTy())
57940+
Wt = CW_SpecificReg;
57941+
break;
57942+
}
57943+
break;
5792257944
case 'v':
5792357945
if ((Ty->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
5792457946
Wt = CW_Register;
@@ -58218,6 +58240,10 @@ static bool isVKClass(const TargetRegisterClass &RC) {
5821858240
RC.hasSuperClassEq(&X86::VK64RegClass);
5821958241
}
5822058242

58243+
static bool useEGPRInlineAsm(const X86Subtarget &Subtarget) {
58244+
return Subtarget.hasEGPR() && Subtarget.useInlineAsmGPR32();
58245+
}
58246+
5822158247
std::pair<unsigned, const TargetRegisterClass *>
5822258248
X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
5822358249
StringRef Constraint,
@@ -58258,13 +58284,21 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
5825858284
case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
5825958285
if (Subtarget.is64Bit()) {
5826058286
if (VT == MVT::i8 || VT == MVT::i1)
58261-
return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
58287+
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
58288+
? &X86::GR8RegClass
58289+
: &X86::GR8_NOREX2RegClass);
5826258290
if (VT == MVT::i16)
58263-
return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
58291+
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
58292+
? &X86::GR16RegClass
58293+
: &X86::GR16_NOREX2RegClass);
5826458294
if (VT == MVT::i32 || VT == MVT::f32)
58265-
return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
58295+
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
58296+
? &X86::GR32RegClass
58297+
: &X86::GR32_NOREX2RegClass);
5826658298
if (VT != MVT::f80 && !VT.isVector())
58267-
return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
58299+
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
58300+
? &X86::GR64RegClass
58301+
: &X86::GR64_NOREX2RegClass);
5826858302
break;
5826958303
}
5827058304
[[fallthrough]];
@@ -58283,14 +58317,22 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
5828358317
case 'r': // GENERAL_REGS
5828458318
case 'l': // INDEX_REGS
5828558319
if (VT == MVT::i8 || VT == MVT::i1)
58286-
return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
58320+
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
58321+
? &X86::GR8RegClass
58322+
: &X86::GR8_NOREX2RegClass);
5828758323
if (VT == MVT::i16)
58288-
return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
58324+
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
58325+
? &X86::GR16RegClass
58326+
: &X86::GR16_NOREX2RegClass);
5828958327
if (VT == MVT::i32 || VT == MVT::f32 ||
5829058328
(!VT.isVector() && !Subtarget.is64Bit()))
58291-
return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
58329+
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
58330+
? &X86::GR32RegClass
58331+
: &X86::GR32_NOREX2RegClass);
5829258332
if (VT != MVT::f80 && !VT.isVector())
58293-
return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
58333+
return std::make_pair(0U, useEGPRInlineAsm(Subtarget)
58334+
? &X86::GR64RegClass
58335+
: &X86::GR64_NOREX2RegClass);
5829458336
break;
5829558337
case 'R': // LEGACY_REGS
5829658338
if (VT == MVT::i8 || VT == MVT::i1)
@@ -58514,6 +58556,31 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
5851458556
}
5851558557
break;
5851658558
}
58559+
} else if (Constraint.size() == 2 && Constraint[0] == 'j') {
58560+
switch (Constraint[1]) {
58561+
default:
58562+
break;
58563+
case 'r':
58564+
if (VT == MVT::i8 || VT == MVT::i1)
58565+
return std::make_pair(0U, &X86::GR8_NOREX2RegClass);
58566+
if (VT == MVT::i16)
58567+
return std::make_pair(0U, &X86::GR16_NOREX2RegClass);
58568+
if (VT == MVT::i32 || VT == MVT::f32)
58569+
return std::make_pair(0U, &X86::GR32_NOREX2RegClass);
58570+
if (VT != MVT::f80 && !VT.isVector())
58571+
return std::make_pair(0U, &X86::GR64_NOREX2RegClass);
58572+
break;
58573+
case 'R':
58574+
if (VT == MVT::i8 || VT == MVT::i1)
58575+
return std::make_pair(0U, &X86::GR8RegClass);
58576+
if (VT == MVT::i16)
58577+
return std::make_pair(0U, &X86::GR16RegClass);
58578+
if (VT == MVT::i32 || VT == MVT::f32)
58579+
return std::make_pair(0U, &X86::GR32RegClass);
58580+
if (VT != MVT::f80 && !VT.isVector())
58581+
return std::make_pair(0U, &X86::GR64RegClass);
58582+
break;
58583+
}
5851758584
}
5851858585

5851958586
if (parseConstraintCode(Constraint) != X86::COND_INVALID)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: not llc -mtriple=x86_64 %s 2>&1 | FileCheck %s --check-prefix=ERR
3+
; RUN: llc -mtriple=x86_64 -mattr=+egpr < %s | FileCheck %s
4+
; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s
5+
; RUN: not llc -mtriple=x86_64 -mattr=+inline-asm-use-gpr32 %s 2>&1 | FileCheck %s --check-prefix=ERR
6+
7+
; ERR: error: inline assembly requires more registers than available
8+
9+
define void @constraint_jR_test() nounwind {
10+
; CHECK-LABEL: constraint_jR_test:
11+
; CHECK: addq %r16, %rax
12+
entry:
13+
%reg = alloca i64, align 8
14+
%0 = load i64, ptr %reg, align 8
15+
call void asm sideeffect "add $0, %rax", "^jR,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
16+
ret void
17+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: not llc -mtriple=x86_64 < %s >%t1 2>%t2
3+
; RUN: FileCheck %s <%t1
4+
; RUN: FileCheck %s <%t2 --check-prefix=ERR
5+
; RUN: not llc -mattr=+egpr -mtriple=x86_64 < %s >%t1 2>%t2
6+
; RUN: FileCheck %s <%t1
7+
; RUN: FileCheck %s <%t2 --check-prefix=ERR
8+
; RUN: not llc -mattr=+egpr,+inline-asm-use-gpr32 -mtriple=x86_64 < %s >%t1 2>%t2
9+
; RUN: FileCheck %s <%t1
10+
; RUN: FileCheck %s <%t2 --check-prefix=ERR
11+
12+
; CHECK: addq %r8, %rax
13+
define void @constraint_jr_test() nounwind {
14+
entry:
15+
%reg = alloca i64, align 8
16+
%0 = load i64, ptr %reg, align 8
17+
call void asm sideeffect "add $0, %rax", "^jr,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{dirflag},~{fpsr},~{flags}"(i64 %0)
18+
ret void
19+
}
20+
21+
; ERR: error: inline assembly requires more registers than available
22+
define void @constraint_jr_test_err() nounwind {
23+
entry:
24+
%reg = alloca i64, align 8
25+
%0 = load i64, ptr %reg, align 8
26+
call void asm sideeffect "add $0, %rax", "^jr,~{rax},~{rbx},~{rbp},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0)
27+
ret void
28+
}
Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,26 @@
11
; Check r16-r31 can not be used with 'q','r','l' constraint for backward compatibility.
2-
; RUN: not llc < %s -mtriple=x86_64-unknown-unknown -mattr=+egpr 2>&1 | FileCheck %s
2+
; RUN: not llc -mtriple=x86_64 < %s 2>&1 | FileCheck %s --check-prefix=ERR
3+
; RUN: not llc -mtriple=x86_64 -mattr=+egpr < %s 2>&1 | FileCheck %s --check-prefix=ERR
4+
; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s
35

46
define void @q() {
5-
; CHECK: error: inline assembly requires more registers than available
6-
%a = call i32 asm sideeffect "movq %rax, $0", "=q,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
7+
; ERR: error: inline assembly requires more registers than available
8+
; CHECK: movq %rax, %r16
9+
%a = call i64 asm sideeffect "movq %rax, $0", "=q,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
710
ret void
811
}
912

1013
define void @r() {
11-
; CHECK: error: inline assembly requires more registers than available
12-
%a = call i32 asm sideeffect "movq %rax, $0", "=r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
14+
; ERR: error: inline assembly requires more registers than available
15+
; CHECK: movq %rax, %r16
16+
%a = call i64 asm sideeffect "movq %rax, $0", "=r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
1317
ret void
1418
}
1519

1620
define void @l() {
17-
; CHECK: error: inline assembly requires more registers than available
18-
%a = call i32 asm sideeffect "movq %rax, $0", "=l,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
21+
; ERR: error: inline assembly requires more registers than available
22+
; CHECK: movq %rax, %r16
23+
%a = call i64 asm sideeffect "movq %rax, $0", "=l,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{rbp},~{rsp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
1924
ret void
2025
}
2126

0 commit comments

Comments
 (0)