Skip to content

Commit bd84d66

Browse files
authored
[lld][LoongArch] Convert TLS IE to LE in the normal or medium code model (#123680)
Original code sequence: * pcalau12i $a0, %ie_pc_hi20(sym) * ld.d $a0, $a0, %ie_pc_lo12(sym) The code sequence converted is as follows: * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP * ori $a0, src, %le_lo12(sym) # le_hi20 != 0, src = $a0, # otherwise, src = $zero TODO: When relaxation is enabled, redundant NOP can be removed. This will be implemented in a future patch. Note: In the normal or medium code model, original code sequence with relocations allow interleaving, because converted code sequence calculates the absolute offset. However, in extreme code model, to identify the current code model, the first four instructions with relocations must appear consecutively.
1 parent c9280ba commit bd84d66

File tree

4 files changed

+183
-19
lines changed

4 files changed

+183
-19
lines changed

lld/ELF/Arch/LoongArch.cpp

+85
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ class LoongArch final : public TargetInfo {
3939
void relocate(uint8_t *loc, const Relocation &rel,
4040
uint64_t val) const override;
4141
bool relaxOnce(int pass) const override;
42+
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
4243
void finalizeRelax(int passes) const override;
4344
};
4445
} // end anonymous namespace
@@ -53,6 +54,8 @@ enum Op {
5354
ADDI_W = 0x02800000,
5455
ADDI_D = 0x02c00000,
5556
ANDI = 0x03400000,
57+
ORI = 0x03800000,
58+
LU12I_W = 0x14000000,
5659
PCADDI = 0x18000000,
5760
PCADDU12I = 0x1c000000,
5861
LD_W = 0x28800000,
@@ -1002,6 +1005,88 @@ static bool relax(Ctx &ctx, InputSection &sec) {
10021005
return changed;
10031006
}
10041007

1008+
// Convert TLS IE to LE in the normal or medium code model.
1009+
// Original code sequence:
1010+
// * pcalau12i $a0, %ie_pc_hi20(sym)
1011+
// * ld.d $a0, $a0, %ie_pc_lo12(sym)
1012+
//
1013+
// The code sequence converted is as follows:
1014+
// * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP
1015+
// * ori $a0, src, %le_lo12(sym) # le_hi20 != 0, src = $a0,
1016+
// # otherwise, src = $zero
1017+
//
1018+
// When relaxation enables, redundant NOPs can be removed.
1019+
static void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
1020+
assert(isInt<32>(val) &&
1021+
"val exceeds the range of medium code model in tlsIeToLe");
1022+
1023+
bool isUInt12 = isUInt<12>(val);
1024+
const uint32_t currInsn = read32le(loc);
1025+
switch (rel.type) {
1026+
case R_LARCH_TLS_IE_PC_HI20:
1027+
if (isUInt12)
1028+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
1029+
else
1030+
write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12),
1031+
0)); // lu12i.w $a0, %le_hi20
1032+
break;
1033+
case R_LARCH_TLS_IE_PC_LO12:
1034+
if (isUInt12)
1035+
write32le(loc, insn(ORI, getD5(currInsn), R_ZERO,
1036+
val)); // ori $a0, $zero, %le_lo12
1037+
else
1038+
write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn),
1039+
lo12(val))); // ori $a0, $a0, %le_lo12
1040+
break;
1041+
}
1042+
}
1043+
1044+
void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
1045+
const unsigned bits = ctx.arg.is64 ? 64 : 32;
1046+
uint64_t secAddr = sec.getOutputSection()->addr;
1047+
if (auto *s = dyn_cast<InputSection>(&sec))
1048+
secAddr += s->outSecOff;
1049+
else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))
1050+
secAddr += ehIn->getParent()->outSecOff;
1051+
bool isExtreme = false;
1052+
const MutableArrayRef<Relocation> relocs = sec.relocs();
1053+
for (size_t i = 0, size = relocs.size(); i != size; ++i) {
1054+
Relocation &rel = relocs[i];
1055+
uint8_t *loc = buf + rel.offset;
1056+
uint64_t val = SignExtend64(
1057+
sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits);
1058+
1059+
switch (rel.expr) {
1060+
case R_RELAX_HINT:
1061+
continue;
1062+
case R_RELAX_TLS_IE_TO_LE:
1063+
if (rel.type == R_LARCH_TLS_IE_PC_HI20) {
1064+
// LoongArch does not support IE to LE optimization in the extreme code
1065+
// model. In this case, the relocs are as follows:
1066+
//
1067+
// * i -- R_LARCH_TLS_IE_PC_HI20
1068+
// * i+1 -- R_LARCH_TLS_IE_PC_LO12
1069+
// * i+2 -- R_LARCH_TLS_IE64_PC_LO20
1070+
// * i+3 -- R_LARCH_TLS_IE64_PC_HI12
1071+
isExtreme =
1072+
(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20);
1073+
}
1074+
if (isExtreme) {
1075+
rel.expr = getRelExpr(rel.type, *rel.sym, loc);
1076+
val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset),
1077+
bits);
1078+
relocateNoSym(loc, rel.type, val);
1079+
} else {
1080+
tlsIeToLe(loc, rel, val);
1081+
}
1082+
continue;
1083+
default:
1084+
break;
1085+
}
1086+
relocate(loc, rel, val);
1087+
}
1088+
}
1089+
10051090
// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
10061091
// the absence of a linker script. For call and load/store R_LARCH_RELAX, code
10071092
// shrinkage may reduce displacement and make more relocations eligible for

lld/ELF/Relocations.cpp

+16-1
Original file line numberDiff line numberDiff line change
@@ -1376,14 +1376,20 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
13761376
return 1;
13771377
}
13781378

1379+
// LoongArch supports IE to LE optimization in non-extreme code model.
1380+
bool execOptimizeInLoongArch =
1381+
ctx.arg.emachine == EM_LOONGARCH &&
1382+
(type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12);
1383+
13791384
// ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE
13801385
// optimizations.
13811386
// RISC-V supports TLSDESC to IE/LE optimizations.
13821387
// For PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable
13831388
// optimization as well.
13841389
bool execOptimize =
13851390
!ctx.arg.shared && ctx.arg.emachine != EM_ARM &&
1386-
ctx.arg.emachine != EM_HEXAGON && ctx.arg.emachine != EM_LOONGARCH &&
1391+
ctx.arg.emachine != EM_HEXAGON &&
1392+
(ctx.arg.emachine != EM_LOONGARCH || execOptimizeInLoongArch) &&
13871393
!(isRISCV && expr != R_TLSDESC_PC && expr != R_TLSDESC_CALL) &&
13881394
!sec->file->ppc64DisableTLSRelax;
13891395

@@ -1477,6 +1483,15 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
14771483
return 1;
14781484
}
14791485

1486+
// LoongArch TLS GD/LD relocs reuse the RE_LOONGARCH_GOT, in which
1487+
// NEEDS_TLSIE shouldn't set. So we check independently.
1488+
if (ctx.arg.emachine == EM_LOONGARCH && expr == RE_LOONGARCH_GOT &&
1489+
execOptimize && isLocalInExecutable) {
1490+
ctx.hasTlsIe.store(true, std::memory_order_relaxed);
1491+
sec->addReloc({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym});
1492+
return 1;
1493+
}
1494+
14801495
return 0;
14811496
}
14821497

lld/test/ELF/loongarch-relax-tls-ie.s

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# REQUIRES: loongarch
2+
## Test LA64 IE -> LE in various cases.
3+
4+
# RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=+relax %s -o %t.o
5+
6+
## FIXME: IE relaxation has not yet been implemented.
7+
## --relax/--no-relax has the same result. Also check --emit-relocs.
8+
# RUN: ld.lld --emit-relocs %t.o -o %t
9+
# RUN: llvm-readelf -x .got %t 2>&1 | FileCheck --check-prefix=LE-GOT %s
10+
# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LE %s
11+
12+
# RUN: ld.lld --emit-relocs --no-relax %t.o -o %t.norelax
13+
# RUN: llvm-readelf -x .got %t.norelax 2>&1 | FileCheck --check-prefix=LE-GOT %s
14+
# RUN: llvm-objdump -dr --no-show-raw-insn %t.norelax | FileCheck --check-prefixes=LE %s
15+
16+
# LE-GOT: could not find section '.got'
17+
18+
# a@tprel = st_value(a) = 0xfff
19+
# b@tprel = st_value(a) = 0x1000
20+
# LE: 20158: nop
21+
# LE-NEXT: R_LARCH_TLS_IE_PC_HI20 a
22+
# LE-NEXT: R_LARCH_RELAX *ABS*
23+
# LE-NEXT: ori $a0, $zero, 4095
24+
# LE-NEXT: R_LARCH_TLS_IE_PC_LO12 a
25+
# LE-NEXT: R_LARCH_RELAX *ABS*
26+
# LE-NEXT: add.d $a0, $a0, $tp
27+
# LE-NEXT: 20164: lu12i.w $a1, 1
28+
# LE-NEXT: R_LARCH_TLS_IE_PC_HI20 b
29+
# LE-NEXT: ori $a1, $a1, 0
30+
# LE-NEXT: R_LARCH_TLS_IE_PC_LO12 b
31+
# LE-NEXT: add.d $a1, $a1, $tp
32+
# LE-NEXT: 20170: nop
33+
# LE-NEXT: R_LARCH_TLS_IE_PC_HI20 a
34+
# LE-NEXT: R_LARCH_RELAX *ABS*
35+
# LE-NEXT: lu12i.w $a3, 1
36+
# LE-NEXT: R_LARCH_TLS_IE_PC_HI20 b
37+
# LE-NEXT: R_LARCH_RELAX *ABS*
38+
# LE-NEXT: ori $a2, $zero, 4095
39+
# LE-NEXT: R_LARCH_TLS_IE_PC_LO12 a
40+
# LE-NEXT: ori $a3, $a3, 0
41+
# LE-NEXT: R_LARCH_TLS_IE_PC_LO12 b
42+
# LE-NEXT: add.d $a2, $a2, $tp
43+
# LE-NEXT: add.d $a3, $a3, $tp
44+
45+
la.tls.ie $a0, a # relax
46+
add.d $a0, $a0, $tp
47+
48+
# PCALAU12I does not have R_LARCH_RELAX. No relaxation.
49+
pcalau12i $a1, %ie_pc_hi20(b)
50+
ld.d $a1, $a1, %ie_pc_lo12(b)
51+
add.d $a1, $a1, $tp
52+
53+
# Test instructions are interleaved.
54+
# PCALAU12I has an R_LARCH_RELAX. We perform relaxation.
55+
pcalau12i $a2, %ie_pc_hi20(a)
56+
.reloc .-4, R_LARCH_RELAX, 0
57+
pcalau12i $a3, %ie_pc_hi20(b)
58+
.reloc .-4, R_LARCH_RELAX, 0
59+
ld.d $a2, $a2, %ie_pc_lo12(a)
60+
ld.d $a3, $a3, %ie_pc_lo12(b)
61+
add.d $a2, $a2, $tp
62+
add.d $a3, $a3, $tp
63+
64+
.section .tbss,"awT",@nobits
65+
.globl a
66+
.zero 0xfff ## Place a at 0xfff, LE needs only one ins.
67+
a:
68+
.zero 1 ## Place b at 0x1000, LE needs two ins.
69+
b:
70+
.zero 4

lld/test/ELF/loongarch-tls-ie.s

+12-18
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
## LA32 IE -> LE
1313
# RUN: ld.lld %t/32.o -o %t/32
1414
# RUN: llvm-readelf -r %t/32 | FileCheck --check-prefix=NOREL %s
15-
# RUN: llvm-readelf -x .got %t/32 | FileCheck --check-prefix=LE32-GOT %s
15+
# RUN: llvm-readelf -x .got %t/32 2>&1 | FileCheck --check-prefix=LE32-GOT %s
1616
# RUN: llvm-objdump -d --no-show-raw-insn %t/32 | FileCheck --check-prefixes=LE32 %s
1717

1818
## LA64 IE
@@ -23,7 +23,7 @@
2323
## LA64 IE -> LE
2424
# RUN: ld.lld %t/64.o -o %t/64
2525
# RUN: llvm-readelf -r %t/64 | FileCheck --check-prefix=NOREL %s
26-
# RUN: llvm-readelf -x .got %t/64 | FileCheck --check-prefix=LE64-GOT %s
26+
# RUN: llvm-readelf -x .got %t/64 2>&1 | FileCheck --check-prefix=LE64-GOT %s
2727
# RUN: llvm-objdump -d --no-show-raw-insn %t/64 | FileCheck --check-prefixes=LE64 %s
2828

2929
# IE32-REL: FLAGS STATIC_TLS
@@ -62,29 +62,23 @@
6262

6363
# a@tprel = st_value(a) = 0x8
6464
# b@tprel = st_value(a) = 0xc
65-
# LE32-GOT: section '.got':
66-
# LE32-GOT-NEXT: 0x0003012c 08000000 0c000000
67-
# LE64-GOT: section '.got':
68-
# LE64-GOT-NEXT: 0x000301e0 08000000 00000000 0c000000 00000000
65+
# LE32-GOT: could not find section '.got'
66+
# LE64-GOT: could not find section '.got'
6967

7068
## LA32:
71-
## &.got[0] - . = 0x3012c - 0x20114: 0x10 pages, page offset 0x12c
72-
## &.got[1] - . = 0x30130 - 0x20120: 0x10 pages, page offset 0x130
73-
# LE32: 20114: pcalau12i $a4, 16
74-
# LE32-NEXT: ld.w $a4, $a4, 300
69+
# LE32: 200d4: nop
70+
# LE32-NEXT: ori $a4, $zero, 8
7571
# LE32-NEXT: add.w $a4, $a4, $tp
76-
# LE32-NEXT: 20120: pcalau12i $a5, 16
77-
# LE32-NEXT: ld.w $a5, $a5, 304
72+
# LE32-NEXT: 200e0: nop
73+
# LE32-NEXT: ori $a5, $zero, 12
7874
# LE32-NEXT: add.w $a5, $a5, $tp
7975

8076
## LA64:
81-
## &.got[0] - . = 0x301e0 - 0x201c8: 0x10 pages, page offset 0x1e0
82-
## &.got[1] - . = 0x301e8 - 0x201d4: 0x10 pages, page offset 0x1e8
83-
# LE64: 201c8: pcalau12i $a4, 16
84-
# LE64-NEXT: ld.d $a4, $a4, 480
77+
# LE64: 20158: nop
78+
# LE64-NEXT: ori $a4, $zero, 8
8579
# LE64-NEXT: add.d $a4, $a4, $tp
86-
# LE64-NEXT: 201d4: pcalau12i $a5, 16
87-
# LE64-NEXT: ld.d $a5, $a5, 488
80+
# LE64-NEXT: 20164: nop
81+
# LE64-NEXT: ori $a5, $zero, 12
8882
# LE64-NEXT: add.d $a5, $a5, $tp
8983

9084
#--- 32.s

0 commit comments

Comments
 (0)