From 5cdd4ba70d9fca571372c341d8f091e378c08781 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 25 Jan 2024 15:33:06 +0000 Subject: [PATCH 1/2] [TableGen] Fix computeRegUnitLaneMasks for ad hoc aliasing With ad hoc aliasing (using the Aliases field in register definitions) we can have subregisters with disjoint lane masks that nevertheless share a regunit. In this case we need to take the union of their lane masks, not the intersection. This was inadvertently broken by https://reviews.llvm.org/D157864 --- llvm/utils/TableGen/CodeGenRegisters.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp index d1abdb74ea4a9..b9d404aaee5fd 100644 --- a/llvm/utils/TableGen/CodeGenRegisters.cpp +++ b/llvm/utils/TableGen/CodeGenRegisters.cpp @@ -2144,7 +2144,7 @@ void CodeGenRegBank::computeRegUnitLaneMasks() { // Create an initial lane mask for all register units. const auto &RegUnits = Register.getRegUnits(); CodeGenRegister::RegUnitLaneMaskList RegUnitLaneMasks( - RegUnits.count(), LaneBitmask::getAll()); + RegUnits.count(), LaneBitmask::getNone()); // Iterate through SubRegisters. typedef CodeGenRegister::SubRegMap SubRegMap; const SubRegMap &SubRegs = Register.getSubRegs(); @@ -2163,7 +2163,7 @@ void CodeGenRegBank::computeRegUnitLaneMasks() { unsigned u = 0; for (unsigned RU : RegUnits) { if (SUI == RU) { - RegUnitLaneMasks[u] &= LaneMask; + RegUnitLaneMasks[u] |= LaneMask; assert(!Found); Found = true; } @@ -2173,6 +2173,10 @@ void CodeGenRegBank::computeRegUnitLaneMasks() { assert(Found); } } + for (auto &Mask : RegUnitLaneMasks) { + if (Mask.none()) + Mask = LaneBitmask::getAll(); + } Register.setRegUnitLaneMasks(RegUnitLaneMasks); } } From ae64aeddd82cccc8c3a3eab55f14ea5477a97829 Mon Sep 17 00:00:00 2001 From: "Kazushi (Jam) Marukawa" Date: Sat, 27 Jan 2024 21:51:40 +0900 Subject: [PATCH 2/2] Add test case for computeRegUnitLaneMasks fix This test is based on VE implementation. VE has two subregisters. Both are disjointed. However, many instructions contaminate other register very often. Therefore, VE defines them as aliased to avoid register allocator to allocate both registers simultaneously. This trick conflicts wht recent computeRegUnitLaneMasks optimization. The optimization is reverted now. And I'm adding test case for that. --- llvm/test/TableGen/DisjointAliasSubregs.td | 48 ++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 llvm/test/TableGen/DisjointAliasSubregs.td diff --git a/llvm/test/TableGen/DisjointAliasSubregs.td b/llvm/test/TableGen/DisjointAliasSubregs.td new file mode 100644 index 0000000000000..813e27feb1efb --- /dev/null +++ b/llvm/test/TableGen/DisjointAliasSubregs.td @@ -0,0 +1,48 @@ +// RUN: llvm-tblgen -gen-register-info -I %p/../../include %s | FileCheck %s +// Checks that tablegen calculation of register unit's lanemask. +// Detail is described just after tests. + +include "llvm/Target/Target.td" + +class MyReg subregs = [], list aliases = []> + : Register { + let Namespace = "Test"; + let SubRegs = subregs; + let Aliases = aliases; + let CoveredBySubRegs = 1; +} +class MyClass types, dag registers> + : RegisterClass<"Test", types, size, registers> { + let Size = size; +} + +// Register Example (VE): +// SX0 -- SW0 (sub_i32) +// \- SF0 (sub_f32) + +def sub_i32 : SubRegIndex<32, 32>; // Low 32 bit (32..63) +def sub_f32 : SubRegIndex<32>; // High 32 bit (0..31) + +def SW0 : MyReg<"sw0", []>; +def SF0 : MyReg<"sf0", [], [!cast("SW0")]>; + +let SubRegIndices = [sub_i32, sub_f32] in { +def SX0 : MyReg<"s0", [SW0, SF0]>; +} + +def I64 : MyClass<1, [i64], (sequence "SX%u", 0, 0)>; + +def TestTarget : Target; + +// CHECK: extern const LaneBitmask TestTargetLaneMaskLists[] = { +// CHECK-NEXT: /* 0 */ LaneBitmask(0x0000000000000003), LaneBitmask::getAll(), +// CHECK-NEXT: /* 2 */ LaneBitmask(0xFFFFFFFFFFFFFFFF), LaneBitmask::getAll(), + +// A sub_f32 has 0x0001 as a lane bitmask. A sub_i32 has 0x0002. +// Both are disjointed, but VE defines those are aliased registers since +// many instructions contaminate other part of register values. So, VE +// doesn't want to allocate SF0 and SW0 simultaneously. TableGen's +// computeRegUnitLaneMasks is optimizable using intersection algorithm, +// but it calculates 0x0000 as a intersection of 0x0001 and 0x0002. +// This causes wrong live-in calculation later. Union mechanism can +// calculate correct lanemaks as 0x0003.