Skip to content

Reland "[NVPTX] Emit prmt selection value in hex" #115952

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,12 @@ void NVPTXInstPrinter::printOffseti32imm(const MCInst *MI, int OpNum,
}
}

void NVPTXInstPrinter::printHexu32imm(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier) {
int64_t Imm = MI->getOperand(OpNum).getImm();
O << formatHex(Imm) << "U";
}

void NVPTXInstPrinter::printProtoIdent(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier) {
const MCOperand &Op = MI->getOperand(OpNum);
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class NVPTXInstPrinter : public MCInstPrinter {
raw_ostream &O, const char *Modifier = nullptr);
void printOffseti32imm(const MCInst *MI, int OpNum, raw_ostream &O,
const char *Modifier = nullptr);
void printHexu32imm(const MCInst *MI, int OpNum, raw_ostream &O,
const char *Modifier = nullptr);
void printProtoIdent(const MCInst *MI, int OpNum,
raw_ostream &O, const char *Modifier = nullptr);
void printPrmtMode(const MCInst *MI, int OpNum, raw_ostream &O,
Expand Down
8 changes: 6 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1740,6 +1740,10 @@ multiclass BFI<string Instr, ValueType T, RegisterClass RC, Operand ImmCls> {
[(set (T RC:$f), (bfi (T imm:$a), (T RC:$b), (i32 imm:$c), (i32 imm:$d)))]>;
}

def Hexu32imm : Operand<i32> {
let PrintMethod = "printHexu32imm";
}

multiclass PRMT<ValueType T, RegisterClass RC> {
def rrr
: NVPTXInst<(outs RC:$d),
Expand All @@ -1748,12 +1752,12 @@ multiclass PRMT<ValueType T, RegisterClass RC> {
[(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 Int32Regs:$c), imm:$mode))]>;
def rri
: NVPTXInst<(outs RC:$d),
(ins RC:$a, Int32Regs:$b, i32imm:$c, PrmtMode:$mode),
(ins RC:$a, Int32Regs:$b, Hexu32imm:$c, PrmtMode:$mode),
!strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"),
[(set (T RC:$d), (prmt (T RC:$a), (T RC:$b), (i32 imm:$c), imm:$mode))]>;
def rii
: NVPTXInst<(outs RC:$d),
(ins RC:$a, i32imm:$b, i32imm:$c, PrmtMode:$mode),
(ins RC:$a, i32imm:$b, Hexu32imm:$c, PrmtMode:$mode),
!strconcat("prmt.b32${mode}", " \t$d, $a, $b, $c;"),
[(set (T RC:$d), (prmt (T RC:$a), (T imm:$b), (i32 imm:$c), imm:$mode))]>;
}
Expand Down
148 changes: 74 additions & 74 deletions llvm/test/CodeGen/NVPTX/i8x4-instructions.ll

Large diffs are not rendered by default.

48 changes: 24 additions & 24 deletions llvm/test/CodeGen/NVPTX/load-store.ll
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ define void @generic_4xi8(ptr %a) {
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
Expand All @@ -184,8 +184,8 @@ define void @generic_4xi8(ptr %a) {
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load <4 x i8>, ptr %a
Expand Down Expand Up @@ -519,7 +519,7 @@ define void @generic_volatile_4xi8(ptr %a) {
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
Expand All @@ -528,8 +528,8 @@ define void @generic_volatile_4xi8(ptr %a) {
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.volatile.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i8>, ptr %a
Expand Down Expand Up @@ -1424,7 +1424,7 @@ define void @global_4xi8(ptr addrspace(1) %a) {
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
Expand All @@ -1433,8 +1433,8 @@ define void @global_4xi8(ptr addrspace(1) %a) {
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.global.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load <4 x i8>, ptr addrspace(1) %a
Expand Down Expand Up @@ -1749,7 +1749,7 @@ define void @global_volatile_4xi8(ptr addrspace(1) %a) {
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
Expand All @@ -1758,8 +1758,8 @@ define void @global_volatile_4xi8(ptr addrspace(1) %a) {
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.volatile.global.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i8>, ptr addrspace(1) %a
Expand Down Expand Up @@ -2796,7 +2796,7 @@ define void @shared_4xi8(ptr addrspace(3) %a) {
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
Expand All @@ -2805,8 +2805,8 @@ define void @shared_4xi8(ptr addrspace(3) %a) {
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.shared.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load <4 x i8>, ptr addrspace(3) %a
Expand Down Expand Up @@ -3121,7 +3121,7 @@ define void @shared_volatile_4xi8(ptr addrspace(3) %a) {
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
Expand All @@ -3130,8 +3130,8 @@ define void @shared_volatile_4xi8(ptr addrspace(3) %a) {
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i8>, ptr addrspace(3) %a
Expand Down Expand Up @@ -4026,7 +4026,7 @@ define void @local_4xi8(ptr addrspace(5) %a) {
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
Expand All @@ -4035,8 +4035,8 @@ define void @local_4xi8(ptr addrspace(5) %a) {
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.local.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load <4 x i8>, ptr addrspace(5) %a
Expand Down Expand Up @@ -4351,7 +4351,7 @@ define void @local_volatile_4xi8(ptr addrspace(5) %a) {
; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 13120;
; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x3340U;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
Expand All @@ -4360,8 +4360,8 @@ define void @local_volatile_4xi8(ptr addrspace(5) %a) {
; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 0x5410U;
; CHECK-NEXT: st.local.u32 [%rd1], %r12;
; CHECK-NEXT: ret;
%a.load = load volatile <4 x i8>, ptr addrspace(5) %a
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/NVPTX/sext-setcc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,11 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) {
; CHECK-NEXT: setp.eq.s16 %p4, %rs8, 0;
; CHECK-NEXT: selp.s32 %r6, -1, 0, %p4;
; CHECK-NEXT: selp.s32 %r7, -1, 0, %p3;
; CHECK-NEXT: prmt.b32 %r8, %r7, %r6, 13120;
; CHECK-NEXT: prmt.b32 %r8, %r7, %r6, 0x3340U;
; CHECK-NEXT: selp.s32 %r9, -1, 0, %p2;
; CHECK-NEXT: selp.s32 %r10, -1, 0, %p1;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r9, 13120;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r8, 21520;
; CHECK-NEXT: prmt.b32 %r11, %r10, %r9, 0x3340U;
; CHECK-NEXT: prmt.b32 %r12, %r11, %r8, 0x5410U;
; CHECK-NEXT: st.param.b32 [func_retval0], %r12;
; CHECK-NEXT: ret;
entry:
Expand Down
31 changes: 20 additions & 11 deletions llvm/test/CodeGen/NVPTX/shuffle-vec-undef-init.ll
Original file line number Diff line number Diff line change
@@ -1,18 +1,27 @@
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-FOUND
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx -mcpu=sm_20 -verify-machineinstrs | FileCheck %s

target triple = "nvptx64-unknown-unknown"

define void @kernel_func(ptr %in.vec, ptr %out.vec0) nounwind {
entry:
; CHECK-LABEL: kernel_func(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<10>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.u32 %r1, [kernel_func_param_0];
; CHECK-NEXT: ld.u32 %r2, [%r1+8];
; CHECK-NEXT: ld.u32 %r3, [%r1];
; CHECK-NEXT: ld.u32 %r4, [%r1+24];
; CHECK-NEXT: ld.u32 %r5, [%r1+16];
; CHECK-NEXT: ld.param.u32 %r6, [kernel_func_param_1];
; CHECK-NEXT: prmt.b32 %r7, %r5, %r4, 0x4000U;
; CHECK-NEXT: prmt.b32 %r8, %r3, %r2, 0x40U;
; CHECK-NEXT: prmt.b32 %r9, %r8, %r7, 0x7610U;
; CHECK-NEXT: st.u32 [%r6], %r9;
; CHECK-NEXT: ret;
%wide.vec = load <32 x i8>, ptr %in.vec, align 64
%vec0 = shufflevector <32 x i8> %wide.vec, <32 x i8> undef, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
store <4 x i8> %vec0, ptr %out.vec0, align 64
ret void

; CHECK-FOUND: prmt.b32 {{.*}} 16384;
; CHECK-FOUND: prmt.b32 {{.*}} 64;
; CHECK-FOUND: prmt.b32 {{.*}} 30224;

; CHECK: @kernel_func
; CHECK-NOT: prmt.b32 {{.*}} -1;
; CHECK: -- End function
}
Loading