diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index a2181b478c269..bc23998455a68 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -629,6 +629,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM, for (MVT VT : MVT::integer_valuetypes()) { setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); + setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); setTruncStoreAction(VT, MVT::i1, Expand); } @@ -2920,9 +2921,10 @@ SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const { assert(LD->getExtensionType() == ISD::NON_EXTLOAD); assert(Node->getValueType(0) == MVT::i1 && "Custom lowering for i1 load only"); - SDValue newLD = DAG.getLoad(MVT::i16, dl, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), LD->getAlign(), - LD->getMemOperand()->getFlags()); + SDValue newLD = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i16, LD->getChain(), + LD->getBasePtr(), LD->getPointerInfo(), + MVT::i8, LD->getAlign(), + LD->getMemOperand()->getFlags()); SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD); // The legalizer (the caller) is expecting two values from the legalized // load, so we build a MergeValues node for it. See ExpandUnalignedLoad() diff --git a/llvm/test/CodeGen/NVPTX/i1-ext-load.ll b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll new file mode 100644 index 0000000000000..b775e40470047 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/i1-ext-load.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function foo --extra_scrub --default-march nvptx64 --filter-out ".*//.*" --filter-out "[\{\}\(\)]" --version 5 + +; RUN: llc < %s -march=nvptx64 -mcpu=sm_50 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_50 | %ptxas-verify %} + +target triple = "nvptx-nvidia-cuda" + +define void @foo(ptr noalias readonly %ptr, ptr noalias %retval) { +; CHECK-LABEL: foo( +; CHECK: .reg .b16 %rs<2>; +; CHECK: .reg .b32 %r<4>; +; CHECK: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK: ld.param.u64 %rd1, [foo_param_0]; +; CHECK: ld.param.u64 %rd2, [foo_param_1]; +; CHECK: cvta.to.global.u64 %rd3, %rd2; +; CHECK: cvta.to.global.u64 %rd4, %rd1; +; CHECK: ld.global.nc.u8 %rs1, [%rd4]; +; CHECK: cvt.u32.u8 %r1, %rs1; +; CHECK: add.s32 %r2, %r1, 1; +; CHECK: and.b32 %r3, %r2, 1; +; CHECK: st.global.u32 [%rd3], %r3; +; CHECK: ret; + %ld = load i1, ptr %ptr, align 1 + %zext = zext i1 %ld to i32 + %add = add i32 %zext, 1 + %and = and i32 %add, 1 + store i32 %and, ptr %retval + ret void +} + +!nvvm.annotations = !{!0} + +!0 = !{ptr @foo, !"kernel", i32 1} diff --git a/llvm/test/CodeGen/NVPTX/i1-load-lower.ll b/llvm/test/CodeGen/NVPTX/i1-load-lower.ll new file mode 100644 index 0000000000000..d1f99b5724de8 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/i1-load-lower.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function foo --extra_scrub --default-march nvptx64 --filter-out ".*//.*" --filter-out "[\(\)\{\}]" --version 5 + +; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s +; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 | %ptxas-verify %} + +target triple = "nvptx-nvidia-cuda" + +@i1g = addrspace(1) global i1 false, align 2 + +define void @foo() { +; CHECK-LABEL: foo( +; CHECK: .reg .pred %p<2>; +; CHECK: .reg .b16 %rs<4>; +; CHECK-EMPTY: +; CHECK: ld.global.u8 %rs1, [i1g]; +; CHECK: and.b16 %rs2, %rs1, 1; +; CHECK: setp.eq.b16 %p1, %rs2, 1; +; CHECK: @%p1 bra $L__BB0_2; +; CHECK: mov.u16 %rs3, 1; +; CHECK: st.global.u8 [i1g], %rs3; +; CHECK: ret; + %tmp = load i1, ptr addrspace(1) @i1g, align 2 + br i1 %tmp, label %if.end, label %if.then + +if.then: + store i1 true, ptr addrspace(1) @i1g, align 2 + br label %if.end + +if.end: + ret void +}