Skip to content
This repository was archived by the owner on Sep 2, 2018. It is now read-only.

Commit fea2d4d

Browse files
author
Chad Rosier
committed
[AArch64] Improve getUsefulBitsForUse for narrow stores.
For narrow stores (e.g., strb, srth) we know the upper bits of the register are unused/not useful. In some cases we can use this information to eliminate unnecessary instructions. For example, without this patch we generate (from the 2nd test case): ldr w8, [x0] and w8, w8, #0xfff0 bfxil w8, w2, #16, #4 strh w8, [x1] and after the patch the 'and' is removed: ldr w8, [x0] bfxil w8, w2, #16, #4 strh w8, [x1] ret During the lowering of the bitfield insert instruction the 'and' is eliminated because we know the upper 16-bits that are masked off are unused and the lower 4-bits that are masked off are overwritten by the insert itself. Therefore, the 'and' is unnecessary. Differential Revision: http://reviews.llvm.org/D20175 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@269226 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 678ba5a commit fea2d4d

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed

lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

+14
Original file line numberDiff line numberDiff line change
@@ -1849,6 +1849,20 @@ static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
18491849
case AArch64::BFMWri:
18501850
case AArch64::BFMXri:
18511851
return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
1852+
1853+
case AArch64::STRBui:
1854+
case AArch64::STRBBui:
1855+
if (UserNode->getOperand(0) != Orig)
1856+
return;
1857+
UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
1858+
return;
1859+
1860+
case AArch64::STRHui:
1861+
case AArch64::STRHHui:
1862+
if (UserNode->getOperand(0) != Orig)
1863+
return;
1864+
UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
1865+
return;
18521866
}
18531867
}
18541868

test/CodeGen/AArch64/bitfield-insert.ll

+36
Original file line numberDiff line numberDiff line change
@@ -237,3 +237,39 @@ define i32 @test_nouseful_bits(i8 %a, i32 %b) {
237237
%shl.4 = shl i32 %or.3, 8 ; A A A 0
238238
ret i32 %shl.4
239239
}
240+
241+
define void @test_nouseful_strb(i32* %ptr32, i8* %ptr8, i32 %x) {
242+
entry:
243+
; CHECK-LABEL: @test_nouseful_strb
244+
; CHECK: ldr [[REG1:w[0-9]+]],
245+
; CHECK-NOT: and {{w[0-9]+}}, {{w[0-9]+}}, #0xf8
246+
; CHECK-NEXT: bfxil [[REG1]], w2, #16, #3
247+
; CHECK-NEXT: strb [[REG1]],
248+
; CHECK-NEXT: ret
249+
%0 = load i32, i32* %ptr32, align 8
250+
%and = and i32 %0, -8
251+
%shr = lshr i32 %x, 16
252+
%and1 = and i32 %shr, 7
253+
%or = or i32 %and, %and1
254+
%trunc = trunc i32 %or to i8
255+
store i8 %trunc, i8* %ptr8
256+
ret void
257+
}
258+
259+
define void @test_nouseful_strh(i32* %ptr32, i16* %ptr16, i32 %x) {
260+
entry:
261+
; CHECK-LABEL: @test_nouseful_strh
262+
; CHECK: ldr [[REG1:w[0-9]+]],
263+
; CHECK-NOT: and {{w[0-9]+}}, {{w[0-9]+}}, #0xfff0
264+
; CHECK-NEXT: bfxil [[REG1]], w2, #16, #4
265+
; CHECK-NEXT: strh [[REG1]],
266+
; CHECK-NEXT: ret
267+
%0 = load i32, i32* %ptr32, align 8
268+
%and = and i32 %0, -16
269+
%shr = lshr i32 %x, 16
270+
%and1 = and i32 %shr, 15
271+
%or = or i32 %and, %and1
272+
%trunc = trunc i32 %or to i16
273+
store i16 %trunc, i16* %ptr16
274+
ret void
275+
}

0 commit comments

Comments
 (0)