From 378d2128100b1bfce9ef9f032ac399abd08929d8 Mon Sep 17 00:00:00 2001 From: Guy David Date: Tue, 22 Apr 2025 17:39:24 +0300 Subject: [PATCH] [AArch64] Support merging of narrow zero stores --- .../AArch64/AArch64LoadStoreOptimizer.cpp | 8 +- .../CodeGen/AArch64/str-narrow-zero-merge.mir | 98 +++++++++++++++++-- 2 files changed, 95 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index ba3ffc2f6eb1f..dc866486fb953 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1676,10 +1676,12 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI, if (!PairIsValidLdStrOpc) return false; - // FIXME: We don't support merging narrow stores with mixed scaled/unscaled - // offsets. + // Narrow stores do not have a matching pair opcodes, so constrain their + // merging to zero stores. if (isNarrowStore(OpcA) || isNarrowStore(OpcB)) - return false; + return getLdStRegOp(FirstMI).getReg() == AArch64::WZR && + getLdStRegOp(MI).getReg() == AArch64::WZR && + TII->getMemScale(FirstMI) == TII->getMemScale(MI); // The STRpre - STRui and // LDRpre-LDRui diff --git a/llvm/test/CodeGen/AArch64/str-narrow-zero-merge.mir b/llvm/test/CodeGen/AArch64/str-narrow-zero-merge.mir index e995c402c50a8..e5a2ef8d65bcb 100644 --- a/llvm/test/CodeGen/AArch64/str-narrow-zero-merge.mir +++ b/llvm/test/CodeGen/AArch64/str-narrow-zero-merge.mir @@ -29,8 +29,7 @@ name: merge_scaled_str_with_unscaled_8 body: | bb.0.entry: ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_8 - ; CHECK: STRBBui $wzr, $x0, 4 :: (store (s8)) - ; CHECK-NEXT: STURBBi $wzr, $x0, 5 :: (store (s8)) + ; CHECK: STRHHui $wzr, $x0, 2 :: (store (s8)) ; CHECK-NEXT: RET undef $lr STRBBui $wzr, $x0, 4 :: (store (s8)) STURBBi $wzr, $x0, 5 :: (store (s8)) @@ -41,14 +40,56 @@ name: merge_unscaled_str_with_scaled_8 body: | bb.0.entry: ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_8 - ; CHECK: STURBBi $wzr, $x0, 4 :: (store (s8)) - ; CHECK-NEXT: STRBBui $wzr, $x0, 5 :: (store (s8)) + ; CHECK: STURHHi $wzr, $x0, 4 :: (store (s8)) ; CHECK-NEXT: RET undef $lr STURBBi $wzr, $x0, 4 :: (store (s8)) STRBBui $wzr, $x0, 5 :: (store (s8)) RET undef $lr ... --- +name: merge_unscaled_str_with_scaled_8_lower_address_second +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_8_lower_address_second + ; CHECK: STURHHi $wzr, $x0, 0 :: (store (s8)) + ; CHECK-NEXT: RET undef $lr + STURBBi $wzr, $x0, 1 :: (store (s8)) + STRBBui $wzr, $x0, 0 :: (store (s8)) + RET undef $lr +... +--- +name: merge_scaled_str_with_unscaled_8_lower_address_second +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_8_lower_address_second + ; CHECK: STRHHui $wzr, $x0, 0 :: (store (s8)) + ; CHECK-NEXT: RET undef $lr + STRBBui $wzr, $x0, 1 :: (store (s8)) + STURBBi $wzr, $x0, 0 :: (store (s8)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_scaled_8_limits +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_8_limits + ; CHECK: STURHHi $wzr, $x0, 255 :: (store (s8)) + ; CHECK-NEXT: RET undef $lr + STURBBi $wzr, $x0, 255 :: (store (s8)) + STRBBui $wzr, $x0, 256 :: (store (s8)) + RET undef $lr +--- +--- +name: merge_scaled_str_with_unscaled_8_limits +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_8_limits + ; CHECK: STRHHui $wzr, $x0, 127 :: (store (s8)) + ; CHECK-NEXT: RET undef $lr + STRBBui $wzr, $x0, 254 :: (store (s8)) + STURBBi $wzr, $x0, 255 :: (store (s8)) + RET undef $lr +--- name: merge_unscaled_str_with_unscaled_str_16 body: | bb.0: @@ -75,8 +116,7 @@ name: merge_scaled_str_with_unscaled_16 body: | bb.0.entry: ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_16 - ; CHECK: STRHHui $wzr, $x0, 2 :: (store (s16)) - ; CHECK-NEXT: STURHHi $wzr, $x0, 6 :: (store (s16)) + ; CHECK: STRWui $wzr, $x0, 1 :: (store (s16)) ; CHECK-NEXT: RET undef $lr STRHHui $wzr, $x0, 2 :: (store (s16)) STURHHi $wzr, $x0, 6 :: (store (s16)) @@ -87,14 +127,56 @@ name: merge_unscaled_str_with_scaled_16 body: | bb.0.entry: ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_16 - ; CHECK: STURHHi $wzr, $x0, 4 :: (store (s16)) - ; CHECK-NEXT: STRHHui $wzr, $x0, 3 :: (store (s16)) + ; CHECK: STURWi $wzr, $x0, 4 :: (store (s16)) ; CHECK-NEXT: RET undef $lr STURHHi $wzr, $x0, 4 :: (store (s16)) STRHHui $wzr, $x0, 3 :: (store (s16)) RET undef $lr ... --- +name: merge_unscaled_str_with_scaled_16_lower_address_second +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_16_lower_address_second + ; CHECK: STURWi $wzr, $x0, 2 :: (store (s16)) + ; CHECK-NEXT: RET undef $lr + STURHHi $wzr, $x0, 4 :: (store (s16)) + STRHHui $wzr, $x0, 1 :: (store (s16)) + RET undef $lr +... +--- +name: merge_scaled_str_with_unscaled_16_lower_address_second +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_16_lower_address_second + ; CHECK: STRWui $wzr, $x0, 0 :: (store (s16)) + ; CHECK-NEXT: RET undef $lr + STRHHui $wzr, $x0, 1 :: (store (s16)) + STURHHi $wzr, $x0, 0 :: (store (s16)) + RET undef $lr +... +--- +name: merge_unscaled_str_with_scaled_16_limits +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_unscaled_str_with_scaled_16_limits + ; CHECK: STURWi $wzr, $x0, 254 :: (store (s16)) + ; CHECK-NEXT: RET undef $lr + STURHHi $wzr, $x0, 254 :: (store (s16)) + STRHHui $wzr, $x0, 128 :: (store (s16)) + RET undef $lr +--- +--- +name: merge_scaled_str_with_unscaled_16_limits +body: | + bb.0.entry: + ; CHECK-LABEL: name: merge_scaled_str_with_unscaled_16_limits + ; CHECK: STRWui $wzr, $x0, 63 :: (store (s16)) + ; CHECK-NEXT: RET undef $lr + STRHHui $wzr, $x0, 126 :: (store (s16)) + STURHHi $wzr, $x0, 254 :: (store (s16)) + RET undef $lr +--- name: merge_unscaled_str_with_unscaled_32 body: | bb.0.entry: