diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 9a7cc283b5c15..d66a8a896bae4 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -570,6 +570,18 @@ def FeatureD128 : SubtargetFeature<"d128", "HasD128",
     "and Instructions (FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128)",
     [FeatureLSE128]>;
 
+def FeatureDisableLdp : SubtargetFeature<"disable-ldp", "HasDisableLdp",
+    "true", "Do not emit ldp">;
+
+def FeatureDisableStp : SubtargetFeature<"disable-stp", "HasDisableStp",
+    "true", "Do not emit stp">;
+
+def FeatureLdpAlignedOnly : SubtargetFeature<"ldp-aligned-only", "HasLdpAlignedOnly",
+    "true", "In order to emit ldp, first check if the load will be aligned to 2 * element_size">;
+
+def FeatureStpAlignedOnly : SubtargetFeature<"stp-aligned-only", "HasStpAlignedOnly",
+    "true", "In order to emit stp, first check if the store will be aligned to 2 * element_size">;
+
 //===----------------------------------------------------------------------===//
 // Architectures.
 //
@@ -1239,7 +1251,9 @@ def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
                                    FeatureArithmeticBccFusion,
                                    FeatureCmpBccFusion,
                                    FeatureFuseAddress,
-                                   FeatureFuseLiterals]>;
+                                   FeatureFuseLiterals,
+			           FeatureLdpAlignedOnly,
+                                   FeatureStpAlignedOnly]>;
 
 def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A",
                                     "Ampere Computing Ampere-1A processors", [
@@ -1252,7 +1266,9 @@ def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A",
                                     FeatureCmpBccFusion,
                                     FeatureFuseAddress,
                                     FeatureFuseLiterals,
-                                    FeatureFuseLiterals]>;
+                                    FeatureFuseLiterals,
+                                    FeatureLdpAlignedOnly,
+                                    FeatureStpAlignedOnly]>;
 
 def ProcessorFeatures {
   list<SubtargetFeature> A53  = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 41af5522d967d..dc2965178bc28 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -2136,6 +2136,14 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
   if (!TII->isCandidateToMergeOrPair(MI))
     return false;
 
+  // If disable-ldp feature is opted, do not emit ldp.
+  if (MI.mayLoad() && Subtarget->hasDisableLdp())
+    return false;
+
+  // If disable-stp feature is opted, do not emit stp.
+  if (MI.mayStore() && Subtarget->hasDisableStp())
+    return false;
+
   // Early exit if the offset is not possible to match. (6 bits of positive
   // range, plus allow an extra one in case we find a later insn that matches
   // with Offset-1)
@@ -2159,6 +2167,31 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
     // Keeping the iterator straight is a pain, so we let the merge routine tell
     // us what the next instruction is after it's done mucking about.
     auto Prev = std::prev(MBBI);
+
+    // Fetch the memoperand of the load/store that is a candidate for
+    // combination.
+    MachineMemOperand *MemOp =
+        MI.memoperands_empty() ? nullptr : MI.memoperands().front();
+
+    // Get the needed alignments to check them if
+    // ldp-aligned-only/stp-aligned-only features are opted.
+    uint64_t MemAlignment = MemOp ? MemOp->getAlign().value() : -1;
+    uint64_t TypeAlignment = MemOp ? Align(MemOp->getSize()).value() : -1;
+
+    // If a load arrives and ldp-aligned-only feature is opted, check that the
+    // alignment of the source pointer is at least double the alignment of the
+    // type.
+    if (MI.mayLoad() && Subtarget->hasLdpAlignedOnly() && MemOp &&
+        MemAlignment < 2 * TypeAlignment)
+      return false;
+
+    // If a store arrives and stp-aligned-only feature is opted, check that the
+    // alignment of the source pointer is at least double the alignment of the
+    // type.
+    if (MI.mayStore() && Subtarget->hasStpAlignedOnly() && MemOp &&
+        MemAlignment < 2 * TypeAlignment)
+      return false;
+
     MBBI = mergePairedInsns(MBBI, Paired, Flags);
     // Collect liveness info for instructions between Prev and the new position
     // MBBI.
diff --git a/llvm/test/CodeGen/AArch64/ldp-stp-control-features.ll b/llvm/test/CodeGen/AArch64/ldp-stp-control-features.ll
new file mode 100644
index 0000000000000..838df340b402e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ldp-stp-control-features.ll
@@ -0,0 +1,389 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -O2 -mtriple=aarch64 -mcpu=ampere1 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -O2 -mtriple=aarch64 -mcpu=ampere1a | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -O2 -mtriple=aarch64 | FileCheck %s --check-prefixes=CHECK-DEFAULT
+; RUN: llc < %s -O2 -mtriple=aarch64 -mcpu=ampere1 -mattr=+disable-ldp | FileCheck %s --check-prefixes=CHECK-DISABLE-LDP
+; RUN: llc < %s -O2 -mtriple=aarch64 -mcpu=ampere1 -mattr=+disable-stp | FileCheck %s --check-prefixes=CHECK-DISABLE-STP
+; RUN: llc < %s -O2 -mtriple=aarch64 -mcpu=ampere1a -mattr=+disable-ldp | FileCheck %s --check-prefixes=CHECK-DISABLE-LDP
+; RUN: llc < %s -O2 -mtriple=aarch64 -mcpu=ampere1a -mattr=+disable-stp | FileCheck %s --check-prefixes=CHECK-DISABLE-STP
+
+define i32 @ldp_aligned_int32_t(ptr %0) #0 {
+; CHECK-LABEL: ldp_aligned_int32_t:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x8, x0, #0xffffffffffffffc0
+; CHECK-NEXT:    ldp w9, w8, [x8]
+; CHECK-NEXT:    add w0, w8, w9
+; CHECK-NEXT:    ret
+;
+; CHECK-DEFAULT-LABEL: ldp_aligned_int32_t:
+; CHECK-DEFAULT:       // %bb.0:
+; CHECK-DEFAULT-NEXT:    and x8, x0, #0xffffffffffffffc0
+; CHECK-DEFAULT-NEXT:    ldp w9, w8, [x8]
+; CHECK-DEFAULT-NEXT:    add w0, w8, w9
+; CHECK-DEFAULT-NEXT:    ret
+;
+; CHECK-DISABLE-LDP-LABEL: ldp_aligned_int32_t:
+; CHECK-DISABLE-LDP:       // %bb.0:
+; CHECK-DISABLE-LDP-NEXT:    and x8, x0, #0xffffffffffffffc0
+; CHECK-DISABLE-LDP-NEXT:    ldr w9, [x8]
+; CHECK-DISABLE-LDP-NEXT:    ldr w8, [x8, #4]
+; CHECK-DISABLE-LDP-NEXT:    add w0, w8, w9
+; CHECK-DISABLE-LDP-NEXT:    ret
+  %2 = ptrtoint ptr %0 to i64
+  %3 = and i64 %2, -64
+  %4 = inttoptr i64 %3 to ptr
+  %5 = load i32, ptr %4, align 64
+  %6 = getelementptr inbounds i32, ptr %4, i64 1
+  %7 = load i32, ptr %6, align 4
+  %8 = add nsw i32 %7, %5
+  ret i32 %8
+}
+
+define i64 @ldp_aligned_int64_t(ptr %0) #0 {
+; CHECK-LABEL: ldp_aligned_int64_t:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x8, x0, #0xffffffffffffff80
+; CHECK-NEXT:    ldp x9, x8, [x8]
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
+;
+; CHECK-DEFAULT-LABEL: ldp_aligned_int64_t:
+; CHECK-DEFAULT:       // %bb.0:
+; CHECK-DEFAULT-NEXT:    and x8, x0, #0xffffffffffffff80
+; CHECK-DEFAULT-NEXT:    ldp x9, x8, [x8]
+; CHECK-DEFAULT-NEXT:    add x0, x8, x9
+; CHECK-DEFAULT-NEXT:    ret
+;
+; CHECK-DISABLE-LDP-LABEL: ldp_aligned_int64_t:
+; CHECK-DISABLE-LDP:       // %bb.0:
+; CHECK-DISABLE-LDP-NEXT:    and x8, x0, #0xffffffffffffff80
+; CHECK-DISABLE-LDP-NEXT:    ldr x9, [x8]
+; CHECK-DISABLE-LDP-NEXT:    ldr x8, [x8, #8]
+; CHECK-DISABLE-LDP-NEXT:    add x0, x8, x9
+; CHECK-DISABLE-LDP-NEXT:    ret
+  %2 = ptrtoint ptr %0 to i64
+  %3 = and i64 %2, -128
+  %4 = inttoptr i64 %3 to ptr
+  %5 = load i64, ptr %4, align 128
+  %6 = getelementptr inbounds i64, ptr %4, i64 1
+  %7 = load i64, ptr %6, align 8
+  %8 = add nsw i64 %7, %5
+  ret i64 %8
+}
+
+define <4 x i32> @ldp_aligned_v4si(ptr %0) #0 {
+; CHECK-LABEL: ldp_aligned_v4si:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x8, x0, #0xffffffffffffff00
+; CHECK-NEXT:    ldp q0, q1, [x8]
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+;
+; CHECK-DEFAULT-LABEL: ldp_aligned_v4si:
+; CHECK-DEFAULT:       // %bb.0:
+; CHECK-DEFAULT-NEXT:    and x8, x0, #0xffffffffffffff00
+; CHECK-DEFAULT-NEXT:    ldp q0, q1, [x8]
+; CHECK-DEFAULT-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-DEFAULT-NEXT:    ret
+;
+; CHECK-DISABLE-LDP-LABEL: ldp_aligned_v4si:
+; CHECK-DISABLE-LDP:       // %bb.0:
+; CHECK-DISABLE-LDP-NEXT:    and x8, x0, #0xffffffffffffff00
+; CHECK-DISABLE-LDP-NEXT:    ldr q0, [x8]
+; CHECK-DISABLE-LDP-NEXT:    ldr q1, [x8, #16]
+; CHECK-DISABLE-LDP-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-DISABLE-LDP-NEXT:    ret
+  %2 = ptrtoint ptr %0 to i64
+  %3 = and i64 %2, -256
+  %4 = inttoptr i64 %3 to ptr
+  %5 = load <4 x i32>, ptr %4, align 256
+  %6 = getelementptr inbounds <4 x i32>, ptr %4, i64 1
+  %7 = load <4 x i32>, ptr %6, align 16
+  %8 = add <4 x i32> %7, %5
+  ret <4 x i32> %8
+}
+
+define i32 @ldp_unaligned_int32_t(ptr %0) #0 {
+; CHECK-LABEL: ldp_unaligned_int32_t:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x8, x0, #0xffffffffffffffc0
+; CHECK-NEXT:    ldr w9, [x8, #4]
+; CHECK-NEXT:    ldr w8, [x8, #8]
+; CHECK-NEXT:    add w0, w8, w9
+; CHECK-NEXT:    ret
+;
+; CHECK-DEFAULT-LABEL: ldp_unaligned_int32_t:
+; CHECK-DEFAULT:       // %bb.0:
+; CHECK-DEFAULT-NEXT:    and x8, x0, #0xffffffffffffffc0
+; CHECK-DEFAULT-NEXT:    ldp w9, w8, [x8, #4]
+; CHECK-DEFAULT-NEXT:    add w0, w8, w9
+; CHECK-DEFAULT-NEXT:    ret
+;
+; CHECK-DISABLE-LDP-LABEL: ldp_unaligned_int32_t:
+; CHECK-DISABLE-LDP:       // %bb.0:
+; CHECK-DISABLE-LDP-NEXT:    and x8, x0, #0xffffffffffffffc0
+; CHECK-DISABLE-LDP-NEXT:    ldr w9, [x8, #4]
+; CHECK-DISABLE-LDP-NEXT:    ldr w8, [x8, #8]
+; CHECK-DISABLE-LDP-NEXT:    add w0, w8, w9
+; CHECK-DISABLE-LDP-NEXT:    ret
+  %2 = ptrtoint ptr %0 to i64
+  %3 = and i64 %2, -64
+  %4 = inttoptr i64 %3 to ptr
+  %5 = getelementptr inbounds i32, ptr %4, i64 1
+  %6 = load i32, ptr %5, align 4
+  %7 = getelementptr inbounds i32, ptr %4, i64 2
+  %8 = load i32, ptr %7, align 8
+  %9 = add nsw i32 %8, %6
+  ret i32 %9
+}
+
+define i64 @ldp_unaligned_int64_t(ptr %0) #0 {
+; CHECK-LABEL: ldp_unaligned_int64_t:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x8, x0, #0xffffffffffffff80
+; CHECK-NEXT:    ldr x9, [x8, #8]
+; CHECK-NEXT:    ldr x8, [x8, #16]
+; CHECK-NEXT:    add x0, x8, x9
+; CHECK-NEXT:    ret
+;
+; CHECK-DEFAULT-LABEL: ldp_unaligned_int64_t:
+; CHECK-DEFAULT:       // %bb.0:
+; CHECK-DEFAULT-NEXT:    and x8, x0, #0xffffffffffffff80
+; CHECK-DEFAULT-NEXT:    ldp x9, x8, [x8, #8]
+; CHECK-DEFAULT-NEXT:    add x0, x8, x9
+; CHECK-DEFAULT-NEXT:    ret
+;
+; CHECK-DISABLE-LDP-LABEL: ldp_unaligned_int64_t:
+; CHECK-DISABLE-LDP:       // %bb.0:
+; CHECK-DISABLE-LDP-NEXT:    and x8, x0, #0xffffffffffffff80
+; CHECK-DISABLE-LDP-NEXT:    ldr x9, [x8, #8]
+; CHECK-DISABLE-LDP-NEXT:    ldr x8, [x8, #16]
+; CHECK-DISABLE-LDP-NEXT:    add x0, x8, x9
+; CHECK-DISABLE-LDP-NEXT:    ret
+  %2 = ptrtoint ptr %0 to i64
+  %3 = and i64 %2, -128
+  %4 = inttoptr i64 %3 to ptr
+  %5 = getelementptr inbounds i64, ptr %4, i64 1
+  %6 = load i64, ptr %5, align 8
+  %7 = getelementptr inbounds i64, ptr %4, i64 2
+  %8 = load i64, ptr %7, align 16
+  %9 = add nsw i64 %8, %6
+  ret i64 %9
+}
+
+define <4 x i32> @ldp_unaligned_v4si(ptr %0) #0 {
+; CHECK-LABEL: ldp_unaligned_v4si:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x8, x0, #0xffffffffffffff00
+; CHECK-NEXT:    ldr q0, [x8, #16]
+; CHECK-NEXT:    ldr q1, [x8, #32]
+; CHECK-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+;
+; CHECK-DEFAULT-LABEL: ldp_unaligned_v4si:
+; CHECK-DEFAULT:       // %bb.0:
+; CHECK-DEFAULT-NEXT:    and x8, x0, #0xffffffffffffff00
+; CHECK-DEFAULT-NEXT:    ldp q0, q1, [x8, #16]
+; CHECK-DEFAULT-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-DEFAULT-NEXT:    ret
+;
+; CHECK-DISABLE-LDP-LABEL: ldp_unaligned_v4si:
+; CHECK-DISABLE-LDP:       // %bb.0:
+; CHECK-DISABLE-LDP-NEXT:    and x8, x0, #0xffffffffffffff00
+; CHECK-DISABLE-LDP-NEXT:    ldr q0, [x8, #16]
+; CHECK-DISABLE-LDP-NEXT:    ldr q1, [x8, #32]
+; CHECK-DISABLE-LDP-NEXT:    add v0.4s, v1.4s, v0.4s
+; CHECK-DISABLE-LDP-NEXT:    ret
+  %2 = ptrtoint ptr %0 to i64
+  %3 = and i64 %2, -256
+  %4 = inttoptr i64 %3 to ptr
+  %5 = getelementptr inbounds <4 x i32>, ptr %4, i64 1
+  %6 = load <4 x i32>, ptr %5, align 16
+  %7 = getelementptr inbounds <4 x i32>, ptr %4, i64 2
+  %8 = load <4 x i32>, ptr %7, align 32
+  %9 = add <4 x i32> %8, %6
+  ret <4 x i32> %9
+}
+
+define ptr @stp_aligned_int32_t(ptr %0, i32 %1) #0 {
+; CHECK-LABEL: stp_aligned_int32_t:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x0, x0, #0xffffffffffffffc0
+; CHECK-NEXT:    stp w1, w1, [x0]
+; CHECK-NEXT:    ret
+;
+; CHECK-DEFAULT-LABEL: stp_aligned_int32_t:
+; CHECK-DEFAULT:       // %bb.0:
+; CHECK-DEFAULT-NEXT:    and x0, x0, #0xffffffffffffffc0
+; CHECK-DEFAULT-NEXT:    stp w1, w1, [x0]
+; CHECK-DEFAULT-NEXT:    ret
+;
+; CHECK-DISABLE-STP-LABEL: stp_aligned_int32_t:
+; CHECK-DISABLE-STP:       // %bb.0:
+; CHECK-DISABLE-STP-NEXT:    and x0, x0, #0xffffffffffffffc0
+; CHECK-DISABLE-STP-NEXT:    str w1, [x0]
+; CHECK-DISABLE-STP-NEXT:    str w1, [x0, #4]
+; CHECK-DISABLE-STP-NEXT:    ret
+  %3 = ptrtoint ptr %0 to i64
+  %4 = and i64 %3, -64
+  %5 = inttoptr i64 %4 to ptr
+  store i32 %1, ptr %5, align 64
+  %6 = getelementptr inbounds i32, ptr %5, i64 1
+  store i32 %1, ptr %6, align 4
+  ret ptr %5
+}
+
+define dso_local ptr @stp_aligned_int64_t(ptr %0, i64 %1) #0 {
+; CHECK-LABEL: stp_aligned_int64_t:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x0, x0, #0xffffffffffffff80
+; CHECK-NEXT:    stp x1, x1, [x0]
+; CHECK-NEXT:    ret
+;
+; CHECK-DEFAULT-LABEL: stp_aligned_int64_t:
+; CHECK-DEFAULT:       // %bb.0:
+; CHECK-DEFAULT-NEXT:    and x0, x0, #0xffffffffffffff80
+; CHECK-DEFAULT-NEXT:    stp x1, x1, [x0]
+; CHECK-DEFAULT-NEXT:    ret
+;
+; CHECK-DISABLE-STP-LABEL: stp_aligned_int64_t:
+; CHECK-DISABLE-STP:       // %bb.0:
+; CHECK-DISABLE-STP-NEXT:    and x0, x0, #0xffffffffffffff80
+; CHECK-DISABLE-STP-NEXT:    str x1, [x0]
+; CHECK-DISABLE-STP-NEXT:    str x1, [x0, #8]
+; CHECK-DISABLE-STP-NEXT:    ret
+  %3 = ptrtoint ptr %0 to i64
+  %4 = and i64 %3, -128
+  %5 = inttoptr i64 %4 to ptr
+  store i64 %1, ptr %5, align 128
+  %6 = getelementptr inbounds i64, ptr %5, i64 1
+  store i64 %1, ptr %6, align 8
+  ret ptr %5
+}
+
+define ptr @stp_aligned_v4si(ptr %0, <4 x i32> %1) #0 {
+; CHECK-LABEL: stp_aligned_v4si:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x0, x0, #0xffffffffffffff00
+; CHECK-NEXT:    stp q0, q0, [x0]
+; CHECK-NEXT:    ret
+;
+; CHECK-DEFAULT-LABEL: stp_aligned_v4si:
+; CHECK-DEFAULT:       // %bb.0:
+; CHECK-DEFAULT-NEXT:    and x0, x0, #0xffffffffffffff00
+; CHECK-DEFAULT-NEXT:    stp q0, q0, [x0]
+; CHECK-DEFAULT-NEXT:    ret
+;
+; CHECK-DISABLE-STP-LABEL: stp_aligned_v4si:
+; CHECK-DISABLE-STP:       // %bb.0:
+; CHECK-DISABLE-STP-NEXT:    and x0, x0, #0xffffffffffffff00
+; CHECK-DISABLE-STP-NEXT:    str q0, [x0]
+; CHECK-DISABLE-STP-NEXT:    str q0, [x0, #16]
+; CHECK-DISABLE-STP-NEXT:    ret
+  %3 = ptrtoint ptr %0 to i64
+  %4 = and i64 %3, -256
+  %5 = inttoptr i64 %4 to ptr
+  store <4 x i32> %1, ptr %5, align 256
+  %6 = getelementptr inbounds <4 x i32>, ptr %5, i64 1
+  store <4 x i32> %1, ptr %6, align 16
+  ret ptr %5
+}
+
+define ptr @stp_unaligned_int32_t(ptr %0, i32 %1) #0 {
+; CHECK-LABEL: stp_unaligned_int32_t:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x8, x0, #0xffffffffffffffc0
+; CHECK-NEXT:    orr x0, x8, #0x4
+; CHECK-NEXT:    str w1, [x8, #4]
+; CHECK-NEXT:    str w1, [x8, #8]
+; CHECK-NEXT:    ret
+;
+; CHECK-DEFAULT-LABEL: stp_unaligned_int32_t:
+; CHECK-DEFAULT:       // %bb.0:
+; CHECK-DEFAULT-NEXT:    and x8, x0, #0xffffffffffffffc0
+; CHECK-DEFAULT-NEXT:    orr x0, x8, #0x4
+; CHECK-DEFAULT-NEXT:    stp w1, w1, [x8, #4]
+; CHECK-DEFAULT-NEXT:    ret
+;
+; CHECK-DISABLE-STP-LABEL: stp_unaligned_int32_t:
+; CHECK-DISABLE-STP:       // %bb.0:
+; CHECK-DISABLE-STP-NEXT:    and x8, x0, #0xffffffffffffffc0
+; CHECK-DISABLE-STP-NEXT:    orr x0, x8, #0x4
+; CHECK-DISABLE-STP-NEXT:    str w1, [x8, #4]
+; CHECK-DISABLE-STP-NEXT:    str w1, [x8, #8]
+; CHECK-DISABLE-STP-NEXT:    ret
+  %3 = ptrtoint ptr %0 to i64
+  %4 = and i64 %3, -64
+  %5 = inttoptr i64 %4 to ptr
+  %6 = getelementptr inbounds i32, ptr %5, i64 1
+  store i32 %1, ptr %6, align 4
+  %7 = getelementptr inbounds i32, ptr %5, i64 2
+  store i32 %1, ptr %7, align 8
+  ret ptr %6
+}
+
+define ptr @stp_unaligned_int64_t(ptr %0, i64 %1) #0 {
+; CHECK-LABEL: stp_unaligned_int64_t:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x8, x0, #0xffffffffffffff80
+; CHECK-NEXT:    orr x0, x8, #0x8
+; CHECK-NEXT:    str x1, [x8, #8]
+; CHECK-NEXT:    str x1, [x8, #16]
+; CHECK-NEXT:    ret
+;
+; CHECK-DEFAULT-LABEL: stp_unaligned_int64_t:
+; CHECK-DEFAULT:       // %bb.0:
+; CHECK-DEFAULT-NEXT:    and x8, x0, #0xffffffffffffff80
+; CHECK-DEFAULT-NEXT:    orr x0, x8, #0x8
+; CHECK-DEFAULT-NEXT:    stp x1, x1, [x8, #8]
+; CHECK-DEFAULT-NEXT:    ret
+;
+; CHECK-DISABLE-STP-LABEL: stp_unaligned_int64_t:
+; CHECK-DISABLE-STP:       // %bb.0:
+; CHECK-DISABLE-STP-NEXT:    and x8, x0, #0xffffffffffffff80
+; CHECK-DISABLE-STP-NEXT:    orr x0, x8, #0x8
+; CHECK-DISABLE-STP-NEXT:    str x1, [x8, #8]
+; CHECK-DISABLE-STP-NEXT:    str x1, [x8, #16]
+; CHECK-DISABLE-STP-NEXT:    ret
+  %3 = ptrtoint ptr %0 to i64
+  %4 = and i64 %3, -128
+  %5 = inttoptr i64 %4 to ptr
+  %6 = getelementptr inbounds i64, ptr %5, i64 1
+  store i64 %1, ptr %6, align 8
+  %7 = getelementptr inbounds i64, ptr %5, i64 2
+  store i64 %1, ptr %7, align 16
+  ret ptr %6
+}
+
+define ptr @stp_unaligned_v4si(ptr %0, <4 x i32> %1) #0 {
+; CHECK-LABEL: stp_unaligned_v4si:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and x8, x0, #0xffffffffffffff00
+; CHECK-NEXT:    orr x0, x8, #0x10
+; CHECK-NEXT:    str q0, [x8, #16]
+; CHECK-NEXT:    str q0, [x8, #32]
+; CHECK-NEXT:    ret
+;
+; CHECK-DEFAULT-LABEL: stp_unaligned_v4si:
+; CHECK-DEFAULT:       // %bb.0:
+; CHECK-DEFAULT-NEXT:    and x8, x0, #0xffffffffffffff00
+; CHECK-DEFAULT-NEXT:    orr x0, x8, #0x10
+; CHECK-DEFAULT-NEXT:    stp q0, q0, [x8, #16]
+; CHECK-DEFAULT-NEXT:    ret
+;
+; CHECK-DISABLE-STP-LABEL: stp_unaligned_v4si:
+; CHECK-DISABLE-STP:       // %bb.0:
+; CHECK-DISABLE-STP-NEXT:    and x8, x0, #0xffffffffffffff00
+; CHECK-DISABLE-STP-NEXT:    orr x0, x8, #0x10
+; CHECK-DISABLE-STP-NEXT:    str q0, [x8, #16]
+; CHECK-DISABLE-STP-NEXT:    str q0, [x8, #32]
+; CHECK-DISABLE-STP-NEXT:    ret
+  %3 = ptrtoint ptr %0 to i64
+  %4 = and i64 %3, -256
+  %5 = inttoptr i64 %4 to ptr
+  %6 = getelementptr inbounds <4 x i32>, ptr %5, i64 1
+  store <4 x i32> %1, ptr %6, align 16
+  %7 = getelementptr inbounds <4 x i32>, ptr %5, i64 2
+  store <4 x i32> %1, ptr %7, align 32
+  ret ptr %6
+}