diff --git a/cachelib/allocator/CacheAllocator-inl.h b/cachelib/allocator/CacheAllocator-inl.h
index 268f318e4b..c491a056bc 100644
--- a/cachelib/allocator/CacheAllocator-inl.h
+++ b/cachelib/allocator/CacheAllocator-inl.h
@@ -529,6 +529,18 @@ template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::WriteHandle
 CacheAllocator<CacheTrait>::allocateChainedItemInternal(
     const ReadHandle& parent, uint32_t size) {
+  auto tid = 0; /* TODO: consult admission policy */
+  for(TierId tid = 0; tid < getNumTiers(); ++tid) {
+    auto handle = allocateChainedItemInternalTier(*parent, size, tid);
+    if (handle) return handle;
+  }
+  return {};
+}
+
+template <typename CacheTrait>
+typename CacheAllocator<CacheTrait>::WriteHandle
+CacheAllocator<CacheTrait>::allocateChainedItemInternalTier(
+    const Item& parent, uint32_t size, TierId tid) {
   util::LatencyTracker tracker{stats().allocateLatency_};
 
   SCOPE_FAIL { stats_.invalidAllocs.inc(); };
@@ -536,11 +548,9 @@ CacheAllocator<CacheTrait>::allocateChainedItemInternal(
   // number of bytes required for this item
   const auto requiredSize = ChainedItem::getRequiredSize(size);
 
-  // TODO: is this correct?
-  auto tid = getTierId(*parent);
-
-  const auto pid = allocator_[tid]->getAllocInfo(parent->getMemory()).poolId;
-  const auto cid = allocator_[tid]->getAllocationClassId(pid, requiredSize);
+  const auto ptid = getTierId(parent); //it is okay because pools/classes are duplicated among the tiers
+  const auto pid = allocator_[ptid]->getAllocInfo(parent.getMemory()).poolId;
+  const auto cid = allocator_[ptid]->getAllocationClassId(pid, requiredSize);
 
   util::RollingLatencyTracker rollTracker{
       (*stats_.classAllocLatency)[tid][pid][cid]};
@@ -559,7 +569,7 @@ CacheAllocator<CacheTrait>::allocateChainedItemInternal(
   SCOPE_FAIL { allocator_[tid]->free(memory); };
 
   auto child = acquire(
-      new (memory) ChainedItem(compressor_.compress(parent.getInternal()), size,
+      new (memory) ChainedItem(compressor_.compress(&parent), size,
                                util::getCurrentTimeSec()));
 
   if (child) {
@@ -603,7 +613,7 @@ void CacheAllocator<CacheTrait>::addChainedItem(WriteHandle& parent,
   // Increment refcount since this chained item is now owned by the parent
   // Parent will decrement the refcount upon release. Since this is an
   // internal refcount, we dont include it in active handle tracking.
-  auto ret = child->incRef(true);
+  auto ret = child->incRef();
   XDCHECK(ret == RefcountWithFlags::incResult::incOk);
   XDCHECK_EQ(2u, child->getRefCount());
 
@@ -672,22 +682,20 @@ CacheAllocator<CacheTrait>::getParentKey(const Item& chainedItem) {
 }
 
 template <typename CacheTrait>
-void CacheAllocator<CacheTrait>::transferChainLocked(WriteHandle& parent,
+void CacheAllocator<CacheTrait>::transferChainLocked(Item& parent,
                                                      WriteHandle& newParent) {
   // parent must be in a state to not have concurrent readers. Eviction code
-  // paths rely on holding the last item handle. Since we hold on to an item
-  // handle here, the chain will not be touched by any eviction code path.
-  XDCHECK(parent);
+  // paths rely on holding the last item handle.
   XDCHECK(newParent);
-  XDCHECK_EQ(parent->getKey(), newParent->getKey());
-  XDCHECK(parent->hasChainedItem());
+  XDCHECK_EQ(parent.getKey(), newParent->getKey());
+  XDCHECK(parent.hasChainedItem());
 
   if (newParent->hasChainedItem()) {
     throw std::invalid_argument(folly::sformat(
         "New Parent {} has invalid state", newParent->toString()));
   }
 
-  auto headHandle = findChainedItem(*parent);
+  auto headHandle = findChainedItem(parent);
   XDCHECK(headHandle);
 
   // remove from the access container since we are changing the key
@@ -699,6 +707,7 @@ void CacheAllocator<CacheTrait>::transferChainLocked(WriteHandle& parent,
   while (curr) {
     XDCHECK_EQ(curr == headHandle.get() ? 2u : 1u, curr->getRefCount());
     XDCHECK(curr->isInMMContainer());
+    XDCHECK(!newParent->isMoving());
     curr->changeKey(newParentPtr);
     curr = curr->getNext(compressor_);
   }
@@ -710,7 +719,7 @@ void CacheAllocator<CacheTrait>::transferChainLocked(WriteHandle& parent,
         folly::sformat("Did not expect to find an existing chain for {}",
                        newParent->toString(), oldHead->toString()));
   }
-  parent->unmarkHasChainedItem();
+  parent.unmarkHasChainedItem();
 }
 
 template <typename CacheTrait>
@@ -721,7 +730,7 @@ void CacheAllocator<CacheTrait>::transferChainAndReplace(
   }
   { // scope for chained item lock
     auto l = chainedItemLocks_.lockExclusive(parent->getKey());
-    transferChainLocked(parent, newParent);
+    transferChainLocked(*parent, newParent);
   }
 
   if (replaceIfAccessible(*parent, *newParent)) {
@@ -788,33 +797,10 @@ CacheAllocator<CacheTrait>::replaceChainedItem(Item& oldItem,
 }
 
 template <typename CacheTrait>
-typename CacheAllocator<CacheTrait>::WriteHandle
-CacheAllocator<CacheTrait>::replaceChainedItemLocked(Item& oldItem,
-                                                     WriteHandle newItemHdl,
-                                                     const Item& parent) {
-  XDCHECK(newItemHdl != nullptr);
-  XDCHECK_GE(1u, oldItem.getRefCount());
-
-  // grab the handle to the old item so that we can return this. Also, we need
-  // to drop the refcount the parent holds on oldItem by manually calling
-  // decRef.  To do that safely we need to have a proper outstanding handle.
-  auto oldItemHdl = acquire(&oldItem);
-
-  // Replace the old chained item with new item in the MMContainer before we
-  // actually replace the old item in the chain
-
-  if (!replaceChainedItemInMMContainer(oldItem, *newItemHdl)) {
-    // This should never happen since we currently hold an valid
-    // parent handle. None of its chained items can be removed
-    throw std::runtime_error(folly::sformat(
-        "chained item cannot be replaced in MM container, oldItem={}, "
-        "newItem={}, parent={}",
-        oldItem.toString(), newItemHdl->toString(), parent.toString()));
-  }
-
-  XDCHECK(!oldItem.isInMMContainer());
-  XDCHECK(newItemHdl->isInMMContainer());
-
+void CacheAllocator<CacheTrait>::replaceInChainLocked(Item& oldItem,
+                                                     WriteHandle& newItemHdl,
+                                                     const Item& parent,
+                                                     bool fromMove) {
   auto head = findChainedItem(parent);
   XDCHECK(head != nullptr);
   XDCHECK_EQ(reinterpret_cast<uintptr_t>(
@@ -843,17 +829,62 @@ CacheAllocator<CacheTrait>::replaceChainedItemLocked(Item& oldItem,
       oldItem.asChainedItem().getNext(compressor_), compressor_);
   oldItem.asChainedItem().setNext(nullptr, compressor_);
 
-  // this should not result in 0 refcount. We are bumping down the internal
-  // refcount. If it did, we would leak an item.
-  oldItem.decRef();
-  XDCHECK_LT(0u, oldItem.getRefCount()) << oldItem.toString();
+  //if called from moveChainedItem then ref will be zero, else
+  //greater than 0
+  if (fromMove) {
+    //if this is the head chained item, release the handle now
+    //while refCount > 1 so that the destructor does not
+    //call releaseBackToAllocator since we want recycle oldItem
+    if (head) {
+      head.reset();
+      XDCHECK_EQ(1u, oldItem.getRefCount());
+    }
+    oldItem.decRef();
+    XDCHECK_EQ(0u, oldItem.getRefCount()) << oldItem.toString();
+  } else {
+    oldItem.decRef();
+    XDCHECK_LT(0u, oldItem.getRefCount()) << oldItem.toString();
+  }
 
   // increment refcount to indicate parent owns this similar to addChainedItem
   // Since this is an internal refcount, we dont include it in active handle
   // tracking.
 
-  auto ret = newItemHdl->incRef(true);
+  auto ret = newItemHdl->incRef();
   XDCHECK(ret == RefcountWithFlags::incResult::incOk);
+}
+
+template <typename CacheTrait>
+typename CacheAllocator<CacheTrait>::WriteHandle
+CacheAllocator<CacheTrait>::replaceChainedItemLocked(Item& oldItem,
+                                                     WriteHandle newItemHdl,
+                                                     const Item& parent) {
+  XDCHECK(newItemHdl != nullptr);
+  XDCHECK_GE(1u, oldItem.getRefCount());
+
+  // grab the handle to the old item so that we can return this. Also, we need
+  // to drop the refcount the parent holds on oldItem by manually calling
+  // decRef.  To do that safely we need to have a proper outstanding handle.
+  auto oldItemHdl = acquire(&oldItem);
+  XDCHECK_GE(2u, oldItem.getRefCount());
+
+  // Replace the old chained item with new item in the MMContainer before we
+  // actually replace the old item in the chain
+
+  if (!replaceChainedItemInMMContainer(oldItem, *newItemHdl)) {
+    // This should never happen since we currently hold an valid
+    // parent handle. None of its chained items can be removed
+    throw std::runtime_error(folly::sformat(
+        "chained item cannot be replaced in MM container, oldItem={}, "
+        "newItem={}, parent={}",
+        oldItem.toString(), newItemHdl->toString(), parent.toString()));
+  }
+
+  XDCHECK(!oldItem.isInMMContainer());
+  XDCHECK(newItemHdl->isInMMContainer());
+
+  replaceInChainLocked(oldItem, newItemHdl, parent, false);
+
   return oldItemHdl;
 }
 
@@ -886,7 +917,10 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
   // memory for a chained item but has decided not to insert the chained item
   // to a parent item and instead drop the chained item handle. In this case,
   // we free the chained item directly without calling remove callback.
-  if (it.isChainedItem()) {
+  //
+  // Except if we are moving a chained item between tiers -
+  // then it == toRecycle and we will want the normal recycle path
+  if (it.isChainedItem() && &it != toRecycle) {
     if (toRecycle) {
       throw std::runtime_error(
           folly::sformat("Can not recycle a chained item {}, toRecyle",
@@ -959,10 +993,10 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
 
     while (head) {
       auto next = head->getNext(compressor_);
-
+      const auto ctid = getTierId(head);
       const auto childInfo =
-          allocator_[tid]->getAllocInfo(static_cast<const void*>(head));
-      (*stats_.fragmentationSize)[tid][childInfo.poolId][childInfo.classId].sub(
+          allocator_[ctid]->getAllocInfo(static_cast<const void*>(head));
+      (*stats_.fragmentationSize)[ctid][childInfo.poolId][childInfo.classId].sub(
           util::getFragmentation(*this, *head));
 
       removeFromMMContainer(*head);
@@ -983,22 +1017,21 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
       // If the item is already moving and we already decremented the
       // refcount, we don't need to free this item. We'll let the slab
       // release thread take care of that
-      if (!wasMoving) {
-        if (childRef != 0) {
-          throw std::runtime_error(folly::sformat(
-              "chained item refcount is not zero. We cannot proceed! "
-              "Ref: {}, Chained Item: {}",
-              childRef, head->toString()));
-        }
+      XDCHECK(!wasMoving);
+      if (childRef != 0) {
+        throw std::runtime_error(folly::sformat(
+            "chained item refcount is not zero. We cannot proceed! "
+            "Ref: {}, Chained Item: {}",
+            childRef, head->toString()));
+      }
 
-        // Item is not moving and refcount is 0, we can proceed to
-        // free it or recylce the memory
-        if (head == toRecycle) {
-          XDCHECK(ReleaseRes::kReleased != res);
-          res = ReleaseRes::kRecycled;
-        } else {
-          allocator_[tid]->free(head);
-        }
+      // Item is not moving and refcount is 0, we can proceed to
+      // free it or recylce the memory
+      if (head == toRecycle) {
+        XDCHECK(ReleaseRes::kReleased != res);
+        res = ReleaseRes::kRecycled;
+      } else {
+        allocator_[ctid]->free(head);
       }
 
       stats_.numChainedChildItems.dec();
@@ -1008,6 +1041,7 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
   }
 
   if (&it == toRecycle) {
+    XDCHECK_EQ(it.getRefCount(),0u);
     XDCHECK(ReleaseRes::kReleased != res);
     res = ReleaseRes::kRecycled;
   } else {
@@ -1019,8 +1053,8 @@ CacheAllocator<CacheTrait>::releaseBackToAllocator(Item& it,
 }
 
 template <typename CacheTrait>
-RefcountWithFlags::incResult CacheAllocator<CacheTrait>::incRef(Item& it, bool failIfMoving) {
-   auto ret = it.incRef(failIfMoving);
+RefcountWithFlags::incResult CacheAllocator<CacheTrait>::incRef(Item& it) {
+   auto ret = it.incRef();
    if (ret == RefcountWithFlags::incResult::incOk) {
      ++handleCount_.tlStats();
    }
@@ -1044,11 +1078,8 @@ CacheAllocator<CacheTrait>::acquire(Item* it) {
 
   SCOPE_FAIL { stats_.numRefcountOverflow.inc(); };
 
-  // TODO: do not block incRef for child items to avoid deadlock
-  const auto failIfMoving = getNumTiers() > 1 && !it->isChainedItem();
-
   while (true) {
-    auto incRes = incRef(*it, failIfMoving);
+    auto incRes = incRef(*it);
     if (LIKELY(incRes == RefcountWithFlags::incResult::incOk)) {
       return WriteHandle{it, *this};
     } else if (incRes == RefcountWithFlags::incResult::incFailedEviction){
@@ -1220,7 +1251,6 @@ CacheAllocator<CacheTrait>::insertOrReplace(const WriteHandle& handle) {
                           : std::unique_lock<std::mutex>();
 
     replaced = accessContainer_->insertOrReplace(*(handle.getInternal()));
-
     if (replaced && replaced->isNvmClean() && !replaced->isNvmEvicted()) {
       // item is to be replaced and the destructor will be executed
       // upon memory released, mark it in nvm to avoid destructor
@@ -1269,7 +1299,7 @@ CacheAllocator<CacheTrait>::insertOrReplace(const WriteHandle& handle) {
 /* Next two methods are used to asynchronously move Item between memory tiers.
  *
  * The thread, which moves Item, allocates new Item in the tier we are moving to
- * and calls moveRegularItemWithSync() method. This method does the following:
+ * and calls moveRegularItem() method. This method does the following:
  *  1. Update the access container with the new item from the tier we are
  *     moving to. This Item has moving flag set.
  *  2. Copy data from the old Item to the new one.
@@ -1325,21 +1355,8 @@ size_t CacheAllocator<CacheTrait>::wakeUpWaitersLocked(folly::StringPiece key,
 }
 
 template <typename CacheTrait>
-bool CacheAllocator<CacheTrait>::moveRegularItemWithSync(
+bool CacheAllocator<CacheTrait>::moveRegularItem(
     Item& oldItem, WriteHandle& newItemHdl) {
-  //on function exit - the new item handle is no longer moving
-  //and other threads may access it - but in case where
-  //we failed to replace in access container we can give the
-  //new item back to the allocator
-  auto guard = folly::makeGuard([&]() {
-    auto ref = newItemHdl->unmarkMoving();
-    if (UNLIKELY(ref == 0)) {
-      const auto res =
-          releaseBackToAllocator(*newItemHdl, RemoveContext::kNormal, false);
-      XDCHECK(res == ReleaseRes::kReleased);
-    }
-  });
-
   XDCHECK(oldItem.isMoving());
   XDCHECK(!oldItem.isExpired());
   // TODO: should we introduce new latency tracker. E.g. evictRegularLatency_
@@ -1354,38 +1371,6 @@ bool CacheAllocator<CacheTrait>::moveRegularItemWithSync(
     newItemHdl->markNvmClean();
   }
 
-  // mark new item as moving to block readers until the data is copied
-  // (moveCb is called). Mark item in MMContainer temporarily (TODO: should
-  // we remove markMoving requirement for the item to be linked?)
-  newItemHdl->markInMMContainer();
-  auto marked = newItemHdl->markMoving(false /* there is already a handle */);
-  newItemHdl->unmarkInMMContainer();
-  XDCHECK(marked);
-
-  auto predicate = [&](const Item& item){
-    // we rely on moving flag being set (it should block all readers)
-    XDCHECK(item.getRefCount() == 0);
-    return true;
-  };
-
-  auto replaced = accessContainer_->replaceIf(oldItem, *newItemHdl,
-                                   predicate);
-  // another thread may have called insertOrReplace which could have
-  // marked this item as unaccessible causing the replaceIf
-  // in the access container to fail - in this case we want
-  // to abort the move since the item is no longer valid
-  if (!replaced) {
-      return false;
-  }
-  // what if another thread calls insertOrReplace now when
-  // the item is moving and already replaced in the hash table?
-  // 1. it succeeds in updating the hash table - so there is
-  //    no guarentee that isAccessible() is true
-  // 2. it will then try to remove from MM container
-  //     - this operation will wait for newItemHdl to
-  //       be unmarkedMoving via the waitContext
-  // 3. replaced handle is returned and eventually drops
-  //    ref to 0 and the item is recycled back to allocator.
 
   if (config_.moveCb) {
     // Execute the move callback. We cannot make any guarantees about the
@@ -1406,17 +1391,24 @@ bool CacheAllocator<CacheTrait>::moveRegularItemWithSync(
   auto mmContainerAdded = newContainer.add(*newItemHdl);
   XDCHECK(mmContainerAdded);
 
-  // no one can add or remove chained items at this point
+
+  auto predicate = [&](const Item& item){
+    // we rely on moving flag being set (it should block all readers)
+    XDCHECK_EQ(item.getRefCount(),0);
+    XDCHECK(item.isMoving());
+    return item.isMoving();
+  };
+
   if (oldItem.hasChainedItem()) {
-    // safe to acquire handle for a moving Item
-    auto incRes = incRef(oldItem, false);
-    XDCHECK(incRes == RefcountWithFlags::incResult::incOk);
-    auto oldHandle = WriteHandle{&oldItem,*this};
-    XDCHECK_EQ(1u, oldHandle->getRefCount()) << oldHandle->toString();
     XDCHECK(!newItemHdl->hasChainedItem()) << newItemHdl->toString();
     try {
-      auto l = chainedItemLocks_.lockExclusive(oldItem.getKey());
-      transferChainLocked(oldHandle, newItemHdl);
+      auto l = chainedItemLocks_.tryLockExclusive(oldItem.getKey());
+      if (l) {
+        transferChainLocked(oldItem, newItemHdl);
+      } else {
+        newContainer.remove(*newItemHdl);
+        return false;
+      }
     } catch (const std::exception& e) {
       // this should never happen because we drained all the handles.
       XLOGF(DFATAL, "{}", e.what());
@@ -1426,146 +1418,75 @@ bool CacheAllocator<CacheTrait>::moveRegularItemWithSync(
     XDCHECK(!oldItem.hasChainedItem());
     XDCHECK(newItemHdl->hasChainedItem());
   }
-  newItemHdl.unmarkNascent();
-  return true;
-}
-
-template <typename CacheTrait>
-bool CacheAllocator<CacheTrait>::moveRegularItem(Item& oldItem,
-                                                 WriteHandle& newItemHdl) {
-  XDCHECK(config_.moveCb);
-  util::LatencyTracker tracker{stats_.moveRegularLatency_};
-
-  if (!oldItem.isAccessible() || oldItem.isExpired()) {
-    return false;
-  }
-
-  XDCHECK_EQ(newItemHdl->getSize(), oldItem.getSize());
-  XDCHECK_EQ(reinterpret_cast<uintptr_t>(&getMMContainer(oldItem)),
-             reinterpret_cast<uintptr_t>(&getMMContainer(*newItemHdl)));
-
-  // take care of the flags before we expose the item to be accessed. this
-  // will ensure that when another thread removes the item from RAM, we issue
-  // a delete accordingly. See D7859775 for an example
-  if (oldItem.isNvmClean()) {
-    newItemHdl->markNvmClean();
-  }
-
-  // Execute the move callback. We cannot make any guarantees about the
-  // consistency of the old item beyond this point, because the callback can
-  // do more than a simple memcpy() e.g. update external references. If there
-  // are any remaining handles to the old item, it is the caller's
-  // responsibility to invalidate them. The move can only fail after this
-  // statement if the old item has been removed or replaced, in which case it
-  // should be fine for it to be left in an inconsistent state.
-  config_.moveCb(oldItem, *newItemHdl, nullptr);
 
-  // Inside the access container's lock, this checks if the old item is
-  // accessible and its refcount is one. If the item is not accessible,
-  // there is no point to replace it since it had already been removed
-  // or in the process of being removed. If the item is in cache but the
-  // refcount is non-one, it means user could be attempting to remove
-  // this item through an API such as remove(itemHandle). In this case,
-  // it is unsafe to replace the old item with a new one, so we should
-  // also abort.
-  if (!accessContainer_->replaceIf(oldItem, *newItemHdl,
-                                   itemSlabMovePredicate)) {
+  if (!accessContainer_->replaceIf(oldItem, *newItemHdl, predicate)) {
+    newContainer.remove(*newItemHdl);
     return false;
   }
 
-  // Inside the MM container's lock, this checks if the old item exists to
-  // make sure that no other thread removed it, and only then replaces it.
-  if (!replaceInMMContainer(oldItem, *newItemHdl)) {
-    accessContainer_->remove(*newItemHdl);
-    return false;
-  }
-
-  // Replacing into the MM container was successful, but someone could have
-  // called insertOrReplace() or remove() before or after the
-  // replaceInMMContainer() operation, which would invalidate newItemHdl.
-  if (!newItemHdl->isAccessible()) {
-    removeFromMMContainer(*newItemHdl);
-    return false;
-  }
-
-  // no one can add or remove chained items at this point
-  if (oldItem.hasChainedItem()) {
-    auto oldItemHdl = acquire(&oldItem);
-    XDCHECK_EQ(1u, oldItemHdl->getRefCount()) << oldItemHdl->toString();
-    XDCHECK(!newItemHdl->hasChainedItem()) << newItemHdl->toString();
-    try {
-      auto l = chainedItemLocks_.lockExclusive(oldItem.getKey());
-      transferChainLocked(oldItemHdl, newItemHdl);
-    } catch (const std::exception& e) {
-      // this should never happen because we drained all the handles.
-      XLOGF(DFATAL, "{}", e.what());
-      throw;
-    }
-
-    XDCHECK(!oldItem.hasChainedItem());
-    XDCHECK(newItemHdl->hasChainedItem());
-  }
   newItemHdl.unmarkNascent();
   return true;
 }
 
 template <typename CacheTrait>
 bool CacheAllocator<CacheTrait>::moveChainedItem(ChainedItem& oldItem,
-                                                 WriteHandle& newItemHdl) {
-  XDCHECK(config_.moveCb);
+                                                 WriteHandle& newItemHdl,
+                                                 Item& parentItem) {
+  XDCHECK(parentItem.isMoving());
   util::LatencyTracker tracker{stats_.moveChainedLatency_};
 
-  // This item has been unlinked from its parent and we're the only
-  // owner of it, so we're done here
-  if (!oldItem.isInMMContainer() || oldItem.isOnlyMoving()) {
-    return false;
-  }
-
   auto& expectedParent = oldItem.getParentItem(compressor_);
   const auto parentKey = expectedParent.getKey();
-  auto l = chainedItemLocks_.lockExclusive(parentKey);
-
-  // verify old item under the lock
-  auto parentHandle =
-      validateAndGetParentHandleForChainedMoveLocked(oldItem, parentKey);
-  if (!parentHandle || &expectedParent != parentHandle.get()) {
-    return false;
+  auto l = chainedItemLocks_.tryLockExclusive(parentKey);
+  if (!l) {
+      return false;
   }
+  XDCHECK_EQ(&expectedParent,&parentItem);
 
-  // once we have the moving sync and valid parent for the old item, check if
+  // check if
   // the original allocation was made correctly. If not, we destroy the
   // allocation to indicate a retry to moving logic above.
   if (reinterpret_cast<uintptr_t>(
           &newItemHdl->asChainedItem().getParentItem(compressor_)) !=
-      reinterpret_cast<uintptr_t>(&parentHandle->asChainedItem())) {
-    newItemHdl.reset();
+      reinterpret_cast<uintptr_t>(&parentItem.asChainedItem())) {
+    XDCHECK(false);
     return false;
   }
 
   XDCHECK_EQ(reinterpret_cast<uintptr_t>(
                  &newItemHdl->asChainedItem().getParentItem(compressor_)),
-             reinterpret_cast<uintptr_t>(&parentHandle->asChainedItem()));
+             reinterpret_cast<uintptr_t>(&parentItem.asChainedItem()));
 
-  // In case someone else had removed this chained item from its parent by now
-  // So we check again to see if the it has been unlinked from its parent
-  if (!oldItem.isInMMContainer() || oldItem.isOnlyMoving()) {
-    return false;
-  }
-
-  auto parentPtr = parentHandle.getInternal();
+  auto parentPtr = &parentItem;
 
   XDCHECK_EQ(reinterpret_cast<uintptr_t>(parentPtr),
              reinterpret_cast<uintptr_t>(&oldItem.getParentItem(compressor_)));
 
-  // Invoke the move callback to fix up any user data related to the chain
-  config_.moveCb(oldItem, *newItemHdl, parentPtr);
+  if (config_.moveCb) {
+    // Execute the move callback. We cannot make any guarantees about the
+    // consistency of the old item beyond this point, because the callback can
+    // do more than a simple memcpy() e.g. update external references. If there
+    // are any remaining handles to the old item, it is the caller's
+    // responsibility to invalidate them. The move can only fail after this
+    // statement if the old item has been removed or replaced, in which case it
+    // should be fine for it to be left in an inconsistent state.
+    config_.moveCb(oldItem, *newItemHdl, parentPtr);
+  } else {
+    std::memcpy(newItemHdl->getMemory(), oldItem.getMemory(),
+                oldItem.getSize());
+  }
 
   // Replace the new item in the position of the old one before both in the
   // parent's chain and the MMContainer.
-  auto oldItemHandle =
-      replaceChainedItemLocked(oldItem, std::move(newItemHdl), *parentHandle);
-  XDCHECK(oldItemHandle->isMoving());
-  XDCHECK(!oldItemHandle->isInMMContainer());
+  XDCHECK_EQ(parentItem.getRefCount(),0);
+  XDCHECK(parentItem.isMoving());
+  XDCHECK(l);
+
+  auto& newContainer = getMMContainer(*newItemHdl);
+  auto mmContainerAdded = newContainer.add(*newItemHdl);
+  XDCHECK(mmContainerAdded);
+
+  replaceInChainLocked(oldItem, newItemHdl, parentItem, true);
 
   return true;
 }
@@ -1603,11 +1524,16 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
           config_.evictionSearchTries > searchTries)) {
 
     Item* toRecycle = nullptr;
+    Item* toRecycleParent = nullptr;
     Item* candidate = nullptr;
     bool isExpired = false;
+    Item* syncItem = nullptr;
     typename NvmCacheT::PutToken token;
+    bool chainedItem = false;
 
-    mmContainer.withEvictionIterator([this, tid, pid, cid, &candidate, &toRecycle,
+    mmContainer.withEvictionIterator([this, tid, pid, cid, &candidate,
+                                      &toRecycle, &toRecycleParent, &syncItem,
+                                      &chainedItem,
                                       &searchTries, &mmContainer, &lastTier,
                                       &isExpired, &token](auto&& itr) {
       if (!itr) {
@@ -1621,13 +1547,37 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
              itr) {
         ++searchTries;
         (*stats_.evictionAttempts)[tid][pid][cid].inc();
-
-        auto* toRecycle_ = itr.get();
-        auto* candidate_ =
-            toRecycle_->isChainedItem()
+        Item* toRecycle_ = itr.get();
+        bool chainedItem_ = toRecycle_->isChainedItem();
+        Item* toRecycleParent_ = chainedItem_
                 ? &toRecycle_->asChainedItem().getParentItem(compressor_)
-                : toRecycle_;
-
+                : nullptr;
+        // in order to safely check if the expected parent (toRecycleParent_) matches
+        // the current parent on the chained item, we need to take the chained
+        // item lock so we are sure that nobody else will be editing the chain
+        auto l_ = chainedItem_
+                  ? chainedItemLocks_.tryLockExclusive(toRecycleParent_->getKey())
+                  : decltype(chainedItemLocks_.tryLockExclusive(toRecycle_->getKey()))();
+
+        if (chainedItem_ &&
+            ( !l_ || &toRecycle_->asChainedItem().getParentItem(compressor_)
+                     != toRecycleParent_) ) {
+            ++itr;
+            continue;
+        }
+        Item* candidate_;
+        Item* syncItem_;
+        //sync on the parent item for chained items to move to next tier
+        if (!lastTier && chainedItem_) {
+            syncItem_ = toRecycleParent_;
+            candidate_ = toRecycle_;
+        } else if (lastTier && chainedItem_) {
+            candidate_ = toRecycleParent_;
+            syncItem_ = toRecycleParent_;
+        } else {
+            candidate_ = toRecycle_;
+            syncItem_ = toRecycle_;
+        }
         // if it's last tier, the item will be evicted
         // need to create put token before marking it exclusive
         const bool evictToNvmCache = lastTier && shouldWriteToNvmCache(*candidate_);
@@ -1638,10 +1588,12 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
 
         if (evictToNvmCache && !token_.isValid()) {
           stats_.evictFailConcurrentFill.inc();
-        } else if ( ((lastTier || candidate_->isExpired()) &&
-                      candidate_->markForEviction()) ||
-                    (!lastTier && candidate_->markMoving(true)) ) {
-          XDCHECK(candidate_->isMoving() || candidate_->isMarkedForEviction());
+        } else if ( ((lastTier || candidate_->isExpired()) && 
+                      syncItem_->markForEviction()) ||
+                    (!lastTier && syncItem_->markMoving()) ) {
+          XDCHECK(syncItem_->isMoving() || syncItem_->isMarkedForEviction());
+          toRecycleParent = toRecycleParent_;
+          chainedItem = chainedItem_;
           // markForEviction to make sure no other thead is evicting the item
           // nor holding a handle to that item if this is last tier
           // since we won't be moving the item to the next tier
@@ -1649,16 +1601,11 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
           candidate = candidate_;
           isExpired = candidate_->isExpired();
           token = std::move(token_);
-
-          // Check if parent changed for chained items - if yes, we cannot
-          // remove the child from the mmContainer as we will not be evicting
-          // it. We could abort right here, but we need to cleanup in case
-          // unmarkForEviction() returns 0 - so just go through normal path.
-          if (!toRecycle_->isChainedItem() ||
-              &toRecycle->asChainedItem().getParentItem(compressor_) ==
-                  candidate) {
-            mmContainer.remove(itr);
+          if (chainedItem) {
+              XDCHECK(l_);
+              XDCHECK_EQ(toRecycleParent,&toRecycle_->asChainedItem().getParentItem(compressor_));
           }
+          mmContainer.remove(itr);
           return;
         } else {
           if (candidate_->hasChainedItem()) {
@@ -1666,11 +1613,11 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
           } else {
             stats_.evictFailAC.inc();
           }
-        }
 
-        ++itr;
-        XDCHECK(toRecycle == nullptr);
-        XDCHECK(candidate == nullptr);
+          ++itr;
+          XDCHECK_EQ(toRecycle,nullptr);
+          XDCHECK_EQ(candidate,nullptr);
+        }
       }
     });
 
@@ -1684,6 +1631,14 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
     auto evictedToNext = (lastTier || isExpired) ? nullptr
         : tryEvictToNextMemoryTier(*candidate, false);
     if (!evictedToNext) {
+      //failed to move a chained item - so evict the entire chain
+      if (candidate->isChainedItem()) {
+        //candidate should be parent now
+        XDCHECK(toRecycleParent->isMoving());
+        XDCHECK_EQ(candidate,toRecycle);
+        candidate = toRecycleParent; //but now we evict the chain and in
+                                     //doing so recycle the child
+      }
       //if insertOrReplace was called during move
       //then candidate will not be accessible (failed replace during tryEvict)
       // - therefore this was why we failed to
@@ -1728,7 +1683,35 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
       XDCHECK(candidate->getKey() == evictedToNext->getKey());
 
       (*stats_.numWritebacks)[tid][pid][cid].inc();
-      wakeUpWaiters(*candidate, std::move(evictedToNext));
+      if (chainedItem) {
+          XDCHECK(toRecycleParent->isMoving());
+          XDCHECK_EQ(evictedToNext->getRefCount(),2u);
+          (*stats_.chainedItemEvictions)[tid][pid][cid].inc();
+          // check if by releasing the item we intend to, we actually
+          // recycle the candidate.
+          auto ret = releaseBackToAllocator(*candidate, RemoveContext::kEviction,
+                                            /* isNascent */ false, toRecycle);
+          XDCHECK_EQ(ret,ReleaseRes::kRecycled);
+          evictedToNext.reset(); //once we unmark moving threads will try and alloc, drop
+                                 //the handle now - and refcount will drop to 1
+          auto ref = toRecycleParent->unmarkMoving();
+          if (UNLIKELY(ref == 0)) {
+            wakeUpWaiters(*toRecycleParent,{});
+            const auto res =
+                releaseBackToAllocator(*toRecycleParent, RemoveContext::kNormal, false);
+            XDCHECK(res == ReleaseRes::kReleased);
+          } else {
+            auto parentHandle = acquire(toRecycleParent);
+            if (parentHandle) {
+              wakeUpWaiters(*toRecycleParent,std::move(parentHandle));
+            } //in case where parent handle is null that means some other thread
+              // would have called wakeUpWaiters with null handle and released
+              // parent back to allocator
+          }
+          return toRecycle;
+      } else {
+        wakeUpWaiters(*candidate, std::move(evictedToNext));
+      }
     }
 
     XDCHECK(!candidate->isMarkedForEviction() && !candidate->isMoving());
@@ -1748,6 +1731,7 @@ CacheAllocator<CacheTrait>::findEviction(TierId tid, PoolId pid, ClassId cid) {
                            candidate->getConfiguredTTL().count());
     }
 
+    XDCHECK(!candidate->isChainedItem());
     // check if by releasing the item we intend to, we actually
     // recycle the candidate.
     auto ret = releaseBackToAllocator(*candidate, RemoveContext::kEviction,
@@ -1811,9 +1795,6 @@ template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::WriteHandle
 CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
     TierId tid, PoolId pid, Item& item, bool fromBgThread) {
-  XDCHECK(item.isMoving());
-  XDCHECK(item.getRefCount() == 0);
-  if(item.hasChainedItem()) return WriteHandle{}; // TODO: We do not support ChainedItem yet
 
   TierId nextTier = tid; // TODO - calculate this based on some admission policy
   while (++nextTier < getNumTiers()) { // try to evict down to the next memory tiers
@@ -1821,21 +1802,44 @@ CacheAllocator<CacheTrait>::tryEvictToNextMemoryTier(
     bool evict = true;
 
     // allocateInternal might trigger another eviction
-    auto newItemHdl = allocateInternalTier(nextTier, pid,
+    WriteHandle newItemHdl{};
+    Item* parentItem;
+    bool chainedItem = false;
+    if(item.isChainedItem()) {
+        chainedItem = true;
+        parentItem = &item.asChainedItem().getParentItem(compressor_);
+        XDCHECK(parentItem->isMoving());
+        XDCHECK(item.isChainedItem() && item.getRefCount() == 1);
+        XDCHECK_EQ(0, parentItem->getRefCount());
+        newItemHdl = allocateChainedItemInternalTier(*parentItem,
+                                                     item.getSize(),
+                                                     nextTier);
+    } else {
+      // this assert can fail if parent changed
+      XDCHECK(item.isMoving());
+      XDCHECK(item.getRefCount() == 0);
+      newItemHdl = allocateInternalTier(nextTier, pid,
                      item.getKey(),
                      item.getSize(),
                      item.getCreationTime(),
                      item.getExpiryTime(),
                      fromBgThread,
                      evict);
+    }
 
     if (newItemHdl) {
       XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
-      if (!moveRegularItemWithSync(item, newItemHdl)) {
-          return WriteHandle{};
+      bool moveSuccess = chainedItem
+          ? moveChainedItem(item.asChainedItem(),
+                                    newItemHdl, *parentItem)
+          : moveRegularItem(item, newItemHdl);
+      if (!moveSuccess) {
+        return WriteHandle{};
+      }
+      if (!chainedItem) {
+        XDCHECK_EQ(newItemHdl->getKey(),item.getKey());
+        item.unmarkMoving();
       }
-      XDCHECK_EQ(newItemHdl->getKey(),item.getKey());
-      item.unmarkMoving();
       return newItemHdl;
     } else {
       return WriteHandle{};
@@ -1877,7 +1881,7 @@ CacheAllocator<CacheTrait>::tryPromoteToNextMemoryTier(
 
     if (newItemHdl) {
       XDCHECK_EQ(newItemHdl->getSize(), item.getSize());
-      if (!moveRegularItemWithSync(item, newItemHdl)) {
+      if (!moveRegularItem(item, newItemHdl)) {
           return WriteHandle{};
       }
       item.unmarkMoving();
@@ -3066,6 +3070,8 @@ void CacheAllocator<CacheTrait>::releaseSlabImpl(TierId tid,
   //  4. Move on to the next item if current item is freed
   for (auto alloc : releaseContext.getActiveAllocations()) {
     auto startTimeSec = util::getCurrentTimeSec();
+    Item& item = *static_cast<Item*>(alloc);
+
     // Need to mark an item for release before proceeding
     // If we can't mark as moving, it means the item is already freed
     const bool isAlreadyFreed =
@@ -3074,8 +3080,6 @@ void CacheAllocator<CacheTrait>::releaseSlabImpl(TierId tid,
       continue;
     }
 
-    Item& item = *static_cast<Item*>(alloc);
-
     // Try to move this item and make sure we can free the memory
     const bool isMoved = moveForSlabRelease(releaseContext, item, throttler);
 
@@ -3108,21 +3112,22 @@ typename RefcountWithFlags::Value CacheAllocator<CacheTrait>::unmarkMovingAndWak
 template <typename CacheTrait>
 bool CacheAllocator<CacheTrait>::moveForSlabRelease(
     const SlabReleaseContext& ctx, Item& oldItem, util::Throttler& throttler) {
-  if (!config_.moveCb) {
-    return false;
-  }
 
   bool isMoved = false;
   auto startTime = util::getCurrentTimeSec();
   WriteHandle newItemHdl{};
+  Item *parentItem;
+  bool chainedItem = oldItem.isChainedItem();
 
   for (unsigned int itemMovingAttempts = 0;
        itemMovingAttempts < config_.movingTries;
        ++itemMovingAttempts) {
     stats_.numMoveAttempts.inc();
 
-    // Nothing to move and the key is likely also bogus for chained items.
+    // Nothing to move - in the case that tryMoving failed
+    // for chained items we would have already evicted the entire chain.
     if (oldItem.isOnlyMoving()) {
+      XDCHECK(!oldItem.isChainedItem());
       auto ret = unmarkMovingAndWakeUpWaiters(oldItem, {});
       XDCHECK(ret == 0);
       const auto res =
@@ -3140,7 +3145,17 @@ bool CacheAllocator<CacheTrait>::moveForSlabRelease(
     });
 
     if (!newItemHdl) {
-      newItemHdl = allocateNewItemForOldItem(oldItem);
+      if (chainedItem) {
+        parentItem = &oldItem.asChainedItem().getParentItem(compressor_);
+        XDCHECK(parentItem->isMoving());
+        XDCHECK(oldItem.isChainedItem() && oldItem.getRefCount() == 1);
+        XDCHECK_EQ(0, parentItem->getRefCount());
+        newItemHdl =
+            allocateChainedItemInternalTier(*parentItem, oldItem.getSize(), getTierId(oldItem));
+      } else {
+        XDCHECK(oldItem.isMoving());
+        newItemHdl = allocateNewItemForOldItem(oldItem);
+      }
     }
 
     // if we have a valid handle, try to move, if not, we retry.
@@ -3162,7 +3177,7 @@ bool CacheAllocator<CacheTrait>::moveForSlabRelease(
   // that's identical to this one to replace it. Here we just need to wait
   // until all users have dropped the item handles before we can proceed.
   startTime = util::getCurrentTimeSec();
-  while (!oldItem.isOnlyMoving()) {
+  while (!chainedItem && !oldItem.isOnlyMoving()) {
     throttleWith(throttler, [&] {
       XLOGF(WARN,
             "Spent {} seconds, slab release still waiting for refcount to "
@@ -3172,16 +3187,33 @@ bool CacheAllocator<CacheTrait>::moveForSlabRelease(
     });
   }
   auto tid = getTierId(oldItem);
-  auto ref = unmarkMovingAndWakeUpWaiters(oldItem, std::move(newItemHdl));
-  XDCHECK(ref == 0);
-
   const auto allocInfo = allocator_[tid]->getAllocInfo(oldItem.getMemory());
+  if (chainedItem) {
+    newItemHdl.reset();
+    auto ref = parentItem->unmarkMoving();
+    if (UNLIKELY(ref == 0)) {
+      wakeUpWaiters(*parentItem,{});
+      const auto res =
+          releaseBackToAllocator(*parentItem, RemoveContext::kNormal, false);
+      XDCHECK(res == ReleaseRes::kReleased);
+      return true;
+    } else {
+      XDCHECK_NE(ref,0);
+      auto parentHdl = acquire(parentItem);
+      if (parentHdl) {
+        wakeUpWaiters(*parentItem,std::move(parentHdl));
+      }
+    }
+  } else {
+    auto ref = unmarkMovingAndWakeUpWaiters(oldItem, std::move(newItemHdl));
+    XDCHECK(ref == 0);
+  }
   allocator_[tid]->free(&oldItem);
-
   (*stats_.fragmentationSize)[tid][allocInfo.poolId][allocInfo.classId].sub(
       util::getFragmentation(*this, oldItem));
   stats_.numMoveSuccesses.inc();
   return true;
+
 }
 
 template <typename CacheTrait>
@@ -3207,7 +3239,6 @@ CacheAllocator<CacheTrait>::validateAndGetParentHandleForChainedMoveLocked(
 template <typename CacheTrait>
 typename CacheAllocator<CacheTrait>::WriteHandle
 CacheAllocator<CacheTrait>::allocateNewItemForOldItem(const Item& oldItem) {
-  XDCHECK(oldItem.isMoving());
   if (oldItem.isChainedItem()) {
     const auto& oldChainedItem = oldItem.asChainedItem();
     const auto parentKey = oldChainedItem.getParentItem(compressor_).getKey();
@@ -3267,75 +3298,38 @@ CacheAllocator<CacheTrait>::allocateNewItemForOldItem(const Item& oldItem) {
 template <typename CacheTrait>
 bool CacheAllocator<CacheTrait>::tryMovingForSlabRelease(
     Item& oldItem, WriteHandle& newItemHdl) {
-  // By holding onto a user-level synchronization object, we ensure moving
-  // a regular item or chained item is synchronized with any potential
-  // user-side mutation.
-  std::unique_ptr<SyncObj> syncObj;
-  if (config_.movingSync && getNumTiers() == 1) {
-    // TODO: use moving-bit synchronization for single tier as well
-    if (!oldItem.isChainedItem()) {
-      syncObj = config_.movingSync(oldItem.getKey());
-    } else {
-      // Copy the key so we have a valid key to work with if the chained
-      // item is still valid.
-      const std::string parentKey =
-          oldItem.asChainedItem().getParentItem(compressor_).getKey().str();
-      if (oldItem.isOnlyMoving()) {
-        // If chained item no longer has a refcount, its parent is already
-        // being released, so we abort this try to moving.
-        return false;
+    std::unique_ptr<SyncObj> syncObj;
+    if (config_.movingSync) {
+      if (!oldItem.isChainedItem()) {
+        syncObj = config_.movingSync(oldItem.getKey());
+      } else {
+        // Copy the key so we have a valid key to work with if the chained
+        // item is still valid.
+        const std::string parentKey =
+            oldItem.asChainedItem().getParentItem(compressor_).getKey().str();
+        syncObj = config_.movingSync(parentKey);
+      }
+      if (syncObj && !syncObj->isValid()) {
+          return false;
       }
-      syncObj = config_.movingSync(parentKey);
-    }
-
-    // We need to differentiate between the following three scenarios:
-    // 1. nullptr indicates no move sync required for this particular item
-    // 2. moveSync.isValid() == true meaning we've obtained the sync
-    // 3. moveSync.isValid() == false meaning we need to abort and retry
-    if (syncObj && !syncObj->isValid()) {
-      return false;
-    }
-  }
 
-  // TODO: we can unify move*Item and move*ItemWithSync by always
-  // using the moving bit to block readers.
-  if (getNumTiers() == 1) {
-    return oldItem.isChainedItem()
-              ? moveChainedItem(oldItem.asChainedItem(), newItemHdl)
+   }
+   //move can fail if another thread calls insertOrReplace
+   //in this case oldItem is no longer valid (not accessible,
+   //it gets removed from MMContainer and evictForSlabRelease
+   //will send it back to the allocator
+   bool ret = oldItem.isChainedItem()
+              ? moveChainedItem(oldItem.asChainedItem(), newItemHdl,
+                                        oldItem.asChainedItem().getParentItem(compressor_))
               : moveRegularItem(oldItem, newItemHdl);
-  } else {
-    if (oldItem.isChainedItem() || oldItem.hasChainedItem()) {
-      // TODO: add support for chained items
-      return false;
-    } else {
-      //move can fail if another thread calls insertOrReplace
-      //in this case oldItem is no longer valid (not accessible, 
-      //it gets removed from MMContainer and evictForSlabRelease
-      //will send it back to the allocator
-      bool ret = moveRegularItemWithSync(oldItem, newItemHdl);
-      if (!ret) {
-          //we failed to move - newItemHdl was released back to allocator
-          //by the moveRegularItemWithSync but oldItem is not accessible
-          //and no longer valid - we need to clean it up here
-          XDCHECK(!oldItem.isAccessible());
-          oldItem.markForEvictionWhenMoving();
-          unlinkItemForEviction(oldItem);
-          wakeUpWaiters(oldItem, {});
-      } else {
-        removeFromMMContainer(oldItem);
-      }
-      return ret;
-    }
-  }
+   removeFromMMContainer(oldItem);
+   return ret;
 }
 
 template <typename CacheTrait>
 void CacheAllocator<CacheTrait>::wakeUpWaiters(Item& item, WriteHandle handle)
 {
-  // readers do not block on 'moving' items in case there is only one tier
-  if (getNumTiers() > 1) {
-    wakeUpWaitersLocked(item.getKey(), std::move(handle));
-  }
+  wakeUpWaitersLocked(item.getKey(), std::move(handle));
 }
 
 template <typename CacheTrait>
@@ -3344,16 +3338,19 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
   auto startTime = util::getCurrentTimeSec();
 
   while (true) {
-    XDCHECK(item.isMoving());
+    //we can't rely on an item being marked moving because
+    //it may have previously been a chained item
     stats_.numEvictionAttempts.inc();
 
     if (shutDownInProgress_) {
-      auto ref = unmarkMovingAndWakeUpWaiters(item, {});
-      allocator_[getTierId(item)]->abortSlabRelease(ctx);
-      throw exception::SlabReleaseAborted(
-          folly::sformat("Slab Release aborted while trying to evict"
-                         " Item: {} Pool: {}, Class: {}.",
-                         item.toString(), ctx.getPoolId(), ctx.getClassId()));
+      if (item.isMoving()) {
+        auto ref = unmarkMovingAndWakeUpWaiters(item, {});
+        allocator_[getTierId(item)]->abortSlabRelease(ctx);
+        throw exception::SlabReleaseAborted(
+            folly::sformat("Slab Release aborted while trying to evict"
+                           " Item: {} Pool: {}, Class: {}.",
+                           item.toString(), ctx.getPoolId(), ctx.getClassId()));
+      }
     }
     throttleWith(throttler, [&] {
       XLOGF(WARN,
@@ -3381,80 +3378,37 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
     }
 
     typename NvmCacheT::PutToken token;
+    bool isChainedItem = item.isChainedItem();
     Item* evicted;
-    if (item.isChainedItem()) {
-      auto& expectedParent = item.asChainedItem().getParentItem(compressor_);
-
-      if (getNumTiers() == 1) {
-        // TODO: unify this with multi-tier implementation
-        // right now, taking a chained item lock here would lead to deadlock
-        const std::string parentKey = expectedParent.getKey().str();
-        auto l = chainedItemLocks_.lockExclusive(parentKey);
-
-        // check if the child is still in mmContainer and the expected parent is
-        // valid under the chained item lock.
-        if (expectedParent.getKey() != parentKey || !item.isInMMContainer() ||
-            item.isOnlyMoving() ||
-            &expectedParent != &item.asChainedItem().getParentItem(compressor_) ||
-            !expectedParent.isAccessible() || !expectedParent.hasChainedItem()) {
-          continue;
-        }
-
-        // search if the child is present in the chain
-        {
-          auto parentHandle = findInternal(parentKey);
-          if (!parentHandle || parentHandle != &expectedParent) {
-            continue;
-          }
-
-          ChainedItem* head = nullptr;
-          { // scope for the handle
-            auto headHandle = findChainedItem(expectedParent);
-            head = headHandle ? &headHandle->asChainedItem() : nullptr;
-          }
-
-          bool found = false;
-          while (head) {
-            if (head == &item) {
-              found = true;
-              break;
-            }
-            head = head->getNext(compressor_);
-          }
-
-          if (!found) {
-            continue;
-          }
-        }
-      }
-
-      evicted = &expectedParent;
-      token = createPutToken(*evicted);
-      if (evicted->markForEviction()) {
-        // unmark the child so it will be freed
-        item.unmarkMoving();
-        unlinkItemForEviction(*evicted);
-        // wake up any readers that wait for the move to complete
-        // it's safe to do now, as we have the item marked exclusive and
-        // no other reader can be added to the waiters list
-        wakeUpWaiters(*evicted, {});
-      } else {
-        // TODO: potential deadlock with markUseful for parent item
-        // for now, we do not block any reader on child items but this
-        // should probably be fixed
-        continue;
+    Item *expectedParent = isChainedItem
+                           ? &item.asChainedItem().getParentItem(compressor_)
+                           : nullptr;
+    if (isChainedItem) {
+      XDCHECK(expectedParent->isMoving());
+      XDCHECK_EQ(expectedParent,&item.asChainedItem().getParentItem(compressor_));
+      if (expectedParent != &item.asChainedItem().getParentItem(compressor_)) {
+          XDCHECK_EQ(expectedParent,&item.asChainedItem().getParentItem(compressor_));
+          throw std::runtime_error(folly::sformat(
+              "Slab release aborted while evicting "
+              "item {}", item.toString()));
       }
+      evicted = expectedParent;
     } else {
       evicted = &item;
-
-      token = createPutToken(*evicted);
-      if (evicted->markForEvictionWhenMoving()) {
-        unlinkItemForEviction(*evicted);
-        wakeUpWaiters(*evicted, {});
-      } else {
-        continue;
-      }
     }
+    XDCHECK(evicted->isMoving());
+    token = createPutToken(*evicted);
+    auto ret = evicted->markForEvictionWhenMoving();
+    XDCHECK(ret);
+    // unmark the child so it will be freed
+    // TODO entire chain just gets evicted since moveForSlabRelease
+    // returns false
+    XDCHECK(!item.isMoving());
+    unlinkItemForEviction(*evicted);
+    // wake up any readers that wait for the move to complete
+    // it's safe to do now, as we have the item marked exclusive and
+    // no other reader can be added to the waiters list
+    wakeUpWaiters(*evicted, {});
 
     if (token.isValid() && shouldWriteToNvmCacheExclusive(*evicted)) {
       nvmCache_->put(*evicted, std::move(token));
@@ -3475,14 +3429,10 @@ void CacheAllocator<CacheTrait>::evictForSlabRelease(
     const auto res =
         releaseBackToAllocator(*evicted, RemoveContext::kEviction, false);
 
-    if (getNumTiers() == 1) {
-      XDCHECK(res == ReleaseRes::kReleased);
-    } else {
-      const bool isAlreadyFreed =
-          !markMovingForSlabRelease(ctx, &item, throttler);
-      if (!isAlreadyFreed) {
-        continue;
-      }
+    const bool isAlreadyFreed =
+        !markMovingForSlabRelease(ctx, &item, throttler);
+    if (!isAlreadyFreed) {
+      continue;
     }
   
     return;
@@ -3530,19 +3480,52 @@ bool CacheAllocator<CacheTrait>::markMovingForSlabRelease(
 
   // At first, we assume this item was already freed
   bool itemFreed = true;
+  Item *syncItem = nullptr;
   bool markedMoving = false;
   TierId tid = getTierId(alloc);
-  auto numTiers = getNumTiers();
-  const auto fn = [&markedMoving, &itemFreed, numTiers](void* memory) {
+  const auto fn = [this, tid, &syncItem, &markedMoving, &itemFreed](void* memory) {
     // Since this callback is executed, the item is not yet freed
     itemFreed = false;
     Item* item = static_cast<Item*>(memory);
-    // TODO: for chained items, moving bit is only used to avoid
-    // freeing the item prematurely
-    auto failIfRefNotZero = numTiers > 1 && !item->isChainedItem();
-    if (item->markMoving(failIfRefNotZero)) {
-      markedMoving = true;
-    }
+    auto allocInfo = allocator_[tid]->getAllocInfo(memory);
+    auto pid = allocInfo.poolId;
+    auto cid = allocInfo.classId;
+    auto& mmContainer = getMMContainer(tid, pid, cid);
+    mmContainer.withContainerLock([this, &mmContainer,
+                                   &syncItem, &item, &markedMoving]() {
+      //we rely on the mmContainer lock to safely check that the item is
+      //currently in the mmContainer (no other threads are currently allocating
+      //this item). This is needed to sync on the case where a chained item
+      //is being released back to allocator and it's parent ref could be
+      //invalid. We need a valid parent ref in order to mark a chained item
+      //as moving since we sync on the parent by marking it as moving.
+      if (!item->isInMMContainer()) {
+        return;
+      }
+      bool chainedItem_ = item->isChainedItem();
+      XDCHECK_EQ(&getMMContainer(*item),&mmContainer);
+      XDCHECK_EQ(item->isChainedItem(),chainedItem_);
+      Item* syncItem_ = chainedItem_
+                  ? &item->asChainedItem().getParentItem(compressor_)
+                  : item;
+      // in order to safely check if the expected parent (syncItem_) matches
+      // the current parent on the chained item, we need to take the chained
+      // item lock so we are sure that nobody else will be editing the chain
+      auto l_ = chainedItem_
+                ? chainedItemLocks_.tryLockExclusive(syncItem_->getKey())
+                : decltype(chainedItemLocks_.tryLockExclusive(syncItem_->getKey()))();
+
+      XDCHECK_EQ(item->isChainedItem(),chainedItem_);
+      if (chainedItem_ &&
+            ( !l_ || &item->asChainedItem().getParentItem(compressor_) != syncItem_) ) {
+          markedMoving = false;
+          return;
+      }
+      if (syncItem_->markMoving()) {
+        markedMoving = true;
+        syncItem = syncItem_;
+      }
+    });
   };
 
   auto startTime = util::getCurrentTimeSec();
@@ -3554,6 +3537,24 @@ bool CacheAllocator<CacheTrait>::markMovingForSlabRelease(
     if (itemFreed) {
       return false;
     } else if (markedMoving) {
+      Item* item = static_cast<Item*>(alloc);
+      XDCHECK(syncItem->isMoving());
+      XDCHECK(item->isChainedItem()
+              ? item->asChainedItem().getParentItem(compressor_).isMoving()
+              : item->isMoving()) << item->toString() << "\n" << syncItem->toString();
+      if ( ( item->isChainedItem() &&
+              !item->asChainedItem().getParentItem(compressor_).isMoving() )
+           || (!item->isChainedItem() && !item->isMoving()) ) {
+        throw std::runtime_error(
+            folly::sformat("Slab Release aborted - failed to mark"
+                           " as moving for Item: {}. Pool: {}, Class: {}. Parent is {}",
+                           item->toString(), ctx.getPoolId(),
+                           ctx.getClassId(),
+                           item->isChainedItem()
+                            ? item->asChainedItem().getParentItem(compressor_).toString()
+                            : "none"));
+
+      }
       return true;
     }
 
@@ -4132,7 +4133,7 @@ bool CacheAllocator<CacheTrait>::startNewPoolRebalancer(
     std::chrono::milliseconds interval,
     std::shared_ptr<RebalanceStrategy> strategy,
     unsigned int freeAllocThreshold) {
-  if (!startNewWorker("PoolRebalancer", poolRebalancer_, interval, strategy,
+  if (!startNewWorker("PoolRebalancer", poolRebalancer_, std::chrono::milliseconds(1) , strategy,
                       freeAllocThreshold)) {
     return false;
   }
diff --git a/cachelib/allocator/CacheAllocator.h b/cachelib/allocator/CacheAllocator.h
index 28768ce563..4861dd3459 100644
--- a/cachelib/allocator/CacheAllocator.h
+++ b/cachelib/allocator/CacheAllocator.h
@@ -1387,7 +1387,7 @@ class CacheAllocator : public CacheBase {
 
  private:
   // wrapper around Item's refcount and active handle tracking
-  FOLLY_ALWAYS_INLINE RefcountWithFlags::incResult incRef(Item& it, bool failIfMoving);
+  FOLLY_ALWAYS_INLINE RefcountWithFlags::incResult incRef(Item& it);
   FOLLY_ALWAYS_INLINE RefcountWithFlags::Value decRef(Item& it);
 
   // drops the refcount and if needed, frees the allocation back to the memory
@@ -1551,6 +1551,26 @@ class CacheAllocator : public CacheBase {
   WriteHandle allocateChainedItemInternal(const ReadHandle& parent,
                                           uint32_t size);
 
+  // Allocate a chained item to a specific tier
+  //
+  // The resulting chained item does not have a parent item yet
+  // and if we fail to link to the chain for any reasoin
+  // the chained item will be freed once the handle is dropped.
+  //
+  // The parent item parameter here is mainly used to find the
+  // correct pool to allocate memory for this chained item
+  //
+  // @param parent    parent item
+  // @param size      the size for the chained allocation
+  // @param tid       the tier to allocate on
+  //
+  // @return    handle to the chained allocation
+  // @throw     std::invalid_argument if the size requested is invalid or
+  //            if the item is invalid
+  WriteHandle allocateChainedItemInternalTier(const Item& parent,
+                                          uint32_t size,
+                                          TierId tid);
+
   // Given an item and its parentKey, validate that the parentKey
   // corresponds to an item that's the parent of the supplied item.
   //
@@ -1626,19 +1646,17 @@ class CacheAllocator : public CacheBase {
   //
   // @return true  If the move was completed, and the containers were updated
   //               successfully.
-  bool moveRegularItemWithSync(Item& oldItem, WriteHandle& newItemHdl);
+  bool moveRegularItem(Item& oldItem, WriteHandle& newItemHdl);
 
-  // Moves a regular item to a different slab. This should only be used during
-  // slab release after the item's exclusive bit has been set. The user supplied
-  // callback is responsible for copying the contents and fixing the semantics
-  // of chained item.
+  // Moves a chained item to a different memory tier.
   //
-  // @param oldItem     item being moved
+  // @param oldItem     Reference to the item being moved
   // @param newItemHdl  Reference to the handle of the new item being moved into
+  // @param parentHandle Reference to the handle of the parent item
   //
   // @return true  If the move was completed, and the containers were updated
   //               successfully.
-  bool moveRegularItem(Item& oldItem, WriteHandle& newItemHdl);
+  bool moveChainedItem(ChainedItem& oldItem, WriteHandle& newItemHdl, Item& parentItem);
 
   // template class for viewAsChainedAllocs that takes either ReadHandle or
   // WriteHandle
@@ -1651,29 +1669,12 @@ class CacheAllocator : public CacheBase {
   template <typename Handle>
   folly::IOBuf convertToIOBufT(Handle& handle);
 
-  // Moves a chained item to a different slab. This should only be used during
-  // slab release after the item's exclusive bit has been set. The user supplied
-  // callback is responsible for copying the contents and fixing the semantics
-  // of chained item.
-  //
-  // Note: If we have successfully moved the old item into the new, the
-  //       newItemHdl is reset and no longer usable by the caller.
-  //
-  // @param oldItem       Reference to the item being moved
-  // @param newItemHdl    Reference to the handle of the new item being
-  //                      moved into
-  //
-  // @return true  If the move was completed, and the containers were updated
-  //               successfully.
-  bool moveChainedItem(ChainedItem& oldItem, WriteHandle& newItemHdl);
-
   // Transfers the chain ownership from parent to newParent. Parent
   // will be unmarked as having chained allocations. Parent will not be null
   // after calling this API.
   //
-  // Parent and NewParent must be valid handles to items with same key and
-  // parent must have chained items and parent handle must be the only
-  // outstanding handle for parent. New parent must be without any chained item
+  // NewParent must be valid handles to item with same key as Parent and
+  // Parent must have chained items. New parent must be without any chained item
   // handles.
   //
   // Chained item lock for the parent's key needs to be held in exclusive mode.
@@ -1682,7 +1683,7 @@ class CacheAllocator : public CacheBase {
   // @param newParent the new parent for the chain
   //
   // @throw if any of the conditions for parent or newParent are not met.
-  void transferChainLocked(WriteHandle& parent, WriteHandle& newParent);
+  void transferChainLocked(Item& parent, WriteHandle& newParent);
 
   // replace a chained item in the existing chain. This needs to be called
   // with the chained item lock held exclusive
@@ -1696,6 +1697,24 @@ class CacheAllocator : public CacheBase {
                                        WriteHandle newItemHdl,
                                        const Item& parent);
 
+  //
+  // Performs the actual inplace replace - it is called from
+  // moveChainedItem and replaceChainedItemLocked
+  // must hold chainedItemLock
+  //
+  // @param oldItem  the item we are replacing in the chain
+  // @param newItem  the item we are replacing it with
+  // @param parent   the parent for the chain
+  // @param fromMove used to determine if the replaced was called from
+  //                 moveChainedItem - we avoid the handle destructor
+  //                 in this case.
+  //
+  // @return handle to the oldItem
+  void replaceInChainLocked(Item& oldItem,
+                            WriteHandle& newItemHdl,
+                            const Item& parent,
+                            bool fromMove);
+
   // Insert an item into MM container. The caller must hold a valid handle for
   // the item.
   //
@@ -1986,7 +2005,7 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
           throw std::runtime_error("Not supported for chained items");
         }
 
-        if (candidate->markMoving(true)) {
+        if (candidate->markMoving()) {
           mmContainer.remove(itr);
           candidates.push_back(candidate);
         } else {
@@ -2059,7 +2078,7 @@ auto& mmContainer = getMMContainer(tid, pid, cid);
 
         // TODO: only allow it for read-only items?
         // or implement mvcc
-        if (candidate->markMoving(true)) {
+        if (candidate->markMoving()) {
           // promotions should rarely fail since we already marked moving
           mmContainer.remove(itr);
           candidates.push_back(candidate);
diff --git a/cachelib/allocator/CacheItem-inl.h b/cachelib/allocator/CacheItem-inl.h
index b33c1ea28a..0028e2776a 100644
--- a/cachelib/allocator/CacheItem-inl.h
+++ b/cachelib/allocator/CacheItem-inl.h
@@ -238,8 +238,8 @@ bool CacheItem<CacheTrait>::markForEvictionWhenMoving() {
 }
 
 template <typename CacheTrait>
-bool CacheItem<CacheTrait>::markMoving(bool failIfRefNotZero) {
-  return ref_.markMoving(failIfRefNotZero);
+bool CacheItem<CacheTrait>::markMoving() {
+  return ref_.markMoving();
 }
 
 template <typename CacheTrait>
diff --git a/cachelib/allocator/CacheItem.h b/cachelib/allocator/CacheItem.h
index 6728b654eb..b7ae24fe6b 100644
--- a/cachelib/allocator/CacheItem.h
+++ b/cachelib/allocator/CacheItem.h
@@ -312,9 +312,9 @@ class CACHELIB_PACKED_ATTR CacheItem {
   //
   // @return true on success, failure if item is marked as exclusive
   // @throw exception::RefcountOverflow on ref count overflow
-  FOLLY_ALWAYS_INLINE RefcountWithFlags::incResult incRef(bool failIfMoving) {
+  FOLLY_ALWAYS_INLINE RefcountWithFlags::incResult incRef() {
     try {
-      return ref_.incRef(failIfMoving);
+      return ref_.incRef();
     } catch (exception::RefcountOverflow& e) {
       throw exception::RefcountOverflow(
           folly::sformat("{} item: {}", e.what(), toString()));
@@ -381,7 +381,7 @@ class CACHELIB_PACKED_ATTR CacheItem {
    * Unmarking moving will also return the refcount at the moment of
    * unmarking.
    */
-  bool markMoving(bool failIfRefNotZero);
+  bool markMoving();
   RefcountWithFlags::Value unmarkMoving() noexcept;
   bool isMoving() const noexcept;
   bool isOnlyMoving() const noexcept;
diff --git a/cachelib/allocator/MM2Q-inl.h b/cachelib/allocator/MM2Q-inl.h
index 07aae775f7..7f00b96131 100644
--- a/cachelib/allocator/MM2Q-inl.h
+++ b/cachelib/allocator/MM2Q-inl.h
@@ -247,6 +247,12 @@ MM2Q::Container<T, HookPtr>::getEvictionIterator() const noexcept {
   return LockedIterator{std::move(l), lru_.rbegin()};
 }
 
+template <typename T, MM2Q::Hook<T> T::*HookPtr>
+template <typename F>
+void MM2Q::Container<T, HookPtr>::withContainerLock(F&& fun) {
+  lruMutex_->lock_combine([this, &fun]() { fun(); });
+}
+
 template <typename T, MM2Q::Hook<T> T::*HookPtr>
 template <typename F>
 void MM2Q::Container<T, HookPtr>::withEvictionIterator(F&& fun) {
diff --git a/cachelib/allocator/MM2Q.h b/cachelib/allocator/MM2Q.h
index 62b644f9c6..292a612cde 100644
--- a/cachelib/allocator/MM2Q.h
+++ b/cachelib/allocator/MM2Q.h
@@ -503,6 +503,10 @@ class MM2Q {
     template <typename F>
     void withEvictionIterator(F&& f);
     
+    // Execute provided function under container lock.
+    template <typename F>
+    void withContainerLock(F&& f);
+
     // Execute provided function under container lock. Function gets
     // iterator passed as parameter.
     template <typename F>
diff --git a/cachelib/allocator/MMLru-inl.h b/cachelib/allocator/MMLru-inl.h
index 751bcca5c1..427753f853 100644
--- a/cachelib/allocator/MMLru-inl.h
+++ b/cachelib/allocator/MMLru-inl.h
@@ -218,6 +218,12 @@ MMLru::Container<T, HookPtr>::getEvictionIterator() const noexcept {
   return LockedIterator{std::move(l), lru_.rbegin()};
 }
 
+template <typename T, MMLru::Hook<T> T::*HookPtr>
+template <typename F>
+void MMLru::Container<T, HookPtr>::withContainerLock(F&& fun) {
+  lruMutex_->lock_combine([this, &fun]() { fun(); });
+}
+
 template <typename T, MMLru::Hook<T> T::*HookPtr>
 template <typename F>
 void MMLru::Container<T, HookPtr>::withEvictionIterator(F&& fun) {
diff --git a/cachelib/allocator/MMLru.h b/cachelib/allocator/MMLru.h
index cf3253349a..ad6c4b784b 100644
--- a/cachelib/allocator/MMLru.h
+++ b/cachelib/allocator/MMLru.h
@@ -377,6 +377,10 @@ class MMLru {
     template <typename F>
     void withEvictionIterator(F&& f);
 
+    // Execute provided function under container lock.
+    template <typename F>
+    void withContainerLock(F&& f);
+
     template <typename F>
     void withPromotionIterator(F&& f);
 
diff --git a/cachelib/allocator/MMTinyLFU-inl.h b/cachelib/allocator/MMTinyLFU-inl.h
index 9203a54dd6..46a760909d 100644
--- a/cachelib/allocator/MMTinyLFU-inl.h
+++ b/cachelib/allocator/MMTinyLFU-inl.h
@@ -220,6 +220,13 @@ MMTinyLFU::Container<T, HookPtr>::getEvictionIterator() const noexcept {
   return LockedIterator{std::move(l), *this};
 }
 
+template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
+template <typename F>
+void MMTinyLFU::Container<T, HookPtr>::withContainerLock(F&& fun) {
+  LockHolder l(lruMutex_);
+  fun();
+}
+
 template <typename T, MMTinyLFU::Hook<T> T::*HookPtr>
 template <typename F>
 void MMTinyLFU::Container<T, HookPtr>::withEvictionIterator(F&& fun) {
diff --git a/cachelib/allocator/MMTinyLFU.h b/cachelib/allocator/MMTinyLFU.h
index eb45cefd22..a0d4386b83 100644
--- a/cachelib/allocator/MMTinyLFU.h
+++ b/cachelib/allocator/MMTinyLFU.h
@@ -497,6 +497,10 @@ class MMTinyLFU {
     template <typename F>
     void withEvictionIterator(F&& f);
     
+    // Execute provided function under container lock.
+    template <typename F>
+    void withContainerLock(F&& f);
+
     template <typename F>
     void withPromotionIterator(F&& f);
 
diff --git a/cachelib/allocator/Refcount.h b/cachelib/allocator/Refcount.h
index 8251ef15ba..a3ae9b7a17 100644
--- a/cachelib/allocator/Refcount.h
+++ b/cachelib/allocator/Refcount.h
@@ -140,9 +140,9 @@ class FOLLY_PACK_ATTR RefcountWithFlags {
   // @return true if refcount is bumped. false otherwise (if item is exclusive)
   // @throw  exception::RefcountOverflow if new count would be greater than
   // maxCount
-  FOLLY_ALWAYS_INLINE incResult incRef(bool failIfMoving) {
+  FOLLY_ALWAYS_INLINE incResult incRef() {
     incResult res = incOk;
-    auto predicate = [failIfMoving, &res](const Value curValue) {
+    auto predicate = [&res](const Value curValue) {
        Value bitMask = getAdminRef<kExclusive>();
 
        const bool exlusiveBitIsSet = curValue & bitMask;
@@ -151,7 +151,7 @@ class FOLLY_PACK_ATTR RefcountWithFlags {
        } else if (exlusiveBitIsSet && (curValue & kAccessRefMask) == 0) {
          res = incFailedEviction;
          return false;
-       } else if (exlusiveBitIsSet && failIfMoving) {
+       } else if (exlusiveBitIsSet) {
          res = incFailedMoving;
          return false;
        }
@@ -320,12 +320,15 @@ class FOLLY_PACK_ATTR RefcountWithFlags {
    *
    * Unmarking moving does not depend on `isInMMContainer`
    */
-  bool markMoving(bool failIfRefNotZero) {
-    auto predicate = [failIfRefNotZero](const Value curValue) {
+  bool markMoving() {
+    auto predicate = [](const Value curValue) {
       Value conditionBitMask = getAdminRef<kLinked>();
       const bool flagSet = curValue & conditionBitMask;
       const bool alreadyExclusive = curValue & getAdminRef<kExclusive>();
-      if (failIfRefNotZero && (curValue & kAccessRefMask) != 0) {
+      const bool isChained = curValue & getFlag<kIsChainedItem>();
+
+      // chained item can have ref count == 1, this just means it's linked in the chain
+      if ((curValue & kAccessRefMask) > isChained ? 1 : 0) {
         return false;
       }
       if (!flagSet || alreadyExclusive) {
diff --git a/cachelib/allocator/tests/AllocatorTypeTest.cpp b/cachelib/allocator/tests/AllocatorTypeTest.cpp
index a572485e8d..ad38588bcb 100644
--- a/cachelib/allocator/tests/AllocatorTypeTest.cpp
+++ b/cachelib/allocator/tests/AllocatorTypeTest.cpp
@@ -288,8 +288,8 @@ TYPED_TEST(BaseAllocatorTest, AddChainedItemMultiThreadWithMovingAndSync) {
   this->testAddChainedItemMultithreadWithMovingAndSync();
 }
 
-TYPED_TEST(BaseAllocatorTest, TransferChainWhileMoving) {
-  this->testTransferChainWhileMoving();
+TYPED_TEST(BaseAllocatorTest, TransferChainAfterMoving) {
+  this->testTransferChainAfterMoving();
 }
 
 TYPED_TEST(BaseAllocatorTest, AddAndPopChainedItemMultithread) {
diff --git a/cachelib/allocator/tests/BaseAllocatorTest.h b/cachelib/allocator/tests/BaseAllocatorTest.h
index 5733798e98..f9e90dd76b 100644
--- a/cachelib/allocator/tests/BaseAllocatorTest.h
+++ b/cachelib/allocator/tests/BaseAllocatorTest.h
@@ -4852,7 +4852,7 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
         }
 
         /* sleep override */
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        std::this_thread::sleep_for(std::chrono::milliseconds(1000));
       }
     };
 
@@ -4860,7 +4860,7 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
     auto releaseFn = [&] {
       for (unsigned int i = 0; i < 5;) {
         /* sleep override */
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        std::this_thread::sleep_for(std::chrono::milliseconds(1000));
 
         ClassId cid = static_cast<ClassId>(i);
         alloc.releaseSlab(pid, cid, SlabReleaseMode::kRebalance);
@@ -5077,7 +5077,7 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
     auto releaseFn = [&] {
       for (unsigned int i = 0; i < 5;) {
         /* sleep override */
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
+        std::this_thread::sleep_for(std::chrono::milliseconds(1000));
 
         ClassId cid = static_cast<ClassId>(i);
         alloc.releaseSlab(pid, cid, SlabReleaseMode::kRebalance);
@@ -5136,9 +5136,10 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
     lookupFn("yolo");
   }
 
-  // while a chained item could be moved, try to transfer its parent and
-  // validate that move succeeds correctly.
-  void testTransferChainWhileMoving() {
+  // while a chained item could be moved - it is sync on parent moving bit.
+  // try to transfer its parent after we moved and
+  // validate that transfer succeeds correctly.
+  void testTransferChainAfterMoving() {
     // create an allocator worth 10 slabs.
     typename AllocatorT::Config config;
     config.configureChainedItems();
@@ -5159,15 +5160,13 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
     struct TestSyncObj : public AllocatorT::SyncObj {
       TestSyncObj(std::mutex& m,
                   std::atomic<bool>& firstTime,
-                  folly::Baton<>& startedMoving,
-                  folly::Baton<>& changedParent)
+                  folly::Baton<>& startedMoving)
           : l(m) {
         if (!firstTime) {
           return;
         }
         firstTime = false;
         startedMoving.post();
-        changedParent.wait();
       }
 
       std::lock_guard<std::mutex> l;
@@ -5180,9 +5179,6 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
     // baton to indicate that the move process has started so that we can
     // switch the parent
     folly::Baton<> startedMoving;
-    // baton to indicate that the parent has been switched so that the move
-    // process can proceed
-    folly::Baton<> changedParent;
 
     const size_t numMovingAttempts = 100;
     std::atomic<uint64_t> numMoves{0};
@@ -5194,11 +5190,10 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
                       oldItem.getSize());
           ++numMoves;
         },
-        [&m, &startedMoving, &changedParent,
-         &firstTimeMovingSync](typename Item::Key key) {
+        [&m, &startedMoving, &firstTimeMovingSync](typename Item::Key key) {
           XLOG(ERR) << "Moving" << key;
           return std::make_unique<TestSyncObj>(m, firstTimeMovingSync,
-                                               startedMoving, changedParent);
+                                               startedMoving);
         },
         numMovingAttempts);
 
@@ -5228,24 +5223,19 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
     auto slabRelease = std::async(releaseFn);
 
     startedMoving.wait();
+    // wait for slab release to complete.
+    slabRelease.wait();
 
     // we know moving sync is held now.
     {
       auto newParent = alloc.allocate(pid, movingKey, 600);
-      auto parent = alloc.findToWrite(movingKey);
+      auto parent = alloc.findToWrite(movingKey); //parent is marked moving during moved, once finished we will get handle
       alloc.transferChainAndReplace(parent, newParent);
     }
 
-    // indicate that we changed the parent. This should abort the current
-    // moving attempt, re-allocate the item and eventually succeed in moving.
-    changedParent.post();
-
-    // wait for slab release to complete.
-    slabRelease.wait();
-
     EXPECT_EQ(numMoves, 1);
     auto slabReleaseStats = alloc.getSlabReleaseStats();
-    EXPECT_EQ(slabReleaseStats.numMoveAttempts, 2);
+    EXPECT_EQ(slabReleaseStats.numMoveAttempts, 1);
     EXPECT_EQ(slabReleaseStats.numMoveSuccesses, 1);
 
     auto handle = alloc.find(movingKey);
@@ -5959,7 +5949,6 @@ class BaseAllocatorTest : public AllocatorTest<AllocatorT> {
     EXPECT_EQ(nullptr,
               util::allocateAccessible(alloc, poolId, "large", largeSize));
 
-    std::this_thread::sleep_for(std::chrono::seconds{1});
     // trigger the slab rebalance
     EXPECT_EQ(nullptr,
               util::allocateAccessible(alloc, poolId, "large", largeSize));
diff --git a/cachelib/allocator/tests/ItemTest.cpp b/cachelib/allocator/tests/ItemTest.cpp
index 2f25cc07f0..54bac1945a 100644
--- a/cachelib/allocator/tests/ItemTest.cpp
+++ b/cachelib/allocator/tests/ItemTest.cpp
@@ -85,7 +85,7 @@ TEST(ItemTest, ExpiryTime) {
   // So that exclusive bit will be set
   item->markAccessible();
   // Test that writes fail while the item is moving
-  result = item->markMoving(true);
+  result = item->markMoving();
   EXPECT_TRUE(result);
   result = item->updateExpiryTime(0);
   EXPECT_FALSE(result);
diff --git a/cachelib/allocator/tests/RefCountTest.cpp b/cachelib/allocator/tests/RefCountTest.cpp
index 862271b03d..8388d8b45d 100644
--- a/cachelib/allocator/tests/RefCountTest.cpp
+++ b/cachelib/allocator/tests/RefCountTest.cpp
@@ -52,7 +52,7 @@ void RefCountTest::testMultiThreaded() {
         nLocalRef--;
         ref.markAccessible();
       } else {
-        ref.incRef(true);
+        ref.incRef();
         nLocalRef++;
         ref.unmarkAccessible();
       }
@@ -101,12 +101,12 @@ void RefCountTest::testBasic() {
   ASSERT_FALSE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag1>());
 
   for (uint32_t i = 0; i < RefcountWithFlags::kAccessRefMask; i++) {
-    ASSERT_EQ(ref.incRef(true),RefcountWithFlags::incOk);
+    ASSERT_EQ(ref.incRef(),RefcountWithFlags::incOk);
   }
 
   // Incrementing past the max will fail
   auto rawRef = ref.getRaw();
-  ASSERT_THROW(ref.incRef(true), std::overflow_error);
+  ASSERT_THROW(ref.incRef(), std::overflow_error);
   ASSERT_EQ(rawRef, ref.getRaw());
 
   // Bumping up access ref shouldn't affect admin ref and flags
@@ -152,11 +152,11 @@ void RefCountTest::testBasic() {
   ASSERT_FALSE(ref.template isFlagSet<RefcountWithFlags::Flags::kMMFlag1>());
 
   // conditionally set flags
-  ASSERT_FALSE(ref.markMoving(true));
+  ASSERT_FALSE(ref.markMoving());
   ref.markInMMContainer();
   // only first one succeeds
-  ASSERT_TRUE(ref.markMoving(true));
-  ASSERT_FALSE(ref.markMoving(true));
+  ASSERT_TRUE(ref.markMoving());
+  ASSERT_FALSE(ref.markMoving());
   ref.unmarkInMMContainer();
 
   ref.template setFlag<RefcountWithFlags::Flags::kMMFlag0>();
@@ -202,7 +202,7 @@ void RefCountTest::testMarkForEvictionAndMoving() {
     ref.markInMMContainer();
     ref.markAccessible();
 
-    ASSERT_TRUE(ref.markMoving(true));
+    ASSERT_TRUE(ref.markMoving());
     ASSERT_FALSE(ref.markForEviction());
 
     ref.unmarkInMMContainer();
@@ -218,7 +218,7 @@ void RefCountTest::testMarkForEvictionAndMoving() {
     ref.markAccessible();
 
     ASSERT_TRUE(ref.markForEviction());
-    ASSERT_FALSE(ref.markMoving(true));
+    ASSERT_FALSE(ref.markMoving());
 
     ref.unmarkInMMContainer();
     ref.unmarkAccessible();
@@ -226,29 +226,13 @@ void RefCountTest::testMarkForEvictionAndMoving() {
     ASSERT_EQ(ret, 0);
   }
 
-  {
-    // can mark moving when ref count > 0
-    RefcountWithFlags ref;
-    ref.markInMMContainer();
-    ref.markAccessible();
-
-    ref.incRef(true);
-
-    ASSERT_TRUE(ref.markMoving(false));
-
-    ref.unmarkInMMContainer();
-    ref.unmarkAccessible();
-    auto ret = ref.unmarkMoving();
-    ASSERT_EQ(ret, 1);
-  }
-
   {
     // cannot mark for eviction when ref count > 0
     RefcountWithFlags ref;
     ref.markInMMContainer();
     ref.markAccessible();
 
-    ref.incRef(true);
+    ref.incRef();
     ASSERT_FALSE(ref.markForEviction());
   }
 }
diff --git a/cachelib/cachebench/runner/CacheStressor.h b/cachelib/cachebench/runner/CacheStressor.h
index 4ea54b1f14..8690602400 100644
--- a/cachelib/cachebench/runner/CacheStressor.h
+++ b/cachelib/cachebench/runner/CacheStressor.h
@@ -77,7 +77,7 @@ class CacheStressor : public Stressor {
         std::unique_lock<folly::SharedMutex> lock;
 
         CacheStressSyncObj(CacheStressor& s, std::string itemKey)
-            : lock{s.chainedItemAcquireUniqueLock(itemKey)} {}
+            : lock{s.chainedItemTryAcquireUniqueLock(itemKey)} {}
       };
       movingSync = [this](typename CacheT::Item::Key key) {
         return std::make_unique<CacheStressSyncObj>(*this, key.str());
@@ -241,6 +241,10 @@ class CacheStressor : public Stressor {
     using Lock = std::unique_lock<folly::SharedMutex>;
     return lockEnabled_ ? Lock{getLock(key)} : Lock{};
   }
+  auto chainedItemTryAcquireUniqueLock(Key key) {
+    using Lock = std::unique_lock<folly::SharedMutex>;
+    return lockEnabled_ ? Lock{getLock(key), std::try_to_lock} : Lock{};
+  }
 
   // populate the input item handle according to the stress setup.
   void populateItem(WriteHandle& handle, const std::string& itemValue = "") {
diff --git a/cachelib/cachebench/test_configs/small_moving_bg.json b/cachelib/cachebench/test_configs/small_moving_bg.json
new file mode 100644
index 0000000000..c4838f42b5
--- /dev/null
+++ b/cachelib/cachebench/test_configs/small_moving_bg.json
@@ -0,0 +1,35 @@
+// @nolint like default.json, but moves items during slab release instead of evicting them.
+{
+    "cache_config" : {
+      "cacheSizeMB" : 2248,
+      "cacheDir": "/tmp/mem-tier5",
+      "memoryTiers" : [
+        {
+          "ratio": 1,
+          "memBindNodes": 0
+        }, {
+          "ratio": 1,
+          "memBindNodes": 0
+        }
+      ],
+      "poolRebalanceIntervalSec" : 1,
+      "moveOnSlabRelease" : true,
+      "rebalanceMinSlabs" : 2,
+      "evictorThreads": 2,
+      "promoterThreads": 2
+    },
+    "test_config" :
+      {
+        "preallocateCache" : true,
+        "numOps" : 20000000,
+        "numThreads" : 32,
+        "numKeys" : 250000,
+        "generator": "online",
+        "keySizeRange" : [1, 8, 32, 64, 128, 256, 512],
+        "keySizeRangeProbability" : [0.1, 0.1, 0.2, 0.2, 0.3, 0.1],
+        "valSizeRange" : [1, 128, 512, 1024, 4096, 10240, 20480, 40960, 60000],
+        "valSizeRangeProbability" : [0.1, 0.1, 0.1, 0.2, 0.2, 0.1, 0.1, 0.1],
+        "getRatio" : 0.70,
+        "setRatio" : 0.30
+      }
+  }
diff --git a/cachelib/common/Mutex.h b/cachelib/common/Mutex.h
index 1d6e5898f1..15b440d406 100644
--- a/cachelib/common/Mutex.h
+++ b/cachelib/common/Mutex.h
@@ -341,6 +341,7 @@ class RWBucketLocks : public BaseBucketLocks<LockType, LockAlignmentType> {
   using Lock = LockType;
   using ReadLockHolder = ReadLockHolderType;
   using WriteLockHolder = WriteLockHolderType;
+  using LockHolder = std::unique_lock<Lock>;
 
   RWBucketLocks(uint32_t locksPower, std::shared_ptr<Hash> hasher)
       : Base::BaseBucketLocks(locksPower, std::move(hasher)) {}
@@ -357,6 +358,11 @@ class RWBucketLocks : public BaseBucketLocks<LockType, LockAlignmentType> {
     return WriteLockHolder{Base::getLock(args...)};
   }
 
+  template <typename... Args>
+  LockHolder tryLockExclusive(Args... args) noexcept {
+    return LockHolder(Base::getLock(args...), std::try_to_lock);
+  }
+
   // try to grab the reader lock for a limit _timeout_ duration
   template <typename... Args>
   ReadLockHolder lockShared(const std::chrono::microseconds& timeout,
diff --git a/run_tests.sh b/run_tests.sh
index e575dbc62a..6ff2ac65ed 100755
--- a/run_tests.sh
+++ b/run_tests.sh
@@ -13,3 +13,4 @@ fi
 
 ../bin/cachebench --json_test_config ../test_configs/consistency/navy.json
 ../bin/cachebench --json_test_config ../test_configs/consistency/navy-multi-tier.json
+../bin/cachebench --json_test_config ../test_configs/small_moving_bg.json