Skip to content

Commit ce5a17b

Browse files
fix: copy offload cross engine dependency handling
Related-To: HSD-18043670900 Signed-off-by: Bartosz Dunajski <[email protected]>
1 parent 0840c26 commit ce5a17b

File tree

5 files changed

+34
-2
lines changed

5 files changed

+34
-2
lines changed

level_zero/core/source/cmdlist/cmdlist_hw.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,7 @@ struct CommandListCoreFamily : public CommandListImp {
484484
bool implicitSynchronizedDispatchForCooperativeKernelsAllowed = false;
485485
bool useAdditionalBlitProperties = false;
486486
bool isPostImageWriteFlushRequired = false;
487+
bool latestFlushIsDualCopyOffload = false;
487488
};
488489

489490
template <PRODUCT_FAMILY gfxProductFamily>

level_zero/core/source/cmdlist/cmdlist_hw.inl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3166,7 +3166,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
31663166
isQwordInOrderCounter(), copyOnlyWait);
31673167

31683168
} else {
3169-
auto resolveDependenciesViaPipeControls = !copyOnlyWait && implicitDependency && (this->dcFlushSupport || (!this->heaplessModeEnabled && this->latestOperationHasOptimizedCbEvent));
3169+
bool crossEngineDependency = (latestFlushIsDualCopyOffload != dualStreamCopyOffloadOperation);
3170+
auto resolveDependenciesViaPipeControls = !crossEngineDependency && !copyOnlyWait && implicitDependency && (this->dcFlushSupport || (!this->heaplessModeEnabled && this->latestOperationHasOptimizedCbEvent));
31703171

31713172
if (NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get() != -1) {
31723173
resolveDependenciesViaPipeControls = NEO::debugManager.flags.ResolveDependenciesViaPipeControls.get();

level_zero/core/source/cmdlist/cmdlist_hw_immediate.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,6 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
268268
uint64_t relaxedOrderingCounter = 0;
269269
std::atomic<bool> dependenciesPresent{false};
270270
bool latestFlushIsHostVisible = false;
271-
bool latestFlushIsDualCopyOffload = false;
272271
bool keepRelaxedOrderingEnabled = false;
273272
};
274273

level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "shared/source/gmm_helper/gmm_helper.h"
1010
#include "shared/source/helpers/compiler_product_helper.h"
1111
#include "shared/source/memory_manager/internal_allocation_storage.h"
12+
#include "shared/test/common/cmd_parse/hw_parse.h"
1213
#include "shared/test/common/helpers/relaxed_ordering_commands_helper.h"
1314
#include "shared/test/common/libult/ult_command_stream_receiver.h"
1415
#include "shared/test/common/mocks/mock_direct_submission_hw.h"
@@ -1061,6 +1062,29 @@ HWTEST2_F(CopyOffloadInOrderTests, givenProfilingEventWithRelaxedOrderingWhenApp
10611062
}
10621063
}
10631064

1065+
HWCMDTEST_F(IGFX_XE_HP_CORE, CopyOffloadInOrderTests, givenCrossEngineDependencyWhenComputeWorkSubmittedThenUseSemaphore) {
1066+
debugManager.flags.OverrideCopyOffloadMode.set(CopyOffloadModes::dualStream);
1067+
1068+
uint32_t counterOffset = 64;
1069+
1070+
auto immCmdList = createImmCmdListWithOffload<FamilyType::gfxCoreFamily>();
1071+
immCmdList->inOrderExecInfo->setAllocationOffset(counterOffset);
1072+
1073+
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
1074+
1075+
immCmdList->appendMemoryCopy(&copyData1, &copyData2, 1, nullptr, 0, nullptr, copyParams);
1076+
EXPECT_TRUE(immCmdList->latestFlushIsDualCopyOffload);
1077+
1078+
auto offset = cmdStream->getUsed();
1079+
1080+
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams);
1081+
1082+
auto hwCmds = HardwareParse::parseCommandBuffer<FamilyType>(*cmdStream, offset);
1083+
1084+
auto itor = find<typename FamilyType::MI_SEMAPHORE_WAIT *>(hwCmds.begin(), hwCmds.end());
1085+
EXPECT_NE(hwCmds.end(), itor);
1086+
}
1087+
10641088
HWTEST2_F(CopyOffloadInOrderTests, givenAtomicSignalingModeWhenUpdatingCounterThenUseCorrectHwCommands, IsAtLeastXe2HpgCore) {
10651089
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
10661090
using ATOMIC_OPCODES = typename FamilyType::MI_ATOMIC::ATOMIC_OPCODES;

shared/test/common/cmd_parse/hw_parse.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,13 @@ struct HardwareParse : NEO::NonCopyableAndNonMovableClass {
218218
return FamilyType::Parse::getCommandName(cmd);
219219
}
220220

221+
template <typename FamilyType>
222+
static GenCmdList parseCommandBuffer(const LinearStream &linearStream, size_t offset) {
223+
GenCmdList cmds;
224+
EXPECT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(cmds, ptrOffset(linearStream.getCpuBase(), offset), linearStream.getUsed() - offset));
225+
return cmds;
226+
}
227+
221228
// The starting point of parsing commandBuffers. This is important
222229
// because as buffers get reused, we only want to parse the deltas.
223230
LinearStream *previousCS = nullptr;

0 commit comments

Comments
 (0)