Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions libkineto/src/plugin/xpupti/XpuptiActivityApi.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include "XpuptiActivityApi.h"

#include <assert.h>
Expand Down Expand Up @@ -170,6 +178,43 @@ void XpuptiActivityApi::bufferCompleted(
}
#endif

#if PTI_VERSION_MAJOR > 0 || PTI_VERSION_MINOR > 10
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be useful to use a version schema similar to cuda where they embed the MAJOR, MINOR and PATCH number into one number. If you use OR statements like this it can lead to more complexity if many ifdefs are needed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, @sraikund16
Thank you for good suggestion.
For now, intel GPU doesn't have capability version control API. Only XPTI has release version macro so we use it here to do conditional building. We will consider to add the capability version control then refine here.
Thank you.

static void enableSpecifcRuntimeAPIsTracing() {
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1, pti_api_group_id::PTI_API_GROUP_SYCL, urEnqueueUSMFill_id));
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1, pti_api_group_id::PTI_API_GROUP_SYCL, urEnqueueUSMFill2D_id));
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1, pti_api_group_id::PTI_API_GROUP_SYCL, urEnqueueUSMMemcpy_id));
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1, pti_api_group_id::PTI_API_GROUP_SYCL, urEnqueueUSMMemcpy2D_id));
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1, pti_api_group_id::PTI_API_GROUP_SYCL, urEnqueueKernelLaunch_id));
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1,
pti_api_group_id::PTI_API_GROUP_SYCL,
urEnqueueKernelLaunchCustomExp_id));
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1,
pti_api_group_id::PTI_API_GROUP_SYCL,
urEnqueueCooperativeKernelLaunchExp_id));
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1, pti_api_group_id::PTI_API_GROUP_SYCL, urEnqueueMemBufferFill_id));
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1, pti_api_group_id::PTI_API_GROUP_SYCL, urEnqueueMemBufferRead_id));
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1, pti_api_group_id::PTI_API_GROUP_SYCL, urEnqueueMemBufferWrite_id));
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1, pti_api_group_id::PTI_API_GROUP_SYCL, urEnqueueMemBufferCopy_id));
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1, pti_api_group_id::PTI_API_GROUP_SYCL, urUSMHostAlloc_id));
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1, pti_api_group_id::PTI_API_GROUP_SYCL, urUSMSharedAlloc_id));
XPUPTI_CALL(ptiViewEnableRuntimeApi(
1, pti_api_group_id::PTI_API_GROUP_SYCL, urUSMDeviceAlloc_id));
}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why we still need these UR specific API ids? we should use API set?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. We will use API set, just apply the previous patch and open a draft PR.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then, can we make this PR as a draft?

#endif

void XpuptiActivityApi::enableXpuptiActivities(
const std::set<ActivityType>& selected_activities) {
#ifdef HAS_XPUPTI
Expand All @@ -192,7 +237,16 @@ void XpuptiActivityApi::enableXpuptiActivities(
externalCorrelationEnabled_ = true;
}
if (activity == ActivityType::XPU_RUNTIME) {
#if PTI_VERSION_MAJOR > 0 || PTI_VERSION_MINOR > 11
XPUPTI_CALL(ptiViewEnable(PTI_VIEW_RUNTIME_API));
XPUPTI_CALL(ptiViewEnableRuntimeApiClass(
1, PTI_API_CLASS_GPU_OPERATION_CORE, PTI_API_GROUP_ALL));
#elif PTI_VERSION_MAJOR == 0 && PTI_VERSION_MINOR == 11
XPUPTI_CALL(ptiViewEnable(PTI_VIEW_RUNTIME_API));
enableSpecifcRuntimeAPIsTracing();
#else
XPUPTI_CALL(ptiViewEnable(PTI_VIEW_SYCL_RUNTIME_CALLS));
#endif
}
if (activity == ActivityType::OVERHEAD) {
XPUPTI_CALL(ptiViewEnable(PTI_VIEW_COLLECTION_OVERHEAD));
Expand Down Expand Up @@ -222,7 +276,12 @@ void XpuptiActivityApi::disablePtiActivities(
XPUPTI_CALL(ptiViewDisable(PTI_VIEW_EXTERNAL_CORRELATION));
}
if (activity == ActivityType::XPU_RUNTIME) {
#if PTI_VERSION_MAJOR > 0 || PTI_VERSION_MINOR > 10
XPUPTI_CALL(ptiViewDisable(PTI_VIEW_RUNTIME_API));
#else

XPUPTI_CALL(ptiViewDisable(PTI_VIEW_SYCL_RUNTIME_CALLS));
#endif
}
if (activity == ActivityType::OVERHEAD) {
XPUPTI_CALL(ptiViewDisable(PTI_VIEW_COLLECTION_OVERHEAD));
Expand Down
19 changes: 19 additions & 0 deletions libkineto/src/plugin/xpupti/XpuptiActivityHandlers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,29 @@ inline void XpuptiActivityProfilerSession::handleCorrelationActivity(
}

void XpuptiActivityProfilerSession::handleRuntimeActivity(
#if PTI_VERSION_MAJOR > 0 || PTI_VERSION_MINOR > 10
const pti_view_record_api* activity,
#else
const pti_view_record_sycl_runtime* activity,
#endif
ActivityLogger* logger) {
traceBuffer_.span.opCount += 1;
traceBuffer_.gpuOpCount += 1;
const ITraceActivity* linked =
linkedActivity(activity->_correlation_id, cpuCorrelationMap_);
#if PTI_VERSION_MAJOR > 0 || PTI_VERSION_MINOR > 10
const char* api_name = nullptr;
XPUPTI_CALL(
ptiViewGetApiIdName(activity->_api_group, activity->_api_id, &api_name));
#endif
traceBuffer_.emplace_activity(
traceBuffer_.span,
ActivityType::XPU_RUNTIME,
#if PTI_VERSION_MAJOR > 0 || PTI_VERSION_MINOR > 10
std::string(api_name));
#else
std::string(activity->_name));
#endif
auto& runtime_activity = traceBuffer_.activities.back();
runtime_activity->startTime = activity->_start_timestamp;
runtime_activity->endTime = activity->_end_timestamp;
Expand Down Expand Up @@ -331,9 +344,15 @@ void XpuptiActivityProfilerSession::handlePtiActivity(
reinterpret_cast<const pti_view_record_external_correlation*>(
record));
break;
#if PTI_VERSION_MAJOR > 0 || PTI_VERSION_MINOR > 10
case PTI_VIEW_RUNTIME_API:
handleRuntimeActivity(
reinterpret_cast<const pti_view_record_api*>(record),
#else
case PTI_VIEW_SYCL_RUNTIME_CALLS:
handleRuntimeActivity(
reinterpret_cast<const pti_view_record_sycl_runtime*>(record),
#endif
logger);
break;
case PTI_VIEW_DEVICE_GPU_KERNEL:
Expand Down
4 changes: 4 additions & 0 deletions libkineto/src/plugin/xpupti/XpuptiActivityProfiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ class XpuptiActivityProfilerSession
void handleCorrelationActivity(
const pti_view_record_external_correlation* correlation);
void handleRuntimeActivity(
#if PTI_VERSION_MAJOR > 0 || PTI_VERSION_MINOR > 10
const pti_view_record_api* activity,
#else
const pti_view_record_sycl_runtime* activity,
#endif
ActivityLogger* logger);
void handleKernelActivity(
const pti_view_record_kernel* activity,
Expand Down
Loading