Skip to content

Commit ac61cb0

Browse files
authored
[PROTON] Change the output format of pc sampling lines (#5711)
This way users can directly open the file using IDEs like vscode and jump to the corresponding lines, by holding the `ctrl` key and click the line on the terminal. Also, this PR emits an error instead of using workarounds for CUPTI compatibility problems and adds more instructions for using PC sampling in the tutorial. ![image](https://github.com/user-attachments/assets/9ecb90c3-0953-43bd-8db4-605dc13c38a2)
1 parent 050f302 commit ac61cb0

File tree

6 files changed

+37
-23
lines changed

6 files changed

+37
-23
lines changed

third_party/proton/csrc/lib/Profiler/Cupti/CuptiPCSampling.cpp

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -135,15 +135,23 @@ CUpti_PCSamplingData allocPCSamplingData(size_t collectNumPCs,
135135
uint32_t libVersion = 0;
136136
cupti::getVersion<true>(&libVersion);
137137
size_t pcDataSize = sizeof(CUpti_PCSamplingPCData);
138-
// Check cupti api version < 12.4 but cupti header version >= 12.4
139-
// If so, we subtract 4 bytes from the size of CUpti_PCSamplingPCData
140-
// because it introduces a new field (i.e., correlationId) at the end of the
141-
// struct, which is not compatible with the previous versions.
142-
if (libVersion < CUPTI_CUDA12_4_VERSION &&
143-
CUPTI_API_VERSION >= CUPTI_CUDA12_4_VERSION)
144-
pcDataSize -= CUPTI_CUDA12_4_PC_DATA_PADDING_SIZE;
138+
// Since CUPTI 12.4, a new field (i.e., correlationId) is added to
139+
// CUpti_PCSamplingPCData, which breaks the ABI compatibility.
140+
// Instead of using workarounds, we emit an error message and exit the
141+
// application.
142+
if ((libVersion < CUPTI_CUDA12_4_VERSION &&
143+
CUPTI_API_VERSION >= CUPTI_CUDA12_4_VERSION) ||
144+
(libVersion >= CUPTI_CUDA12_4_VERSION &&
145+
CUPTI_API_VERSION < CUPTI_CUDA12_4_VERSION)) {
146+
throw std::runtime_error(
147+
"[PROTON] CUPTI API version: " + std::to_string(CUPTI_API_VERSION) +
148+
" and CUPTI driver version: " + std::to_string(libVersion) +
149+
" are not compatible. Please set the environment variable "
150+
" TRITON_CUPTI_INCLUDE_PATH and TRITON_CUPTI_LIB_PATH to resolve the "
151+
"problem.");
152+
}
145153
CUpti_PCSamplingData pcSamplingData{
146-
/*size=*/pcDataSize,
154+
/*size=*/sizeof(CUpti_PCSamplingData),
147155
/*collectNumPcs=*/collectNumPCs,
148156
/*totalSamples=*/0,
149157
/*droppedSamples=*/0,
@@ -372,16 +380,16 @@ void CuptiPCSampling::processPCSamplingData(ConfigureData *configureData,
372380
auto *stallReason = &pcData->stallReason[j];
373381
if (!configureData->stallReasonIndexToMetricIndex.count(
374382
stallReason->pcSamplingStallReasonIndex))
375-
throw std::runtime_error("Invalid stall reason index");
383+
throw std::runtime_error("[PROTON] Invalid stall reason index");
376384
for (auto *data : dataSet) {
377385
auto scopeId = externId;
378386
if (isAPI)
379387
scopeId = data->addOp(externId, lineInfo.functionName);
380388
if (lineInfo.fileName.size())
381-
scopeId = data->addOp(scopeId,
382-
lineInfo.dirName + "/" + lineInfo.fileName +
383-
":" + lineInfo.functionName + "@" +
384-
std::to_string(lineInfo.lineNumber));
389+
scopeId = data->addOp(
390+
scopeId, lineInfo.dirName + "/" + lineInfo.fileName + ":" +
391+
std::to_string(lineInfo.lineNumber) + "@" +
392+
lineInfo.functionName);
385393
auto metricKind = static_cast<PCSamplingMetric::PCSamplingMetricKind>(
386394
configureData->stallReasonIndexToMetricIndex
387395
[stallReason->pcSamplingStallReasonIndex]);

third_party/proton/csrc/lib/Profiler/Cupti/CuptiProfiler.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,7 +227,7 @@ void CuptiProfiler::CuptiProfilerPimpl::allocBuffer(uint8_t **buffer,
227227
size_t *maxNumRecords) {
228228
*buffer = static_cast<uint8_t *>(aligned_alloc(AlignSize, BufferSize));
229229
if (*buffer == nullptr) {
230-
throw std::runtime_error("aligned_alloc failed");
230+
throw std::runtime_error("[PROTON] aligned_alloc failed");
231231
}
232232
*bufferSize = BufferSize;
233233
*maxNumRecords = 0;
@@ -253,7 +253,7 @@ void CuptiProfiler::CuptiProfilerPimpl::completeBuffer(CUcontext ctx,
253253
} else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) {
254254
break;
255255
} else {
256-
throw std::runtime_error("cupti::activityGetNextRecord failed");
256+
throw std::runtime_error("[PROTON] cupti::activityGetNextRecord failed");
257257
}
258258
} while (true);
259259

third_party/proton/proton/viewer.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,8 @@ def format_frames(gf, format):
214214
elif format == "function_line":
215215
gf.dataframe["name"] = gf.dataframe["name"].apply(lambda x: x.split(":")[-1])
216216
elif format == "file_function":
217-
gf.dataframe["name"] = gf.dataframe["name"].apply(lambda x: x.split("/")[-1].split("@")[0])
217+
gf.dataframe["name"] = gf.dataframe["name"].apply(
218+
lambda x: f"{x.split('/')[-1].split(':')[0]}@{x.split('@')[-1].split(':')[0]}")
218219
return gf
219220

220221

third_party/proton/test/examples/frame.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
{
77
"children": [],
88
"frame": {
9-
"name": "/home/user/projects/example.py/test.py:foo@1",
9+
"name": "/home/user/projects/example.py/test.py:1@foo",
1010
"type": "function"
1111
},
1212
"metrics": {

third_party/proton/test/test_viewer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,13 @@ def test_format_frames(option):
5050
gf, _, _, _ = get_raw_metrics(f)
5151
gf = format_frames(gf, option)
5252
if option == "full":
53-
idx = gf.dataframe["name"] == "/home/user/projects/example.py/test.py:foo@1"
53+
idx = gf.dataframe["name"] == "/home/user/projects/example.py/test.py:1@foo"
5454
elif option == "file_function_line":
55-
idx = gf.dataframe["name"] == "test.py:foo@1"
55+
idx = gf.dataframe["name"] == "test.py:1@foo"
5656
elif option == "function_line":
57-
idx = gf.dataframe["name"] == "foo@1"
57+
idx = gf.dataframe["name"] == "1@foo"
5858
elif option == "file_function":
59-
idx = gf.dataframe["name"] == "test.py:foo"
59+
idx = gf.dataframe["name"] == "test.py@foo"
6060
assert idx.sum() == 1
6161

6262

third_party/proton/tutorials/matmul.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,7 @@ def grid(META):
238238

239239
argparser = argparse.ArgumentParser()
240240
argparser.add_argument("--profile", action="store_true")
241+
argparser.add_argument("--pcsampling", action="store_true", default=False)
241242
argparser.add_argument("--cudagraph", action="store_true", default=False)
242243
args = argparser.parse_args()
243244

@@ -305,9 +306,13 @@ def perf(ms):
305306

306307

307308
if args.profile:
308-
proton.start("matmul", hook="triton")
309+
if args.pcsampling:
310+
# proton-viewer -m num_samples/%,time/s ./matmul.hatchet
311+
proton.start("matmul", hook="triton", backend="cupti_pcsampling")
312+
else:
313+
# proton-viewer -m tflop/s,time/s ./matmul.hatchet
314+
proton.start("matmul", hook="triton")
309315
benchmark.run(show_plots=True, print_data=True)
310316
proton.finalize()
311-
# proton-viewer -m tflop/s,time/s ./matmul.hatchet
312317
else:
313318
benchmark.run(show_plots=True, print_data=True)

0 commit comments

Comments
 (0)