Skip to content

Commit bd042ef

Browse files
committed
dump cudaOccDeviceProp
1 parent 1dbf6e8 commit bd042ef

File tree

3 files changed

+45
-4
lines changed

3 files changed

+45
-4
lines changed

libkineto/src/CudaDeviceProperties.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,7 @@
77

88
#include "CudaDeviceProperties.h"
99

10-
#include <vector>
11-
1210
#include <cuda_runtime.h>
13-
#include <cuda_occupancy.h>
1411

1512
#include "Logger.h"
1613

libkineto/src/CudaDeviceProperties.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,14 @@
77

88
#pragma once
99

10+
#include <vector>
1011
#include <stdint.h>
12+
#include <cuda_occupancy.h>
1113

1214
namespace KINETO_NAMESPACE {
1315

16+
const std::vector<cudaOccDeviceProp>& occDeviceProps();
17+
1418
float kernelOccupancy(
1519
uint32_t deviceId,
1620
uint16_t registersPerThread,

libkineto/src/output_json.cpp

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,44 @@ static constexpr int kSchemaVersion = 1;
3333

3434
void ChromeTraceLogger::handleTraceStart(
3535
const std::unordered_map<std::string, std::string>& metadata) {
36+
std::string cudaOccDeviceProps = "";
37+
#ifdef HAS_CUPTI
38+
const std::vector<cudaOccDeviceProp>& occProps = KINETO_NAMESPACE::occDeviceProps();
39+
if (occProps.size() > 0) {
40+
std::ostringstream oss;
41+
oss << "[";
42+
bool first = true;
43+
for (size_t i = 0; i < occProps.size(); i += 1) {
44+
const cudaOccDeviceProp& occProp = occProps[i];
45+
if (!first) {
46+
oss << ", ";
47+
}
48+
oss << "{";
49+
oss << "\"computeMajor\": " << occProp.computeMajor << ", ";
50+
oss << "\"computeMinor\": " << occProp.computeMinor << ", ";
51+
oss << "\"maxThreadsPerBlock\": " << occProp.maxThreadsPerBlock << ", ";
52+
oss << "\"maxThreadsPerMultiprocessor\": " << occProp.maxThreadsPerMultiprocessor << ", ";
53+
oss << "\"regsPerBlock\": " << occProp.regsPerBlock << ", ";
54+
oss << "\"regsPerMultiprocessor\": " << occProp.regsPerMultiprocessor << ", ";
55+
oss << "\"warpSize\": " << occProp.warpSize << ", ";
56+
oss << "\"sharedMemPerBlock\": " << occProp.sharedMemPerBlock << ", ";
57+
oss << "\"sharedMemPerMultiprocessor\": " << occProp.sharedMemPerMultiprocessor << ", ";
58+
oss << "\"numSms\": " << occProp.numSms << ", ";
59+
oss << "\"sharedMemPerBlockOptin\": " << occProp.sharedMemPerBlockOptin;
60+
oss << "}";
61+
first = false;
62+
}
63+
oss << "]";
64+
cudaOccDeviceProps = oss.str();
65+
}
66+
#endif // HAS_CUPTI
67+
3668
traceOf_ << fmt::format(R"JSON(
3769
{{
3870
"schemaVersion": {},
3971
)JSON", kSchemaVersion);
4072

41-
if (!metadata.empty()) {
73+
if (!metadata.empty() || !cudaOccDeviceProps.empty()) {
4274
traceOf_ << R"JSON(
4375
"metadata": {
4476
)JSON";
@@ -50,6 +82,14 @@ void ChromeTraceLogger::handleTraceStart(
5082
traceOf_ << fmt::format(R"( "{}": "{}")", kv.first, kv.second);
5183
first = false;
5284
}
85+
if (!cudaOccDeviceProps.empty()) {
86+
if (!first) {
87+
traceOf_ << ",\n";
88+
}
89+
traceOf_ << fmt::format(R"( "{}": {})", "cudaOccDeviceProps", cudaOccDeviceProps);
90+
first = false;
91+
}
92+
5393
traceOf_ << R"JSON(
5494
},
5595
)JSON";

0 commit comments

Comments
 (0)