Skip to content

Commit 2caeb94

Browse files
Ilia Cherniavskiifacebook-github-bot
authored andcommitted
Expand Kineto profiler support (part 1) (#57333)
Summary: Pull Request resolved: pytorch/pytorch#57333 Pull Request resolved: #193 Expanding Kineto support to more platforms Reviewed By: gdankel Differential Revision: D27873669 fbshipit-source-id: 4a72a589f958440cbfff247751b7f4e1910a10c7
1 parent 71ff16a commit 2caeb94

39 files changed

+419
-376
lines changed

libkineto/CMakeLists.txt

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,13 @@ endif()
4848

4949
# Define file lists
5050
if (LIBKINETO_NOCUPTI)
51-
get_filelist("get_libkineto_cpu_only_srcs()" LIBKINETO_SRCS)
51+
get_filelist("get_libkineto_cpu_only_srcs(with_api=False)" LIBKINETO_SRCS)
5252
message(INFO " CUPTI unavailable or disabled - not building GPU profilers")
5353
else()
54-
get_filelist("get_libkineto_srcs()" LIBKINETO_SRCS)
54+
get_filelist("get_libkineto_srcs(with_api=False)" LIBKINETO_SRCS)
5555
endif()
5656
get_filelist("get_libkineto_public_headers()" LIBKINETO_PUBLIC_HEADERS)
57-
set(LIBKINETO_API_SRCS "${LIBKINETO_SOURCE_DIR}/libkineto_api.cpp")
57+
get_filelist("get_libkineto_api_srcs()" LIBKINETO_API_SRCS)
5858

5959
add_library(kineto_base OBJECT ${LIBKINETO_SRCS})
6060
add_library(kineto_api OBJECT ${LIBKINETO_API_SRCS})
@@ -69,13 +69,22 @@ set_target_properties(kineto_base kineto_api PROPERTIES
6969
CXX_EXTENSIONS NO
7070
CXX_VISIBILITY_PRESET hidden)
7171

72-
target_compile_options(kineto_base PRIVATE "-DKINETO_NAMESPACE=libkineto"
73-
"-std=gnu++14")
74-
target_compile_options(kineto_api PRIVATE "-std=gnu++14")
72+
set(KINETO_COMPILE_OPTIONS "-DKINETO_NAMESPACE=libkineto")
73+
list(APPEND KINETO_COMPILE_OPTIONS "-DFMT_HEADER_ONLY")
74+
if(NOT MSVC)
75+
list(APPEND KINETO_COMPILE_OPTIONS "-std=c++14")
76+
else()
77+
list(APPEND KINETO_COMPILE_OPTIONS "/std:c++14")
78+
list(APPEND KINETO_COMPILE_OPTIONS "-DWIN32_LEAN_AND_MEAN")
79+
list(APPEND KINETO_COMPILE_OPTIONS "-DNOGDI")
80+
endif()
7581
if (NOT LIBKINETO_NOCUPTI)
76-
target_compile_options(kineto_base PRIVATE "-DHAS_CUPTI")
82+
list(APPEND KINETO_COMPILE_OPTIONS "-DHAS_CUPTI")
7783
endif()
7884

85+
target_compile_options(kineto_base PRIVATE "${KINETO_COMPILE_OPTIONS}")
86+
target_compile_options(kineto_api PRIVATE "${KINETO_COMPILE_OPTIONS}")
87+
7988
if(NOT TARGET fmt)
8089
if(NOT FMT_SOURCE_DIR)
8190
set(FMT_SOURCE_DIR "${LIBKINETO_THIRDPARTY_DIR}/fmt"
@@ -95,6 +104,8 @@ if(NOT TARGET fmt)
95104
endif()
96105

97106
set(FMT_INCLUDE_DIR "${FMT_SOURCE_DIR}/include")
107+
message(STATUS "Kineto: FMT_SOURCE_DIR = ${FMT_SOURCE_DIR}")
108+
message(STATUS "Kineto: FMT_INCLUDE_DIR = ${FMT_INCLUDE_DIR}")
98109
if (NOT CUPTI_INCLUDE_DIR)
99110
set(CUPTI_INCLUDE_DIR "${CUDA_SOURCE_DIR}/extras/CUPTI/include")
100111
endif()
@@ -134,9 +145,8 @@ else()
134145
endif()
135146

136147
target_link_libraries(kineto "${CUDA_cupti_LIBRARY}")
137-
138-
target_link_libraries(kineto $<BUILD_INTERFACE:fmt>)
139-
add_dependencies(kineto fmt)
148+
target_link_libraries(kineto $<BUILD_INTERFACE:fmt::fmt-header-only>)
149+
add_dependencies(kineto fmt::fmt-header-only)
140150

141151
install(TARGETS kineto EXPORT kinetoLibraryConfig
142152
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}

libkineto/include/ActivityProfilerInterface.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
namespace libkineto {
1919

2020
class ActivityProfilerController;
21-
class CpuTraceBuffer;
21+
struct CpuTraceBuffer;
2222
class Config;
2323

2424
class ActivityProfilerInterface {
@@ -78,9 +78,9 @@ class ActivityProfilerInterface {
7878
return true;
7979
}
8080

81-
// Maps kernel thread id -> pthread id for CPU ops.
81+
// Saves information for the current thread to be used in profiler output
8282
// Client must record any new kernel thread where the activity has occured.
83-
virtual void recordThreadInfo(pid_t tid, pthread_t pthreadId) {}
83+
virtual void recordThreadInfo() {}
8484

8585
// Record trace metadata, currently supporting only string key and values,
8686
// values with the same key are overwritten

libkineto/include/GenericTraceActivity.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <thread>
1313
#include <vector>
1414

15+
#include "ThreadUtil.h"
1516
#include "TraceActivity.h"
1617

1718
namespace libkineto {
@@ -21,7 +22,7 @@ namespace libkineto {
2122
struct GenericTraceActivity : TraceActivity {
2223

2324
int64_t deviceId() const override {
24-
return cachedPid();
25+
return processId();
2526
}
2627

2728
int64_t resourceId() const override {
@@ -60,15 +61,14 @@ struct GenericTraceActivity : TraceActivity {
6061
metadata_.push_back(std::move(kv));
6162
}
6263

63-
const std::string getMetadata() const {
64+
const std::string getMetadata() const {
6465
return fmt::format("{}", fmt::join(metadata_, ", "));
6566
}
6667

6768
int64_t startTime{0};
6869
int64_t endTime{0};
6970
int64_t correlation{0};
7071
int device{-1};
71-
// TODO: Add OS abstraction
7272
int32_t sysThreadId{0};
7373
std::string activityName;
7474
ActivityType activityType;

libkineto/include/ThreadUtil.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#pragma once
2+
3+
#include <cstdint>
4+
#include <string>
5+
#include <utility>
6+
#include <vector>
7+
8+
namespace libkineto {
9+
10+
int32_t systemThreadId();
11+
int32_t threadId();
12+
bool setThreadName(const std::string& name);
13+
std::string getThreadName();
14+
15+
int32_t processId();
16+
std::string processName(int32_t pid);
17+
18+
struct ProcessInfo {
19+
int32_t pid;
20+
const std::string name;
21+
const std::string label;
22+
};
23+
24+
struct ThreadInfo {
25+
ThreadInfo(int32_t tid, const std::string& name) :
26+
tid(tid), name(name) {}
27+
int32_t tid;
28+
const std::string name;
29+
};
30+
31+
// Return a list of pids and process names for the current process
32+
// and its parents.
33+
std::vector<std::pair<int32_t, std::string>> pidCommandPairsOfAncestors();
34+
35+
} // namespace libkineto

libkineto/include/TraceActivity.h

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
#pragma once
99

1010
#include <string>
11-
#include <sys/types.h>
12-
#include <unistd.h>
1311

1412
#include "ActivityType.h"
1513

@@ -37,21 +35,12 @@ struct TraceActivity {
3735
virtual const TraceActivity* linkedActivity() const = 0;
3836
// Log activity
3937
virtual void log(ActivityLogger& logger) const = 0;
40-
};
41-
42-
namespace {
43-
// Caching pid is not safe across forks and clones but we currently
44-
// don't support an active profiler in a forked process.
45-
static inline pid_t cachedPid() {
46-
static pid_t pid = getpid();
47-
return pid;
48-
}
4938

50-
static inline int64_t nsToUs(int64_t ns) {
39+
static int64_t nsToUs(int64_t ns) {
5140
// It's important that this conversion is the same everywhere.
5241
// No rounding!
5342
return ns / 1000;
5443
}
55-
}
44+
};
5645

5746
} // namespace libkineto

libkineto/include/libkineto.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
#pragma once
1111

12-
#include <signal.h>
1312
#include <atomic>
1413
#include <chrono>
1514
#include <functional>
@@ -27,6 +26,8 @@
2726
#include "GenericTraceActivity.h"
2827
#include "TraceSpan.h"
2928

29+
#include "ThreadUtil.h"
30+
3031
extern "C" {
3132
void suppressLibkinetoLogMessages();
3233
bool libkineto_init(bool cpuOnly, bool logOnError);
@@ -98,7 +99,7 @@ class LibkinetoApi {
9899

99100
std::unique_ptr<ActivityProfilerInterface> activityProfiler_{};
100101
ClientInterface* client_{};
101-
pthread_t clientRegisterThread_{0};
102+
int32_t clientRegisterThread_{0};
102103

103104
bool isLoaded_{false};
104105
std::atomic_int netSizeThreshold_{};
@@ -108,4 +109,3 @@ class LibkinetoApi {
108109
LibkinetoApi& api();
109110

110111
} // namespace libkineto
111-

libkineto/include/time_since_epoch.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
namespace libkineto {
1313

1414
inline int64_t timeSinceEpoch(
15-
const std::chrono::time_point<std::chrono::high_resolution_clock>& t) {
15+
const std::chrono::time_point<std::chrono::system_clock>& t) {
1616
return std::chrono::duration_cast<std::chrono::microseconds>(
1717
t.time_since_epoch())
1818
.count();

libkineto/libkineto_defs.bzl

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,13 @@
33
# This source code is licensed under the BSD-style license found in the
44
# LICENSE file in the root directory of this source tree.
55

6-
def get_libkineto_srcs():
6+
def get_libkineto_api_srcs():
7+
return [
8+
"src/ThreadUtil.cpp",
9+
"src/libkineto_api.cpp",
10+
]
11+
12+
def get_libkineto_srcs(with_api = True):
713
return [
814
"src/AbstractConfig.cpp",
915
"src/ActivityProfiler.cpp",
@@ -19,17 +25,14 @@ def get_libkineto_srcs():
1925
"src/EventProfilerController.cpp",
2026
"src/GenericTraceActivity.cpp",
2127
"src/Logger.cpp",
22-
"src/ProcessInfo.cpp",
23-
"src/ThreadName.cpp",
2428
"src/WeakSymbols.cpp",
2529
"src/cupti_strings.cpp",
2630
"src/init.cpp",
27-
"src/libkineto_api.cpp",
2831
"src/output_csv.cpp",
2932
"src/output_json.cpp",
30-
]
33+
] + (get_libkineto_api_srcs() if with_api else [])
3134

32-
def get_libkineto_cpu_only_srcs():
35+
def get_libkineto_cpu_only_srcs(with_api = True):
3336
return [
3437
"src/AbstractConfig.cpp",
3538
"src/ActivityProfiler.cpp",
@@ -41,22 +44,20 @@ def get_libkineto_cpu_only_srcs():
4144
"src/Demangle.cpp",
4245
"src/GenericTraceActivity.cpp",
4346
"src/Logger.cpp",
44-
"src/ProcessInfo.cpp",
45-
"src/ThreadName.cpp",
4647
"src/init.cpp",
47-
"src/libkineto_api.cpp",
4848
"src/output_csv.cpp",
4949
"src/output_json.cpp",
50-
]
50+
] + (get_libkineto_api_srcs() if with_api else [])
5151

5252
def get_libkineto_public_headers():
5353
return [
5454
"include/ActivityProfilerInterface.h",
5555
"include/ActivityType.h",
56-
"include/ClientTraceActivity.h",
5756
"include/ClientInterface.h",
57+
"include/GenericTraceActivity.h",
5858
"include/TraceActivity.h",
5959
"include/TraceSpan.h",
60+
"include/ThreadUtil.h",
6061
"include/libkineto.h",
6162
"include/time_since_epoch.h",
6263
]

libkineto/src/AbstractConfig.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "AbstractConfig.h"
99

10+
#include <array>
1011
#include <fmt/format.h>
1112
#include <sstream>
1213

libkineto/src/ActivityProfiler.cpp

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,7 @@
88
#include "ActivityProfiler.h"
99

1010
#include <fmt/format.h>
11-
#include <libgen.h>
12-
#include <sys/types.h>
1311
#include <time.h>
14-
#include <unistd.h>
1512
#include <atomic>
1613
#include <iomanip>
1714
#include <string>
@@ -32,6 +29,7 @@
3229
#include "output_base.h"
3330

3431
#include "Logger.h"
32+
#include "ThreadUtil.h"
3533

3634
using namespace std::chrono;
3735
using namespace libkineto;
@@ -617,7 +615,7 @@ const time_point<system_clock> ActivityProfiler::performRunLoopStep(
617615
// FIXME: Is this a good idea for synced start?
618616
{
619617
std::lock_guard<std::mutex> guard(mutex_);
620-
profileEndTime_ = time_point<high_resolution_clock>(
618+
profileEndTime_ = time_point<system_clock>(
621619
microseconds(captureWindowStartTime_)) +
622620
config_->activitiesOnDemandDuration();
623621
}
@@ -655,23 +653,6 @@ const time_point<system_clock> ActivityProfiler::performRunLoopStep(
655653
return new_wakeup_time;
656654
}
657655

658-
// Extract process name from /proc/pid/cmdline. This does not have
659-
// the 16 character limit that /proc/pid/status and /prod/pid/comm has.
660-
const string processName(pid_t pid) {
661-
FILE* cmdfile = fopen(fmt::format("/proc/{}/cmdline", pid).c_str(), "r");
662-
if (cmdfile != nullptr) {
663-
char* command = nullptr;
664-
int scanned = fscanf(cmdfile, "%ms", &command);
665-
if (scanned > 0 && command) {
666-
string ret(basename(command));
667-
free(command);
668-
return ret;
669-
}
670-
}
671-
VLOG(1) << "Failed to read process name for pid " << pid;
672-
return "";
673-
}
674-
675656
void ActivityProfiler::finalizeTrace(const Config& config, ActivityLogger& logger) {
676657
LOG(INFO) << "Recorded nets:";
677658
{
@@ -682,9 +663,9 @@ void ActivityProfiler::finalizeTrace(const Config& config, ActivityLogger& logge
682663
}
683664

684665
// Process names
685-
string process_name = processName(getpid());
666+
string process_name = processName(processId());
686667
if (!process_name.empty()) {
687-
pid_t pid = getpid();
668+
int32_t pid = processId();
688669
logger.handleProcessInfo(
689670
{pid, process_name, "CPU"}, captureWindowStartTime_);
690671
if (!cpuOnly_) {

0 commit comments

Comments
 (0)