Skip to content

Commit b798c7f

Browse files
authored
Merge pull request #778 from NVIDIA/torchtrtc_cli_cleanup
Refactor the CLI
2 parents 7223fc8 + a182c0e commit b798c7f

File tree

11 files changed

+599
-428
lines changed

11 files changed

+599
-428
lines changed

cpp/bin/torchtrtc/BUILD

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,14 @@ config_setting(
1010
cc_binary(
1111
name = "torchtrtc",
1212
srcs = [
13+
"accuracy.h",
14+
"accuracy.cpp",
15+
"fileio.h",
16+
"fileio.cpp",
17+
"luts.h",
1318
"main.cpp",
19+
"parser_util.h",
20+
"parser_util.cpp"
1421
],
1522
deps = [
1623
"//third_party/args",

cpp/bin/torchtrtc/README.md

Lines changed: 101 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -14,108 +14,110 @@ to standard TorchScript. Load with `torch.jit.load()` and run like you would run
1414

1515
```
1616
torchtrtc [input_file_path] [output_file_path]
17-
[input_specs...] {OPTIONS}
17+
[input_specs...] {OPTIONS}
1818
19-
Torch-TensorRT is a compiler for TorchScript, it will compile and optimize
20-
TorchScript programs to run on NVIDIA GPUs using TensorRT
19+
torchtrtc is a compiler for TorchScript, it will compile and optimize
20+
TorchScript programs to run on NVIDIA GPUs using TensorRT
2121
22-
OPTIONS:
22+
OPTIONS:
23+
24+
-h, --help Display this help menu
25+
Verbiosity of the compiler
26+
-v, --verbose Dumps debugging information about the
27+
compilation process onto the console
28+
-w, --warnings Disables warnings generated during
29+
compilation onto the console (warnings
30+
are on by default)
31+
--i, --info Dumps info messages generated during
32+
compilation onto the console
33+
--build-debuggable-engine Creates a debuggable engine
34+
--allow-gpu-fallback (Only used when targeting DLA
35+
(device-type)) Lets engine run layers on
36+
GPU if they are not supported on DLA
37+
--require-full-compilation Require that the model should be fully
38+
compiled to TensorRT or throw an error
39+
--check-method-support=[method_name]
40+
Check the support for end to end
41+
compilation of a specified method in the
42+
TorchScript module
43+
--disable-tf32 Prevent Float32 layers from using the
44+
TF32 data format
45+
--sparse-weights Enable sparsity for weights of conv and
46+
FC layers
47+
-p[precision...],
48+
--enable-precision=[precision...] (Repeatable) Enabling an operating
49+
precision for kernels to use when
50+
building the engine (Int8 requires a
51+
calibration-cache argument) [ float |
52+
float32 | f32 | fp32 | half | float16 |
53+
f16 | fp16 | int8 | i8 | char ]
54+
(default: float)
55+
-d[type], --device-type=[type] The type of device the engine should be
56+
built for [ gpu | dla ] (default: gpu)
57+
--gpu-id=[gpu_id] GPU id if running on multi-GPU platform
58+
(defaults to 0)
59+
--dla-core=[dla_core] DLACore id if running on available DLA
60+
(defaults to 0)
61+
--engine-capability=[capability] The type of device the engine should be
62+
built for [ standard | safety |
63+
dla_standalone ]
64+
--calibration-cache-file=[file_path]
65+
Path to calibration cache file to use
66+
for post training quantization
67+
--teo=[op_name...],
68+
--torch-executed-op=[op_name...] (Repeatable) Operator in the graph that
69+
should always be run in PyTorch for
70+
execution (partial compilation must be
71+
enabled)
72+
--tem=[module_name...],
73+
--torch-executed-mod=[module_name...]
74+
(Repeatable) Module that should always
75+
be run in Pytorch for execution (partial
76+
compilation must be enabled)
77+
--mbs=[num_ops],
78+
--min-block-size=[num_ops] Minimum number of contiguous TensorRT
79+
supported ops to compile a subgraph to
80+
TensorRT
81+
--embed-engine Whether to treat input file as a
82+
serialized TensorRT engine and embed it
83+
into a TorchScript module (device spec
84+
must be provided)
85+
--num-min-timing-iter=[num_iters] Number of minimization timing iterations
86+
used to select kernels
87+
--num-avg-timing-iters=[num_iters]
88+
Number of averaging timing iterations
89+
used to select kernels
90+
--workspace-size=[workspace_size] Maximum size of workspace given to
91+
TensorRT
92+
-t[threshold],
93+
--threshold=[threshold] Maximum acceptable numerical deviation
94+
from standard torchscript output
95+
(default 2e-5)
96+
--no-threshold-check Skip checking threshold compliance
97+
--truncate-long-double,
98+
--truncate, --truncate-64bit Truncate weights that are provided in
99+
64bit to 32bit (Long, Double to Int,
100+
Float)
101+
--save-engine Instead of compiling a full a
102+
TorchScript program, save the created
103+
engine to the path specified as the
104+
output path
105+
input_file_path Path to input TorchScript file
106+
output_file_path Path for compiled TorchScript (or
107+
TensorRT engine) file
108+
input_specs... Specs for inputs to engine, can either
109+
be a single size or a range defined by
110+
Min, Optimal, Max sizes, e.g.
111+
"(N,..,C,H,W)"
112+
"[(MIN_N,..,MIN_C,MIN_H,MIN_W);(OPT_N,..,OPT_C,OPT_H,OPT_W);(MAX_N,..,MAX_C,MAX_H,MAX_W)]".
113+
Data Type and format can be specified by
114+
adding an "@" followed by dtype and "%"
115+
followed by format to the end of the
116+
shape spec. e.g. "(3, 3, 32,
117+
32)@f16%NHWC"
118+
"--" can be used to terminate flag options and force all following
119+
arguments to be treated as positional options
23120
24-
-h, --help Display this help menu
25-
Verbiosity of the compiler
26-
-v, --verbose Dumps debugging information about the
27-
compilation process onto the console
28-
-w, --warnings Disables warnings generated during
29-
compilation onto the console (warnings
30-
are on by default)
31-
--i, --info Dumps info messages generated during
32-
compilation onto the console
33-
--build-debuggable-engine Creates a debuggable engine
34-
--allow-gpu-fallback (Only used when targeting DLA
35-
(device-type)) Lets engine run layers on
36-
GPU if they are not supported on DLA
37-
--require-full-compilation Require that the model should be fully
38-
compiled to TensorRT or throw an error
39-
--disable-tf32 Prevent Float32 layers from using the
40-
TF32 data format
41-
--sparse-weights Enable sparsity for weights of conv and
42-
FC layers
43-
-p[precision...],
44-
--enabled-precision=[precision...]
45-
(Repeatable) Enabling an operating
46-
precision for kernels to use when
47-
building the engine (Int8 requires a
48-
calibration-cache argument) [ float |
49-
float32 | f32 | fp32 | half | float16 |
50-
f16 | fp16 | int8 | i8 | char ]
51-
(default: float)
52-
-d[type], --device-type=[type] The type of device the engine should be
53-
built for [ gpu | dla ] (default: gpu)
54-
--gpu-id=[gpu_id] GPU id if running on multi-GPU platform
55-
(defaults to 0)
56-
--dla-core=[dla_core] DLACore id if running on available DLA
57-
(defaults to 0)
58-
--engine-capability=[capability] The type of device the engine should be
59-
built for [ standard | safety |
60-
dla_standalone ]
61-
--calibration-cache-file=[file_path]
62-
Path to calibration cache file to use
63-
for post training quantization
64-
--teo=[torch-executed-ops...],
65-
--torch-executed-ops=[torch-executed-ops...]
66-
(Repeatable) Operator in the graph that
67-
should always be run in PyTorch for
68-
execution (partial compilation must be
69-
enabled)
70-
--tem=[torch-executed-mods...],
71-
--torch-executed-mods=[torch-executed-mods...]
72-
(Repeatable) Module that should always
73-
be run in Pytorch for execution (partial
74-
compilation must be enabled)
75-
--mbs=[torch-executed-mods...],
76-
--min-block-size=[torch-executed-mods...]
77-
Minimum number of contiguous TensorRT
78-
supported ops to compile a subgraph to
79-
TensorRT
80-
--embed-engine Whether to treat input file as a
81-
serialized TensorRT engine and embed it
82-
into a TorchScript module (device spec
83-
must be provided)
84-
--num-min-timing-iter=[num_iters] Number of minimization timing iterations
85-
used to select kernels
86-
--num-avg-timing-iters=[num_iters]
87-
Number of averaging timing iterations
88-
used to select kernels
89-
--workspace-size=[workspace_size] Maximum size of workspace given to
90-
TensorRT
91-
-t[threshold],
92-
--threshold=[threshold] Maximum acceptable numerical deviation
93-
from standard torchscript output
94-
(default 2e-5)
95-
--no-threshold-check Skip checking threshold compliance
96-
--truncate-long-double,
97-
--truncate, --truncate-64bit Truncate weights that are provided in
98-
64bit to 32bit (Long, Double to Int,
99-
Float)
100-
--save-engine Instead of compiling a full a
101-
TorchScript program, save the created
102-
engine to the path specified as the
103-
output path
104-
input_file_path Path to input TorchScript file
105-
output_file_path Path for compiled TorchScript (or
106-
TensorRT engine) file
107-
input_specs... Specs for inputs to engine, can either
108-
be a single size or a range defined by
109-
Min, Optimal, Max sizes, e.g.
110-
"(N,..,C,H,W)"
111-
"[(MIN_N,..,MIN_C,MIN_H,MIN_W);(OPT_N,..,OPT_C,OPT_H,OPT_W);(MAX_N,..,MAX_C,MAX_H,MAX_W)]".
112-
Data Type and format can be specified by
113-
adding an "@" followed by dtype and "%"
114-
followed by format to the end of the
115-
shape spec. e.g. "(3, 3, 32,
116-
32)@f16%NHWC"
117-
"--" can be used to terminate flag options and force all following
118-
arguments to be treated as positional options
119121
```
120122

121123
e.g.

cpp/bin/torchtrtc/accuracy.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#include "accuracy.h"
2+
3+
#include "torch_tensorrt/logging.h"
4+
#include "torch_tensorrt/torch_tensorrt.h"
5+
6+
namespace torchtrtc {
7+
namespace accuracy {
8+
9+
bool check_rtol(const at::Tensor& diff, const std::vector<at::Tensor> inputs, float threshold) {
10+
double maxValue = 0.0;
11+
for (auto& tensor : inputs) {
12+
maxValue = fmax(tensor.abs().max().item<float>(), maxValue);
13+
}
14+
torchtrt::logging::log(
15+
torchtrt::logging::Level::kDEBUG,
16+
std::string("Max Difference: ") + std::to_string(diff.abs().max().item<float>()));
17+
torchtrt::logging::log(
18+
torchtrt::logging::Level::kDEBUG, std::string("Acceptable Threshold: ") + std::to_string(threshold));
19+
return diff.abs().max().item<float>() <= threshold * maxValue;
20+
}
21+
22+
bool almost_equal(const at::Tensor& a, const at::Tensor& b, float threshold) {
23+
return check_rtol(a - b, {a, b}, threshold);
24+
}
25+
26+
} // namespace accuracy
27+
} // namespace torchtrtc

cpp/bin/torchtrtc/accuracy.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#pragma once
2+
3+
#include <stdlib.h>
4+
#include <iostream>
5+
#include <sstream>
6+
#include <vector>
7+
8+
#include "torch/script.h"
9+
#include "torch/torch.h"
10+
11+
namespace torchtrtc {
12+
namespace accuracy {
13+
14+
bool check_rtol(const at::Tensor& diff, const std::vector<at::Tensor> inputs, float threshold);
15+
bool almost_equal(const at::Tensor& a, const at::Tensor& b, float threshold);
16+
17+
} // namespace accuracy
18+
} // namespace torchtrtc

cpp/bin/torchtrtc/fileio.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#include "fileio.h"
2+
3+
namespace torchtrtc {
4+
namespace fileio {
5+
6+
std::string read_buf(std::string const& path) {
7+
std::string buf;
8+
std::ifstream stream(path.c_str(), std::ios::binary);
9+
10+
if (stream) {
11+
stream >> std::noskipws;
12+
std::copy(std::istream_iterator<char>(stream), std::istream_iterator<char>(), std::back_inserter(buf));
13+
}
14+
15+
return buf;
16+
}
17+
18+
std::string get_cwd() {
19+
char buff[FILENAME_MAX]; // create string buffer to hold path
20+
if (getcwd(buff, FILENAME_MAX)) {
21+
std::string current_working_dir(buff);
22+
return current_working_dir;
23+
} else {
24+
torchtrt::logging::log(torchtrt::logging::Level::kERROR, "Unable to get current directory");
25+
exit(1);
26+
}
27+
}
28+
29+
std::string real_path(std::string path) {
30+
auto abs_path = path;
31+
char real_path_c[PATH_MAX];
32+
char* res = realpath(abs_path.c_str(), real_path_c);
33+
if (res) {
34+
return std::string(real_path_c);
35+
} else {
36+
torchtrt::logging::log(torchtrt::logging::Level::kERROR, std::string("Unable to find file ") + abs_path);
37+
exit(1);
38+
}
39+
}
40+
41+
std::string resolve_path(std::string path) {
42+
auto rpath = path;
43+
if (!(rpath.rfind("/", 0) == 0)) {
44+
rpath = get_cwd() + '/' + rpath;
45+
}
46+
return rpath;
47+
}
48+
49+
} // namespace fileio
50+
} // namespace torchtrtc

cpp/bin/torchtrtc/fileio.h

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#pragma once
2+
#include <stdlib.h>
3+
#include <iostream>
4+
#include <sstream>
5+
6+
#ifdef __linux__
7+
#include <linux/limits.h>
8+
#else
9+
#define PATH_MAX 260
10+
#endif
11+
12+
#if defined(_WIN32)
13+
#include <direct.h>
14+
#define getcwd _getcwd
15+
#define realpath(N, R) _fullpath((R), (N), PATH_MAX)
16+
#else
17+
#include <unistd.h>
18+
#endif
19+
20+
#include "NvInfer.h"
21+
#include "third_party/args/args.hpp"
22+
#include "torch/script.h"
23+
#include "torch/torch.h"
24+
25+
#include "torch_tensorrt/logging.h"
26+
#include "torch_tensorrt/ptq.h"
27+
#include "torch_tensorrt/torch_tensorrt.h"
28+
29+
namespace torchtrtc {
30+
namespace fileio {
31+
32+
std::string read_buf(std::string const& path);
33+
std::string get_cwd();
34+
std::string real_path(std::string path);
35+
std::string resolve_path(std::string path);
36+
37+
} // namespace fileio
38+
} // namespace torchtrtc

0 commit comments

Comments
 (0)