Skip to content

Commit a182c0e

Browse files
committed
refactor(//cpp/bin/torchtrtc): Address review comments
Signed-off-by: Naren Dasan <[email protected]> Signed-off-by: Naren Dasan <[email protected]>
1 parent b663154 commit a182c0e

File tree

4 files changed

+19
-133
lines changed

4 files changed

+19
-133
lines changed

cpp/bin/torchtrtc/README.md

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,12 @@ torchtrtc [input_file_path] [output_file_path]
3131
--i, --info Dumps info messages generated during
3232
compilation onto the console
3333
--build-debuggable-engine Creates a debuggable engine
34-
--use-strict-types Restrict operating type to only use set
35-
operation precision
3634
--allow-gpu-fallback (Only used when targeting DLA
3735
(device-type)) Lets engine run layers on
3836
GPU if they are not supported on DLA
3937
--require-full-compilation Require that the model should be fully
4038
compiled to TensorRT or throw an error
41-
--is-supported=[method_name],
42-
--supported=[method_name],
43-
--check-support=[method_name],
44-
--check-method-op-support=[method_name]
39+
--check-method-support=[method_name]
4540
Check the support for end to end
4641
compilation of a specified method in the
4742
TorchScript module
@@ -79,8 +74,8 @@ torchtrtc [input_file_path] [output_file_path]
7974
(Repeatable) Module that should always
8075
be run in Pytorch for execution (partial
8176
compilation must be enabled)
82-
--mbs=[min-block-size],
83-
--min-block-size=[min-block-size] Minimum number of contiguous TensorRT
77+
--mbs=[num_ops],
78+
--min-block-size=[num_ops] Minimum number of contiguous TensorRT
8479
supported ops to compile a subgraph to
8580
TensorRT
8681
--embed-engine Whether to treat input file as a
@@ -122,6 +117,7 @@ torchtrtc [input_file_path] [output_file_path]
122117
32)@f16%NHWC"
123118
"--" can be used to terminate flag options and force all following
124119
arguments to be treated as positional options
120+
125121
```
126122

127123
e.g.

cpp/bin/torchtrtc/luts.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
namespace torchtrtc {
99
namespace luts {
1010

11-
at::ScalarType to_torch_dtype(torchtrt::DataType dtype) {
11+
inline at::ScalarType to_torch_dtype(torchtrt::DataType dtype) {
1212
switch (dtype) {
1313
case torchtrt::DataType::kHalf:
1414
return at::kHalf;

cpp/bin/torchtrtc/main.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#include "NvInfer.h"
66
#include "third_party/args/args.hpp"
77
#include "torch/script.h"
8-
#include "torch/torch.h"
98

109
#include "torch_tensorrt/logging.h"
1110
#include "torch_tensorrt/ptq.h"
@@ -38,8 +37,7 @@ int main(int argc, char** argv) {
3837

3938
args::Flag build_debuggable_engine(
4039
parser, "build-debuggable-engine", "Creates a debuggable engine", {"build-debuggable-engine"});
41-
args::Flag use_strict_types(
42-
parser, "use-strict-types", "Restrict operating type to only use set operation precision", {"use-strict-types"});
40+
4341
args::Flag allow_gpu_fallback(
4442
parser,
4543
"allow-gpu-fallback",
@@ -56,7 +54,7 @@ int main(int argc, char** argv) {
5654
parser,
5755
"method_name",
5856
"Check the support for end to end compilation of a specified method in the TorchScript module",
59-
{"supported", "is-supported", "check-support", "check-method-op-support"});
57+
{"check-method-support"});
6058

6159
args::Flag disable_tf32(
6260
parser, "disable-tf32", "Prevent Float32 layers from using the TF32 data format", {"disable-tf32"});
@@ -105,7 +103,7 @@ int main(int argc, char** argv) {
105103

106104
args::ValueFlag<uint64_t> min_block_size(
107105
parser,
108-
"min-block-size",
106+
"num_ops",
109107
"Minimum number of contiguous TensorRT supported ops to compile a subgraph to TensorRT",
110108
{"mbs", "min-block-size"});
111109

@@ -239,7 +237,6 @@ int main(int argc, char** argv) {
239237
compile_settings.debug = true;
240238
}
241239

242-
243240
if (allow_gpu_fallback) {
244241
compile_settings.device.allow_gpu_fallback = true;
245242
}

docsrc/tutorials/torchtrtc.rst

Lines changed: 11 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,16 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r
3939
GPU if they are not supported on DLA
4040
--require-full-compilation Require that the model should be fully
4141
compiled to TensorRT or throw an error
42+
--check-method-support=[method_name]
43+
Check the support for end to end
44+
compilation of a specified method in the
45+
TorchScript module
4246
--disable-tf32 Prevent Float32 layers from using the
4347
TF32 data format
4448
--sparse-weights Enable sparsity for weights of conv and
4549
FC layers
4650
-p[precision...],
47-
--enabled-precision=[precision...]
48-
(Repeatable) Enabling an operating
51+
--enable-precision=[precision...] (Repeatable) Enabling an operating
4952
precision for kernels to use when
5053
building the engine (Int8 requires a
5154
calibration-cache argument) [ float |
@@ -64,20 +67,18 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r
6467
--calibration-cache-file=[file_path]
6568
Path to calibration cache file to use
6669
for post training quantization
67-
--teo=[torch-executed-ops...],
68-
--torch-executed-ops=[torch-executed-ops...]
69-
(Repeatable) Operator in the graph that
70+
--teo=[op_name...],
71+
--torch-executed-op=[op_name...] (Repeatable) Operator in the graph that
7072
should always be run in PyTorch for
7173
execution (partial compilation must be
7274
enabled)
73-
--tem=[torch-executed-mods...],
74-
--torch-executed-mods=[torch-executed-mods...]
75+
--tem=[module_name...],
76+
--torch-executed-mod=[module_name...]
7577
(Repeatable) Module that should always
7678
be run in Pytorch for execution (partial
7779
compilation must be enabled)
78-
--mbs=[torch-executed-mods...],
79-
--min-block-size=[torch-executed-mods...]
80-
Minimum number of contiguous TensorRT
80+
--mbs=[num_ops],
81+
--min-block-size=[num_ops] Minimum number of contiguous TensorRT
8182
supported ops to compile a subgraph to
8283
TensorRT
8384
--embed-engine Whether to treat input file as a
@@ -119,114 +120,6 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r
119120
32)@f16%NHWC"
120121
"--" can be used to terminate flag options and force all following
121122
arguments to be treated as positional options
122-
[input_specs...] {OPTIONS}
123-
124-
torchtrtc is a compiler for TorchScript, it will compile and optimize
125-
TorchScript programs to run on NVIDIA GPUs using TensorRT
126-
127-
OPTIONS:
128-
129-
-h, --help Display this help menu
130-
Verbiosity of the compiler
131-
-v, --verbose Dumps debugging information about the
132-
compilation process onto the console
133-
-w, --warnings Disables warnings generated during
134-
compilation onto the console (warnings
135-
are on by default)
136-
--i, --info Dumps info messages generated during
137-
compilation onto the console
138-
--build-debuggable-engine Creates a debuggable engine
139-
--use-strict-types Restrict operating type to only use set
140-
operation precision
141-
--allow-gpu-fallback (Only used when targeting DLA
142-
(device-type)) Lets engine run layers on
143-
GPU if they are not supported on DLA
144-
--require-full-compilation Require that the model should be fully
145-
compiled to TensorRT or throw an error
146-
--is-supported=[method_name],
147-
--supported=[method_name],
148-
--check-support=[method_name],
149-
--check-method-op-support=[method_name]
150-
Check the support for end to end
151-
compilation of a specified method in the
152-
TorchScript module
153-
--disable-tf32 Prevent Float32 layers from using the
154-
TF32 data format
155-
--sparse-weights Enable sparsity for weights of conv and
156-
FC layers
157-
-p[precision...],
158-
--enable-precision=[precision...] (Repeatable) Enabling an operating
159-
precision for kernels to use when
160-
building the engine (Int8 requires a
161-
calibration-cache argument) [ float |
162-
float32 | f32 | fp32 | half | float16 |
163-
f16 | fp16 | int8 | i8 | char ]
164-
(default: float)
165-
-d[type], --device-type=[type] The type of device the engine should be
166-
built for [ gpu | dla ] (default: gpu)
167-
--gpu-id=[gpu_id] GPU id if running on multi-GPU platform
168-
(defaults to 0)
169-
--dla-core=[dla_core] DLACore id if running on available DLA
170-
(defaults to 0)
171-
--engine-capability=[capability] The type of device the engine should be
172-
built for [ standard | safety |
173-
dla_standalone ]
174-
--calibration-cache-file=[file_path]
175-
Path to calibration cache file to use
176-
for post training quantization
177-
--teo=[op_name...],
178-
--torch-executed-op=[op_name...] (Repeatable) Operator in the graph that
179-
should always be run in PyTorch for
180-
execution (partial compilation must be
181-
enabled)
182-
--tem=[module_name...],
183-
--torch-executed-mod=[module_name...]
184-
(Repeatable) Module that should always
185-
be run in Pytorch for execution (partial
186-
compilation must be enabled)
187-
--mbs=[min-block-size],
188-
--min-block-size=[min-block-size] Minimum number of contiguous TensorRT
189-
supported ops to compile a subgraph to
190-
TensorRT
191-
--embed-engine Whether to treat input file as a
192-
serialized TensorRT engine and embed it
193-
into a TorchScript module (device spec
194-
must be provided)
195-
--num-min-timing-iter=[num_iters] Number of minimization timing iterations
196-
used to select kernels
197-
--num-avg-timing-iters=[num_iters]
198-
Number of averaging timing iterations
199-
used to select kernels
200-
--workspace-size=[workspace_size] Maximum size of workspace given to
201-
TensorRT
202-
-t[threshold],
203-
--threshold=[threshold] Maximum acceptable numerical deviation
204-
from standard torchscript output
205-
(default 2e-5)
206-
--no-threshold-check Skip checking threshold compliance
207-
--truncate-long-double,
208-
--truncate, --truncate-64bit Truncate weights that are provided in
209-
64bit to 32bit (Long, Double to Int,
210-
Float)
211-
--save-engine Instead of compiling a full a
212-
TorchScript program, save the created
213-
engine to the path specified as the
214-
output path
215-
input_file_path Path to input TorchScript file
216-
output_file_path Path for compiled TorchScript (or
217-
TensorRT engine) file
218-
input_specs... Specs for inputs to engine, can either
219-
be a single size or a range defined by
220-
Min, Optimal, Max sizes, e.g.
221-
"(N,..,C,H,W)"
222-
"[(MIN_N,..,MIN_C,MIN_H,MIN_W);(OPT_N,..,OPT_C,OPT_H,OPT_W);(MAX_N,..,MAX_C,MAX_H,MAX_W)]".
223-
Data Type and format can be specified by
224-
adding an "@" followed by dtype and "%"
225-
followed by format to the end of the
226-
shape spec. e.g. "(3, 3, 32,
227-
32)@f16%NHWC"
228-
"--" can be used to terminate flag options and force all following
229-
arguments to be treated as positional options
230123
231124
e.g.
232125

0 commit comments

Comments
 (0)