Skip to content

Commit 6bd3c7f

Browse files
committed
fix comments
1 parent 9289fd4 commit 6bd3c7f

File tree

5 files changed

+7
-54
lines changed

5 files changed

+7
-54
lines changed

py/torch_tensorrt/dynamo/lowering/passes/_aten_lowering_pass.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from .constant_folding import constant_fold
1111
from .fuse_distributed_ops import fuse_distributed_ops
1212
from .fuse_prims_broadcast import fuse_prims_broadcast
13-
from .lower_linear import lower_linear
1413
from .pass_manager import DynamoPassManager
1514
from .remove_assert_nodes import remove_assert_nodes
1615
from .remove_detach import remove_detach
@@ -29,7 +28,6 @@
2928
accumulate_fp32_matmul,
3029
remove_num_users_is_0_nodes,
3130
complex_graph_detection,
32-
lower_linear,
3331
]
3432

3533
pre_lowering_pass_list = [

py/torch_tensorrt/dynamo/lowering/passes/lower_linear.py

Lines changed: 0 additions & 42 deletions
This file was deleted.

tools/perf/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ This is a comprehensive Python benchmark suite to run perf runs using different
99
5. TensorRT
1010

1111

12-
Note: Please note that for ONNX models, user can convert the ONNX model to TensorRT serialized engine and then use this package.
13-
1412
## Prerequisite
1513

1614
Benchmark scripts depends on following Python packages in addition to requirements.txt packages
@@ -47,13 +45,15 @@ Here are the list of `CompileSpec` options that can be provided directly to comp
4745
* `--backends` : Comma separated string of backends. Eg: torch, torch_compile, dynamo, tensorrt
4846
* `--model` : Name of the model file (Can be a torchscript module or a tensorrt engine (ending in `.plan` extension)). If the backend is `dynamo` or `torch_compile`, the input should be a Pytorch module (instead of a torchscript module).
4947
* `--model_torch` : Name of the PyTorch model file (optional, only necessary if `dynamo` or `torch_compile` is a chosen backend)
48+
* `--onnx` : ONNX model file which helps bypass the step of exporting ONNX from `model_torch`. If this argument is provided, the ONNX will be directly converted to TRT engine
5049
* `--inputs` : List of input shapes & dtypes. Eg: (1, 3, 224, 224)@fp32 for Resnet or (1, 128)@int32;(1, 128)@int32 for BERT
5150
* `--batch_size` : Batch size
5251
* `--precision` : Comma separated list of precisions to build TensorRT engine Eg: fp32,fp16
5352
* `--device` : Device ID
5453
* `--truncate` : Truncate long and double weights in the network in Torch-TensorRT
5554
* `--is_trt_engine` : Boolean flag to be enabled if the model file provided is a TensorRT engine.
5655
* `--report` : Path of the output file where performance summary is written.
56+
* `--optimization_level` : Builder optimization level for TensorRT (from 1 to 5, 5 is the highest optimization).
5757

5858
Eg:
5959

tools/perf/perf_run.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -444,10 +444,8 @@ def run_tensorrt(
444444
if params["onnx"]:
445445
onnx_path = params["onnx"]
446446
else:
447-
# Export an ONNX model and convert to TRT
448447
onnx_path = "./onnx-trt.onnx"
449-
exp_program = torch.export.export(model.eval().cuda(), tuple(input_tensors))
450-
torch.onnx.export(exp_program, tuple(input_tensors), onnx_path)
448+
torch.onnx.export(model, tuple(input_tensors), onnx_path, dynamo=True)
451449
builder = trt.Builder(logger)
452450
network = builder.create_network(
453451
1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
@@ -472,6 +470,7 @@ def run_tensorrt(
472470
print("Running TensorRT for precision: ", precision, " batch_size : ", batch_size)
473471
iters = params.get("iterations", 20)
474472

473+
start_time = timeit.default_timer()
475474
# Get I/O tensor information using TensorRT 10 API
476475
input_names = []
477476
output_names = []
@@ -526,16 +525,15 @@ def run_tensorrt(
526525

527526
# Performance measurement
528527
for i in range(iters):
529-
start_time = timeit.default_timer()
530528
# Wait for current stream to finish
531529
dedicated_stream.wait_stream(current_stream)
532530
context.execute_async_v3(dedicated_stream.cuda_stream)
533531
# Wait for TensorRT stream to finish
534532
current_stream.wait_stream(dedicated_stream)
535533
torch.cuda.synchronize()
536534
end_time = timeit.default_timer()
537-
meas_time = end_time - start_time
538-
timings.append(meas_time)
535+
infer_time = end_time - start_time
536+
timings.append(infer_time)
539537

540538
recordStats("TensorRT", timings, precision, batch_size, compile_time_s)
541539

tools/perf/requirements.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,5 @@ pyyaml
44
onnx
55
pandas
66
transformers
7-
diffusers==0.21.4
7+
diffusers
88
timm==0.9.8
9-

0 commit comments

Comments
 (0)