From e5c3ee814a8dc6b8b2faaad4be2076bd3207d37c Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 6 Aug 2025 23:44:10 -0700 Subject: [PATCH 1/8] initial check in for removing int8 calibrator related --- core/conversion/conversion.cpp | 6 - .../conversionctx/ConversionCtx.cpp | 13 +- core/conversion/conversionctx/ConversionCtx.h | 1 - cpp/BUILD | 2 - cpp/CMakeLists.txt | 2 - cpp/bin/torchtrtc/fileio.h | 1 - cpp/bin/torchtrtc/main.cpp | 13 +- cpp/bin/torchtrtc/parser_util.h | 1 - cpp/include/torch_tensorrt/macros.h | 3 - cpp/include/torch_tensorrt/ptq.h | 352 ------------------ cpp/include/torch_tensorrt/torch_tensorrt.h | 8 - cpp/src/compile_spec.cpp | 11 +- cpp/src/ptq.cpp | 16 - py/BUILD.bazel | 1 - .../csrc/register_tensorrt_classes.cpp | 1 - py/torch_tensorrt/csrc/tensorrt_classes.cpp | 13 +- py/torch_tensorrt/csrc/tensorrt_classes.h | 10 - py/torch_tensorrt/csrc/torch_tensorrt_py.cpp | 176 --------- py/torch_tensorrt/dynamo/_compiler.py | 4 - .../dynamo/conversion/impl/activation/base.py | 8 - .../dynamo/conversion/impl/conv.py | 9 - .../dynamo/conversion/impl/deconv.py | 7 - .../runtime/_MutableTorchTensorRTModule.py | 1 - py/torch_tensorrt/fx/converters/__init__.py | 7 +- .../fx/converters/adaptive_avgpool.py | 6 +- py/torch_tensorrt/fx/converters/add.py | 7 - py/torch_tensorrt/fx/converters/batchnorm.py | 8 +- .../fx/converters/converter_utils.py | 20 - .../fx/converters/impl/activation.py | 13 +- .../fx/converters/impl/convolution.py | 18 +- py/torch_tensorrt/fx/converters/linear.py | 13 +- py/torch_tensorrt/fx/converters/maxpool.py | 6 +- py/torch_tensorrt/fx/converters/mul.py | 4 - .../fx/converters/quantization.py | 66 ---- .../fx/converters/transformation.py | 5 - py/torch_tensorrt/ts/_compile_spec.py | 7 - py/torch_tensorrt/ts/_compiler.py | 6 - py/torch_tensorrt/ts/ptq.py | 233 ------------ tests/py/ts/BUILD | 44 --- .../ts/ptq/test_ptq_dataloader_calibrator.py | 112 ------ tests/py/ts/ptq/test_ptq_to_backend.py | 119 ------ tests/py/ts/ptq/test_ptq_trt_calibrator.py | 156 -------- tests/py/ts/qat/test_qat_trt_accuracy.py | 99 ----- 43 files changed, 26 insertions(+), 1582 deletions(-) delete mode 100644 cpp/include/torch_tensorrt/ptq.h delete mode 100644 cpp/src/ptq.cpp delete mode 100644 py/torch_tensorrt/fx/converters/quantization.py delete mode 100644 py/torch_tensorrt/ts/ptq.py delete mode 100644 tests/py/ts/ptq/test_ptq_dataloader_calibrator.py delete mode 100644 tests/py/ts/ptq/test_ptq_to_backend.py delete mode 100644 tests/py/ts/ptq/test_ptq_trt_calibrator.py delete mode 100644 tests/py/ts/qat/test_qat_trt_accuracy.py diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp index f8a26e8d77..25f6d5da5c 100644 --- a/core/conversion/conversion.cpp +++ b/core/conversion/conversion.cpp @@ -202,13 +202,7 @@ void AddInputs(ConversionCtx* ctx, c10::ArrayRef input TORCHTRT_CHECK( profile->isValid(), "Optimization profile is invalid, please check the input range provided (conversion.AddInputs)"); - ctx->cfg->addOptimizationProfile(profile); -#if NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1) - if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) { - ctx->cfg->setCalibrationProfile(profile); - } -#endif } void MarkOutputs(ConversionCtx* ctx, at::ArrayRef outputs) { diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp index 2eb363706f..21c1cd9265 100644 --- a/core/conversion/conversionctx/ConversionCtx.cpp +++ b/core/conversion/conversionctx/ConversionCtx.cpp @@ -31,8 +31,7 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) { if (s.device.device_type == nvinfer1::DeviceType::kDLA) { os << "\n DLACore: " << s.device.dla_core; } - os << "\n Engine Capability: " << s.capability \ - << "\n Calibrator Created: " << (s.calibrator != nullptr); + os << "\n Engine Capability: " << s.capability; return os; } // clang-format on @@ -64,15 +63,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) cfg->setFlag(nvinfer1::BuilderFlag::kFP16); break; case nvinfer1::DataType::kINT8: - TORCHTRT_CHECK( - builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8"); - cfg->setFlag(nvinfer1::BuilderFlag::kINT8); - if (!settings.calibrator) { - LOG_INFO( - "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks"); - } else { - cfg->setInt8Calibrator(settings.calibrator); - } + LOG_WARNING("INT8 precision has been enabled, we assume the network has Q/DQ nodes obtained from modelopt"); break; case nvinfer1::DataType::kFLOAT: break; diff --git a/core/conversion/conversionctx/ConversionCtx.h b/core/conversion/conversionctx/ConversionCtx.h index 8587885eca..0b5a09490b 100644 --- a/core/conversion/conversionctx/ConversionCtx.h +++ b/core/conversion/conversionctx/ConversionCtx.h @@ -26,7 +26,6 @@ struct BuilderSettings { bool allow_shape_tensors = false; ir::Device device; nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD; - nvinfer1::IInt8Calibrator* calibrator = nullptr; uint64_t num_avg_timing_iters = 1; uint64_t workspace_size = 0; uint64_t dla_sram_size = DLA_SRAM_SIZE; diff --git a/cpp/BUILD b/cpp/BUILD index e5cb1558e9..2dc87c6039 100644 --- a/cpp/BUILD +++ b/cpp/BUILD @@ -7,14 +7,12 @@ cc_library( srcs = [ "src/compile_spec.cpp", "src/logging.cpp", - "src/ptq.cpp", "src/torch_tensorrt.cpp", "src/types.cpp", ], hdrs = [ "include/torch_tensorrt/logging.h", "include/torch_tensorrt/macros.h", - "include/torch_tensorrt/ptq.h", "include/torch_tensorrt/torch_tensorrt.h", ], linkstatic = True, diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0c0e5a43f0..690dca2749 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -4,7 +4,6 @@ add_library(${lib_name} OBJECT) set(CXX_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/src/compile_spec.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/logging.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/src/ptq.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/torch_tensorrt.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/types.cpp" ) @@ -12,7 +11,6 @@ set(CXX_SRCS set(HEADER_FILES "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/logging.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/macros.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/ptq.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/torch_tensorrt.h" ) diff --git a/cpp/bin/torchtrtc/fileio.h b/cpp/bin/torchtrtc/fileio.h index ed52d566a1..c4d8bb50c4 100644 --- a/cpp/bin/torchtrtc/fileio.h +++ b/cpp/bin/torchtrtc/fileio.h @@ -23,7 +23,6 @@ #include "torch/torch.h" #include "torch_tensorrt/logging.h" -#include "torch_tensorrt/ptq.h" #include "torch_tensorrt/torch_tensorrt.h" namespace torchtrtc { diff --git a/cpp/bin/torchtrtc/main.cpp b/cpp/bin/torchtrtc/main.cpp index c36cfdd0fc..72eddbff71 100644 --- a/cpp/bin/torchtrtc/main.cpp +++ b/cpp/bin/torchtrtc/main.cpp @@ -7,7 +7,6 @@ #include "torch/script.h" #include "torch_tensorrt/logging.h" -#include "torch_tensorrt/ptq.h" #include "torch_tensorrt/torch_tensorrt.h" #include "accuracy.h" @@ -335,8 +334,6 @@ int main(int argc, char** argv) { calibration_cache_file_path = torchtrtc::fileio::resolve_path(args::get(calibration_cache_file)); } - auto calibrator = torchtrt::ptq::make_int8_cache_calibrator(calibration_cache_file_path); - compile_settings.require_full_compilation = require_full_compilation; if (torch_executed_ops || torch_executed_mods) { @@ -367,13 +364,9 @@ int main(int argc, char** argv) { compile_settings.enabled_precisions.insert(torch::kF16); } else if (dtype == torchtrt::DataType::kChar) { compile_settings.enabled_precisions.insert(torch::kI8); - if (calibration_cache_file) { - compile_settings.ptq_calibrator = calibrator; - } else { - torchtrt::logging::log( - torchtrt::logging::Level::kINFO, - "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks"); - } + torchtrt::logging::log( + torchtrt::logging::Level::kINFO, + "Int8 precision has been enabled which assumes the network has Q/DQ nodes obtained"); } else { std::stringstream ss; ss << "Invalid precision given for enabled kernel precision, options are [ float | float32 | f32 | fp32 | half | float16 | f16 | fp16 | char | int8 | i8 ], found: "; diff --git a/cpp/bin/torchtrtc/parser_util.h b/cpp/bin/torchtrtc/parser_util.h index 9ed5f6d06b..9cbb4ff994 100644 --- a/cpp/bin/torchtrtc/parser_util.h +++ b/cpp/bin/torchtrtc/parser_util.h @@ -9,7 +9,6 @@ #include "torch/torch.h" #include "torch_tensorrt/logging.h" -#include "torch_tensorrt/ptq.h" #include "torch_tensorrt/torch_tensorrt.h" namespace torchtrtc { diff --git a/cpp/include/torch_tensorrt/macros.h b/cpp/include/torch_tensorrt/macros.h index 020b94c114..e31091031b 100644 --- a/cpp/include/torch_tensorrt/macros.h +++ b/cpp/include/torch_tensorrt/macros.h @@ -30,9 +30,6 @@ STR(TORCH_TENSORRT_MAJOR_VERSION) \ "." STR(TORCH_TENSORRT_MINOR_VERSION) "." STR(TORCH_TENSORRT_PATCH_VERSION) -#define TORCH_TENSORRT_PTQ_DEPRECATION \ - [[deprecated( \ - "Int8 PTQ Calibrator has been deprecated by TensorRT, please plan on porting to a NVIDIA Model Optimizer Toolkit based workflow. See: https://pytorch.org/TensorRT/tutorials/_rendered_examples/dynamo/vgg16_ptq.html for more details")]] // Setup namespace aliases for ease of use namespace torch_tensorrt { namespace torchscript {} diff --git a/cpp/include/torch_tensorrt/ptq.h b/cpp/include/torch_tensorrt/ptq.h deleted file mode 100644 index a2f82346c0..0000000000 --- a/cpp/include/torch_tensorrt/ptq.h +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Copyright (c) NVIDIA Corporation. - * All rights reserved. - * - * This library is licensed under the BSD-style license found in the - * LICENSE file in the root directory of this source tree. - */ -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "NvInfer.h" -#include "torch/torch.h" -#include "torch_tensorrt/logging.h" -#include "torch_tensorrt/macros.h" - -#ifndef DOXYGEN_SHOULD_SKIP_THIS -namespace torch_tensorrt { -namespace ptq { -TORCHTRT_API bool get_batch_impl(void* bindings[], const char* names[], int nbBindings, torch::Tensor& data); -} -} // namespace torch_tensorrt -#endif // DOXYGEN_SHOULD_SKIP_THIS - -namespace torch_tensorrt { -namespace ptq { - -/** - * @brief Generic Int8Calibrator implementation based on a specified - * TensorRT calibration algorithm and a LibTorch DataLoader - * - * @tparam Algorithm: class nvinfer1::IInt8Calibrator (Default: - * nvinfer1::IInt8EntropyCalibrator2) - Algorithm to use - * @tparam DataLoaderUniquePtr: std::unique_ptr - - * DataLoader type - */ -template -class Int8Calibrator : Algorithm { - using DataLoader = typename DataLoaderUniquePtr::element_type; - using Batch = typename DataLoader::super::BatchType; - - public: - /** - * @brief Construct a new Int8Calibrator object - * - * Using the provided DataLoader, construct a calibrator that can be used for - * PTQ with Torch-TensorRT - * - * @param dataloader: std::unqiue_ptr - A unique - * pointer to the DataLoader, should be what is returned from the - * make_data_loader factory - * @param cache_file_path: const std::string& - A path to store / find the - * calibration cache - * @param use_cache : bool - Whether to use the cache (if it exists) - */ - Int8Calibrator(DataLoaderUniquePtr dataloader, const std::string& cache_file_path, bool use_cache) - : dataloader_(dataloader.get()), cache_file_path_(cache_file_path), use_cache_(use_cache) { - for (auto batch : *dataloader_) { - batched_data_.push_back(batch.data); - } - it_ = batched_data_.begin(); - } - - /** - * @brief Get the Batch Size for the next batch (always 1 due to issues with - * TRT and explicit batch) - * - * @return int - */ - int getBatchSize() const noexcept override { - // HACK: Torch-TensorRT only uses explict batch sizing, INT8 Calibrator does not - // work when reporting the batch size here and having explicity batching. - // So we just report batch size 1 (warnings will still be printed out). - return 1; - // return static_cast(dataloader_->options().batch_size); - } - - /** - * @brief Get the next Batch - * - * @param bindings: void*[] - An array of binding pointers (fed in from - * TensorRT calibrator), these buffers should be filed with batch data for - * each input - * @param names: const char*[] - Names of bindings - * @param nbBindings: int - Number of bindings - * @return true - There is a new batch for the calibrator to consume - * @return false - There is not a new batch for the calibrator to consume - */ - bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override { - if (it_ != batched_data_.end()) { - auto status = get_batch_impl(bindings, names, nbBindings, *it_); - it_ = ++it_; - return status; - } else { - // Reset iterator if incase calibrator is going to be used again - it_ = batched_data_.begin(); - return false; - } - } - - /** - * @brief Read calibration cache - * - * How to read from the calibration cache, only enabled if use_cache is set - * - * @param length - * @return const void* - Pointer to cache data - */ - const void* readCalibrationCache(size_t& length) noexcept override { - if (use_cache_) { - std::stringstream ss; - ss << "Reading Calibration Cache from " << cache_file_path_; - logging::log(logging::Level::kINFO, ss.str()); - - cache_.clear(); - std::ifstream input(cache_file_path_, std::ios::binary); - input >> std::noskipws; - if (input.good()) { - std::copy(std::istream_iterator(input), std::istream_iterator(), std::back_inserter(cache_)); - logging::log(logging::Level::kDEBUG, "Cache read"); - } - length = cache_.size(); - return length ? cache_.data() : nullptr; - } - return nullptr; - } - - /** - * @brief Write calibration cache - * - * Write a the calibration cache provided by TensorRT to a specified file - * - * @param cache: const void* - cache data - * @param length: size_t - length of cache - */ - void writeCalibrationCache(const void* cache, size_t length) noexcept override { - std::ofstream cache_file(cache_file_path_, std::ios::binary); - cache_file.write(reinterpret_cast(cache), length); - std::stringstream ss; - ss << "Saved Calibration Cache to " << cache_file_path_; - logging::log(logging::Level::kINFO, ss.str()); - } - - /** - * @brief operator to cast to nvinfer1::IInt8Calibrator* - * - * Convience function to convert to a IInt8Calibrator* to easily be assigned - * to the ptq_calibrator field in CompileSpec - * - * @return nvinfer1::IInt8Calibrator* - */ - operator nvinfer1::IInt8Calibrator*() { - return reinterpret_cast(this); - } - - private: - /// Pointer to the dataloader - DataLoader* dataloader_; - /// Path to cache file - const std::string& cache_file_path_; - /// Size of cache - size_t cache_size_ = 0; - /// Whether to use the cache or not - bool use_cache_; - /// Cache data - std::vector cache_; - /// Batched Data - std::vector batched_data_; - /// Iterator to move through dataset - std::vector::iterator it_; -}; - -/** - * @brief Generic Int8Calibrator implementation based on a specified - * TensorRT calibration algorithm that only reads from a calibration file - * - * @tparam Algorithm: class nvinfer1::IInt8Calibrator (Default: - * nvinfer1::IInt8EntropyCalibrator2) - Algorithm to use - */ -template -class Int8CacheCalibrator : Algorithm { - public: - /** - * @brief Construct a new Int 8 Cache Calibrator object - * - * @param cache_file_path - */ - Int8CacheCalibrator(const std::string& cache_file_path) : cache_file_path_(cache_file_path) {} - - /** - * @brief Get the Batch Size for the next batch (always 1 due to issues with - * TRT and explicit batch) - * - * @return int - */ - int getBatchSize() const noexcept override { - // HACK: Torch-TensorRT only uses explict batch sizing, INT8 Calibrator does not - // work when reporting the batch size here and having explicity batching. - // So we just report batch size 1 (warnings will still be printed out). - return 1; - } - - /** - * @brief Get the next Batch - * - * Not used always returns false - * - * @param bindings: void*[] - An array of binding pointers (fed in from - * TensorRT calibrator), these buffers should be filed with batch data for - * each input - * @param names: const char*[] - Names of bindings - * @param nbBindings: int - Number of bindings - * @return false - */ - bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override { - return false; - } - - /** - * @brief Read calibration cache - * - * How to read from the calibration cache, only enabled if use_cache is set - * - * @param length - * @return const void* - Pointer to cache data - */ - const void* readCalibrationCache(size_t& length) noexcept override { - std::stringstream ss; - ss << "Reading Calibration Cache from " << cache_file_path_; - logging::log(logging::Level::kINFO, ss.str()); - - cache_.clear(); - std::ifstream input(cache_file_path_, std::ios::binary); - input >> std::noskipws; - if (input.good()) { - std::copy(std::istream_iterator(input), std::istream_iterator(), std::back_inserter(cache_)); - logging::log(logging::Level::kDEBUG, "Cache read"); - } - length = cache_.size(); - return length ? cache_.data() : nullptr; - } - - /** - * @brief Write calibration cache - * - * Write a the calibration cache provided by TensorRT to a specified file - * - * @param cache: const void* - cache data - * @param length: size_t - length of cache - */ - void writeCalibrationCache(const void* cache, size_t length) noexcept override { - std::ofstream cache_file(cache_file_path_, std::ios::binary); - cache_file.write(reinterpret_cast(cache), length); - std::stringstream ss; - ss << "Saved Calibration Cache to " << cache_file_path_; - logging::log(logging::Level::kINFO, ss.str()); - } - - /** - * @brief operator to cast to nvinfer1::IInt8Calibrator* - * - * Convience function to convert to a IInt8Calibrator* to easily be assigned - * to the ptq_calibrator field in CompileSpec - * - * @return nvinfer1::IInt8Calibrator* - */ - operator nvinfer1::IInt8Calibrator*() { - return reinterpret_cast(this); - } - - private: - /// Path to cache file - const std::string& cache_file_path_; - /// Size of cache - size_t cache_size_ = 0; - /// Cache data - std::vector cache_; -}; - -/** - * @brief A factory to build a post training quantization calibrator from a - * torch dataloader - * - * Creates a calibrator to use for post training quantization. By default the - * returned calibrator uses TensorRT Entropy v2 algorithm to perform - * calibration. This is recommended for feed forward networks. You can override - * the algorithm selection (such as to use the MinMax Calibrator recomended for - * NLP tasks) by calling make_int8_calibrator with the calibrator class as a - * template parameter. - * - * e.g. - * ``torch_tensorrt::ptq::make_int8_calibrator(std::move(calibration_dataloader), - * calibration_cache_file, use_cache);`` - * @tparam Algorithm: class nvinfer1::IInt8Calibrator (Default: - * nvinfer1::IInt8EntropyCalibrator2) - Algorithm to use - * @tparam DataLoader: std::unique_ptr - DataLoader - * type - * @param dataloader: std::unique_ptr - DataLoader - * containing data - * @param cache_file_path: const std::string& - Path to read/write calibration - * cache - * @param use_cache: bool - use calibration cache - * @return Int8Calibrator - */ -template -TORCH_TENSORRT_PTQ_DEPRECATION inline Int8Calibrator make_int8_calibrator( - DataLoader dataloader, - const std::string& cache_file_path, - bool use_cache) { - return Int8Calibrator(std::move(dataloader), cache_file_path, use_cache); -} - -/** - * @brief A factory to build a post training quantization calibrator from a - * torch dataloader that only uses the calibration cache - * - * Creates a calibrator to use for post training quantization which reads from a - * previously created calibration cache, therefore you can have a calibration - * cache generating program that requires a dataloader and a dataset, then save - * the cache to use later in a different program that needs to calibrate from - * scratch and not have the dataset dependency. However, the network should also - * be recalibrated if its structure changes, or the input data set changes, and - * it is the responsibility of the application to ensure this. - * - * By default the returned calibrator uses TensorRT Entropy v2 algorithm to - * perform calibration. This is recommended for feed forward networks You can - * override the algorithm selection (such as to use the MinMax Calibrator - * recomended for NLP tasks) by calling make_int8_calibrator with the calibrator - * class as a template parameter. - * - * e.g. - * torch_tensorrt::ptq::make_int8_cache_calibrator(calibration_cache_file); - * @tparam Algorithm: class nvinfer1::IInt8Calibrator (Default: - * nvinfer1::IInt8EntropyCalibrator2) - Algorithm to use - * @param cache_file_path: const std::string& - Path to read/write calibration - * cache - * @return Int8CacheCalibrator - */ -template -TORCH_TENSORRT_PTQ_DEPRECATION inline Int8CacheCalibrator make_int8_cache_calibrator( - const std::string& cache_file_path) { - return Int8CacheCalibrator(cache_file_path); -} - -} // namespace ptq -} // namespace torch_tensorrt diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h index adac75d984..8cf4449e75 100644 --- a/cpp/include/torch_tensorrt/torch_tensorrt.h +++ b/cpp/include/torch_tensorrt/torch_tensorrt.h @@ -34,9 +34,6 @@ template class ArrayRef; } // namespace c10 -namespace nvinfer1 { -class IInt8Calibrator; -} #endif // DOXYGEN_SHOULD_SKIP_THIS namespace torch_tensorrt { @@ -833,11 +830,6 @@ struct CompileSpec { */ uint64_t dla_global_dram_size = 536870912; - /** - * Calibration dataloaders for each input for post training quantizatiom - */ - nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr; - /** * Require the full module be compiled to TensorRT instead of potentially running unsupported operations in PyTorch */ diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp index 68a25b3912..6e5cf99850 100644 --- a/cpp/src/compile_spec.cpp +++ b/cpp/src/compile_spec.cpp @@ -156,15 +156,8 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external, bool if (internal.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) != internal.convert_info.engine_settings.enabled_precisions.end()) { internal.partitioning_info.cast_int8_inputs = false; - if (external.ptq_calibrator) { - internal.convert_info.engine_settings.calibrator = external.ptq_calibrator; - } else { - internal.lower_info.unfreeze_module = true; - internal.lower_info.disable_cse = true; - internal.convert_info.engine_settings.calibrator = nullptr; - } - } else { - internal.convert_info.engine_settings.calibrator = nullptr; + internal.lower_info.unfreeze_module = true; + internal.lower_info.disable_cse = true; } return internal; diff --git a/cpp/src/ptq.cpp b/cpp/src/ptq.cpp deleted file mode 100644 index 7d36e9ce7d..0000000000 --- a/cpp/src/ptq.cpp +++ /dev/null @@ -1,16 +0,0 @@ -#include "torch_tensorrt/ptq.h" -#include "torch/torch.h" - -namespace torch_tensorrt { -namespace ptq { - -bool get_batch_impl(void* bindings[], const char* names[], int nbBindings, torch::Tensor& data) { - for (int i = 0; i < nbBindings; i++) { - data = data.to(at::kCUDA).contiguous(); - bindings[i] = data.data_ptr(); - } - return true; -} - -} // namespace ptq -} // namespace torch_tensorrt diff --git a/py/BUILD.bazel b/py/BUILD.bazel index 57bde9cf0a..dcbb337132 100644 --- a/py/BUILD.bazel +++ b/py/BUILD.bazel @@ -13,7 +13,6 @@ py_library( "torch_tensorrt/_types.py", "torch_tensorrt/_version.py", "torch_tensorrt/logging.py", - "torch_tensorrt/ptq.py", ], data = [ "torch_tensorrt/lib/libtrtorch.so", diff --git a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp index bae61881da..04e9115cd1 100644 --- a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp @@ -67,7 +67,6 @@ void RegisterTRTCompileSpec() { .def("_set_precisions", &torch_tensorrt::pyapi::CompileSpec::setPrecisions) .def("_set_device", &torch_tensorrt::pyapi::CompileSpec::setDeviceIntrusive) .def("_set_torch_fallback", &torch_tensorrt::pyapi::CompileSpec::setTorchFallbackIntrusive) - .def("_set_ptq_calibrator", &torch_tensorrt::pyapi::CompileSpec::setPTQCalibratorViaHandle) .def("__str__", &torch_tensorrt::pyapi::CompileSpec::stringify); ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, sparse_weights); diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/py/torch_tensorrt/csrc/tensorrt_classes.cpp index bd3aa6b305..788a45184b 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/tensorrt_classes.cpp @@ -343,16 +343,11 @@ core::CompileSpec CompileSpec::toInternalCompileSpec(bool converting_to_trt_engi info.partitioning_info.cast_int8_inputs = true; - if (ptq_calibrator) { - info.convert_info.engine_settings.calibrator = ptq_calibrator; + if (info.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) != + info.convert_info.engine_settings.enabled_precisions.end()) { info.partitioning_info.cast_int8_inputs = false; - } else { - if (info.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) != - info.convert_info.engine_settings.enabled_precisions.end()) { - info.partitioning_info.cast_int8_inputs = false; - info.lower_info.unfreeze_module = true; - info.lower_info.disable_cse = true; - } + info.lower_info.unfreeze_module = true; + info.lower_info.disable_cse = true; } info.convert_info.engine_settings.sparse_weights = sparse_weights; info.convert_info.engine_settings.disable_tf32 = disable_tf32; diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.h b/py/torch_tensorrt/csrc/tensorrt_classes.h index 89c5c8661e..2bf8fe8f52 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.h +++ b/py/torch_tensorrt/csrc/tensorrt_classes.h @@ -140,10 +140,6 @@ struct CompileSpec : torch::CustomClassHolder { } } - int64_t getPTQCalibratorHandle() { - return (int64_t)ptq_calibrator; - } - void setDeviceIntrusive(const c10::intrusive_ptr& d) { device = *d; } @@ -152,10 +148,6 @@ struct CompileSpec : torch::CustomClassHolder { torch_fallback = *fb; } - void setPTQCalibratorViaHandle(int64_t handle) { - ptq_calibrator = (nvinfer1::IInt8Calibrator*)handle; - } - ADD_FIELD_GET_SET(disable_tf32, bool); ADD_FIELD_GET_SET(sparse_weights, bool); ADD_FIELD_GET_SET(refit, bool); @@ -170,11 +162,9 @@ struct CompileSpec : torch::CustomClassHolder { ADD_FIELD_GET_SET(allow_shape_tensors, bool); ADD_FIELD_GET_SET(device, Device); ADD_FIELD_GET_SET(torch_fallback, TorchFallback); - ADD_FIELD_GET_SET(ptq_calibrator, nvinfer1::IInt8Calibrator*); std::vector inputs; InputSignature input_signature; - nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr; std::set enabled_precisions = {}; bool sparse_weights = false; bool disable_tf32 = false; diff --git a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp index e32d102f8b..378f96cd0e 100644 --- a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp +++ b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp @@ -18,135 +18,6 @@ namespace py = pybind11; namespace torch_tensorrt { namespace pyapi { -template -class pyCalibratorTrampoline : public Derived { - public: - using Derived::Derived; // Inherit constructors - - int getBatchSize() const noexcept override { - try { - PYBIND11_OVERLOAD_PURE_NAME(int, Derived, "get_batch_size", getBatchSize); - } catch (std::exception const& e) { - LOG_ERROR("Exception caught in get_batch_size" + std::string(e.what())); - } catch (...) { - LOG_ERROR("Exception caught in get_batch_size"); - } - return -1; - } - - bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override { - py::gil_scoped_acquire gil{}; - - py::function pyGetBatch = torch_tensorrt::pyapi::util::getOverload(static_cast(this), "get_batch"); - std::vector namesVec(names, names + nbBindings); - py::object result = pyGetBatch(namesVec); - // Copy over into the other data structure. - if (!result.is_none() && result.cast>().size() != 0) { - std::memcpy(bindings, result.cast>().data(), nbBindings * sizeof(void*)); - return true; - } - return false; - } - - const void* readCalibrationCache(std::size_t& length) noexcept override { - py::gil_scoped_acquire gil{}; - - py::function pyReadCalibrationCache = - torch_tensorrt::pyapi::util::getOverload(static_cast(this), "read_calibration_cache"); - py::buffer cache = pyReadCalibrationCache(); - if (!cache.is_none()) { - py::buffer_info info = cache.request(); - length = info.size * info.itemsize; - return info.ptr; - } - return nullptr; - } - - void writeCalibrationCache(const void* ptr, std::size_t length) noexcept override { - py::gil_scoped_acquire gil{}; - - py::function pyWriteCalibrationCache = - torch_tensorrt::pyapi::util::getOverload(static_cast(this), "write_calibration_cache"); - - py::memoryview cache{py::memoryview::from_buffer(static_cast(ptr), {length}, {sizeof(uint8_t)})}; - pyWriteCalibrationCache(cache); - } -}; - -class pyIInt8Calibrator : public pyCalibratorTrampoline { - public: - using Derived = pyCalibratorTrampoline; - using Derived::Derived; - - nvinfer1::InterfaceInfo getInterfaceInfo() const noexcept override { - return nvinfer1::InterfaceInfo{"PYTHON CALIBRATOR", 1, 0}; - } - - nvinfer1::CalibrationAlgoType getAlgorithm() noexcept override { - try { - PYBIND11_OVERLOAD_PURE_NAME( - nvinfer1::CalibrationAlgoType, nvinfer1::IInt8Calibrator, "get_algorithm", getAlgorithm); - } catch (std::exception const& e) { - LOG_ERROR("Exception caught in get_algorithm: " + std::string(e.what())); - } catch (...) { - LOG_ERROR("Exception caught in get_algorithm"); - } - return {}; - } -}; - -class pyIInt8LegacyCalibrator : public pyCalibratorTrampoline { - public: - using Derived = pyCalibratorTrampoline; - using Derived::Derived; - - double getQuantile() const noexcept override { - try { - PYBIND11_OVERLOAD_PURE_NAME(double, nvinfer1::IInt8LegacyCalibrator, "get_quantile", getQuantile); - } catch (std::exception const& e) { - LOG_ERROR("Exception caught in get_quantile: " + std::string(e.what())); - } catch (...) { - LOG_ERROR("Exception caught in get_quantile"); - } - return -1.0; - } - - double getRegressionCutoff() const noexcept override { - try { - PYBIND11_OVERLOAD_PURE_NAME( - double, nvinfer1::IInt8LegacyCalibrator, "get_regression_cutoff", getRegressionCutoff); - } catch (std::exception const& e) { - LOG_ERROR("Exception caught in get_regression_cutoff: " + std::string(e.what())); - } catch (...) { - LOG_ERROR("Exception caught in get_regression_cutoff"); - } - return -1.0; - } - - const void* readHistogramCache(std::size_t& length) noexcept override { - try { - PYBIND11_OVERLOAD_PURE_NAME( - const char*, nvinfer1::IInt8LegacyCalibrator, "read_histogram_cache", readHistogramCache, length); - } catch (std::exception const& e) { - LOG_ERROR("Exception caught in read_histogram_cache" + std::string(e.what())); - } catch (...) { - LOG_ERROR("Exception caught in read_histogram_cache"); - } - return {}; - } - - void writeHistogramCache(const void* ptr, std::size_t length) noexcept override { - try { - PYBIND11_OVERLOAD_PURE_NAME( - void, nvinfer1::IInt8LegacyCalibrator, "write_histogram_cache", writeHistogramCache, ptr, length); - } catch (std::exception const& e) { - LOG_ERROR("Exception caught in write_histogram_cache" + std::string(e.what())); - } catch (...) { - LOG_ERROR("Exception caught in write_histogram_cache"); - } - } -}; - void set_device(const int device_id) { core::set_device(device_id); } @@ -275,51 +146,6 @@ PYBIND11_MODULE(_C, m) { .value("channels_last", TensorFormat::kChannelsLast, "Channels last memory layout (NHWC)") .export_values(); - py::enum_(m, "CalibrationAlgo", py::module_local(), "Type of calibration algorithm") - .value("LEGACY_CALIBRATION", nvinfer1::CalibrationAlgoType::kLEGACY_CALIBRATION) - .value("ENTROPY_CALIBRATION", nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION) - .value("ENTROPY_CALIBRATION_2", nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION_2) - .value("MINMAX_CALIBRATION", nvinfer1::CalibrationAlgoType::kMINMAX_CALIBRATION); - - py::class_( - m, "IInt8Calibrator", py::module_local(), "Int8 Calibrator base class") - .def(py::init_alias<>()) // Always initialize trampoline class. - .def("get_batch_size", &nvinfer1::IInt8Calibrator::getBatchSize, "Get batch size") - .def("get_algorithm", &nvinfer1::IInt8Calibrator::getAlgorithm, "Get algorithm"); - - py::class_( - m, "IInt8LegacyCalibrator", py::module_local(), "Int8 Legacy Calibrator class") - .def(py::init_alias<>()) // Always initialize trampoline class. - .def("get_batch_size", &nvinfer1::IInt8LegacyCalibrator::getBatchSize, "Get batch size") - .def("get_algorithm", &nvinfer1::IInt8LegacyCalibrator::getAlgorithm, "Get algorithm"); - - py::class_< - nvinfer1::IInt8EntropyCalibrator, - nvinfer1::IInt8Calibrator, - pyCalibratorTrampoline>( - m, "IInt8EntropyCalibrator", py::module_local(), "Int8 Entropy Calibrator class") - .def(py::init_alias<>()) // Always initialize trampoline class. - .def("get_batch_size", &nvinfer1::IInt8EntropyCalibrator::getBatchSize, "Get batch size") - .def("get_algorithm", &nvinfer1::IInt8EntropyCalibrator::getAlgorithm, "Get algorithm"); - - py::class_< - nvinfer1::IInt8EntropyCalibrator2, - nvinfer1::IInt8Calibrator, - pyCalibratorTrampoline>( - m, "IInt8EntropyCalibrator2", py::module_local(), "Int8 Entropy Calibrator2 class") - .def(py::init_alias<>()) // Always initialize trampoline class. - .def("get_batch_size", &nvinfer1::IInt8EntropyCalibrator2::getBatchSize, "Get batch size") - .def("get_algorithm", &nvinfer1::IInt8EntropyCalibrator2::getAlgorithm, "Get algorithm"); - - py::class_< - nvinfer1::IInt8MinMaxCalibrator, - nvinfer1::IInt8Calibrator, - pyCalibratorTrampoline>( - m, "IInt8MinMaxCalibrator", py::module_local(), "Int8 MinMax Calibrator class") - .def(py::init_alias<>()) // Always initialize trampoline class. - .def("get_batch_size", &nvinfer1::IInt8MinMaxCalibrator::getBatchSize, "Get batch size") - .def("get_algorithm", &nvinfer1::IInt8MinMaxCalibrator::getAlgorithm, "Get algorithm"); - py::class_(m, "Device") .def(py::init<>()) .def("__str__", &torch_tensorrt::pyapi::Device::to_str) @@ -362,11 +188,9 @@ PYBIND11_MODULE(_C, m) { py::class_(ts_sub_mod, "CompileSpec") .def(py::init<>()) .def("__str__", &torch_tensorrt::pyapi::CompileSpec::stringify) - .def("_get_calibrator_handle", &CompileSpec::getPTQCalibratorHandle, "[Internal] gets a handle from a calibrator") .def_readwrite("inputs", &CompileSpec::inputs) .def_readwrite("input_signature", &CompileSpec::input_signature) .def_readwrite("enabled_precisions", &CompileSpec::enabled_precisions) - .def_readwrite("ptq_calibrator", &CompileSpec::ptq_calibrator) .def_readwrite("refit", &CompileSpec::refit) .def_readwrite("sparse_weights", &CompileSpec::sparse_weights) .def_readwrite("disable_tf32", &CompileSpec::disable_tf32) diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py index 74cab980c4..608c8e84c9 100644 --- a/py/torch_tensorrt/dynamo/_compiler.py +++ b/py/torch_tensorrt/dynamo/_compiler.py @@ -148,7 +148,6 @@ def cross_compile_for_windows( dla_local_dram_size (int): Host RAM used by DLA to share intermediate tensor data across operations dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution truncate_double (bool): Truncate weights provided in double (float64) to float32 - calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration require_full_compilation (bool): Require modules to be compiled end to end or return an error as opposed to returning a hybrid graph where operations that cannot be run in TensorRT are run in PyTorch min_block_size (int): The minimum number of contiguous TensorRT convertible operations in order to run a set of operations in TensorRT torch_executed_ops (Collection[Target]): Set of aten operators that must be run in PyTorch. An error will be thrown if this set is not empty but ``require_full_compilation`` is True @@ -487,7 +486,6 @@ def compile( dla_local_dram_size (int): Host RAM used by DLA to share intermediate tensor data across operations dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution truncate_double (bool): Truncate weights provided in double (float64) to float32 - calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration require_full_compilation (bool): Require modules to be compiled end to end or return an error as opposed to returning a hybrid graph where operations that cannot be run in TensorRT are run in PyTorch min_block_size (int): The minimum number of contiguous TensorRT convertible operations in order to run a set of operations in TensorRT torch_executed_ops (Collection[Target]): Set of aten operators that must be run in PyTorch. An error will be thrown if this set is not empty but ``require_full_compilation`` is True @@ -1042,7 +1040,6 @@ def convert_exported_program_to_serialized_trt_engine( dla_sram_size: int = _defaults.DLA_SRAM_SIZE, dla_local_dram_size: int = _defaults.DLA_LOCAL_DRAM_SIZE, dla_global_dram_size: int = _defaults.DLA_GLOBAL_DRAM_SIZE, - calibrator: object = None, allow_shape_tensors: bool = False, timing_cache_path: str = _defaults.TIMING_CACHE_PATH, use_explicit_typing: bool = _defaults.USE_EXPLICIT_TYPING, @@ -1107,7 +1104,6 @@ def convert_exported_program_to_serialized_trt_engine( dla_sram_size (int): Fast software managed RAM used by DLA to communicate within a layer. dla_local_dram_size (int): Host RAM used by DLA to share intermediate tensor data across operations dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution - calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration allow_shape_tensors: (Experimental) Allow aten::size to output shape tensors using IShapeLayer in TensorRT timing_cache_path (str): Path to the timing cache if it exists (or) where it will be saved after compilation use_explicit_typing (bool): This flag enables strong typing in TensorRT compilation which respects the precisions set in the Pytorch model. This is useful when users have mixed precision graphs. diff --git a/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py b/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py index db257b9c4e..edd289e66e 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py @@ -5,7 +5,6 @@ from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.fx.converters.converter_utils import ( - mark_as_int8_layer, set_layer_name, ) from torch_tensorrt.fx.types import TRTTensor @@ -37,11 +36,4 @@ def convert_activation( layer.beta = beta set_layer_name(layer, target, name, source_ir) - if ( - not ctx.net.get_flag(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED) - and input_val.dynamic_range is not None - and dyn_range_fn is not None - ): - dyn_range = dyn_range_fn(input_val.dynamic_range) - mark_as_int8_layer(layer, dyn_range) return layer.get_output(0) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/conv.py b/py/torch_tensorrt/dynamo/conversion/impl/conv.py index 918c87ca70..8e0fa9130b 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/conv.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/conv.py @@ -17,10 +17,6 @@ to_torch, to_trt_weights, ) -from torch_tensorrt.fx.converters.converter_utils import ( - get_dyn_range, - mark_as_int8_layer, -) from torch_tensorrt.fx.types import TRTTensor @@ -172,11 +168,6 @@ def convNd( if groups is not None: conv_layer.num_groups = groups - # Handle quantization cases - if scale is not None and zero_point is not None: - # Assume the dtype of activation is torch.quint8 - mark_as_int8_layer(conv_layer, get_dyn_range(scale, zero_point, torch.quint8)) - result = conv_layer.get_output(0) if is_conv1d: diff --git a/py/torch_tensorrt/dynamo/conversion/impl/deconv.py b/py/torch_tensorrt/dynamo/conversion/impl/deconv.py index 6a21415ffe..dcfb01d15d 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/deconv.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/deconv.py @@ -16,8 +16,6 @@ to_trt_weights, ) from torch_tensorrt.fx.converters.converter_utils import ( - get_dyn_range, - mark_as_int8_layer, set_layer_name, ) from torch_tensorrt.fx.types import TRTTensor @@ -174,11 +172,6 @@ def deconvNd( deconv_layer.pre_padding = tuple(pre_padding_values) deconv_layer.post_padding = tuple(post_padding_values) - # Handle quantization cases - if scale is not None and zero_point is not None: - # Assume the dtype of activation is torch.quint8 - mark_as_int8_layer(deconv_layer, get_dyn_range(scale, zero_point, torch.quint8)) - result = deconv_layer.get_output(0) if is_deconv1d: diff --git a/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py index b0e41f7aeb..3197d9f7de 100644 --- a/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py +++ b/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py @@ -95,7 +95,6 @@ def __init__( dla_local_dram_size (int): Host RAM used by DLA to share intermediate tensor data across operations dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution truncate_double (bool): Truncate weights provided in double (float64) to float32 - calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration require_full_compilation (bool): Require modules to be compiled end to end or return an error as opposed to returning a hybrid graph where operations that cannot be run in TensorRT are run in PyTorch min_block_size (int): The minimum number of contiguous TensorRT convertible operations in order to run a set of operations in TensorRT torch_executed_ops (Collection[Target]): Set of aten operators that must be run in PyTorch. An error will be thrown if this set is not empty but ``require_full_compilation`` is True diff --git a/py/torch_tensorrt/fx/converters/__init__.py b/py/torch_tensorrt/fx/converters/__init__.py index f037d54ce7..60c20df44d 100644 --- a/py/torch_tensorrt/fx/converters/__init__.py +++ b/py/torch_tensorrt/fx/converters/__init__.py @@ -2,17 +2,16 @@ import tensorrt as trt if hasattr(trt, "__version__"): + from .acc_ops_converters import * # noqa: F401 F403 from .adaptive_avgpool import * # noqa: F401 F403 from .add import * # noqa: F401 F403 + from .aten_ops_converters import * # noqa: F401 F403 from .batchnorm import * # noqa: F401 F403 from .linear import * # noqa: F401 F403 from .maxpool import * # noqa: F401 F403 from .mul import * # noqa: F401 F403 - from .transformation import * # noqa: F401 F403 - from .quantization import * # noqa: F401 F403 - from .acc_ops_converters import * # noqa: F401 F403 - from .aten_ops_converters import * # noqa: F401 F403 from .nn_ops_converters import * # noqa: F401 F403 + from .transformation import * # noqa: F401 F403 TRT_LOGGER = trt.Logger() trt.init_libnvinfer_plugins(TRT_LOGGER, "") diff --git a/py/torch_tensorrt/fx/converters/adaptive_avgpool.py b/py/torch_tensorrt/fx/converters/adaptive_avgpool.py index 8de9987c77..9516be06a3 100644 --- a/py/torch_tensorrt/fx/converters/adaptive_avgpool.py +++ b/py/torch_tensorrt/fx/converters/adaptive_avgpool.py @@ -3,8 +3,7 @@ import torch from ..converter_registry import tensorrt_converter - -from .converter_utils import extend_mod_attr_to_tuple, mark_as_int8_layer +from .converter_utils import extend_mod_attr_to_tuple @tensorrt_converter(torch.nn.modules.pooling.AdaptiveAvgPool2d) @@ -31,7 +30,4 @@ def adaptive_avgpool2d(network, submod, args, kwargs, name): layer.stride = stride layer.name = name - if input_val.dynamic_range: - mark_as_int8_layer(layer, input_val.dynamic_range) - return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/add.py b/py/torch_tensorrt/fx/converters/add.py index c60b0313a3..36dc91b637 100644 --- a/py/torch_tensorrt/fx/converters/add.py +++ b/py/torch_tensorrt/fx/converters/add.py @@ -6,8 +6,6 @@ from ..converter_registry import tensorrt_converter -from .converter_utils import get_dyn_range, mark_as_int8_layer - @tensorrt_converter(operator.add) @tensorrt_converter(torch.add) @@ -43,8 +41,6 @@ def quantized_add(network, target, args, kwargs, layer_name): layer = network.add_elementwise(lhs_val, rhs_val, trt.ElementWiseOperation.SUM) layer.name = layer_name - dyn_range = get_dyn_range(kwargs["scale"], kwargs["zero_point"], torch.quint8) - mark_as_int8_layer(layer, dyn_range) return layer.get_output(0) @@ -60,13 +56,10 @@ def quantized_add_relu(network, submod, args, kwargs, layer_name): layer = network.add_elementwise(lhs_val, rhs_val, trt.ElementWiseOperation.SUM) layer.name = f"{layer_name}_add" - dyn_range = get_dyn_range(kwargs["scale"], kwargs["zero_point"], torch.quint8) - mark_as_int8_layer(layer, dyn_range) layer = network.add_activation( input=layer.get_output(0), type=trt.ActivationType.RELU ) layer.name = f"{layer_name}_relu" - mark_as_int8_layer(layer, dyn_range) return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/batchnorm.py b/py/torch_tensorrt/fx/converters/batchnorm.py index 130991df54..3e08c93b12 100644 --- a/py/torch_tensorrt/fx/converters/batchnorm.py +++ b/py/torch_tensorrt/fx/converters/batchnorm.py @@ -5,8 +5,7 @@ import torch from ..converter_registry import tensorrt_converter - -from .converter_utils import get_dyn_range, mark_as_int8_layer, to_numpy +from .converter_utils import to_numpy def common_batchnorm(network, mod, input_val, layer_name, is_quantized): @@ -17,11 +16,6 @@ def common_batchnorm(network, mod, input_val, layer_name, is_quantized): layer = network.add_scale(input_val, trt.ScaleMode.CHANNEL, bias, scale, power) layer.name = layer_name - if is_quantized: - mark_as_int8_layer( - layer, get_dyn_range(mod.scale, mod.zero_point, torch.quint8) - ) - return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/converter_utils.py b/py/torch_tensorrt/fx/converters/converter_utils.py index 78ea125424..ea4d09bb37 100644 --- a/py/torch_tensorrt/fx/converters/converter_utils.py +++ b/py/torch_tensorrt/fx/converters/converter_utils.py @@ -704,26 +704,6 @@ def get_dyn_range(scale, zero_point, dtype): return (min_val - zero_point) * scale, (max_val - zero_point) * scale -def mark_as_int8_layer(layer, dynamic_range): - """ - Set the precision of a layer to int8 as well as the type of its first output. - Also set the dynamic range of its first output. - """ - if layer.type not in { - trt.LayerType.SHUFFLE, - trt.LayerType.CONCATENATION, - trt.LayerType.CONSTANT, - trt.LayerType.SHAPE, - }: - layer.precision = trt.int8 - - for i in range(layer.num_outputs): - output_val = layer.get_output(i) - output_val.dynamic_range = dynamic_range - layer.set_output_type(i, trt.int8) - # output_val.dtype = trt.int8 - - def get_inputs_from_args_and_kwargs(args, kwargs, input_names): inputs = [] for i, key in enumerate(input_names): diff --git a/py/torch_tensorrt/fx/converters/impl/activation.py b/py/torch_tensorrt/fx/converters/impl/activation.py index 66c16b0892..4b613b5de2 100644 --- a/py/torch_tensorrt/fx/converters/impl/activation.py +++ b/py/torch_tensorrt/fx/converters/impl/activation.py @@ -1,18 +1,14 @@ -import numpy as np import operator import warnings from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union +import numpy as np + # @manual=//deeplearning/trt/python:py_tensorrt import tensorrt as trt import torch from torch.fx.node import Argument, Target - - -from torch_tensorrt.fx.converters.converter_utils import mark_as_int8_layer -from torch_tensorrt.fx.converters.converter_utils import set_layer_name -from torch_tensorrt.fx.converters.converter_utils import SourceIR - +from torch_tensorrt.fx.converters.converter_utils import SourceIR, set_layer_name from torch_tensorrt.fx.types import ( TRTNetwork, TRTTensor, @@ -63,9 +59,6 @@ def convert_activation( layer.beta = beta set_layer_name(layer, target, name, source_ir) - if input_val.dynamic_range is not None: - dyn_range = dyn_range_fn(input_val.dynamic_range) - mark_as_int8_layer(layer, dyn_range) return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/impl/convolution.py b/py/torch_tensorrt/fx/converters/impl/convolution.py index 84071ed2d4..295ded110e 100644 --- a/py/torch_tensorrt/fx/converters/impl/convolution.py +++ b/py/torch_tensorrt/fx/converters/impl/convolution.py @@ -1,23 +1,20 @@ -import numpy as np from typing import Any, Optional, Sequence, Union +import numpy as np + # @manual=//deeplearning/trt/python:py_tensorrt import tensorrt as trt import torch from torch.fx.node import Target - +from torch_tensorrt.fx.converters import acc_ops_converters from torch_tensorrt.fx.converters.converter_utils import ( SourceIR, extend_attr_to_tuple, - get_dyn_range, - mark_as_int8_layer, - set_layer_name, + get_trt_tensor, has_dynamic_shape, + set_layer_name, to_numpy, - get_trt_tensor, ) -from torch_tensorrt.fx.converters import acc_ops_converters - from torch_tensorrt.fx.types import ( TRTNetwork, TRTTensor, @@ -124,11 +121,6 @@ def convNd( if groups is not None: conv_layer.num_groups = groups - # Handle quantization cases - if scale is not None and zero_point is not None: - # Assume the dtype of activation is torch.quint8 - mark_as_int8_layer(conv_layer, get_dyn_range(scale, zero_point, torch.quint8)) - result = conv_layer.get_output(0) if is_conv1d: diff --git a/py/torch_tensorrt/fx/converters/linear.py b/py/torch_tensorrt/fx/converters/linear.py index e7cca6f76a..40cec22013 100644 --- a/py/torch_tensorrt/fx/converters/linear.py +++ b/py/torch_tensorrt/fx/converters/linear.py @@ -3,8 +3,7 @@ import torch from ..converter_registry import tensorrt_converter - -from .converter_utils import get_dyn_range, mark_as_int8_layer, to_numpy +from .converter_utils import get_dyn_range, to_numpy def common_linear(network, mod, input_val, layer_name, is_quantized): @@ -23,9 +22,6 @@ def common_linear(network, mod, input_val, layer_name, is_quantized): layer.reshape_dims = tuple(input_val.shape) + (1, 1) layer.name = f"{layer_name}_pre_shuffle" - if is_quantized: - mark_as_int8_layer(layer, input_val.dynamic_range) - kernel = to_numpy(mod.weight if not is_quantized else mod.weight()) bias = to_numpy(mod.bias if not is_quantized else mod.bias()) @@ -38,18 +34,11 @@ def common_linear(network, mod, input_val, layer_name, is_quantized): ) layer.name = f"{layer_name}_linear" - if is_quantized: - dyn_range = get_dyn_range(mod.scale, mod.zero_point, torch.quint8) - mark_as_int8_layer(layer, dyn_range) - # reshape the output from (*, K, 1, 1) to (*, K) layer = network.add_shuffle(layer.get_output(0)) layer.reshape_dims = tuple(input_val.shape[:-1]) + (mod.out_features,) layer.name = f"{layer_name}_post_shuffle" - if is_quantized: - mark_as_int8_layer(layer, dyn_range) - return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/maxpool.py b/py/torch_tensorrt/fx/converters/maxpool.py index 6c64a3b108..dcaf724617 100644 --- a/py/torch_tensorrt/fx/converters/maxpool.py +++ b/py/torch_tensorrt/fx/converters/maxpool.py @@ -3,8 +3,7 @@ import torch from ..converter_registry import tensorrt_converter - -from .converter_utils import extend_mod_attr_to_tuple, mark_as_int8_layer +from .converter_utils import extend_mod_attr_to_tuple def common_maxpool(network, mod, dimension, input_val, layer_name): @@ -23,9 +22,6 @@ def common_maxpool(network, mod, dimension, input_val, layer_name): if mod.ceil_mode: layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP - if input_val.dynamic_range: - mark_as_int8_layer(layer, input_val.dynamic_range) - return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/mul.py b/py/torch_tensorrt/fx/converters/mul.py index a1d9858ebd..24fc884fc5 100644 --- a/py/torch_tensorrt/fx/converters/mul.py +++ b/py/torch_tensorrt/fx/converters/mul.py @@ -6,8 +6,6 @@ from ..converter_registry import tensorrt_converter -from .converter_utils import get_dyn_range, mark_as_int8_layer - @tensorrt_converter(torch.mul) @tensorrt_converter(operator.mul) @@ -42,7 +40,5 @@ def quantized_mul(network, target, args, kwargs, layer_name): layer = network.add_elementwise(lhs_val, rhs_val, trt.ElementWiseOperation.PROD) layer.name = layer_name - dyn_range = get_dyn_range(kwargs["scale"], kwargs["zero_point"], torch.quint8) - mark_as_int8_layer(layer, dyn_range) return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/quantization.py b/py/torch_tensorrt/fx/converters/quantization.py deleted file mode 100644 index 6b75f93278..0000000000 --- a/py/torch_tensorrt/fx/converters/quantization.py +++ /dev/null @@ -1,66 +0,0 @@ -# @manual=//deeplearning/trt/python:py_tensorrt -import tensorrt as trt -import torch - -from ..converter_registry import tensorrt_converter - -from .converter_utils import get_dyn_range, get_inputs_from_args_and_kwargs - -quantize_per_tensor_inputs = ["input", "scale", "zero_point", "dtype"] - - -@tensorrt_converter("dequantize") -@tensorrt_converter(torch.dequantize) -@tensorrt_converter(torch.nn.quantized.modules.DeQuantize) -def dequantize(network, submod, args, kwargs, layer_name): - input_val = args[0] - - if not isinstance(input_val, trt.tensorrt.ITensor): - raise RuntimeError( - f"Dequantize received input {input_val} that is not part " - "of the TensorRT region!" - ) - - return input_val - - -@tensorrt_converter(torch.quantize_per_tensor) -@tensorrt_converter(torch.nn.quantized.modules.Quantize) -def quantize(network, submod, args, kwargs, layer_name): - # If submod is not nn.Module then it's quantize_per_tensor - if not isinstance(submod, torch.nn.Module): - input_val, scale, zero_point, dtype = get_inputs_from_args_and_kwargs( - args, kwargs, quantize_per_tensor_inputs - ) - else: - input_val = args[0] - scale = submod.scale - zero_point = submod.zero_point - dtype = submod.dtype - - if not isinstance(input_val, trt.tensorrt.ITensor): - raise RuntimeError( - f"Quantize received input {input_val} that is not part " - "of the TensorRT region!" - ) - - if dtype != torch.quint8: - raise RuntimeError( - f"Only support torch.quint8 quantized type for activation, get {dtype}." - ) - - input_val.dynamic_range = get_dyn_range(scale, zero_point, dtype) - return input_val - - -@tensorrt_converter(torch.nn.modules.linear.Identity) -def identity(network, submod, args, kwargs, layer_name): - input_val = kwargs["input"] - - if not isinstance(input_val, trt.tensorrt.ITensor): - raise RuntimeError( - f"Identity received input {input_val} that is not part " - "of the TensorRT region!" - ) - - return input_val diff --git a/py/torch_tensorrt/fx/converters/transformation.py b/py/torch_tensorrt/fx/converters/transformation.py index 62cfef8453..bde0f208ec 100644 --- a/py/torch_tensorrt/fx/converters/transformation.py +++ b/py/torch_tensorrt/fx/converters/transformation.py @@ -4,8 +4,6 @@ from ..converter_registry import tensorrt_converter -from .converter_utils import mark_as_int8_layer - @tensorrt_converter(torch.flatten) def torch_flatten(network, target, args, kwargs, name): @@ -45,7 +43,4 @@ def torch_flatten(network, target, args, kwargs, name): layer.reshape_dims = tuple(new_shape) layer.name = name - if input_val.dynamic_range: - mark_as_int8_layer(layer, input_val.dynamic_range) - return layer.get_output(0) diff --git a/py/torch_tensorrt/ts/_compile_spec.py b/py/torch_tensorrt/ts/_compile_spec.py index 6016fe87c5..0d0b12723e 100644 --- a/py/torch_tensorrt/ts/_compile_spec.py +++ b/py/torch_tensorrt/ts/_compile_spec.py @@ -239,9 +239,6 @@ def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec: compile_spec["enabled_precisions"] ) - if "calibrator" in compile_spec and compile_spec["calibrator"]: - info.ptq_calibrator = compile_spec["calibrator"] - if "sparse_weights" in compile_spec: assert isinstance(compile_spec["sparse_weights"], bool) info.sparse_weights = compile_spec["sparse_weights"] @@ -319,7 +316,6 @@ def TensorRTCompileSpec( dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, truncate_long_and_double: bool = False, - calibrator: object = None, allow_shape_tensors: bool = False, ) -> torch.classes.tensorrt.CompileSpec: """Utility to create a formatted spec dictionary for using the PyTorch TensorRT backend @@ -354,7 +350,6 @@ def TensorRTCompileSpec( num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels workspace_size (int): Maximum size of workspace given to TensorRT truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32 - calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration allow_shape_tensors: (Experimental) Allow aten::size to output shape tensors using IShapeLayer in TensorRT Returns: @@ -378,7 +373,6 @@ def TensorRTCompileSpec( "dla_sram_size": dla_sram_size, # Fast software managed RAM used by DLA to communicate within a layer. "dla_local_dram_size": dla_local_dram_size, # Host RAM used by DLA to share intermediate tensor data across operations "dla_global_dram_size": dla_global_dram_size, # Host RAM used by DLA to store weights and metadata for execution - "calibrator": calibrator, "truncate_long_and_double": truncate_long_and_double, "allow_shape_tensors": allow_shape_tensors, } @@ -433,6 +427,5 @@ def TensorRTCompileSpec( backend_spec._set_dla_global_dram_size(parsed_spec.dla_global_dram_size) backend_spec._set_truncate_long_and_double(parsed_spec.truncate_long_and_double) backend_spec._set_allow_shape_tensors(parsed_spec.allow_shape_tensors) - backend_spec._set_ptq_calibrator(parsed_spec._get_calibrator_handle()) return backend_spec diff --git a/py/torch_tensorrt/ts/_compiler.py b/py/torch_tensorrt/ts/_compiler.py index 114398f010..4bcbf058bc 100644 --- a/py/torch_tensorrt/ts/_compiler.py +++ b/py/torch_tensorrt/ts/_compiler.py @@ -27,7 +27,6 @@ def compile( dla_sram_size: int = 1048576, dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, - calibrator: object = None, truncate_long_and_double: bool = False, require_full_compilation: bool = False, min_block_size: int = 3, @@ -92,7 +91,6 @@ def compile( dla_local_dram_size (int): Host RAM used by DLA to share intermediate tensor data across operations dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32 - calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration require_full_compilation (bool): Require modules to be compiled end to end or return an error as opposed to returning a hybrid graph where operations that cannot be run in TensorRT are run in PyTorch min_block_size (int): The minimum number of contiguous TensorRT convertible operations in order to run a set of operations in TensorRT torch_executed_ops (List[str]): List of aten operators that must be run in PyTorch. An error will be thrown if this list is not empty but ``require_full_compilation`` is True @@ -147,7 +145,6 @@ def compile( "dla_sram_size": dla_sram_size, "dla_local_dram_size": dla_local_dram_size, "dla_global_dram_size": dla_global_dram_size, - "calibrator": calibrator, "truncate_long_and_double": truncate_long_and_double, "torch_fallback": { "enabled": not require_full_compilation, @@ -182,7 +179,6 @@ def convert_method_to_trt_engine( dla_local_dram_size: int = 1073741824, dla_global_dram_size: int = 536870912, truncate_long_and_double: int = False, - calibrator: object = None, allow_shape_tensors: bool = False, ) -> bytes: """Convert a TorchScript module method to a serialized TensorRT engine @@ -241,7 +237,6 @@ def convert_method_to_trt_engine( dla_local_dram_size (int): Host RAM used by DLA to share intermediate tensor data across operations dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32 - calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration allow_shape_tensors: (Experimental) Allow aten::size to output shape tensors using IShapeLayer in TensorRT Returns: @@ -274,7 +269,6 @@ def convert_method_to_trt_engine( "capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels "num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels "workspace_size": workspace_size, # Maximum size of workspace given to TensorRT - "calibrator": calibrator, "truncate_long_and_double": truncate_long_and_double, "allow_shape_tensors": allow_shape_tensors, } diff --git a/py/torch_tensorrt/ts/ptq.py b/py/torch_tensorrt/ts/ptq.py deleted file mode 100644 index db55aa47e4..0000000000 --- a/py/torch_tensorrt/ts/ptq.py +++ /dev/null @@ -1,233 +0,0 @@ -import sys -from typing import Any, List, Optional - -if sys.version_info >= (3, 11): - from typing import Self -else: - from typing_extensions import Self - -import os -import warnings -from enum import Enum - -import torch -from torch_tensorrt import _C -from torch_tensorrt.ts.logging import Level, log - - -class CalibrationAlgo(Enum): - ENTROPY_CALIBRATION = _C.CalibrationAlgo.ENTROPY_CALIBRATION - ENTROPY_CALIBRATION_2 = _C.CalibrationAlgo.ENTROPY_CALIBRATION_2 - LEGACY_CALIBRATION = _C.CalibrationAlgo.LEGACY_CALIBRATION - MINMAX_CALIBRATION = _C.CalibrationAlgo.MINMAX_CALIBRATION - - -def get_cache_mode_batch(self: object) -> None: - return None - - -def get_batch_size(self: object) -> int: - return 1 - - -def get_batch(self: object, _: Any) -> Optional[List[int]]: - if self.current_batch_idx + self.batch_size > len(self.data_loader.dataset): - return None - - batch = next(self.dataset_iterator) - self.current_batch_idx += self.batch_size - inputs_gpu = [] - if isinstance(batch, list): - for example in batch: - inputs_gpu.append(example.to(self.device).data_ptr()) - else: - inputs_gpu.append(batch.to(self.device).data_ptr()) - return inputs_gpu - - -def read_calibration_cache(self: object) -> bytes: - if self.cache_file and self.use_cache: - if os.path.exists(self.cache_file): - with open(self.cache_file, "rb") as f: - b: bytes = f.read() - return b - else: - raise FileNotFoundError(self.cache_file) - else: - return b"" - - -def write_calibration_cache(self: object, cache: bytes) -> None: - if self.cache_file: - with open(self.cache_file, "wb") as f: - f.write(cache) - else: - return - - -# deepcopy (which involves pickling) is performed on the compile_spec internally during compilation. -# We register this __reduce__ function for pickler to identity the calibrator object returned by DataLoaderCalibrator during deepcopy. -# This should be the object's local name relative to the module https://docs.python.org/3/library/pickle.html#object.__reduce__ -def __reduce__(self: object) -> str: - return self.__class__.__name__ - - -class DataLoaderCalibrator(object): - """ - Constructs a calibrator class in TensorRT and uses pytorch dataloader to load/preprocess - data which is passed during calibration. - - Arguments: - dataloader (torch.utils.data.DataLoader): an instance of pytorch dataloader which iterates through a given dataset. - algo_type (CalibrationAlgo): choice of calibration algorithm. - cache_file (str): path to cache file. - use_cache (bool): flag which enables usage of pre-existing cache. - device (Device): device on which calibration data is copied to. - """ - - def __init__(self, **kwargs: Any): - pass - - def __new__(cls, *args: Any, **kwargs: Any) -> Self: - warnings.warn( - "Int8 PTQ Calibrator has been deprecated by TensorRT, please plan on porting to a NVIDIA Model Optimizer Toolkit based workflow. See: https://pytorch.org/TensorRT/tutorials/_rendered_examples/dynamo/vgg16_ptq.html for more details", - DeprecationWarning, - stacklevel=2, - ) - dataloader = args[0] - algo_type = kwargs.get("algo_type", CalibrationAlgo.ENTROPY_CALIBRATION_2) - cache_file = kwargs.get("cache_file", None) - use_cache = kwargs.get("use_cache", False) - device = kwargs.get("device", torch.device("cuda:0")) - - if not isinstance(dataloader, torch.utils.data.DataLoader): - log( - Level.Error, - "Dataloader : {} is not a valid instance of torch.utils.data.DataLoader".format( - dataloader - ), - ) - - if cache_file: - if use_cache: - log( - Level.Debug, - "Using existing cache_file {} for calibration".format(cache_file), - ) - else: - log(Level.Debug, "Overwriting existing calibration cache file.") - else: - if use_cache: - log( - Level.Warning, - "Input cache file is None but use_cache is set to True in INT8 mode. Ignoring use_cache flag in this run.", - ) - - # Define attributes and member functions for the calibrator class - attribute_mapping = { - "data_loader": dataloader, - "current_batch_idx": 0, - "batch_size": dataloader.batch_size, - "dataset_iterator": iter(dataloader), - "cache_file": cache_file, - "device": device, - "use_cache": use_cache, - "get_batch_size": get_batch_size, - "get_batch": get_cache_mode_batch if use_cache else get_batch, - "read_calibration_cache": read_calibration_cache, - "write_calibration_cache": write_calibration_cache, - "__reduce__": __reduce__, # used when you deepcopy the DataLoaderCalibrator object - } - - # Using type metaclass to construct calibrator class based on algorithm type - if algo_type == CalibrationAlgo.ENTROPY_CALIBRATION: - calib_ec: Self = type( - "Int8EntropyCalibrator", (_C.IInt8EntropyCalibrator,), attribute_mapping - )() - return calib_ec - elif algo_type == CalibrationAlgo.ENTROPY_CALIBRATION_2: - calib_ec2: Self = type( - "Int8EntropyCalibrator2", - (_C.IInt8EntropyCalibrator2,), - attribute_mapping, - )() - return calib_ec2 - elif algo_type == CalibrationAlgo.LEGACY_CALIBRATION: - calib_lc: Self = type( - "Int8LegacyCalibrator", (_C.IInt8LegacyCalibrator,), attribute_mapping - )() - return calib_lc - elif algo_type == CalibrationAlgo.MINMAX_CALIBRATION: - calib_mmc: Self = type( - "Int8MinMaxCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping - )() - return calib_mmc - else: - raise ValueError( - "Invalid calibration algorithm type. Please select among ENTROPY_CALIBRATION, ENTROPY_CALIBRATION, LEGACY_CALIBRATION or MINMAX_CALIBRATION" - ) - - -class CacheCalibrator(object): - """ - Constructs a calibrator class in TensorRT which directly uses pre-existing cache file for calibration. - - Arguments: - cache_file (str): path to cache file. - algo_type (CalibrationAlgo): choice of calibration algorithm. - """ - - def __init__(self, **kwargs: Any): - pass - - def __new__(cls, *args: Any, **kwargs: Any) -> Self: - warnings.warn( - "Int8 PTQ Calibrator has been deprecated by TensorRT, please plan on porting to a NVIDIA Model Optimizer Toolkit based workflow. See: https://pytorch.org/TensorRT/tutorials/_rendered_examples/dynamo/vgg16_ptq.html for more details", - DeprecationWarning, - stacklevel=2, - ) - cache_file = args[0] - algo_type = kwargs.get("algo_type", CalibrationAlgo.ENTROPY_CALIBRATION_2) - - if os.path.isfile(cache_file): - log( - Level.Debug, - "Using existing cache_file {} for calibration".format(cache_file), - ) - else: - log(Level.Error, "Invalid calibration cache file.") - - # Define attributes and member functions for the calibrator class - attribute_mapping = { - "use_cache": True, - "cache_file": cache_file, - "get_batch_size": get_batch_size, - "get_batch": get_cache_mode_batch, - "read_calibration_cache": read_calibration_cache, - "write_calibration_cache": write_calibration_cache, - } - # Using type metaclass to construct calibrator class based on algorithm type - if algo_type == CalibrationAlgo.ENTROPY_CALIBRATION: - calib_ec: Self = type( - "DataLoaderCalibrator", (_C.IInt8EntropyCalibrator,), attribute_mapping - )() - return calib_ec - elif algo_type == CalibrationAlgo.ENTROPY_CALIBRATION_2: - calib_ec2: Self = type( - "DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping - )() - return calib_ec2 - elif algo_type == CalibrationAlgo.LEGACY_CALIBRATION: - calib_lc: Self = type( - "DataLoaderCalibrator", (_C.IInt8LegacyCalibrator,), attribute_mapping - )() - return calib_lc - elif algo_type == CalibrationAlgo.MINMAX_CALIBRATION: - calib_mmc: Self = type( - "DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping - )() - return calib_mmc - else: - raise ValueError( - "Invalid calibration algorithm type. Please select among ENTROPY_CALIBRATION, ENTROPY_CALIBRATION, LEGACY_CALIBRATION or MINMAX_CALIBRATION" - ) diff --git a/tests/py/ts/BUILD b/tests/py/ts/BUILD index 98db68fc44..0da75f7b10 100644 --- a/tests/py/ts/BUILD +++ b/tests/py/ts/BUILD @@ -27,30 +27,6 @@ py_test( ], ) -py_test( - name = "test_ptq_dataloader_calibrator", - srcs = [ - "model_test_case.py", - "test_ptq_dataloader_calibrator.py", - ], - deps = [ - requirement("torchvision"), - ], -) - -# This test is not included in the main test suite by default. This test checks -# if trtorch can use pre-existing trt calibrators already implemented by users. -py_test( - name = "test_ptq_trt_calibrator", - srcs = [ - "model_test_case.py", - "test_ptq_trt_calibrator.py", - ], - deps = [ - requirement("torchvision"), - ], -) - # Following multi_gpu test is only targeted for multi-gpu configurations. It is not included in the test suite by default. py_test( name = "test_multi_gpu", @@ -84,23 +60,3 @@ py_test( requirement("torchvision"), ], ) - -py_test( - name = "test_ptq_to_backend", - srcs = [ - "model_test_case.py", - "test_ptq_to_backend.py", - ], - deps = [ - requirement("torchvision"), - ], -) - -test_suite( - name = "py_calibrator_tests", - tests = [ - ":test_ptq_dataloader_calibrator", - ":test_ptq_to_backend", - ":test_ptq_trt_calibrator", - ], -) diff --git a/tests/py/ts/ptq/test_ptq_dataloader_calibrator.py b/tests/py/ts/ptq/test_ptq_dataloader_calibrator.py deleted file mode 100644 index 9e3068ec3b..0000000000 --- a/tests/py/ts/ptq/test_ptq_dataloader_calibrator.py +++ /dev/null @@ -1,112 +0,0 @@ -import os -import unittest - -import torch -import torch.nn as nn -import torch_tensorrt as torchtrt -import torch_tensorrt.ts.ptq as PTQ -import torchvision -import torchvision.transforms as transforms -from torch.nn import functional as F -from torch_tensorrt.ts.logging import * - - -def find_repo_root(max_depth=10): - dir_path = os.path.dirname(os.path.realpath(__file__)) - for i in range(max_depth): - files = os.listdir(dir_path) - if "MODULE.bazel" in files: - return dir_path - else: - dir_path = os.path.dirname(dir_path) - - raise RuntimeError("Could not find repo root") - - -MODULE_DIR = find_repo_root() + "/tests/modules" - - -def compute_accuracy(testing_dataloader, model): - total = 0 - correct = 0 - loss = 0.0 - class_probs = [] - class_preds = [] - device = torch.device("cuda:0") - with torch.no_grad(): - idx = 0 - for data, labels in testing_dataloader: - data, labels = data.to(device), labels.to(device) - out = model(data) - preds = torch.max(out, 1)[1] - class_probs.append([F.softmax(i, dim=0) for i in out]) - class_preds.append(preds) - total += labels.size(0) - correct += (preds == labels).sum().item() - idx += 1 - - test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) - test_preds = torch.cat(class_preds) - return correct / total - - -@unittest.skipIf( - not torchtrt.ENABLED_FEATURES.torchscript_frontend, - "TorchScript Frontend is not available", -) -class TestAccuracy(unittest.TestCase): - def test_compile_script(self): - self.model = ( - torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda") - ) - self.input = torch.randn((1, 3, 32, 32)).to("cuda") - self.testing_dataset = torchvision.datasets.CIFAR10( - root="./data", - train=False, - download=True, - transform=transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize( - (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) - ), - ] - ), - ) - - self.testing_dataloader = torch.utils.data.DataLoader( - self.testing_dataset, batch_size=1, shuffle=False, num_workers=1 - ) - self.calibrator = PTQ.DataLoaderCalibrator( - self.testing_dataloader, - cache_file="./calibration.cache", - use_cache=False, - algo_type=PTQ.CalibrationAlgo.ENTROPY_CALIBRATION_2, - device=torch.device("cuda:0"), - ) - - compile_spec = { - "inputs": [torchtrt.Input([1, 3, 32, 32])], - "enabled_precisions": {torch.float, torch.int8}, - "calibrator": self.calibrator, - "truncate_long_and_double": True, - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - "dla_core": 0, - "allow_gpu_fallback": False, - }, - } - trt_mod = torchtrt.ts.compile(self.model, **compile_spec) - - fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model) - log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc)) - - int8_test_acc = compute_accuracy(self.testing_dataloader, trt_mod) - log(Level.Info, "[TRT INT8] Test Acc: {:.2f}%".format(100 * int8_test_acc)) - acc_diff = fp32_test_acc - int8_test_acc - self.assertTrue(abs(acc_diff) < 3) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/py/ts/ptq/test_ptq_to_backend.py b/tests/py/ts/ptq/test_ptq_to_backend.py deleted file mode 100644 index 015ce97126..0000000000 --- a/tests/py/ts/ptq/test_ptq_to_backend.py +++ /dev/null @@ -1,119 +0,0 @@ -import os -import unittest - -import torch -import torch.nn as nn -import torch_tensorrt as torchtrt -import torchvision -import torchvision.transforms as transforms -import torch_tensorrt.ts.ptq as PTQ -from torch.nn import functional as F -from torch_tensorrt.ts.logging import * - - -def find_repo_root(max_depth=10): - dir_path = os.path.dirname(os.path.realpath(__file__)) - for i in range(max_depth): - files = os.listdir(dir_path) - if "WORKSPACE" in files: - return dir_path - else: - dir_path = os.path.dirname(dir_path) - - raise RuntimeError("Could not find repo root") - - -MODULE_DIR = find_repo_root() + "/tests/modules" - - -def compute_accuracy(testing_dataloader, model): - total = 0 - correct = 0 - loss = 0.0 - class_probs = [] - class_preds = [] - device = torch.device("cuda:0") - with torch.no_grad(): - idx = 0 - for data, labels in testing_dataloader: - data, labels = data.to(device), labels.to(device) - out = model(data) - preds = torch.max(out, 1)[1] - class_probs.append([F.softmax(i, dim=0) for i in out]) - class_preds.append(preds) - total += labels.size(0) - correct += (preds == labels).sum().item() - idx += 1 - - test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) - test_preds = torch.cat(class_preds) - return correct / total - - -@unittest.skipIf( - not torchtrt.ENABLED_FEATURES.torchscript_frontend, - "TorchScript Frontend is not available", -) -class TestAccuracy(unittest.TestCase): - def test_compile_script(self): - self.model = ( - torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda") - ) - self.input = torch.randn((1, 3, 32, 32)).to("cuda") - self.testing_dataset = torchvision.datasets.CIFAR10( - root="./data", - train=False, - download=True, - transform=transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize( - (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) - ), - ] - ), - ) - - self.testing_dataloader = torch.utils.data.DataLoader( - self.testing_dataset, batch_size=1, shuffle=False, num_workers=1 - ) - self.calibrator = PTQ.DataLoaderCalibrator( - self.testing_dataloader, - cache_file="./calibration.cache", - use_cache=False, - algo_type=PTQ.CalibrationAlgo.ENTROPY_CALIBRATION_2, - device=torch.device("cuda:0"), - ) - - self.spec = { - "forward": torchtrt.ts.TensorRTCompileSpec( - **{ - "inputs": [torchtrt.Input([1, 3, 32, 32])], - "enabled_precisions": {torch.float, torch.half, torch.int8}, - "calibrator": self.calibrator, - "truncate_long_and_double": True, - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - "dla_core": 0, - "allow_gpu_fallback": False, - }, - } - ) - } - - fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model) - log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc)) - - trt_mod = torch._C._jit_to_backend("tensorrt", self.model, self.spec) - int8_test_acc = compute_accuracy(self.testing_dataloader, trt_mod) - log( - Level.Info, - "[TRT INT8 Backend] Test Acc: {:.2f}%".format(100 * int8_test_acc), - ) - acc_diff = fp32_test_acc - int8_test_acc - self.assertTrue(abs(acc_diff) < 3) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/py/ts/ptq/test_ptq_trt_calibrator.py b/tests/py/ts/ptq/test_ptq_trt_calibrator.py deleted file mode 100644 index bef057081b..0000000000 --- a/tests/py/ts/ptq/test_ptq_trt_calibrator.py +++ /dev/null @@ -1,156 +0,0 @@ -import os -import unittest - -import tensorrt as trt -import torch -import torch.nn as nn -import torch_tensorrt as torchtrt -import torchvision -import torchvision.transforms as transforms -from torch.nn import functional as F -from torch_tensorrt.ts.logging import * - - -def find_repo_root(max_depth=10): - dir_path = os.path.dirname(os.path.realpath(__file__)) - for i in range(max_depth): - files = os.listdir(dir_path) - if "MODULE.bazel" in files: - return dir_path - else: - dir_path = os.path.dirname(dir_path) - - raise RuntimeError("Could not find repo root") - - -MODULE_DIR = find_repo_root() + "/tests/modules" - - -def compute_accuracy(testing_dataloader, model): - total = 0 - correct = 0 - loss = 0.0 - class_probs = [] - class_preds = [] - device = torch.device("cuda:0") - with torch.no_grad(): - idx = 0 - for data, labels in testing_dataloader: - data, labels = data.to(device), labels.to(device) - out = model(data) - preds = torch.max(out, 1)[1] - class_probs.append([F.softmax(i, dim=0) for i in out]) - class_preds.append(preds) - total += labels.size(0) - correct += (preds == labels).sum().item() - idx += 1 - - test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) - test_preds = torch.cat(class_preds) - return correct / total - - -@unittest.skipIf( - not torchtrt.ENABLED_FEATURES.torchscript_frontend, - "TorchScript Frontend is not available", -) -class TRTEntropyCalibrator(trt.IInt8EntropyCalibrator2): - def __init__(self, dataloader, **kwargs): - trt.IInt8EntropyCalibrator2.__init__(self) - - self.cache_file = kwargs.get("cache_file", None) - self.use_cache = kwargs.get("use_cache", False) - self.device = kwargs.get("device", torch.device("cuda:0")) - - self.dataloader = dataloader - self.dataset_iterator = iter(dataloader) - self.batch_size = dataloader.batch_size - self.current_batch_idx = 0 - - def get_batch_size(self): - return 1 - - # TensorRT passes along the names of the engine bindings to the get_batch function. - # You don't necessarily have to use them, but they can be useful to understand the order of - # the inputs. The bindings list is expected to have the same ordering as 'names'. - def get_batch(self, names): - if ( - self.current_batch_idx + self.batch_size - > self.dataloader.dataset.data.shape[0] - ): - return None - - batch = next(self.dataset_iterator) - self.current_batch_idx += self.batch_size - # Treat the first element as input and others as targets. - if isinstance(batch, list): - batch = batch[0].to(self.device) - return [batch.data_ptr()] - - def read_calibration_cache(self): - # If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None. - if self.use_cache: - with open(self.cache_file, "rb") as f: - return f.read() - - def write_calibration_cache(self, cache): - if self.cache_file: - with open(self.cache_file, "wb") as f: - f.write(cache) - - -@unittest.skipIf( - not torchtrt.ENABLED_FEATURES.torchscript_frontend, - "TorchScript Frontend is not available", -) -class TestAccuracy(unittest.TestCase): - def test_compile_script(self): - self.model = ( - torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda") - ) - self.input = torch.randn((1, 3, 32, 32)).to("cuda") - self.testing_dataset = torchvision.datasets.CIFAR10( - root="./data", - train=False, - download=True, - transform=transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize( - (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) - ), - ] - ), - ) - - self.testing_dataloader = torch.utils.data.DataLoader( - self.testing_dataset, batch_size=1, shuffle=False, num_workers=1 - ) - # Test cases can assume using GPU id: 0 - self.calibrator = TRTEntropyCalibrator(self.testing_dataloader) - - fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model) - log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc)) - - compile_spec = { - "inputs": [torchtrt.Input([1, 3, 32, 32])], - "enabled_precisions": {torch.float, torch.int8}, - "calibrator": self.calibrator, - "truncate_long_and_double": True, - "device": { - "device_type": torchtrt.DeviceType.GPU, - "gpu_id": 0, - "dla_core": 0, - "allow_gpu_fallback": False, - }, - } - - trt_mod = torchtrt.ts.compile(self.model, **compile_spec) - int8_test_acc = compute_accuracy(self.testing_dataloader, trt_mod) - log(Level.Info, "[TRT INT8] Test Acc: {:.2f}%".format(100 * int8_test_acc)) - acc_diff = fp32_test_acc - int8_test_acc - self.assertTrue(abs(acc_diff) < 3) - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/py/ts/qat/test_qat_trt_accuracy.py b/tests/py/ts/qat/test_qat_trt_accuracy.py deleted file mode 100644 index ade2cfc865..0000000000 --- a/tests/py/ts/qat/test_qat_trt_accuracy.py +++ /dev/null @@ -1,99 +0,0 @@ -import os -import sys -import unittest - -import torch -import torch.nn as nn -import torch_tensorrt as torchtrt -import torchvision -import torchvision.transforms as transforms -from torch.nn import functional as F -from torch_tensorrt.ts.logging import * - - -def find_repo_root(max_depth=10): - dir_path = os.path.dirname(os.path.realpath(__file__)) - for i in range(max_depth): - files = os.listdir(dir_path) - if "WORKSPACE" in files: - return dir_path - else: - dir_path = os.path.dirname(dir_path) - - raise RuntimeError("Could not find repo root") - - -MODULE_DIR = find_repo_root() + "/tests/modules" - -set_reportable_log_level(Level.Graph) - - -def compute_accuracy(testing_dataloader, model): - total = 0 - correct = 0 - loss = 0.0 - class_probs = [] - class_preds = [] - device = torch.device("cuda:0") - with torch.no_grad(): - idx = 0 - for data, labels in testing_dataloader: - data, labels = data.to(device), labels.to(device) - out = model(data) - preds = torch.max(out, 1)[1] - class_probs.append([F.softmax(i, dim=0) for i in out]) - class_preds.append(preds) - total += labels.size(0) - correct += (preds == labels).sum().item() - idx += 1 - - test_probs = torch.cat([torch.stack(batch) for batch in class_probs]) - test_preds = torch.cat(class_preds) - return correct / total - - -@unittest.skipIf( - not torchtrt.ENABLED_FEATURES.torchscript_frontend, - "TorchScript Frontend is not available", -) -class TestAccuracy(unittest.TestCase): - def test_compile_script(self): - self.model = ( - torch.jit.load(MODULE_DIR + "/trained_vgg16_qat.jit.pt").eval().to("cuda") - ) - self.testing_dataset = torchvision.datasets.CIFAR10( - root="./data", - train=False, - download=True, - transform=transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize( - (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) - ), - ] - ), - ) - - self.testing_dataloader = torch.utils.data.DataLoader( - self.testing_dataset, batch_size=16, shuffle=False, num_workers=1 - ) - - fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model) - log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc)) - - compile_spec = { - "inputs": [torchtrt.Input([16, 3, 32, 32])], - "enabled_precisions": {torch.int8}, - # "enabled_precision": {torch.float32, torch.int8}, - } - - trt_mod = torchtrt.ts.compile(self.model, **compile_spec) - int8_test_acc = compute_accuracy(self.testing_dataloader, trt_mod) - log(Level.Info, "[TRT QAT INT8] Test Acc: {:.2f}%".format(100 * int8_test_acc)) - acc_diff = fp32_test_acc - int8_test_acc - self.assertTrue(abs(acc_diff) < 3) - - -if __name__ == "__main__": - unittest.main() From 18d179e663587a1d46731d598fb4e52291f5dced Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 7 Aug 2025 15:44:04 -0700 Subject: [PATCH 2/8] resolve comments --- core/conversion/conversionctx/ConversionCtx.cpp | 2 +- cpp/bin/torchtrtc/main.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp index 21c1cd9265..c0dbacabc5 100644 --- a/core/conversion/conversionctx/ConversionCtx.cpp +++ b/core/conversion/conversionctx/ConversionCtx.cpp @@ -63,7 +63,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) cfg->setFlag(nvinfer1::BuilderFlag::kFP16); break; case nvinfer1::DataType::kINT8: - LOG_WARNING("INT8 precision has been enabled, we assume the network has Q/DQ nodes obtained from modelopt"); + LOG_DEBUG("INT8 precision has been enabled, we assume the network has Q/DQ nodes obtained from modelopt"); break; case nvinfer1::DataType::kFLOAT: break; diff --git a/cpp/bin/torchtrtc/main.cpp b/cpp/bin/torchtrtc/main.cpp index 72eddbff71..874cb96ef3 100644 --- a/cpp/bin/torchtrtc/main.cpp +++ b/cpp/bin/torchtrtc/main.cpp @@ -365,7 +365,7 @@ int main(int argc, char** argv) { } else if (dtype == torchtrt::DataType::kChar) { compile_settings.enabled_precisions.insert(torch::kI8); torchtrt::logging::log( - torchtrt::logging::Level::kINFO, + torchtrt::logging::Level::kDEBUG, "Int8 precision has been enabled which assumes the network has Q/DQ nodes obtained"); } else { std::stringstream ss; From 1ae794fff0230a92ffbca89ef1956da42d044a7d Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 7 Aug 2025 22:03:29 -0700 Subject: [PATCH 3/8] revert changes to fx --- py/torch_tensorrt/fx/converters/__init__.py | 1 + .../fx/converters/adaptive_avgpool.py | 5 +- py/torch_tensorrt/fx/converters/add.py | 6 ++ py/torch_tensorrt/fx/converters/batchnorm.py | 7 +- .../fx/converters/converter_utils.py | 20 ++++++ .../fx/converters/impl/activation.py | 9 ++- .../fx/converters/impl/convolution.py | 7 ++ py/torch_tensorrt/fx/converters/linear.py | 12 +++- py/torch_tensorrt/fx/converters/maxpool.py | 5 +- py/torch_tensorrt/fx/converters/mul.py | 3 + .../fx/converters/quantization.py | 65 +++++++++++++++++++ .../fx/converters/transformation.py | 4 ++ 12 files changed, 139 insertions(+), 5 deletions(-) create mode 100644 py/torch_tensorrt/fx/converters/quantization.py diff --git a/py/torch_tensorrt/fx/converters/__init__.py b/py/torch_tensorrt/fx/converters/__init__.py index 60c20df44d..b3edcafc4c 100644 --- a/py/torch_tensorrt/fx/converters/__init__.py +++ b/py/torch_tensorrt/fx/converters/__init__.py @@ -11,6 +11,7 @@ from .maxpool import * # noqa: F401 F403 from .mul import * # noqa: F401 F403 from .nn_ops_converters import * # noqa: F401 F403 + from .quantization import * # noqa: F401 F403 from .transformation import * # noqa: F401 F403 TRT_LOGGER = trt.Logger() diff --git a/py/torch_tensorrt/fx/converters/adaptive_avgpool.py b/py/torch_tensorrt/fx/converters/adaptive_avgpool.py index 9516be06a3..47112c39ba 100644 --- a/py/torch_tensorrt/fx/converters/adaptive_avgpool.py +++ b/py/torch_tensorrt/fx/converters/adaptive_avgpool.py @@ -3,7 +3,7 @@ import torch from ..converter_registry import tensorrt_converter -from .converter_utils import extend_mod_attr_to_tuple +from .converter_utils import extend_mod_attr_to_tuple, mark_as_int8_layer @tensorrt_converter(torch.nn.modules.pooling.AdaptiveAvgPool2d) @@ -30,4 +30,7 @@ def adaptive_avgpool2d(network, submod, args, kwargs, name): layer.stride = stride layer.name = name + if input_val.dynamic_range: + mark_as_int8_layer(layer, input_val.dynamic_range) + return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/add.py b/py/torch_tensorrt/fx/converters/add.py index 36dc91b637..60f6837752 100644 --- a/py/torch_tensorrt/fx/converters/add.py +++ b/py/torch_tensorrt/fx/converters/add.py @@ -5,6 +5,7 @@ import torch from ..converter_registry import tensorrt_converter +from .converter_utils import get_dyn_range, mark_as_int8_layer @tensorrt_converter(operator.add) @@ -41,6 +42,8 @@ def quantized_add(network, target, args, kwargs, layer_name): layer = network.add_elementwise(lhs_val, rhs_val, trt.ElementWiseOperation.SUM) layer.name = layer_name + dyn_range = get_dyn_range(kwargs["scale"], kwargs["zero_point"], torch.quint8) + mark_as_int8_layer(layer, dyn_range) return layer.get_output(0) @@ -56,10 +59,13 @@ def quantized_add_relu(network, submod, args, kwargs, layer_name): layer = network.add_elementwise(lhs_val, rhs_val, trt.ElementWiseOperation.SUM) layer.name = f"{layer_name}_add" + dyn_range = get_dyn_range(kwargs["scale"], kwargs["zero_point"], torch.quint8) + mark_as_int8_layer(layer, dyn_range) layer = network.add_activation( input=layer.get_output(0), type=trt.ActivationType.RELU ) layer.name = f"{layer_name}_relu" + mark_as_int8_layer(layer, dyn_range) return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/batchnorm.py b/py/torch_tensorrt/fx/converters/batchnorm.py index 3e08c93b12..873b2169a7 100644 --- a/py/torch_tensorrt/fx/converters/batchnorm.py +++ b/py/torch_tensorrt/fx/converters/batchnorm.py @@ -5,7 +5,7 @@ import torch from ..converter_registry import tensorrt_converter -from .converter_utils import to_numpy +from .converter_utils import get_dyn_range, mark_as_int8_layer, to_numpy def common_batchnorm(network, mod, input_val, layer_name, is_quantized): @@ -16,6 +16,11 @@ def common_batchnorm(network, mod, input_val, layer_name, is_quantized): layer = network.add_scale(input_val, trt.ScaleMode.CHANNEL, bias, scale, power) layer.name = layer_name + if is_quantized: + mark_as_int8_layer( + layer, get_dyn_range(mod.scale, mod.zero_point, torch.quint8) + ) + return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/converter_utils.py b/py/torch_tensorrt/fx/converters/converter_utils.py index ea4d09bb37..78ea125424 100644 --- a/py/torch_tensorrt/fx/converters/converter_utils.py +++ b/py/torch_tensorrt/fx/converters/converter_utils.py @@ -704,6 +704,26 @@ def get_dyn_range(scale, zero_point, dtype): return (min_val - zero_point) * scale, (max_val - zero_point) * scale +def mark_as_int8_layer(layer, dynamic_range): + """ + Set the precision of a layer to int8 as well as the type of its first output. + Also set the dynamic range of its first output. + """ + if layer.type not in { + trt.LayerType.SHUFFLE, + trt.LayerType.CONCATENATION, + trt.LayerType.CONSTANT, + trt.LayerType.SHAPE, + }: + layer.precision = trt.int8 + + for i in range(layer.num_outputs): + output_val = layer.get_output(i) + output_val.dynamic_range = dynamic_range + layer.set_output_type(i, trt.int8) + # output_val.dtype = trt.int8 + + def get_inputs_from_args_and_kwargs(args, kwargs, input_names): inputs = [] for i, key in enumerate(input_names): diff --git a/py/torch_tensorrt/fx/converters/impl/activation.py b/py/torch_tensorrt/fx/converters/impl/activation.py index 4b613b5de2..5c3a0ac5a5 100644 --- a/py/torch_tensorrt/fx/converters/impl/activation.py +++ b/py/torch_tensorrt/fx/converters/impl/activation.py @@ -8,7 +8,11 @@ import tensorrt as trt import torch from torch.fx.node import Argument, Target -from torch_tensorrt.fx.converters.converter_utils import SourceIR, set_layer_name +from torch_tensorrt.fx.converters.converter_utils import ( + SourceIR, + mark_as_int8_layer, + set_layer_name, +) from torch_tensorrt.fx.types import ( TRTNetwork, TRTTensor, @@ -59,6 +63,9 @@ def convert_activation( layer.beta = beta set_layer_name(layer, target, name, source_ir) + if input_val.dynamic_range is not None: + dyn_range = dyn_range_fn(input_val.dynamic_range) + mark_as_int8_layer(layer, dyn_range) return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/impl/convolution.py b/py/torch_tensorrt/fx/converters/impl/convolution.py index 295ded110e..c2a3fa1b79 100644 --- a/py/torch_tensorrt/fx/converters/impl/convolution.py +++ b/py/torch_tensorrt/fx/converters/impl/convolution.py @@ -10,8 +10,10 @@ from torch_tensorrt.fx.converters.converter_utils import ( SourceIR, extend_attr_to_tuple, + get_dyn_range, get_trt_tensor, has_dynamic_shape, + mark_as_int8_layer, set_layer_name, to_numpy, ) @@ -121,6 +123,11 @@ def convNd( if groups is not None: conv_layer.num_groups = groups + # Handle quantization cases + if scale is not None and zero_point is not None: + # Assume the dtype of activation is torch.quint8 + mark_as_int8_layer(conv_layer, get_dyn_range(scale, zero_point, torch.quint8)) + result = conv_layer.get_output(0) if is_conv1d: diff --git a/py/torch_tensorrt/fx/converters/linear.py b/py/torch_tensorrt/fx/converters/linear.py index 40cec22013..f234b769b2 100644 --- a/py/torch_tensorrt/fx/converters/linear.py +++ b/py/torch_tensorrt/fx/converters/linear.py @@ -3,7 +3,7 @@ import torch from ..converter_registry import tensorrt_converter -from .converter_utils import get_dyn_range, to_numpy +from .converter_utils import get_dyn_range, mark_as_int8_layer, to_numpy def common_linear(network, mod, input_val, layer_name, is_quantized): @@ -22,6 +22,9 @@ def common_linear(network, mod, input_val, layer_name, is_quantized): layer.reshape_dims = tuple(input_val.shape) + (1, 1) layer.name = f"{layer_name}_pre_shuffle" + if is_quantized: + mark_as_int8_layer(layer, input_val.dynamic_range) + kernel = to_numpy(mod.weight if not is_quantized else mod.weight()) bias = to_numpy(mod.bias if not is_quantized else mod.bias()) @@ -34,11 +37,18 @@ def common_linear(network, mod, input_val, layer_name, is_quantized): ) layer.name = f"{layer_name}_linear" + if is_quantized: + dyn_range = get_dyn_range(mod.scale, mod.zero_point, torch.quint8) + mark_as_int8_layer(layer, dyn_range) + # reshape the output from (*, K, 1, 1) to (*, K) layer = network.add_shuffle(layer.get_output(0)) layer.reshape_dims = tuple(input_val.shape[:-1]) + (mod.out_features,) layer.name = f"{layer_name}_post_shuffle" + if is_quantized: + mark_as_int8_layer(layer, dyn_range) + return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/maxpool.py b/py/torch_tensorrt/fx/converters/maxpool.py index dcaf724617..467a03359d 100644 --- a/py/torch_tensorrt/fx/converters/maxpool.py +++ b/py/torch_tensorrt/fx/converters/maxpool.py @@ -3,7 +3,7 @@ import torch from ..converter_registry import tensorrt_converter -from .converter_utils import extend_mod_attr_to_tuple +from .converter_utils import extend_mod_attr_to_tuple, mark_as_int8_layer def common_maxpool(network, mod, dimension, input_val, layer_name): @@ -22,6 +22,9 @@ def common_maxpool(network, mod, dimension, input_val, layer_name): if mod.ceil_mode: layer.padding_mode = trt.PaddingMode.EXPLICIT_ROUND_UP + if input_val.dynamic_range: + mark_as_int8_layer(layer, input_val.dynamic_range) + return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/mul.py b/py/torch_tensorrt/fx/converters/mul.py index 24fc884fc5..9ce185d73b 100644 --- a/py/torch_tensorrt/fx/converters/mul.py +++ b/py/torch_tensorrt/fx/converters/mul.py @@ -5,6 +5,7 @@ import torch from ..converter_registry import tensorrt_converter +from .converter_utils import get_dyn_range, mark_as_int8_layer @tensorrt_converter(torch.mul) @@ -40,5 +41,7 @@ def quantized_mul(network, target, args, kwargs, layer_name): layer = network.add_elementwise(lhs_val, rhs_val, trt.ElementWiseOperation.PROD) layer.name = layer_name + dyn_range = get_dyn_range(kwargs["scale"], kwargs["zero_point"], torch.quint8) + mark_as_int8_layer(layer, dyn_range) return layer.get_output(0) diff --git a/py/torch_tensorrt/fx/converters/quantization.py b/py/torch_tensorrt/fx/converters/quantization.py new file mode 100644 index 0000000000..c97da536ca --- /dev/null +++ b/py/torch_tensorrt/fx/converters/quantization.py @@ -0,0 +1,65 @@ +# @manual=//deeplearning/trt/python:py_tensorrt +import tensorrt as trt +import torch + +from ..converter_registry import tensorrt_converter +from .converter_utils import get_dyn_range, get_inputs_from_args_and_kwargs + +quantize_per_tensor_inputs = ["input", "scale", "zero_point", "dtype"] + + +@tensorrt_converter("dequantize") +@tensorrt_converter(torch.dequantize) +@tensorrt_converter(torch.nn.quantized.modules.DeQuantize) +def dequantize(network, submod, args, kwargs, layer_name): + input_val = args[0] + + if not isinstance(input_val, trt.tensorrt.ITensor): + raise RuntimeError( + f"Dequantize received input {input_val} that is not part " + "of the TensorRT region!" + ) + + return input_val + + +@tensorrt_converter(torch.quantize_per_tensor) +@tensorrt_converter(torch.nn.quantized.modules.Quantize) +def quantize(network, submod, args, kwargs, layer_name): + # If submod is not nn.Module then it's quantize_per_tensor + if not isinstance(submod, torch.nn.Module): + input_val, scale, zero_point, dtype = get_inputs_from_args_and_kwargs( + args, kwargs, quantize_per_tensor_inputs + ) + else: + input_val = args[0] + scale = submod.scale + zero_point = submod.zero_point + dtype = submod.dtype + + if not isinstance(input_val, trt.tensorrt.ITensor): + raise RuntimeError( + f"Quantize received input {input_val} that is not part " + "of the TensorRT region!" + ) + + if dtype != torch.quint8: + raise RuntimeError( + f"Only support torch.quint8 quantized type for activation, get {dtype}." + ) + + input_val.dynamic_range = get_dyn_range(scale, zero_point, dtype) + return input_val + + +@tensorrt_converter(torch.nn.modules.linear.Identity) +def identity(network, submod, args, kwargs, layer_name): + input_val = kwargs["input"] + + if not isinstance(input_val, trt.tensorrt.ITensor): + raise RuntimeError( + f"Identity received input {input_val} that is not part " + "of the TensorRT region!" + ) + + return input_val diff --git a/py/torch_tensorrt/fx/converters/transformation.py b/py/torch_tensorrt/fx/converters/transformation.py index bde0f208ec..c5d7e08f62 100644 --- a/py/torch_tensorrt/fx/converters/transformation.py +++ b/py/torch_tensorrt/fx/converters/transformation.py @@ -3,6 +3,7 @@ import torch from ..converter_registry import tensorrt_converter +from .converter_utils import mark_as_int8_layer @tensorrt_converter(torch.flatten) @@ -43,4 +44,7 @@ def torch_flatten(network, target, args, kwargs, name): layer.reshape_dims = tuple(new_shape) layer.name = name + if input_val.dynamic_range: + mark_as_int8_layer(layer, input_val.dynamic_range) + return layer.get_output(0) From 0639733303a286344a5fdfa6d14044944627a517 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 7 Aug 2025 22:07:35 -0700 Subject: [PATCH 4/8] revert fx changes --- py/torch_tensorrt/fx/converters/__init__.py | 8 ++++---- .../fx/converters/adaptive_avgpool.py | 1 + py/torch_tensorrt/fx/converters/add.py | 1 + py/torch_tensorrt/fx/converters/batchnorm.py | 1 + py/torch_tensorrt/fx/converters/impl/activation.py | 14 +++++++------- .../fx/converters/impl/convolution.py | 11 ++++++----- py/torch_tensorrt/fx/converters/linear.py | 1 + py/torch_tensorrt/fx/converters/maxpool.py | 1 + py/torch_tensorrt/fx/converters/mul.py | 1 + py/torch_tensorrt/fx/converters/quantization.py | 1 + py/torch_tensorrt/fx/converters/transformation.py | 1 + 11 files changed, 25 insertions(+), 16 deletions(-) diff --git a/py/torch_tensorrt/fx/converters/__init__.py b/py/torch_tensorrt/fx/converters/__init__.py index b3edcafc4c..f037d54ce7 100644 --- a/py/torch_tensorrt/fx/converters/__init__.py +++ b/py/torch_tensorrt/fx/converters/__init__.py @@ -2,17 +2,17 @@ import tensorrt as trt if hasattr(trt, "__version__"): - from .acc_ops_converters import * # noqa: F401 F403 from .adaptive_avgpool import * # noqa: F401 F403 from .add import * # noqa: F401 F403 - from .aten_ops_converters import * # noqa: F401 F403 from .batchnorm import * # noqa: F401 F403 from .linear import * # noqa: F401 F403 from .maxpool import * # noqa: F401 F403 from .mul import * # noqa: F401 F403 - from .nn_ops_converters import * # noqa: F401 F403 - from .quantization import * # noqa: F401 F403 from .transformation import * # noqa: F401 F403 + from .quantization import * # noqa: F401 F403 + from .acc_ops_converters import * # noqa: F401 F403 + from .aten_ops_converters import * # noqa: F401 F403 + from .nn_ops_converters import * # noqa: F401 F403 TRT_LOGGER = trt.Logger() trt.init_libnvinfer_plugins(TRT_LOGGER, "") diff --git a/py/torch_tensorrt/fx/converters/adaptive_avgpool.py b/py/torch_tensorrt/fx/converters/adaptive_avgpool.py index 47112c39ba..8de9987c77 100644 --- a/py/torch_tensorrt/fx/converters/adaptive_avgpool.py +++ b/py/torch_tensorrt/fx/converters/adaptive_avgpool.py @@ -3,6 +3,7 @@ import torch from ..converter_registry import tensorrt_converter + from .converter_utils import extend_mod_attr_to_tuple, mark_as_int8_layer diff --git a/py/torch_tensorrt/fx/converters/add.py b/py/torch_tensorrt/fx/converters/add.py index 60f6837752..c60b0313a3 100644 --- a/py/torch_tensorrt/fx/converters/add.py +++ b/py/torch_tensorrt/fx/converters/add.py @@ -5,6 +5,7 @@ import torch from ..converter_registry import tensorrt_converter + from .converter_utils import get_dyn_range, mark_as_int8_layer diff --git a/py/torch_tensorrt/fx/converters/batchnorm.py b/py/torch_tensorrt/fx/converters/batchnorm.py index 873b2169a7..130991df54 100644 --- a/py/torch_tensorrt/fx/converters/batchnorm.py +++ b/py/torch_tensorrt/fx/converters/batchnorm.py @@ -5,6 +5,7 @@ import torch from ..converter_registry import tensorrt_converter + from .converter_utils import get_dyn_range, mark_as_int8_layer, to_numpy diff --git a/py/torch_tensorrt/fx/converters/impl/activation.py b/py/torch_tensorrt/fx/converters/impl/activation.py index 5c3a0ac5a5..66c16b0892 100644 --- a/py/torch_tensorrt/fx/converters/impl/activation.py +++ b/py/torch_tensorrt/fx/converters/impl/activation.py @@ -1,18 +1,18 @@ +import numpy as np import operator import warnings from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union -import numpy as np - # @manual=//deeplearning/trt/python:py_tensorrt import tensorrt as trt import torch from torch.fx.node import Argument, Target -from torch_tensorrt.fx.converters.converter_utils import ( - SourceIR, - mark_as_int8_layer, - set_layer_name, -) + + +from torch_tensorrt.fx.converters.converter_utils import mark_as_int8_layer +from torch_tensorrt.fx.converters.converter_utils import set_layer_name +from torch_tensorrt.fx.converters.converter_utils import SourceIR + from torch_tensorrt.fx.types import ( TRTNetwork, TRTTensor, diff --git a/py/torch_tensorrt/fx/converters/impl/convolution.py b/py/torch_tensorrt/fx/converters/impl/convolution.py index c2a3fa1b79..84071ed2d4 100644 --- a/py/torch_tensorrt/fx/converters/impl/convolution.py +++ b/py/torch_tensorrt/fx/converters/impl/convolution.py @@ -1,22 +1,23 @@ -from typing import Any, Optional, Sequence, Union - import numpy as np +from typing import Any, Optional, Sequence, Union # @manual=//deeplearning/trt/python:py_tensorrt import tensorrt as trt import torch from torch.fx.node import Target -from torch_tensorrt.fx.converters import acc_ops_converters + from torch_tensorrt.fx.converters.converter_utils import ( SourceIR, extend_attr_to_tuple, get_dyn_range, - get_trt_tensor, - has_dynamic_shape, mark_as_int8_layer, set_layer_name, + has_dynamic_shape, to_numpy, + get_trt_tensor, ) +from torch_tensorrt.fx.converters import acc_ops_converters + from torch_tensorrt.fx.types import ( TRTNetwork, TRTTensor, diff --git a/py/torch_tensorrt/fx/converters/linear.py b/py/torch_tensorrt/fx/converters/linear.py index f234b769b2..e7cca6f76a 100644 --- a/py/torch_tensorrt/fx/converters/linear.py +++ b/py/torch_tensorrt/fx/converters/linear.py @@ -3,6 +3,7 @@ import torch from ..converter_registry import tensorrt_converter + from .converter_utils import get_dyn_range, mark_as_int8_layer, to_numpy diff --git a/py/torch_tensorrt/fx/converters/maxpool.py b/py/torch_tensorrt/fx/converters/maxpool.py index 467a03359d..6c64a3b108 100644 --- a/py/torch_tensorrt/fx/converters/maxpool.py +++ b/py/torch_tensorrt/fx/converters/maxpool.py @@ -3,6 +3,7 @@ import torch from ..converter_registry import tensorrt_converter + from .converter_utils import extend_mod_attr_to_tuple, mark_as_int8_layer diff --git a/py/torch_tensorrt/fx/converters/mul.py b/py/torch_tensorrt/fx/converters/mul.py index 9ce185d73b..a1d9858ebd 100644 --- a/py/torch_tensorrt/fx/converters/mul.py +++ b/py/torch_tensorrt/fx/converters/mul.py @@ -5,6 +5,7 @@ import torch from ..converter_registry import tensorrt_converter + from .converter_utils import get_dyn_range, mark_as_int8_layer diff --git a/py/torch_tensorrt/fx/converters/quantization.py b/py/torch_tensorrt/fx/converters/quantization.py index c97da536ca..6b75f93278 100644 --- a/py/torch_tensorrt/fx/converters/quantization.py +++ b/py/torch_tensorrt/fx/converters/quantization.py @@ -3,6 +3,7 @@ import torch from ..converter_registry import tensorrt_converter + from .converter_utils import get_dyn_range, get_inputs_from_args_and_kwargs quantize_per_tensor_inputs = ["input", "scale", "zero_point", "dtype"] diff --git a/py/torch_tensorrt/fx/converters/transformation.py b/py/torch_tensorrt/fx/converters/transformation.py index c5d7e08f62..62cfef8453 100644 --- a/py/torch_tensorrt/fx/converters/transformation.py +++ b/py/torch_tensorrt/fx/converters/transformation.py @@ -3,6 +3,7 @@ import torch from ..converter_registry import tensorrt_converter + from .converter_utils import mark_as_int8_layer From d3e7fcb061ca0f55ecabdab9c12a08f3bac1f4e3 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Fri, 8 Aug 2025 12:22:15 -0700 Subject: [PATCH 5/8] fix import issue (resolve Evan's comments) --- py/torch_tensorrt/dynamo/conversion/impl/activation/base.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/addmm.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/arange.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/cat.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/condition/ops.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/conv.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/deconv.py | 2 +- .../dynamo/conversion/impl/dynamic_block_quantize.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/embedding.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/full.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/permutation.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/pool.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/quantize.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/reduce.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/select.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/shape.py | 5 ++--- py/torch_tensorrt/dynamo/conversion/impl/shuffle.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/split.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/squeeze.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/unary/base.py | 2 +- py/torch_tensorrt/dynamo/conversion/ops_evaluators.py | 2 +- py/torch_tensorrt/dynamo/conversion/prims_ops_converters.py | 2 +- 23 files changed, 24 insertions(+), 25 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py b/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py index edd289e66e..96764dcadc 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py @@ -4,10 +4,10 @@ from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext +from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.fx.converters.converter_utils import ( set_layer_name, ) -from torch_tensorrt.fx.types import TRTTensor def convert_activation( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/addmm.py b/py/torch_tensorrt/dynamo/conversion/impl/addmm.py index 1a0690852a..1c273dfe7a 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/addmm.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/addmm.py @@ -6,7 +6,7 @@ from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext -from torch_tensorrt.fx.types import TRTTensor +from torch_tensorrt.dynamo.types import TRTTensor def addmm( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/arange.py b/py/torch_tensorrt/dynamo/conversion/impl/arange.py index baaf690010..af67a642a0 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/arange.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/arange.py @@ -11,7 +11,7 @@ get_trt_tensor, set_layer_name, ) -from torch_tensorrt.fx.types import TRTTensor +from torch_tensorrt.dynamo.types import TRTTensor def arange( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/cat.py b/py/torch_tensorrt/dynamo/conversion/impl/cat.py index 096bc1aa24..b4545cac64 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/cat.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/cat.py @@ -12,8 +12,8 @@ get_positive_dim, get_trt_tensor, ) +from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.fx.converters.converter_utils import set_layer_name -from torch_tensorrt.fx.types import TRTTensor def cat( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/condition/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/condition/ops.py index e21e7f32a1..960b3ed13e 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/condition/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/condition/ops.py @@ -15,7 +15,7 @@ set_layer_name, ) from torch_tensorrt.dynamo.conversion.impl.elementwise import ne -from torch_tensorrt.fx.types import TRTTensor +from torch_tensorrt.dynamo.types import TRTTensor def where( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/conv.py b/py/torch_tensorrt/dynamo/conversion/impl/conv.py index 8e0fa9130b..26871b644f 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/conv.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/conv.py @@ -17,7 +17,7 @@ to_torch, to_trt_weights, ) -from torch_tensorrt.fx.types import TRTTensor +from torch_tensorrt.dynamo.types import TRTTensor def convNd( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/deconv.py b/py/torch_tensorrt/dynamo/conversion/impl/deconv.py index dcfb01d15d..3ee0f24dd6 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/deconv.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/deconv.py @@ -15,10 +15,10 @@ to_torch, to_trt_weights, ) +from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.fx.converters.converter_utils import ( set_layer_name, ) -from torch_tensorrt.fx.types import TRTTensor def deconvNd( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py index f76a84dea5..cfed9baddf 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py @@ -10,8 +10,8 @@ from torch_tensorrt.dynamo.conversion.converter_utils import ( get_trt_tensor, ) +from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.fx.converters.converter_utils import set_layer_name -from torch_tensorrt.fx.types import TRTTensor def quantize( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py index 1bfb8c7242..f46f8e9a63 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py @@ -21,7 +21,7 @@ ) from torch_tensorrt.dynamo.conversion.impl.unary import atan, sign from torch_tensorrt.dynamo.conversion.impl.unary.base import convert_unary -from torch_tensorrt.fx.types import TRTTensor +from torch_tensorrt.dynamo.types import TRTTensor def trunc_div( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/embedding.py b/py/torch_tensorrt/dynamo/conversion/impl/embedding.py index 4188c63e30..deafe40751 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/embedding.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/embedding.py @@ -16,8 +16,8 @@ set_item, to_numpy, ) +from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.fx.converters.converter_utils import set_layer_name -from torch_tensorrt.fx.types import TRTTensor def embedding( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/full.py b/py/torch_tensorrt/dynamo/conversion/impl/full.py index fc079f7f32..3eaf6c0f53 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/full.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/full.py @@ -12,7 +12,7 @@ cast_trt_tensor, get_trt_tensor, ) -from torch_tensorrt.fx.types import TRTTensor +from torch_tensorrt.dynamo.types import TRTTensor def full( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/permutation.py b/py/torch_tensorrt/dynamo/conversion/impl/permutation.py index 1537d0fdbe..b63bdcbb32 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/permutation.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/permutation.py @@ -12,7 +12,7 @@ set_layer_name, ) from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape -from torch_tensorrt.fx.types import TRTTensor +from torch_tensorrt.dynamo.types import TRTTensor def permute( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/pool.py b/py/torch_tensorrt/dynamo/conversion/impl/pool.py index 4e18aaaef2..4035a9f7aa 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/pool.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/pool.py @@ -10,11 +10,11 @@ extend_attr_to_tuple, get_positive_dim, ) +from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.fx.converters.converter_utils import ( has_dynamic_shape, set_layer_name, ) -from torch_tensorrt.fx.types import TRTTensor def avg_poolNd( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py index 2aeedb144e..319e767835 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py @@ -9,8 +9,8 @@ from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.converter_utils import get_trt_tensor, to_torch +from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.fx.converters.converter_utils import set_layer_name -from torch_tensorrt.fx.types import TRTTensor def get_ir(target: Target) -> SourceIR: diff --git a/py/torch_tensorrt/dynamo/conversion/impl/reduce.py b/py/torch_tensorrt/dynamo/conversion/impl/reduce.py index a61a11772d..9e96d867d7 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/reduce.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/reduce.py @@ -10,8 +10,8 @@ get_axes_for_reduce_op, get_positive_dim, ) +from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.fx.converters.converter_utils import set_layer_name -from torch_tensorrt.fx.types import TRTTensor def amax( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/select.py b/py/torch_tensorrt/dynamo/conversion/impl/select.py index fe6ade2e68..744f88b875 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/select.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/select.py @@ -17,12 +17,12 @@ ) from torch_tensorrt.dynamo.conversion.impl.elementwise import convert_binary_elementwise from torch_tensorrt.dynamo.conversion.impl.shape import shape as get_shape +from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.dynamo.utils import DYNAMIC_DIM from torch_tensorrt.fx.converters.converter_utils import ( has_dynamic_shape, set_layer_name, ) -from torch_tensorrt.fx.types import TRTTensor _LOGGER: logging.Logger = logging.getLogger(__name__) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/shape.py b/py/torch_tensorrt/dynamo/conversion/impl/shape.py index c2dfac802b..31a124759f 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/shape.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/shape.py @@ -3,6 +3,7 @@ from typing import List, Optional, Tuple import numpy as np +import tensorrt as trt import torch from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR @@ -15,14 +16,12 @@ from torch_tensorrt.dynamo.conversion.impl.elementwise.base import ( convert_binary_elementwise, ) +from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.fx.converters.converter_utils import ( Frameworks, set_layer_name, unified_dtype_converter, ) -from torch_tensorrt.fx.types import TRTTensor - -import tensorrt as trt def shape( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/shuffle.py b/py/torch_tensorrt/dynamo/conversion/impl/shuffle.py index 975480f390..7e0b1b810a 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/shuffle.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/shuffle.py @@ -14,7 +14,7 @@ set_layer_name, ) from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape -from torch_tensorrt.fx.types import TRTTensor +from torch_tensorrt.dynamo.types import TRTTensor def reshape( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/split.py b/py/torch_tensorrt/dynamo/conversion/impl/split.py index 0f07ceb7ab..0113d2fa16 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/split.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/split.py @@ -4,11 +4,11 @@ from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape +from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.fx.converters.converter_utils import ( has_dynamic_shape, set_layer_name, ) -from torch_tensorrt.fx.types import TRTTensor def split( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py b/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py index dd6a2b9863..dfcc03b248 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py @@ -7,7 +7,7 @@ get_positive_dim, set_layer_name, ) -from torch_tensorrt.fx.types import TRTTensor +from torch_tensorrt.dynamo.types import TRTTensor def squeeze( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py b/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py index 5da8bad252..1930455ba1 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py @@ -4,8 +4,8 @@ from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext +from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.fx.converters.converter_utils import set_layer_name -from torch_tensorrt.fx.types import TRTTensor def convert_unary( diff --git a/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py b/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py index a2feb99d56..87152427d6 100644 --- a/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py +++ b/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py @@ -13,7 +13,7 @@ ConverterRegistry, dynamo_tensorrt_converter, ) -from torch_tensorrt.fx.types import TRTTensor +from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.fx.utils import Frameworks, unified_dtype_converter _LOGGER: logging.Logger = logging.getLogger(__name__) diff --git a/py/torch_tensorrt/dynamo/conversion/prims_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/prims_ops_converters.py index 923ca9be6c..5576a44e3f 100644 --- a/py/torch_tensorrt/dynamo/conversion/prims_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/prims_ops_converters.py @@ -10,7 +10,7 @@ from torch_tensorrt.dynamo.conversion._ConverterRegistry import ( dynamo_tensorrt_converter, ) -from torch_tensorrt.fx.types import TRTTensor +from torch_tensorrt.dynamo.types import TRTTensor _LOGGER: logging.Logger = logging.getLogger(__name__) From ae71013a847d21e55e041da44703e1d5683d8633 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Fri, 8 Aug 2025 12:28:17 -0700 Subject: [PATCH 6/8] resolve Evan's comments --- py/torch_tensorrt/dynamo/conversion/impl/cat.py | 2 +- .../dynamo/conversion/impl/dynamic_block_quantize.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/embedding.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/quantize.py | 7 +++++-- py/torch_tensorrt/dynamo/conversion/impl/reduce.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/unary/base.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py | 2 +- 7 files changed, 11 insertions(+), 8 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/cat.py b/py/torch_tensorrt/dynamo/conversion/impl/cat.py index b4545cac64..5ba5e59639 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/cat.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/cat.py @@ -11,9 +11,9 @@ cast_trt_tensor, get_positive_dim, get_trt_tensor, + set_layer_name, ) from torch_tensorrt.dynamo.types import TRTTensor -from torch_tensorrt.fx.converters.converter_utils import set_layer_name def cat( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py index cfed9baddf..4b4b5278d1 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py @@ -9,9 +9,9 @@ from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.converter_utils import ( get_trt_tensor, + set_layer_name, ) from torch_tensorrt.dynamo.types import TRTTensor -from torch_tensorrt.fx.converters.converter_utils import set_layer_name def quantize( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/embedding.py b/py/torch_tensorrt/dynamo/conversion/impl/embedding.py index deafe40751..3bea454bdd 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/embedding.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/embedding.py @@ -14,10 +14,10 @@ cast_trt_tensor, get_trt_tensor, set_item, + set_layer_name, to_numpy, ) from torch_tensorrt.dynamo.types import TRTTensor -from torch_tensorrt.fx.converters.converter_utils import set_layer_name def embedding( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py index 319e767835..96ee9b2ba4 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py @@ -8,9 +8,12 @@ from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext -from torch_tensorrt.dynamo.conversion.converter_utils import get_trt_tensor, to_torch +from torch_tensorrt.dynamo.conversion.converter_utils import ( + get_trt_tensor, + set_layer_name, + to_torch, +) from torch_tensorrt.dynamo.types import TRTTensor -from torch_tensorrt.fx.converters.converter_utils import set_layer_name def get_ir(target: Target) -> SourceIR: diff --git a/py/torch_tensorrt/dynamo/conversion/impl/reduce.py b/py/torch_tensorrt/dynamo/conversion/impl/reduce.py index 9e96d867d7..68a9151e91 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/reduce.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/reduce.py @@ -9,9 +9,9 @@ cast_trt_tensor, get_axes_for_reduce_op, get_positive_dim, + set_layer_name, ) from torch_tensorrt.dynamo.types import TRTTensor -from torch_tensorrt.fx.converters.converter_utils import set_layer_name def amax( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py b/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py index 1930455ba1..dff3147726 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py @@ -4,8 +4,8 @@ from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext +from torch_tensorrt.dynamo.conversion.converter_utils import set_layer_name from torch_tensorrt.dynamo.types import TRTTensor -from torch_tensorrt.fx.converters.converter_utils import set_layer_name def convert_unary( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py index 89e490392d..7256a61c19 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py @@ -10,9 +10,9 @@ from torch_tensorrt.dynamo.conversion.converter_utils import ( cast_trt_tensor, get_trt_tensor, + set_layer_name, ) from torch_tensorrt.dynamo.conversion.impl.unary.base import convert_unary -from torch_tensorrt.fx.converters.converter_utils import set_layer_name from torch_tensorrt.fx.types import TRTDataType, TRTTensor From bfbac9a6d8c028e06b050b00385dcea3200fd47b Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Fri, 8 Aug 2025 13:11:27 -0700 Subject: [PATCH 7/8] resolve comments --- .../dynamo/conversion/aten_ops_converters.py | 2 +- .../dynamo/conversion/impl/activation/base.py | 4 ++-- .../dynamo/conversion/impl/activation/ops.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/addmm.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/arange.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/cast.py | 3 ++- py/torch_tensorrt/dynamo/conversion/impl/cat.py | 2 +- .../dynamo/conversion/impl/condition/ops.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/conv.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/deconv.py | 6 ++---- .../dynamo/conversion/impl/dynamic_block_quantize.py | 2 +- .../dynamo/conversion/impl/elementwise/ops.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/embedding.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/full.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/grid.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/linear.py | 5 +++-- py/torch_tensorrt/dynamo/conversion/impl/matmul.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/nccl_ops.py | 3 ++- py/torch_tensorrt/dynamo/conversion/impl/pad.py | 2 +- .../dynamo/conversion/impl/permutation.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/pool.py | 4 +--- py/torch_tensorrt/dynamo/conversion/impl/prelu.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/quantize.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/reduce.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/select.py | 8 +++----- py/torch_tensorrt/dynamo/conversion/impl/shape.py | 10 ++++------ py/torch_tensorrt/dynamo/conversion/impl/shuffle.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/slice/base.py | 7 ++++--- py/torch_tensorrt/dynamo/conversion/impl/slice/ops.py | 4 ++-- py/torch_tensorrt/dynamo/conversion/impl/split.py | 6 +++--- py/torch_tensorrt/dynamo/conversion/impl/squeeze.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/topk.py | 4 ++-- py/torch_tensorrt/dynamo/conversion/impl/unary/base.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py | 3 ++- py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py | 2 +- py/torch_tensorrt/dynamo/conversion/impl/upsample.py | 2 +- py/torch_tensorrt/dynamo/conversion/ops_evaluators.py | 7 +++++-- .../dynamo/conversion/prims_ops_converters.py | 2 +- 38 files changed, 61 insertions(+), 61 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py index fe9a01b06c..c3f79d1233 100644 --- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py @@ -6,6 +6,7 @@ import numpy as np import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Argument, Node, Target from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo._SourceIR import SourceIR @@ -21,7 +22,6 @@ get_positive_dim, is_only_operator_on_placeholder, ) -from torch_tensorrt.dynamo.types import TRTTensor _LOGGER: logging.Logger = logging.getLogger(__name__) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py b/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py index 96764dcadc..ed30e2ff18 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py @@ -1,11 +1,11 @@ from typing import Any, Callable, Optional import tensorrt as trt +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext -from torch_tensorrt.dynamo.types import TRTTensor -from torch_tensorrt.fx.converters.converter_utils import ( +from torch_tensorrt.dynamo.conversion.converter_utils import ( set_layer_name, ) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/activation/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/activation/ops.py index eb981f2031..af47a8e2c9 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/activation/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/activation/ops.py @@ -3,11 +3,11 @@ import numpy as np import tensorrt as trt import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.impl.activation.base import convert_activation -from torch_tensorrt.dynamo.types import TRTTensor def relu( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/addmm.py b/py/torch_tensorrt/dynamo/conversion/impl/addmm.py index 1c273dfe7a..46ee1f974c 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/addmm.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/addmm.py @@ -2,11 +2,11 @@ import numpy as np import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext -from torch_tensorrt.dynamo.types import TRTTensor def addmm( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/arange.py b/py/torch_tensorrt/dynamo/conversion/impl/arange.py index af67a642a0..7595e97171 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/arange.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/arange.py @@ -2,6 +2,7 @@ import numpy as np import tensorrt as trt +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -11,7 +12,6 @@ get_trt_tensor, set_layer_name, ) -from torch_tensorrt.dynamo.types import TRTTensor def arange( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/cast.py b/py/torch_tensorrt/dynamo/conversion/impl/cast.py index 0b69f98fc9..4ad39d4563 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/cast.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/cast.py @@ -4,6 +4,8 @@ import numpy as np import tensorrt as trt import torch +from tensorrt import DataType as TRTDataType +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt import _enums from torch_tensorrt.dynamo._SourceIR import SourceIR @@ -13,7 +15,6 @@ cast_trt_tensor, get_trt_tensor, ) -from torch_tensorrt.fx.types import TRTDataType, TRTTensor LOGGER: logging.Logger = logging.getLogger(__name__) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/cat.py b/py/torch_tensorrt/dynamo/conversion/impl/cat.py index 5ba5e59639..68bbcc31d0 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/cat.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/cat.py @@ -3,6 +3,7 @@ import numpy as np import tensorrt as trt import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt import _enums from torch_tensorrt.dynamo._SourceIR import SourceIR @@ -13,7 +14,6 @@ get_trt_tensor, set_layer_name, ) -from torch_tensorrt.dynamo.types import TRTTensor def cat( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/condition/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/condition/ops.py index 960b3ed13e..b7739c3b3f 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/condition/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/condition/ops.py @@ -3,6 +3,7 @@ import numpy as np import tensorrt as trt import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -15,7 +16,6 @@ set_layer_name, ) from torch_tensorrt.dynamo.conversion.impl.elementwise import ne -from torch_tensorrt.dynamo.types import TRTTensor def where( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/conv.py b/py/torch_tensorrt/dynamo/conversion/impl/conv.py index 26871b644f..513346a63b 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/conv.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/conv.py @@ -5,6 +5,7 @@ # @manual=//deeplearning/trt/python:py_tensorrt import tensorrt as trt import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -17,7 +18,6 @@ to_torch, to_trt_weights, ) -from torch_tensorrt.dynamo.types import TRTTensor def convNd( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/deconv.py b/py/torch_tensorrt/dynamo/conversion/impl/deconv.py index 3ee0f24dd6..b9ee582d26 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/deconv.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/deconv.py @@ -5,6 +5,7 @@ # @manual=//deeplearning/trt/python:py_tensorrt import tensorrt as trt import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -12,13 +13,10 @@ SourceIR, get_trt_tensor, has_dynamic_shape, + set_layer_name, to_torch, to_trt_weights, ) -from torch_tensorrt.dynamo.types import TRTTensor -from torch_tensorrt.fx.converters.converter_utils import ( - set_layer_name, -) def deconvNd( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py index 4b4b5278d1..b84c7a2248 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py @@ -3,6 +3,7 @@ import numpy as np import tensorrt as trt import torch +from tensorrt import ITensor as TRTTensor from torch.fx.experimental.proxy_tensor import unset_fake_temporarily from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR @@ -11,7 +12,6 @@ get_trt_tensor, set_layer_name, ) -from torch_tensorrt.dynamo.types import TRTTensor def quantize( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py index f46f8e9a63..b425973661 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py @@ -4,6 +4,7 @@ import tensorrt as trt import torch import torch_tensorrt.dynamo.conversion.impl as impl +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt import _enums from torch_tensorrt.dynamo._SourceIR import SourceIR @@ -21,7 +22,6 @@ ) from torch_tensorrt.dynamo.conversion.impl.unary import atan, sign from torch_tensorrt.dynamo.conversion.impl.unary.base import convert_unary -from torch_tensorrt.dynamo.types import TRTTensor def trunc_div( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/embedding.py b/py/torch_tensorrt/dynamo/conversion/impl/embedding.py index 3bea454bdd..a712641f44 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/embedding.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/embedding.py @@ -6,6 +6,7 @@ import tensorrt as trt import torch import torch_tensorrt.dynamo.conversion.impl as impl +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -17,7 +18,6 @@ set_layer_name, to_numpy, ) -from torch_tensorrt.dynamo.types import TRTTensor def embedding( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/full.py b/py/torch_tensorrt/dynamo/conversion/impl/full.py index 3eaf6c0f53..5c70d4772f 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/full.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/full.py @@ -3,6 +3,7 @@ import numpy as np import tensorrt as trt import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt import _enums from torch_tensorrt.dynamo.conversion import impl @@ -12,7 +13,6 @@ cast_trt_tensor, get_trt_tensor, ) -from torch_tensorrt.dynamo.types import TRTTensor def full( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/grid.py b/py/torch_tensorrt/dynamo/conversion/impl/grid.py index 302d286237..00211fb520 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/grid.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/grid.py @@ -1,11 +1,11 @@ from typing import Optional import tensorrt as trt +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.converter_utils import set_layer_name -from torch_tensorrt.dynamo.types import TRTTensor # bilinear, nearest, bicubic GridSamplerInterpolationMode = { diff --git a/py/torch_tensorrt/dynamo/conversion/impl/linear.py b/py/torch_tensorrt/dynamo/conversion/impl/linear.py index 5e859a46d3..3827284950 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/linear.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/linear.py @@ -3,11 +3,12 @@ import numpy as np import tensorrt as trt import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target +from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext -from torch_tensorrt.dynamo.conversion.converter_utils import SourceIR, get_trt_tensor -from torch_tensorrt.dynamo.types import TRTTensor +from torch_tensorrt.dynamo.conversion.converter_utils import get_trt_tensor def linear( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/matmul.py b/py/torch_tensorrt/dynamo/conversion/impl/matmul.py index 83ea3dd99b..65e4f53328 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/matmul.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/matmul.py @@ -2,6 +2,7 @@ import tensorrt as trt import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt import _enums from torch_tensorrt.dynamo._SourceIR import SourceIR @@ -12,7 +13,6 @@ get_trt_tensor, set_layer_name, ) -from torch_tensorrt.dynamo.types import TRTTensor def matrix_multiply( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/nccl_ops.py b/py/torch_tensorrt/dynamo/conversion/impl/nccl_ops.py index c28c5bcc7d..e64c06ca39 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/nccl_ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/nccl_ops.py @@ -5,8 +5,9 @@ import numpy as np import tensorrt as trt from torch.fx.node import Argument, Target +from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext -from torch_tensorrt.fx.converters.converter_utils import SourceIR, set_layer_name +from torch_tensorrt.dynamo.conversion.converter_utils import set_layer_name # class for AllReduce diff --git a/py/torch_tensorrt/dynamo/conversion/impl/pad.py b/py/torch_tensorrt/dynamo/conversion/impl/pad.py index 731058a122..863b6bc218 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/pad.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/pad.py @@ -2,6 +2,7 @@ import numpy as np import tensorrt as trt +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl @@ -11,7 +12,6 @@ set_layer_name, ) from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape -from torch_tensorrt.dynamo.types import TRTTensor """ Note: IPaddingLayer is deprecated in TensorRT 8.2 and will be removed in TensorRT 10.0. diff --git a/py/torch_tensorrt/dynamo/conversion/impl/permutation.py b/py/torch_tensorrt/dynamo/conversion/impl/permutation.py index b63bdcbb32..60ab762fa6 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/permutation.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/permutation.py @@ -1,6 +1,7 @@ from typing import Optional, Sequence, Union import tensorrt as trt +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl @@ -12,7 +13,6 @@ set_layer_name, ) from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape -from torch_tensorrt.dynamo.types import TRTTensor def permute( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/pool.py b/py/torch_tensorrt/dynamo/conversion/impl/pool.py index 4035a9f7aa..757f7209d9 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/pool.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/pool.py @@ -3,15 +3,13 @@ import tensorrt as trt import torch_tensorrt.dynamo.conversion.impl as impl +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.converter_utils import ( extend_attr_to_tuple, get_positive_dim, -) -from torch_tensorrt.dynamo.types import TRTTensor -from torch_tensorrt.fx.converters.converter_utils import ( has_dynamic_shape, set_layer_name, ) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/prelu.py b/py/torch_tensorrt/dynamo/conversion/impl/prelu.py index 166ce16367..8e218f49cb 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/prelu.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/prelu.py @@ -1,10 +1,10 @@ from typing import Optional +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.converter_utils import set_layer_name -from torch_tensorrt.dynamo.types import TRTTensor def prelu( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py index 96ee9b2ba4..8dd32b11fc 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py @@ -3,6 +3,7 @@ import numpy as np import tensorrt as trt import torch +from tensorrt import ITensor as TRTTensor from torch.fx.experimental.proxy_tensor import unset_fake_temporarily from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR @@ -13,7 +14,6 @@ set_layer_name, to_torch, ) -from torch_tensorrt.dynamo.types import TRTTensor def get_ir(target: Target) -> SourceIR: diff --git a/py/torch_tensorrt/dynamo/conversion/impl/reduce.py b/py/torch_tensorrt/dynamo/conversion/impl/reduce.py index 68a9151e91..2bd7d7de36 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/reduce.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/reduce.py @@ -1,6 +1,7 @@ from typing import Optional, Sequence, Tuple, Union import tensorrt as trt +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl @@ -11,7 +12,6 @@ get_positive_dim, set_layer_name, ) -from torch_tensorrt.dynamo.types import TRTTensor def amax( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/select.py b/py/torch_tensorrt/dynamo/conversion/impl/select.py index 744f88b875..c4d44a07ea 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/select.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/select.py @@ -4,6 +4,7 @@ import numpy as np import tensorrt as trt import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl @@ -13,16 +14,13 @@ cast_trt_tensor, get_positive_dim, get_trt_tensor, + has_dynamic_shape, + set_layer_name, to_numpy, ) from torch_tensorrt.dynamo.conversion.impl.elementwise import convert_binary_elementwise from torch_tensorrt.dynamo.conversion.impl.shape import shape as get_shape -from torch_tensorrt.dynamo.types import TRTTensor from torch_tensorrt.dynamo.utils import DYNAMIC_DIM -from torch_tensorrt.fx.converters.converter_utils import ( - has_dynamic_shape, - set_layer_name, -) _LOGGER: logging.Logger = logging.getLogger(__name__) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/shape.py b/py/torch_tensorrt/dynamo/conversion/impl/shape.py index 31a124759f..526d43713a 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/shape.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/shape.py @@ -5,23 +5,21 @@ import numpy as np import tensorrt as trt import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.converter_utils import ( + Frameworks, cast_trt_tensor, get_positive_dim, get_trt_tensor, + set_layer_name, + unified_dtype_converter, ) from torch_tensorrt.dynamo.conversion.impl.elementwise.base import ( convert_binary_elementwise, ) -from torch_tensorrt.dynamo.types import TRTTensor -from torch_tensorrt.fx.converters.converter_utils import ( - Frameworks, - set_layer_name, - unified_dtype_converter, -) def shape( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/shuffle.py b/py/torch_tensorrt/dynamo/conversion/impl/shuffle.py index 7e0b1b810a..2ef6c740ae 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/shuffle.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/shuffle.py @@ -3,6 +3,7 @@ import numpy as np import tensorrt as trt import torch_tensorrt.dynamo.conversion.impl as impl +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt import _enums from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -14,7 +15,6 @@ set_layer_name, ) from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape -from torch_tensorrt.dynamo.types import TRTTensor def reshape( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/slice/base.py b/py/torch_tensorrt/dynamo/conversion/impl/slice/base.py index a2af840a1f..c2edaceafb 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/slice/base.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/slice/base.py @@ -1,14 +1,15 @@ from typing import Optional +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext -from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape -from torch_tensorrt.fx.converters.converter_utils import ( +from torch_tensorrt.dynamo.conversion.converter_utils import ( has_dynamic_shape, set_layer_name, ) -from torch_tensorrt.fx.types import Shape, TRTTensor +from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape +from torch_tensorrt.dynamo.types import Shape def slice( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/slice/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/slice/ops.py index 203bb03553..6a59cfda4c 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/slice/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/slice/ops.py @@ -4,8 +4,8 @@ import numpy as np import tensorrt as trt +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target - from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -26,8 +26,8 @@ from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape from torch_tensorrt.dynamo.conversion.impl.shape import shape as get_shape from torch_tensorrt.dynamo.conversion.impl.slice.base import slice +from torch_tensorrt.dynamo.types import Shape from torch_tensorrt.dynamo.utils import DYNAMIC_DIM -from torch_tensorrt.fx.types import Shape, TRTTensor def slice_op( # TODO: This should be slice not whatever is in base diff --git a/py/torch_tensorrt/dynamo/conversion/impl/split.py b/py/torch_tensorrt/dynamo/conversion/impl/split.py index 0113d2fa16..143a05a5b2 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/split.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/split.py @@ -1,14 +1,14 @@ from typing import List, Optional, Sequence, Union +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext -from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape -from torch_tensorrt.dynamo.types import TRTTensor -from torch_tensorrt.fx.converters.converter_utils import ( +from torch_tensorrt.dynamo.conversion.converter_utils import ( has_dynamic_shape, set_layer_name, ) +from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape def split( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py b/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py index dfcc03b248..371b0a3c72 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py @@ -1,5 +1,6 @@ from typing import Optional, Sequence, Union +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -7,7 +8,6 @@ get_positive_dim, set_layer_name, ) -from torch_tensorrt.dynamo.types import TRTTensor def squeeze( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/topk.py b/py/torch_tensorrt/dynamo/conversion/impl/topk.py index 3b6549d285..053a46ce2b 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/topk.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/topk.py @@ -1,6 +1,7 @@ from typing import Optional, Tuple, Union import tensorrt as trt +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl @@ -10,14 +11,13 @@ flatten_dims, get_axes_for_reduce_op, get_positive_dim, - set_layer_name, get_trt_tensor, has_dynamic_shape, + set_layer_name, ) from torch_tensorrt.dynamo.conversion.impl.elementwise import convert_binary_elementwise from torch_tensorrt.dynamo.conversion.impl.shape import shape as get_shape from torch_tensorrt.dynamo.utils import DYNAMIC_DIM -from torch_tensorrt.dynamo.types import TRTTensor def argmax_argmin( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py b/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py index dff3147726..51521ceac9 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py @@ -1,11 +1,11 @@ from typing import Optional import tensorrt as trt +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.converter_utils import set_layer_name -from torch_tensorrt.dynamo.types import TRTTensor def convert_unary( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py index 7256a61c19..12f6051457 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py @@ -4,6 +4,8 @@ import tensorrt as trt import torch import torch_tensorrt.dynamo.conversion.impl as impl +from tensorrt import DataType as TRTDataType +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -13,7 +15,6 @@ set_layer_name, ) from torch_tensorrt.dynamo.conversion.impl.unary.base import convert_unary -from torch_tensorrt.fx.types import TRTDataType, TRTTensor def exp( diff --git a/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py b/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py index 35f21198d4..1a54b470f9 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py @@ -1,6 +1,7 @@ import logging from typing import List, Optional, Sequence, cast +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -9,7 +10,6 @@ get_trt_tensor, set_layer_name, ) -from torch_tensorrt.dynamo.types import TRTTensor logger = logging.getLogger(__name__) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/upsample.py b/py/torch_tensorrt/dynamo/conversion/impl/upsample.py index 247179455c..4b47ca5dec 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/upsample.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/upsample.py @@ -1,6 +1,7 @@ from typing import Optional, Sequence import tensorrt as trt +from tensorrt import ITensor as TRTTensor from torch.fx.node import Target from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -9,7 +10,6 @@ set_layer_name, ) from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape -from torch_tensorrt.dynamo.types import TRTTensor def upsample( diff --git a/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py b/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py index 87152427d6..0c0f2ce043 100644 --- a/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py +++ b/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py @@ -6,6 +6,7 @@ import numpy as np import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Argument, Node, Target from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -13,8 +14,10 @@ ConverterRegistry, dynamo_tensorrt_converter, ) -from torch_tensorrt.dynamo.types import TRTTensor -from torch_tensorrt.fx.utils import Frameworks, unified_dtype_converter +from torch_tensorrt.dynamo.conversion.converter_utils import ( + Frameworks, + unified_dtype_converter, +) _LOGGER: logging.Logger = logging.getLogger(__name__) diff --git a/py/torch_tensorrt/dynamo/conversion/prims_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/prims_ops_converters.py index 5576a44e3f..8f2da209b1 100644 --- a/py/torch_tensorrt/dynamo/conversion/prims_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/prims_ops_converters.py @@ -2,6 +2,7 @@ from typing import Dict, Sequence, Tuple, Union import torch +from tensorrt import ITensor as TRTTensor from torch.fx.node import Argument, Target from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo._SourceIR import SourceIR @@ -10,7 +11,6 @@ from torch_tensorrt.dynamo.conversion._ConverterRegistry import ( dynamo_tensorrt_converter, ) -from torch_tensorrt.dynamo.types import TRTTensor _LOGGER: logging.Logger = logging.getLogger(__name__) From 232b04683ec72960f38b232a6cae124d44eea9a6 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Sun, 10 Aug 2025 11:57:53 -0700 Subject: [PATCH 8/8] resolve comments --- .../dynamo/conversion/converter_utils.py | 3 +- .../dynamo/conversion/impl/shape.py | 6 ++-- .../dynamo/conversion/ops_evaluators.py | 2 +- py/torch_tensorrt/dynamo/utils.py | 31 +++++++++++++++++++ 4 files changed, 37 insertions(+), 5 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/converter_utils.py b/py/torch_tensorrt/dynamo/conversion/converter_utils.py index 896bf37b42..2c554f57c4 100644 --- a/py/torch_tensorrt/dynamo/conversion/converter_utils.py +++ b/py/torch_tensorrt/dynamo/conversion/converter_utils.py @@ -19,11 +19,10 @@ import numpy as np import tensorrt as trt import torch +import torch_tensorrt.dynamo.conversion.impl as impl from torch.fx.experimental.proxy_tensor import unset_fake_temporarily from torch.fx.node import Argument, Target from torch.fx.passes.shape_prop import TensorMetadata - -import torch_tensorrt.dynamo.conversion.impl as impl from torch_tensorrt import _enums from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo._SourceIR import SourceIR diff --git a/py/torch_tensorrt/dynamo/conversion/impl/shape.py b/py/torch_tensorrt/dynamo/conversion/impl/shape.py index 526d43713a..27af02e5bb 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/shape.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/shape.py @@ -10,16 +10,18 @@ from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.converter_utils import ( - Frameworks, cast_trt_tensor, get_positive_dim, get_trt_tensor, set_layer_name, - unified_dtype_converter, ) from torch_tensorrt.dynamo.conversion.impl.elementwise.base import ( convert_binary_elementwise, ) +from torch_tensorrt.dynamo.utils import ( + Frameworks, + unified_dtype_converter, +) def shape( diff --git a/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py b/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py index 0c0f2ce043..9401e3d99d 100644 --- a/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py +++ b/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py @@ -14,7 +14,7 @@ ConverterRegistry, dynamo_tensorrt_converter, ) -from torch_tensorrt.dynamo.conversion.converter_utils import ( +from torch_tensorrt.dynamo.utils import ( Frameworks, unified_dtype_converter, ) diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py index 0703fd1cb9..de736db1bf 100644 --- a/py/torch_tensorrt/dynamo/utils.py +++ b/py/torch_tensorrt/dynamo/utils.py @@ -84,6 +84,37 @@ class Frameworks(Enum): } +def unified_dtype_converter( + dtype: Union[TRTDataType, torch.dtype, np.dtype], to: Frameworks +) -> Union[np.dtype, torch.dtype, TRTDataType]: + """ + Convert TensorRT, Numpy, or Torch data types to any other of those data types. + + Args: + dtype (TRTDataType, torch.dtype, np.dtype): A TensorRT, Numpy, or Torch data type. + to (Frameworks): The framework to convert the data type to. + + Returns: + The equivalent data type in the requested framework. + """ + assert to in Frameworks, f"Expected valid Framework for translation, got {to}" + trt_major_version = int(trt.__version__.split(".")[0]) + if dtype in (np.int8, torch.int8, trt.int8): + return DataTypeEquivalence[trt.int8][to] + elif trt_major_version >= 7 and dtype in (np.bool_, torch.bool, trt.bool): + return DataTypeEquivalence[trt.bool][to] + elif dtype in (np.int32, torch.int32, trt.int32): + return DataTypeEquivalence[trt.int32][to] + elif dtype in (np.int64, torch.int64, trt.int64): + return DataTypeEquivalence[trt.int64][to] + elif dtype in (np.float16, torch.float16, trt.float16): + return DataTypeEquivalence[trt.float16][to] + elif dtype in (np.float32, torch.float32, trt.float32): + return DataTypeEquivalence[trt.float32][to] + else: + raise TypeError("%s is not a supported dtype" % dtype) + + def deallocate_module(module: torch.fx.GraphModule, delete_module: bool = True) -> None: """ This is a helper function to delete the instance of module. We first move it to CPU and then