Skip to content

Breaking Change: Remove the deprecated int8 calibrator related #3759

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Aug 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions core/conversion/conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,13 +202,7 @@ void AddInputs(ConversionCtx* ctx, c10::ArrayRef<const torch::jit::Value*> input
TORCHTRT_CHECK(
profile->isValid(),
"Optimization profile is invalid, please check the input range provided (conversion.AddInputs)");

ctx->cfg->addOptimizationProfile(profile);
#if NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)
if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) {
ctx->cfg->setCalibrationProfile(profile);
}
#endif
}

void MarkOutputs(ConversionCtx* ctx, at::ArrayRef<const torch::jit::Value*> outputs) {
Expand Down
13 changes: 2 additions & 11 deletions core/conversion/conversionctx/ConversionCtx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,7 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
if (s.device.device_type == nvinfer1::DeviceType::kDLA) {
os << "\n DLACore: " << s.device.dla_core;
}
os << "\n Engine Capability: " << s.capability \
<< "\n Calibrator Created: " << (s.calibrator != nullptr);
os << "\n Engine Capability: " << s.capability;
return os;
}
// clang-format on
Expand Down Expand Up @@ -64,15 +63,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
break;
case nvinfer1::DataType::kINT8:
TORCHTRT_CHECK(
builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8");
cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
if (!settings.calibrator) {
LOG_INFO(
"Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
} else {
cfg->setInt8Calibrator(settings.calibrator);
}
LOG_DEBUG("INT8 precision has been enabled, we assume the network has Q/DQ nodes obtained from modelopt");
break;
case nvinfer1::DataType::kFLOAT:
break;
Expand Down
1 change: 0 additions & 1 deletion core/conversion/conversionctx/ConversionCtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ struct BuilderSettings {
bool allow_shape_tensors = false;
ir::Device device;
nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD;
nvinfer1::IInt8Calibrator* calibrator = nullptr;
uint64_t num_avg_timing_iters = 1;
uint64_t workspace_size = 0;
uint64_t dla_sram_size = DLA_SRAM_SIZE;
Expand Down
2 changes: 0 additions & 2 deletions cpp/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,12 @@ cc_library(
srcs = [
"src/compile_spec.cpp",
"src/logging.cpp",
"src/ptq.cpp",
"src/torch_tensorrt.cpp",
"src/types.cpp",
],
hdrs = [
"include/torch_tensorrt/logging.h",
"include/torch_tensorrt/macros.h",
"include/torch_tensorrt/ptq.h",
"include/torch_tensorrt/torch_tensorrt.h",
],
linkstatic = True,
Expand Down
2 changes: 0 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,13 @@ add_library(${lib_name} OBJECT)
set(CXX_SRCS
"${CMAKE_CURRENT_SOURCE_DIR}/src/compile_spec.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/logging.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/ptq.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/torch_tensorrt.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/src/types.cpp"
)

set(HEADER_FILES
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/logging.h"
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/macros.h"
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/ptq.h"
"${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/torch_tensorrt.h"
)

Expand Down
1 change: 0 additions & 1 deletion cpp/bin/torchtrtc/fileio.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#include "torch/torch.h"

#include "torch_tensorrt/logging.h"
#include "torch_tensorrt/ptq.h"
#include "torch_tensorrt/torch_tensorrt.h"

namespace torchtrtc {
Expand Down
13 changes: 3 additions & 10 deletions cpp/bin/torchtrtc/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "torch/script.h"

#include "torch_tensorrt/logging.h"
#include "torch_tensorrt/ptq.h"
#include "torch_tensorrt/torch_tensorrt.h"

#include "accuracy.h"
Expand Down Expand Up @@ -335,8 +334,6 @@ int main(int argc, char** argv) {
calibration_cache_file_path = torchtrtc::fileio::resolve_path(args::get(calibration_cache_file));
}

auto calibrator = torchtrt::ptq::make_int8_cache_calibrator(calibration_cache_file_path);

compile_settings.require_full_compilation = require_full_compilation;

if (torch_executed_ops || torch_executed_mods) {
Expand Down Expand Up @@ -367,13 +364,9 @@ int main(int argc, char** argv) {
compile_settings.enabled_precisions.insert(torch::kF16);
} else if (dtype == torchtrt::DataType::kChar) {
compile_settings.enabled_precisions.insert(torch::kI8);
if (calibration_cache_file) {
compile_settings.ptq_calibrator = calibrator;
} else {
torchtrt::logging::log(
torchtrt::logging::Level::kINFO,
"Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
}
torchtrt::logging::log(
torchtrt::logging::Level::kDEBUG,
"Int8 precision has been enabled which assumes the network has Q/DQ nodes obtained");
} else {
std::stringstream ss;
ss << "Invalid precision given for enabled kernel precision, options are [ float | float32 | f32 | fp32 | half | float16 | f16 | fp16 | char | int8 | i8 ], found: ";
Expand Down
1 change: 0 additions & 1 deletion cpp/bin/torchtrtc/parser_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
#include "torch/torch.h"

#include "torch_tensorrt/logging.h"
#include "torch_tensorrt/ptq.h"
#include "torch_tensorrt/torch_tensorrt.h"

namespace torchtrtc {
Expand Down
3 changes: 0 additions & 3 deletions cpp/include/torch_tensorrt/macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,6 @@
STR(TORCH_TENSORRT_MAJOR_VERSION) \
"." STR(TORCH_TENSORRT_MINOR_VERSION) "." STR(TORCH_TENSORRT_PATCH_VERSION)

#define TORCH_TENSORRT_PTQ_DEPRECATION \
[[deprecated( \
"Int8 PTQ Calibrator has been deprecated by TensorRT, please plan on porting to a NVIDIA Model Optimizer Toolkit based workflow. See: https://pytorch.org/TensorRT/tutorials/_rendered_examples/dynamo/vgg16_ptq.html for more details")]]
// Setup namespace aliases for ease of use
namespace torch_tensorrt {
namespace torchscript {}
Expand Down
Loading
Loading