diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp
index f8a26e8d77..25f6d5da5c 100644
--- a/core/conversion/conversion.cpp
+++ b/core/conversion/conversion.cpp
@@ -202,13 +202,7 @@ void AddInputs(ConversionCtx* ctx, c10::ArrayRef<const torch::jit::Value*> input
   TORCHTRT_CHECK(
       profile->isValid(),
       "Optimization profile is invalid, please check the input range provided (conversion.AddInputs)");
-
   ctx->cfg->addOptimizationProfile(profile);
-#if NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)
-  if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) {
-    ctx->cfg->setCalibrationProfile(profile);
-  }
-#endif
 }
 
 void MarkOutputs(ConversionCtx* ctx, at::ArrayRef<const torch::jit::Value*> outputs) {
diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp
index 2eb363706f..c0dbacabc5 100644
--- a/core/conversion/conversionctx/ConversionCtx.cpp
+++ b/core/conversion/conversionctx/ConversionCtx.cpp
@@ -31,8 +31,7 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
     if (s.device.device_type == nvinfer1::DeviceType::kDLA) {
     os << "\n    DLACore: " << s.device.dla_core;
     }
-    os << "\n    Engine Capability: " << s.capability                                      \
-       << "\n    Calibrator Created: " << (s.calibrator != nullptr);
+    os << "\n    Engine Capability: " << s.capability;
     return os;
 }
 // clang-format on
@@ -64,15 +63,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
         cfg->setFlag(nvinfer1::BuilderFlag::kFP16);
         break;
       case nvinfer1::DataType::kINT8:
-        TORCHTRT_CHECK(
-            builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8");
-        cfg->setFlag(nvinfer1::BuilderFlag::kINT8);
-        if (!settings.calibrator) {
-          LOG_INFO(
-              "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
-        } else {
-          cfg->setInt8Calibrator(settings.calibrator);
-        }
+        LOG_DEBUG("INT8 precision has been enabled, we assume the network has Q/DQ nodes obtained from modelopt");
         break;
       case nvinfer1::DataType::kFLOAT:
         break;
diff --git a/core/conversion/conversionctx/ConversionCtx.h b/core/conversion/conversionctx/ConversionCtx.h
index 8587885eca..0b5a09490b 100644
--- a/core/conversion/conversionctx/ConversionCtx.h
+++ b/core/conversion/conversionctx/ConversionCtx.h
@@ -26,7 +26,6 @@ struct BuilderSettings {
   bool allow_shape_tensors = false;
   ir::Device device;
   nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD;
-  nvinfer1::IInt8Calibrator* calibrator = nullptr;
   uint64_t num_avg_timing_iters = 1;
   uint64_t workspace_size = 0;
   uint64_t dla_sram_size = DLA_SRAM_SIZE;
diff --git a/cpp/BUILD b/cpp/BUILD
index e5cb1558e9..2dc87c6039 100644
--- a/cpp/BUILD
+++ b/cpp/BUILD
@@ -7,14 +7,12 @@ cc_library(
     srcs = [
         "src/compile_spec.cpp",
         "src/logging.cpp",
-        "src/ptq.cpp",
         "src/torch_tensorrt.cpp",
         "src/types.cpp",
     ],
     hdrs = [
         "include/torch_tensorrt/logging.h",
         "include/torch_tensorrt/macros.h",
-        "include/torch_tensorrt/ptq.h",
         "include/torch_tensorrt/torch_tensorrt.h",
     ],
     linkstatic = True,
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 0c0e5a43f0..690dca2749 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -4,7 +4,6 @@ add_library(${lib_name} OBJECT)
 set(CXX_SRCS
     "${CMAKE_CURRENT_SOURCE_DIR}/src/compile_spec.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/src/logging.cpp"
-    "${CMAKE_CURRENT_SOURCE_DIR}/src/ptq.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/src/torch_tensorrt.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/src/types.cpp"
 )
@@ -12,7 +11,6 @@ set(CXX_SRCS
 set(HEADER_FILES
     "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/logging.h"
     "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/macros.h"
-    "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/ptq.h"
     "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/torch_tensorrt.h"
 )
 
diff --git a/cpp/bin/torchtrtc/fileio.h b/cpp/bin/torchtrtc/fileio.h
index ed52d566a1..c4d8bb50c4 100644
--- a/cpp/bin/torchtrtc/fileio.h
+++ b/cpp/bin/torchtrtc/fileio.h
@@ -23,7 +23,6 @@
 #include "torch/torch.h"
 
 #include "torch_tensorrt/logging.h"
-#include "torch_tensorrt/ptq.h"
 #include "torch_tensorrt/torch_tensorrt.h"
 
 namespace torchtrtc {
diff --git a/cpp/bin/torchtrtc/main.cpp b/cpp/bin/torchtrtc/main.cpp
index c36cfdd0fc..874cb96ef3 100644
--- a/cpp/bin/torchtrtc/main.cpp
+++ b/cpp/bin/torchtrtc/main.cpp
@@ -7,7 +7,6 @@
 #include "torch/script.h"
 
 #include "torch_tensorrt/logging.h"
-#include "torch_tensorrt/ptq.h"
 #include "torch_tensorrt/torch_tensorrt.h"
 
 #include "accuracy.h"
@@ -335,8 +334,6 @@ int main(int argc, char** argv) {
     calibration_cache_file_path = torchtrtc::fileio::resolve_path(args::get(calibration_cache_file));
   }
 
-  auto calibrator = torchtrt::ptq::make_int8_cache_calibrator(calibration_cache_file_path);
-
   compile_settings.require_full_compilation = require_full_compilation;
 
   if (torch_executed_ops || torch_executed_mods) {
@@ -367,13 +364,9 @@ int main(int argc, char** argv) {
         compile_settings.enabled_precisions.insert(torch::kF16);
       } else if (dtype == torchtrt::DataType::kChar) {
         compile_settings.enabled_precisions.insert(torch::kI8);
-        if (calibration_cache_file) {
-          compile_settings.ptq_calibrator = calibrator;
-        } else {
-          torchtrt::logging::log(
-              torchtrt::logging::Level::kINFO,
-              "Int8 precision has been enabled but no calibrator provided. This assumes the network has Q/DQ nodes obtained from Quantization aware training. For more details, refer to https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#work-with-qat-networks");
-        }
+        torchtrt::logging::log(
+            torchtrt::logging::Level::kDEBUG,
+            "Int8 precision has been enabled which assumes the network has Q/DQ nodes obtained");
       } else {
         std::stringstream ss;
         ss << "Invalid precision given for enabled kernel precision, options are [ float | float32 | f32 | fp32 | half | float16 | f16 | fp16 | char | int8 | i8 ], found: ";
diff --git a/cpp/bin/torchtrtc/parser_util.h b/cpp/bin/torchtrtc/parser_util.h
index 9ed5f6d06b..9cbb4ff994 100644
--- a/cpp/bin/torchtrtc/parser_util.h
+++ b/cpp/bin/torchtrtc/parser_util.h
@@ -9,7 +9,6 @@
 #include "torch/torch.h"
 
 #include "torch_tensorrt/logging.h"
-#include "torch_tensorrt/ptq.h"
 #include "torch_tensorrt/torch_tensorrt.h"
 
 namespace torchtrtc {
diff --git a/cpp/include/torch_tensorrt/macros.h b/cpp/include/torch_tensorrt/macros.h
index 020b94c114..e31091031b 100644
--- a/cpp/include/torch_tensorrt/macros.h
+++ b/cpp/include/torch_tensorrt/macros.h
@@ -30,9 +30,6 @@
   STR(TORCH_TENSORRT_MAJOR_VERSION) \
   "." STR(TORCH_TENSORRT_MINOR_VERSION) "." STR(TORCH_TENSORRT_PATCH_VERSION)
 
-#define TORCH_TENSORRT_PTQ_DEPRECATION \
-  [[deprecated(                        \
-      "Int8 PTQ Calibrator has been deprecated by TensorRT, please plan on porting to a NVIDIA Model Optimizer Toolkit based workflow. See: https://pytorch.org/TensorRT/tutorials/_rendered_examples/dynamo/vgg16_ptq.html for more details")]]
 // Setup namespace aliases for ease of use
 namespace torch_tensorrt {
 namespace torchscript {}
diff --git a/cpp/include/torch_tensorrt/ptq.h b/cpp/include/torch_tensorrt/ptq.h
deleted file mode 100644
index a2f82346c0..0000000000
--- a/cpp/include/torch_tensorrt/ptq.h
+++ /dev/null
@@ -1,352 +0,0 @@
-/*
- * Copyright (c) NVIDIA Corporation.
- * All rights reserved.
- *
- * This library is licensed under the BSD-style license found in the
- * LICENSE file in the root directory of this source tree.
- */
-#pragma once
-
-#include <fstream>
-#include <iostream>
-#include <iterator>
-#include <memory>
-#include <sstream>
-#include <string>
-#include <vector>
-
-#include "NvInfer.h"
-#include "torch/torch.h"
-#include "torch_tensorrt/logging.h"
-#include "torch_tensorrt/macros.h"
-
-#ifndef DOXYGEN_SHOULD_SKIP_THIS
-namespace torch_tensorrt {
-namespace ptq {
-TORCHTRT_API bool get_batch_impl(void* bindings[], const char* names[], int nbBindings, torch::Tensor& data);
-}
-} // namespace torch_tensorrt
-#endif // DOXYGEN_SHOULD_SKIP_THIS
-
-namespace torch_tensorrt {
-namespace ptq {
-
-/**
- * @brief Generic Int8Calibrator implementation based on a specified
- * TensorRT calibration algorithm and a LibTorch DataLoader
- *
- * @tparam Algorithm: class nvinfer1::IInt8Calibrator (Default:
- * nvinfer1::IInt8EntropyCalibrator2) - Algorithm to use
- * @tparam DataLoaderUniquePtr: std::unique_ptr<torch::data::DataLoader> -
- * DataLoader type
- */
-template <typename Algorithm, typename DataLoaderUniquePtr>
-class Int8Calibrator : Algorithm {
-  using DataLoader = typename DataLoaderUniquePtr::element_type;
-  using Batch = typename DataLoader::super::BatchType;
-
- public:
-  /**
-   * @brief Construct a new Int8Calibrator object
-   *
-   * Using the provided DataLoader, construct a calibrator that can be used for
-   * PTQ with Torch-TensorRT
-   *
-   * @param dataloader: std::unqiue_ptr<torch::data::DataLoader> - A unique
-   * pointer to the DataLoader, should be what is returned from the
-   * make_data_loader factory
-   * @param cache_file_path: const std::string& - A path to store / find the
-   * calibration cache
-   * @param use_cache : bool - Whether to use the cache (if it exists)
-   */
-  Int8Calibrator(DataLoaderUniquePtr dataloader, const std::string& cache_file_path, bool use_cache)
-      : dataloader_(dataloader.get()), cache_file_path_(cache_file_path), use_cache_(use_cache) {
-    for (auto batch : *dataloader_) {
-      batched_data_.push_back(batch.data);
-    }
-    it_ = batched_data_.begin();
-  }
-
-  /**
-   * @brief Get the Batch Size for the next batch (always 1 due to issues with
-   * TRT and explicit batch)
-   *
-   * @return int
-   */
-  int getBatchSize() const noexcept override {
-    // HACK: Torch-TensorRT only uses explict batch sizing, INT8 Calibrator does not
-    // work when reporting the batch size here and having explicity batching.
-    // So we just report batch size 1 (warnings will still be printed out).
-    return 1;
-    // return static_cast<int>(dataloader_->options().batch_size);
-  }
-
-  /**
-   * @brief Get the next Batch
-   *
-   * @param bindings: void*[] - An array of binding pointers (fed in from
-   * TensorRT calibrator), these buffers should be filed with batch data for
-   * each input
-   * @param names: const char*[] - Names of bindings
-   * @param nbBindings: int - Number of bindings
-   * @return true - There is a new batch for the calibrator to consume
-   * @return false - There is not a new batch for the calibrator to consume
-   */
-  bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override {
-    if (it_ != batched_data_.end()) {
-      auto status = get_batch_impl(bindings, names, nbBindings, *it_);
-      it_ = ++it_;
-      return status;
-    } else {
-      // Reset iterator if incase calibrator is going to be used again
-      it_ = batched_data_.begin();
-      return false;
-    }
-  }
-
-  /**
-   * @brief Read calibration cache
-   *
-   * How to read from the calibration cache, only enabled if use_cache is set
-   *
-   * @param length
-   * @return const void* - Pointer to cache data
-   */
-  const void* readCalibrationCache(size_t& length) noexcept override {
-    if (use_cache_) {
-      std::stringstream ss;
-      ss << "Reading Calibration Cache from " << cache_file_path_;
-      logging::log(logging::Level::kINFO, ss.str());
-
-      cache_.clear();
-      std::ifstream input(cache_file_path_, std::ios::binary);
-      input >> std::noskipws;
-      if (input.good()) {
-        std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(cache_));
-        logging::log(logging::Level::kDEBUG, "Cache read");
-      }
-      length = cache_.size();
-      return length ? cache_.data() : nullptr;
-    }
-    return nullptr;
-  }
-
-  /**
-   * @brief Write calibration cache
-   *
-   * Write a the calibration cache provided by TensorRT to a specified file
-   *
-   * @param cache: const void* - cache data
-   * @param length: size_t - length of cache
-   */
-  void writeCalibrationCache(const void* cache, size_t length) noexcept override {
-    std::ofstream cache_file(cache_file_path_, std::ios::binary);
-    cache_file.write(reinterpret_cast<const char*>(cache), length);
-    std::stringstream ss;
-    ss << "Saved Calibration Cache to " << cache_file_path_;
-    logging::log(logging::Level::kINFO, ss.str());
-  }
-
-  /**
-   * @brief operator to cast to nvinfer1::IInt8Calibrator*
-   *
-   * Convience function to convert to a IInt8Calibrator* to easily be assigned
-   * to the ptq_calibrator field in CompileSpec
-   *
-   * @return nvinfer1::IInt8Calibrator*
-   */
-  operator nvinfer1::IInt8Calibrator*() {
-    return reinterpret_cast<nvinfer1::IInt8Calibrator*>(this);
-  }
-
- private:
-  /// Pointer to the dataloader
-  DataLoader* dataloader_;
-  /// Path to cache file
-  const std::string& cache_file_path_;
-  /// Size of cache
-  size_t cache_size_ = 0;
-  /// Whether to use the cache or not
-  bool use_cache_;
-  /// Cache data
-  std::vector<char> cache_;
-  /// Batched Data
-  std::vector<torch::Tensor> batched_data_;
-  /// Iterator to move through dataset
-  std::vector<torch::Tensor>::iterator it_;
-};
-
-/**
- * @brief Generic Int8Calibrator implementation based on a specified
- * TensorRT calibration algorithm that only reads from a calibration file
- *
- * @tparam Algorithm: class nvinfer1::IInt8Calibrator (Default:
- * nvinfer1::IInt8EntropyCalibrator2) - Algorithm to use
- */
-template <typename Algorithm>
-class Int8CacheCalibrator : Algorithm {
- public:
-  /**
-   * @brief Construct a new Int 8 Cache Calibrator object
-   *
-   * @param cache_file_path
-   */
-  Int8CacheCalibrator(const std::string& cache_file_path) : cache_file_path_(cache_file_path) {}
-
-  /**
-   * @brief Get the Batch Size for the next batch (always 1 due to issues with
-   * TRT and explicit batch)
-   *
-   * @return int
-   */
-  int getBatchSize() const noexcept override {
-    // HACK: Torch-TensorRT only uses explict batch sizing, INT8 Calibrator does not
-    // work when reporting the batch size here and having explicity batching.
-    // So we just report batch size 1 (warnings will still be printed out).
-    return 1;
-  }
-
-  /**
-   * @brief Get the next Batch
-   *
-   * Not used always returns false
-   *
-   * @param bindings: void*[] - An array of binding pointers (fed in from
-   * TensorRT calibrator), these buffers should be filed with batch data for
-   * each input
-   * @param names: const char*[] - Names of bindings
-   * @param nbBindings: int - Number of bindings
-   * @return false
-   */
-  bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override {
-    return false;
-  }
-
-  /**
-   * @brief Read calibration cache
-   *
-   * How to read from the calibration cache, only enabled if use_cache is set
-   *
-   * @param length
-   * @return const void* - Pointer to cache data
-   */
-  const void* readCalibrationCache(size_t& length) noexcept override {
-    std::stringstream ss;
-    ss << "Reading Calibration Cache from " << cache_file_path_;
-    logging::log(logging::Level::kINFO, ss.str());
-
-    cache_.clear();
-    std::ifstream input(cache_file_path_, std::ios::binary);
-    input >> std::noskipws;
-    if (input.good()) {
-      std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(cache_));
-      logging::log(logging::Level::kDEBUG, "Cache read");
-    }
-    length = cache_.size();
-    return length ? cache_.data() : nullptr;
-  }
-
-  /**
-   * @brief Write calibration cache
-   *
-   * Write a the calibration cache provided by TensorRT to a specified file
-   *
-   * @param cache: const void* - cache data
-   * @param length: size_t - length of cache
-   */
-  void writeCalibrationCache(const void* cache, size_t length) noexcept override {
-    std::ofstream cache_file(cache_file_path_, std::ios::binary);
-    cache_file.write(reinterpret_cast<const char*>(cache), length);
-    std::stringstream ss;
-    ss << "Saved Calibration Cache to " << cache_file_path_;
-    logging::log(logging::Level::kINFO, ss.str());
-  }
-
-  /**
-   * @brief operator to cast to nvinfer1::IInt8Calibrator*
-   *
-   * Convience function to convert to a IInt8Calibrator* to easily be assigned
-   * to the ptq_calibrator field in CompileSpec
-   *
-   * @return nvinfer1::IInt8Calibrator*
-   */
-  operator nvinfer1::IInt8Calibrator*() {
-    return reinterpret_cast<nvinfer1::IInt8Calibrator*>(this);
-  }
-
- private:
-  /// Path to cache file
-  const std::string& cache_file_path_;
-  /// Size of cache
-  size_t cache_size_ = 0;
-  /// Cache data
-  std::vector<char> cache_;
-};
-
-/**
- * @brief A factory to build a post training quantization calibrator from a
- * torch dataloader
- *
- * Creates a calibrator to use for post training quantization. By default the
- * returned calibrator uses TensorRT Entropy v2 algorithm to perform
- * calibration. This is recommended for feed forward networks. You can override
- * the algorithm selection (such as to use the MinMax Calibrator recomended for
- * NLP tasks) by calling make_int8_calibrator with the calibrator class as a
- * template parameter.
- *
- * e.g.
- * ``torch_tensorrt::ptq::make_int8_calibrator<nvinfer1::IInt8MinMaxCalibrator>(std::move(calibration_dataloader),
- * calibration_cache_file, use_cache);``
- * @tparam Algorithm: class nvinfer1::IInt8Calibrator (Default:
- * nvinfer1::IInt8EntropyCalibrator2) - Algorithm to use
- * @tparam DataLoader: std::unique_ptr<torch::data::DataLoader> - DataLoader
- * type
- * @param dataloader: std::unique_ptr<torch::data::DataLoader> - DataLoader
- * containing data
- * @param cache_file_path: const std::string& - Path to read/write calibration
- * cache
- * @param use_cache: bool - use calibration cache
- * @return Int8Calibrator<Algorithm, DataLoader>
- */
-template <typename Algorithm = nvinfer1::IInt8EntropyCalibrator2, typename DataLoader>
-TORCH_TENSORRT_PTQ_DEPRECATION inline Int8Calibrator<Algorithm, DataLoader> make_int8_calibrator(
-    DataLoader dataloader,
-    const std::string& cache_file_path,
-    bool use_cache) {
-  return Int8Calibrator<Algorithm, DataLoader>(std::move(dataloader), cache_file_path, use_cache);
-}
-
-/**
- * @brief A factory to build a post training quantization calibrator from a
- * torch dataloader that only uses the calibration cache
- *
- * Creates a calibrator to use for post training quantization which reads from a
- * previously created calibration cache, therefore you can have a calibration
- * cache generating program that requires a dataloader and a dataset, then save
- * the cache to use later in a different program that needs to calibrate from
- * scratch and not have the dataset dependency. However, the network should also
- *  be recalibrated if its structure changes, or the input data set changes, and
- * it is the responsibility of the application to ensure this.
- *
- * By default the returned calibrator uses TensorRT Entropy v2 algorithm to
- * perform calibration. This is recommended for feed forward networks You can
- * override the algorithm selection (such as to use the MinMax Calibrator
- * recomended for NLP tasks) by calling make_int8_calibrator with the calibrator
- * class as a template parameter.
- *
- * e.g.
- * torch_tensorrt::ptq::make_int8_cache_calibrator<nvinfer1::IInt8MinMaxCalibrator>(calibration_cache_file);
- * @tparam Algorithm: class nvinfer1::IInt8Calibrator (Default:
- * nvinfer1::IInt8EntropyCalibrator2) - Algorithm to use
- * @param cache_file_path: const std::string& - Path to read/write calibration
- * cache
- * @return Int8CacheCalibrator<Algorithm>
- */
-template <typename Algorithm = nvinfer1::IInt8EntropyCalibrator2>
-TORCH_TENSORRT_PTQ_DEPRECATION inline Int8CacheCalibrator<Algorithm> make_int8_cache_calibrator(
-    const std::string& cache_file_path) {
-  return Int8CacheCalibrator<Algorithm>(cache_file_path);
-}
-
-} // namespace ptq
-} // namespace torch_tensorrt
diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h
index adac75d984..8cf4449e75 100644
--- a/cpp/include/torch_tensorrt/torch_tensorrt.h
+++ b/cpp/include/torch_tensorrt/torch_tensorrt.h
@@ -34,9 +34,6 @@ template <class>
 class ArrayRef;
 } // namespace c10
 
-namespace nvinfer1 {
-class IInt8Calibrator;
-}
 #endif // DOXYGEN_SHOULD_SKIP_THIS
 
 namespace torch_tensorrt {
@@ -833,11 +830,6 @@ struct CompileSpec {
    */
   uint64_t dla_global_dram_size = 536870912;
 
-  /**
-   * Calibration dataloaders for each input for post training quantizatiom
-   */
-  nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr;
-
   /**
    * Require the full module be compiled to TensorRT instead of potentially running unsupported operations in PyTorch
    */
diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp
index 68a25b3912..6e5cf99850 100644
--- a/cpp/src/compile_spec.cpp
+++ b/cpp/src/compile_spec.cpp
@@ -156,15 +156,8 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external, bool
   if (internal.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) !=
       internal.convert_info.engine_settings.enabled_precisions.end()) {
     internal.partitioning_info.cast_int8_inputs = false;
-    if (external.ptq_calibrator) {
-      internal.convert_info.engine_settings.calibrator = external.ptq_calibrator;
-    } else {
-      internal.lower_info.unfreeze_module = true;
-      internal.lower_info.disable_cse = true;
-      internal.convert_info.engine_settings.calibrator = nullptr;
-    }
-  } else {
-    internal.convert_info.engine_settings.calibrator = nullptr;
+    internal.lower_info.unfreeze_module = true;
+    internal.lower_info.disable_cse = true;
   }
 
   return internal;
diff --git a/cpp/src/ptq.cpp b/cpp/src/ptq.cpp
deleted file mode 100644
index 7d36e9ce7d..0000000000
--- a/cpp/src/ptq.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-#include "torch_tensorrt/ptq.h"
-#include "torch/torch.h"
-
-namespace torch_tensorrt {
-namespace ptq {
-
-bool get_batch_impl(void* bindings[], const char* names[], int nbBindings, torch::Tensor& data) {
-  for (int i = 0; i < nbBindings; i++) {
-    data = data.to(at::kCUDA).contiguous();
-    bindings[i] = data.data_ptr();
-  }
-  return true;
-}
-
-} // namespace ptq
-} // namespace torch_tensorrt
diff --git a/py/BUILD.bazel b/py/BUILD.bazel
index 57bde9cf0a..dcbb337132 100644
--- a/py/BUILD.bazel
+++ b/py/BUILD.bazel
@@ -13,7 +13,6 @@ py_library(
         "torch_tensorrt/_types.py",
         "torch_tensorrt/_version.py",
         "torch_tensorrt/logging.py",
-        "torch_tensorrt/ptq.py",
     ],
     data = [
         "torch_tensorrt/lib/libtrtorch.so",
diff --git a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp
index bae61881da..04e9115cd1 100644
--- a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp
+++ b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp
@@ -67,7 +67,6 @@ void RegisterTRTCompileSpec() {
           .def("_set_precisions", &torch_tensorrt::pyapi::CompileSpec::setPrecisions)
           .def("_set_device", &torch_tensorrt::pyapi::CompileSpec::setDeviceIntrusive)
           .def("_set_torch_fallback", &torch_tensorrt::pyapi::CompileSpec::setTorchFallbackIntrusive)
-          .def("_set_ptq_calibrator", &torch_tensorrt::pyapi::CompileSpec::setPTQCalibratorViaHandle)
           .def("__str__", &torch_tensorrt::pyapi::CompileSpec::stringify);
 
   ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, sparse_weights);
diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/py/torch_tensorrt/csrc/tensorrt_classes.cpp
index bd3aa6b305..788a45184b 100644
--- a/py/torch_tensorrt/csrc/tensorrt_classes.cpp
+++ b/py/torch_tensorrt/csrc/tensorrt_classes.cpp
@@ -343,16 +343,11 @@ core::CompileSpec CompileSpec::toInternalCompileSpec(bool converting_to_trt_engi
 
   info.partitioning_info.cast_int8_inputs = true;
 
-  if (ptq_calibrator) {
-    info.convert_info.engine_settings.calibrator = ptq_calibrator;
+  if (info.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) !=
+      info.convert_info.engine_settings.enabled_precisions.end()) {
     info.partitioning_info.cast_int8_inputs = false;
-  } else {
-    if (info.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) !=
-        info.convert_info.engine_settings.enabled_precisions.end()) {
-      info.partitioning_info.cast_int8_inputs = false;
-      info.lower_info.unfreeze_module = true;
-      info.lower_info.disable_cse = true;
-    }
+    info.lower_info.unfreeze_module = true;
+    info.lower_info.disable_cse = true;
   }
   info.convert_info.engine_settings.sparse_weights = sparse_weights;
   info.convert_info.engine_settings.disable_tf32 = disable_tf32;
diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.h b/py/torch_tensorrt/csrc/tensorrt_classes.h
index 89c5c8661e..2bf8fe8f52 100644
--- a/py/torch_tensorrt/csrc/tensorrt_classes.h
+++ b/py/torch_tensorrt/csrc/tensorrt_classes.h
@@ -140,10 +140,6 @@ struct CompileSpec : torch::CustomClassHolder {
     }
   }
 
-  int64_t getPTQCalibratorHandle() {
-    return (int64_t)ptq_calibrator;
-  }
-
   void setDeviceIntrusive(const c10::intrusive_ptr<Device>& d) {
     device = *d;
   }
@@ -152,10 +148,6 @@ struct CompileSpec : torch::CustomClassHolder {
     torch_fallback = *fb;
   }
 
-  void setPTQCalibratorViaHandle(int64_t handle) {
-    ptq_calibrator = (nvinfer1::IInt8Calibrator*)handle;
-  }
-
   ADD_FIELD_GET_SET(disable_tf32, bool);
   ADD_FIELD_GET_SET(sparse_weights, bool);
   ADD_FIELD_GET_SET(refit, bool);
@@ -170,11 +162,9 @@ struct CompileSpec : torch::CustomClassHolder {
   ADD_FIELD_GET_SET(allow_shape_tensors, bool);
   ADD_FIELD_GET_SET(device, Device);
   ADD_FIELD_GET_SET(torch_fallback, TorchFallback);
-  ADD_FIELD_GET_SET(ptq_calibrator, nvinfer1::IInt8Calibrator*);
 
   std::vector<Input> inputs;
   InputSignature input_signature;
-  nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr;
   std::set<DataType> enabled_precisions = {};
   bool sparse_weights = false;
   bool disable_tf32 = false;
diff --git a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp
index e32d102f8b..378f96cd0e 100644
--- a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp
+++ b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp
@@ -18,135 +18,6 @@ namespace py = pybind11;
 namespace torch_tensorrt {
 namespace pyapi {
 
-template <typename Derived>
-class pyCalibratorTrampoline : public Derived {
- public:
-  using Derived::Derived; // Inherit constructors
-
-  int getBatchSize() const noexcept override {
-    try {
-      PYBIND11_OVERLOAD_PURE_NAME(int, Derived, "get_batch_size", getBatchSize);
-    } catch (std::exception const& e) {
-      LOG_ERROR("Exception caught in get_batch_size" + std::string(e.what()));
-    } catch (...) {
-      LOG_ERROR("Exception caught in get_batch_size");
-    }
-    return -1;
-  }
-
-  bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override {
-    py::gil_scoped_acquire gil{};
-
-    py::function pyGetBatch = torch_tensorrt::pyapi::util::getOverload(static_cast<Derived*>(this), "get_batch");
-    std::vector<const char*> namesVec(names, names + nbBindings);
-    py::object result = pyGetBatch(namesVec);
-    // Copy over into the other data structure.
-    if (!result.is_none() && result.cast<std::vector<size_t>>().size() != 0) {
-      std::memcpy(bindings, result.cast<std::vector<size_t>>().data(), nbBindings * sizeof(void*));
-      return true;
-    }
-    return false;
-  }
-
-  const void* readCalibrationCache(std::size_t& length) noexcept override {
-    py::gil_scoped_acquire gil{};
-
-    py::function pyReadCalibrationCache =
-        torch_tensorrt::pyapi::util::getOverload(static_cast<Derived*>(this), "read_calibration_cache");
-    py::buffer cache = pyReadCalibrationCache();
-    if (!cache.is_none()) {
-      py::buffer_info info = cache.request();
-      length = info.size * info.itemsize;
-      return info.ptr;
-    }
-    return nullptr;
-  }
-
-  void writeCalibrationCache(const void* ptr, std::size_t length) noexcept override {
-    py::gil_scoped_acquire gil{};
-
-    py::function pyWriteCalibrationCache =
-        torch_tensorrt::pyapi::util::getOverload(static_cast<Derived*>(this), "write_calibration_cache");
-
-    py::memoryview cache{py::memoryview::from_buffer(static_cast<const uint8_t*>(ptr), {length}, {sizeof(uint8_t)})};
-    pyWriteCalibrationCache(cache);
-  }
-};
-
-class pyIInt8Calibrator : public pyCalibratorTrampoline<nvinfer1::IInt8Calibrator> {
- public:
-  using Derived = pyCalibratorTrampoline<nvinfer1::IInt8Calibrator>;
-  using Derived::Derived;
-
-  nvinfer1::InterfaceInfo getInterfaceInfo() const noexcept override {
-    return nvinfer1::InterfaceInfo{"PYTHON CALIBRATOR", 1, 0};
-  }
-
-  nvinfer1::CalibrationAlgoType getAlgorithm() noexcept override {
-    try {
-      PYBIND11_OVERLOAD_PURE_NAME(
-          nvinfer1::CalibrationAlgoType, nvinfer1::IInt8Calibrator, "get_algorithm", getAlgorithm);
-    } catch (std::exception const& e) {
-      LOG_ERROR("Exception caught in get_algorithm: " + std::string(e.what()));
-    } catch (...) {
-      LOG_ERROR("Exception caught in get_algorithm");
-    }
-    return {};
-  }
-};
-
-class pyIInt8LegacyCalibrator : public pyCalibratorTrampoline<nvinfer1::IInt8LegacyCalibrator> {
- public:
-  using Derived = pyCalibratorTrampoline<nvinfer1::IInt8LegacyCalibrator>;
-  using Derived::Derived;
-
-  double getQuantile() const noexcept override {
-    try {
-      PYBIND11_OVERLOAD_PURE_NAME(double, nvinfer1::IInt8LegacyCalibrator, "get_quantile", getQuantile);
-    } catch (std::exception const& e) {
-      LOG_ERROR("Exception caught in get_quantile: " + std::string(e.what()));
-    } catch (...) {
-      LOG_ERROR("Exception caught in get_quantile");
-    }
-    return -1.0;
-  }
-
-  double getRegressionCutoff() const noexcept override {
-    try {
-      PYBIND11_OVERLOAD_PURE_NAME(
-          double, nvinfer1::IInt8LegacyCalibrator, "get_regression_cutoff", getRegressionCutoff);
-    } catch (std::exception const& e) {
-      LOG_ERROR("Exception caught in get_regression_cutoff: " + std::string(e.what()));
-    } catch (...) {
-      LOG_ERROR("Exception caught in get_regression_cutoff");
-    }
-    return -1.0;
-  }
-
-  const void* readHistogramCache(std::size_t& length) noexcept override {
-    try {
-      PYBIND11_OVERLOAD_PURE_NAME(
-          const char*, nvinfer1::IInt8LegacyCalibrator, "read_histogram_cache", readHistogramCache, length);
-    } catch (std::exception const& e) {
-      LOG_ERROR("Exception caught in read_histogram_cache" + std::string(e.what()));
-    } catch (...) {
-      LOG_ERROR("Exception caught in read_histogram_cache");
-    }
-    return {};
-  }
-
-  void writeHistogramCache(const void* ptr, std::size_t length) noexcept override {
-    try {
-      PYBIND11_OVERLOAD_PURE_NAME(
-          void, nvinfer1::IInt8LegacyCalibrator, "write_histogram_cache", writeHistogramCache, ptr, length);
-    } catch (std::exception const& e) {
-      LOG_ERROR("Exception caught in write_histogram_cache" + std::string(e.what()));
-    } catch (...) {
-      LOG_ERROR("Exception caught in write_histogram_cache");
-    }
-  }
-};
-
 void set_device(const int device_id) {
   core::set_device(device_id);
 }
@@ -275,51 +146,6 @@ PYBIND11_MODULE(_C, m) {
       .value("channels_last", TensorFormat::kChannelsLast, "Channels last memory layout (NHWC)")
       .export_values();
 
-  py::enum_<nvinfer1::CalibrationAlgoType>(m, "CalibrationAlgo", py::module_local(), "Type of calibration algorithm")
-      .value("LEGACY_CALIBRATION", nvinfer1::CalibrationAlgoType::kLEGACY_CALIBRATION)
-      .value("ENTROPY_CALIBRATION", nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION)
-      .value("ENTROPY_CALIBRATION_2", nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION_2)
-      .value("MINMAX_CALIBRATION", nvinfer1::CalibrationAlgoType::kMINMAX_CALIBRATION);
-
-  py::class_<nvinfer1::IInt8Calibrator, pyIInt8Calibrator>(
-      m, "IInt8Calibrator", py::module_local(), "Int8 Calibrator base class")
-      .def(py::init_alias<>()) // Always initialize trampoline class.
-      .def("get_batch_size", &nvinfer1::IInt8Calibrator::getBatchSize, "Get batch size")
-      .def("get_algorithm", &nvinfer1::IInt8Calibrator::getAlgorithm, "Get algorithm");
-
-  py::class_<nvinfer1::IInt8LegacyCalibrator, nvinfer1::IInt8Calibrator, pyIInt8LegacyCalibrator>(
-      m, "IInt8LegacyCalibrator", py::module_local(), "Int8 Legacy Calibrator class")
-      .def(py::init_alias<>()) // Always initialize trampoline class.
-      .def("get_batch_size", &nvinfer1::IInt8LegacyCalibrator::getBatchSize, "Get batch size")
-      .def("get_algorithm", &nvinfer1::IInt8LegacyCalibrator::getAlgorithm, "Get algorithm");
-
-  py::class_<
-      nvinfer1::IInt8EntropyCalibrator,
-      nvinfer1::IInt8Calibrator,
-      pyCalibratorTrampoline<nvinfer1::IInt8EntropyCalibrator>>(
-      m, "IInt8EntropyCalibrator", py::module_local(), "Int8 Entropy Calibrator class")
-      .def(py::init_alias<>()) // Always initialize trampoline class.
-      .def("get_batch_size", &nvinfer1::IInt8EntropyCalibrator::getBatchSize, "Get batch size")
-      .def("get_algorithm", &nvinfer1::IInt8EntropyCalibrator::getAlgorithm, "Get algorithm");
-
-  py::class_<
-      nvinfer1::IInt8EntropyCalibrator2,
-      nvinfer1::IInt8Calibrator,
-      pyCalibratorTrampoline<nvinfer1::IInt8EntropyCalibrator2>>(
-      m, "IInt8EntropyCalibrator2", py::module_local(), "Int8 Entropy Calibrator2 class")
-      .def(py::init_alias<>()) // Always initialize trampoline class.
-      .def("get_batch_size", &nvinfer1::IInt8EntropyCalibrator2::getBatchSize, "Get batch size")
-      .def("get_algorithm", &nvinfer1::IInt8EntropyCalibrator2::getAlgorithm, "Get algorithm");
-
-  py::class_<
-      nvinfer1::IInt8MinMaxCalibrator,
-      nvinfer1::IInt8Calibrator,
-      pyCalibratorTrampoline<nvinfer1::IInt8MinMaxCalibrator>>(
-      m, "IInt8MinMaxCalibrator", py::module_local(), "Int8 MinMax Calibrator class")
-      .def(py::init_alias<>()) // Always initialize trampoline class.
-      .def("get_batch_size", &nvinfer1::IInt8MinMaxCalibrator::getBatchSize, "Get batch size")
-      .def("get_algorithm", &nvinfer1::IInt8MinMaxCalibrator::getAlgorithm, "Get algorithm");
-
   py::class_<Device>(m, "Device")
       .def(py::init<>())
       .def("__str__", &torch_tensorrt::pyapi::Device::to_str)
@@ -362,11 +188,9 @@ PYBIND11_MODULE(_C, m) {
   py::class_<CompileSpec>(ts_sub_mod, "CompileSpec")
       .def(py::init<>())
       .def("__str__", &torch_tensorrt::pyapi::CompileSpec::stringify)
-      .def("_get_calibrator_handle", &CompileSpec::getPTQCalibratorHandle, "[Internal] gets a handle from a calibrator")
       .def_readwrite("inputs", &CompileSpec::inputs)
       .def_readwrite("input_signature", &CompileSpec::input_signature)
       .def_readwrite("enabled_precisions", &CompileSpec::enabled_precisions)
-      .def_readwrite("ptq_calibrator", &CompileSpec::ptq_calibrator)
       .def_readwrite("refit", &CompileSpec::refit)
       .def_readwrite("sparse_weights", &CompileSpec::sparse_weights)
       .def_readwrite("disable_tf32", &CompileSpec::disable_tf32)
diff --git a/py/torch_tensorrt/dynamo/_compiler.py b/py/torch_tensorrt/dynamo/_compiler.py
index 74cab980c4..608c8e84c9 100644
--- a/py/torch_tensorrt/dynamo/_compiler.py
+++ b/py/torch_tensorrt/dynamo/_compiler.py
@@ -148,7 +148,6 @@ def cross_compile_for_windows(
         dla_local_dram_size (int): Host RAM used by DLA to share intermediate tensor data across operations
         dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution
         truncate_double (bool): Truncate weights provided in double (float64) to float32
-        calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
         require_full_compilation (bool): Require modules to be compiled end to end or return an error as opposed to returning a hybrid graph where operations that cannot be run in TensorRT are run in PyTorch
         min_block_size (int): The minimum number of contiguous TensorRT convertible operations in order to run a set of operations in TensorRT
         torch_executed_ops (Collection[Target]): Set of aten operators that must be run in PyTorch. An error will be thrown if this set is not empty but ``require_full_compilation`` is True
@@ -487,7 +486,6 @@ def compile(
         dla_local_dram_size (int): Host RAM used by DLA to share intermediate tensor data across operations
         dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution
         truncate_double (bool): Truncate weights provided in double (float64) to float32
-        calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
         require_full_compilation (bool): Require modules to be compiled end to end or return an error as opposed to returning a hybrid graph where operations that cannot be run in TensorRT are run in PyTorch
         min_block_size (int): The minimum number of contiguous TensorRT convertible operations in order to run a set of operations in TensorRT
         torch_executed_ops (Collection[Target]): Set of aten operators that must be run in PyTorch. An error will be thrown if this set is not empty but ``require_full_compilation`` is True
@@ -1042,7 +1040,6 @@ def convert_exported_program_to_serialized_trt_engine(
     dla_sram_size: int = _defaults.DLA_SRAM_SIZE,
     dla_local_dram_size: int = _defaults.DLA_LOCAL_DRAM_SIZE,
     dla_global_dram_size: int = _defaults.DLA_GLOBAL_DRAM_SIZE,
-    calibrator: object = None,
     allow_shape_tensors: bool = False,
     timing_cache_path: str = _defaults.TIMING_CACHE_PATH,
     use_explicit_typing: bool = _defaults.USE_EXPLICIT_TYPING,
@@ -1107,7 +1104,6 @@ def convert_exported_program_to_serialized_trt_engine(
         dla_sram_size (int): Fast software managed RAM used by DLA to communicate within a layer.
         dla_local_dram_size (int): Host RAM used by DLA to share intermediate tensor data across operations
         dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution
-        calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
         allow_shape_tensors: (Experimental) Allow aten::size to output shape tensors using IShapeLayer in TensorRT
         timing_cache_path (str): Path to the timing cache if it exists (or) where it will be saved after compilation
         use_explicit_typing (bool): This flag enables strong typing in TensorRT compilation which respects the precisions set in the Pytorch model. This is useful when users have mixed precision graphs.
diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py
index f81f7cab32..6006484f19 100644
--- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py
+++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py
@@ -6,6 +6,7 @@
 
 import numpy as np
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Argument, Node, Target
 from torch_tensorrt._utils import is_tensorrt_version_supported
 from torch_tensorrt.dynamo._settings import CompilationSettings
@@ -22,7 +23,6 @@
     get_positive_dim,
     is_only_operator_on_placeholder,
 )
-from torch_tensorrt.dynamo.types import TRTTensor
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
diff --git a/py/torch_tensorrt/dynamo/conversion/converter_utils.py b/py/torch_tensorrt/dynamo/conversion/converter_utils.py
index 53835ba1d5..7d7f4274ff 100644
--- a/py/torch_tensorrt/dynamo/conversion/converter_utils.py
+++ b/py/torch_tensorrt/dynamo/conversion/converter_utils.py
@@ -19,11 +19,10 @@
 import numpy as np
 import tensorrt as trt
 import torch
+import torch_tensorrt.dynamo.conversion.impl as impl
 from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
 from torch.fx.node import Argument, Target
 from torch.fx.passes.shape_prop import TensorMetadata
-
-import torch_tensorrt.dynamo.conversion.impl as impl
 from torch_tensorrt import _enums
 from torch_tensorrt.dynamo._settings import CompilationSettings
 from torch_tensorrt.dynamo._SourceIR import SourceIR
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py b/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py
index db257b9c4e..ed30e2ff18 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/activation/base.py
@@ -1,14 +1,13 @@
 from typing import Any, Callable, Optional
 
 import tensorrt as trt
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
-from torch_tensorrt.fx.converters.converter_utils import (
-    mark_as_int8_layer,
+from torch_tensorrt.dynamo.conversion.converter_utils import (
     set_layer_name,
 )
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def convert_activation(
@@ -37,11 +36,4 @@ def convert_activation(
         layer.beta = beta
     set_layer_name(layer, target, name, source_ir)
 
-    if (
-        not ctx.net.get_flag(trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED)
-        and input_val.dynamic_range is not None
-        and dyn_range_fn is not None
-    ):
-        dyn_range = dyn_range_fn(input_val.dynamic_range)
-        mark_as_int8_layer(layer, dyn_range)
     return layer.get_output(0)
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/activation/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/activation/ops.py
index eb981f2031..af47a8e2c9 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/activation/ops.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/activation/ops.py
@@ -3,11 +3,11 @@
 import numpy as np
 import tensorrt as trt
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
 from torch_tensorrt.dynamo.conversion.impl.activation.base import convert_activation
-from torch_tensorrt.dynamo.types import TRTTensor
 
 
 def relu(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/addmm.py b/py/torch_tensorrt/dynamo/conversion/impl/addmm.py
index 1a0690852a..46ee1f974c 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/addmm.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/addmm.py
@@ -2,11 +2,11 @@
 
 import numpy as np
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion import impl
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def addmm(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/arange.py b/py/torch_tensorrt/dynamo/conversion/impl/arange.py
index baaf690010..7595e97171 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/arange.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/arange.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import tensorrt as trt
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo.conversion import impl
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
@@ -11,7 +12,6 @@
     get_trt_tensor,
     set_layer_name,
 )
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def arange(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/cast.py b/py/torch_tensorrt/dynamo/conversion/impl/cast.py
index 0b69f98fc9..4ad39d4563 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/cast.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/cast.py
@@ -4,6 +4,8 @@
 import numpy as np
 import tensorrt as trt
 import torch
+from tensorrt import DataType as TRTDataType
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt import _enums
 from torch_tensorrt.dynamo._SourceIR import SourceIR
@@ -13,7 +15,6 @@
     cast_trt_tensor,
     get_trt_tensor,
 )
-from torch_tensorrt.fx.types import TRTDataType, TRTTensor
 
 LOGGER: logging.Logger = logging.getLogger(__name__)
 
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/cat.py b/py/torch_tensorrt/dynamo/conversion/impl/cat.py
index 096bc1aa24..68bbcc31d0 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/cat.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/cat.py
@@ -3,6 +3,7 @@
 import numpy as np
 import tensorrt as trt
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt import _enums
 from torch_tensorrt.dynamo._SourceIR import SourceIR
@@ -11,9 +12,8 @@
     cast_trt_tensor,
     get_positive_dim,
     get_trt_tensor,
+    set_layer_name,
 )
-from torch_tensorrt.fx.converters.converter_utils import set_layer_name
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def cat(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/condition/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/condition/ops.py
index e21e7f32a1..b7739c3b3f 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/condition/ops.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/condition/ops.py
@@ -3,6 +3,7 @@
 import numpy as np
 import tensorrt as trt
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
@@ -15,7 +16,6 @@
     set_layer_name,
 )
 from torch_tensorrt.dynamo.conversion.impl.elementwise import ne
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def where(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/conv.py b/py/torch_tensorrt/dynamo/conversion/impl/conv.py
index 918c87ca70..513346a63b 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/conv.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/conv.py
@@ -5,6 +5,7 @@
 # @manual=//deeplearning/trt/python:py_tensorrt
 import tensorrt as trt
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo.conversion import impl
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
@@ -17,11 +18,6 @@
     to_torch,
     to_trt_weights,
 )
-from torch_tensorrt.fx.converters.converter_utils import (
-    get_dyn_range,
-    mark_as_int8_layer,
-)
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def convNd(
@@ -172,11 +168,6 @@ def convNd(
     if groups is not None:
         conv_layer.num_groups = groups
 
-    # Handle quantization cases
-    if scale is not None and zero_point is not None:
-        # Assume the dtype of activation is torch.quint8
-        mark_as_int8_layer(conv_layer, get_dyn_range(scale, zero_point, torch.quint8))
-
     result = conv_layer.get_output(0)
 
     if is_conv1d:
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/deconv.py b/py/torch_tensorrt/dynamo/conversion/impl/deconv.py
index 6a21415ffe..b9ee582d26 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/deconv.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/deconv.py
@@ -5,6 +5,7 @@
 # @manual=//deeplearning/trt/python:py_tensorrt
 import tensorrt as trt
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo.conversion import impl
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
@@ -12,15 +13,10 @@
     SourceIR,
     get_trt_tensor,
     has_dynamic_shape,
+    set_layer_name,
     to_torch,
     to_trt_weights,
 )
-from torch_tensorrt.fx.converters.converter_utils import (
-    get_dyn_range,
-    mark_as_int8_layer,
-    set_layer_name,
-)
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def deconvNd(
@@ -174,11 +170,6 @@ def deconvNd(
     deconv_layer.pre_padding = tuple(pre_padding_values)
     deconv_layer.post_padding = tuple(post_padding_values)
 
-    # Handle quantization cases
-    if scale is not None and zero_point is not None:
-        # Assume the dtype of activation is torch.quint8
-        mark_as_int8_layer(deconv_layer, get_dyn_range(scale, zero_point, torch.quint8))
-
     result = deconv_layer.get_output(0)
 
     if is_deconv1d:
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py
index e935992bda..040828e297 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/dynamic_block_quantize.py
@@ -3,6 +3,7 @@
 import numpy as np
 import tensorrt as trt
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
 from torch.fx.node import Target
 from torch_tensorrt._utils import is_tensorrt_version_supported
@@ -10,9 +11,8 @@
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
 from torch_tensorrt.dynamo.conversion.converter_utils import (
     get_trt_tensor,
+    set_layer_name,
 )
-from torch_tensorrt.fx.converters.converter_utils import set_layer_name
-from torch_tensorrt.fx.types import TRTTensor
 
 if is_tensorrt_version_supported("10.8.0"):
 
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py
index 1bfb8c7242..b425973661 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/elementwise/ops.py
@@ -4,6 +4,7 @@
 import tensorrt as trt
 import torch
 import torch_tensorrt.dynamo.conversion.impl as impl
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt import _enums
 from torch_tensorrt.dynamo._SourceIR import SourceIR
@@ -21,7 +22,6 @@
 )
 from torch_tensorrt.dynamo.conversion.impl.unary import atan, sign
 from torch_tensorrt.dynamo.conversion.impl.unary.base import convert_unary
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def trunc_div(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/embedding.py b/py/torch_tensorrt/dynamo/conversion/impl/embedding.py
index 4188c63e30..a712641f44 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/embedding.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/embedding.py
@@ -6,6 +6,7 @@
 import tensorrt as trt
 import torch
 import torch_tensorrt.dynamo.conversion.impl as impl
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
@@ -14,10 +15,9 @@
     cast_trt_tensor,
     get_trt_tensor,
     set_item,
+    set_layer_name,
     to_numpy,
 )
-from torch_tensorrt.fx.converters.converter_utils import set_layer_name
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def embedding(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/full.py b/py/torch_tensorrt/dynamo/conversion/impl/full.py
index fc079f7f32..5c70d4772f 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/full.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/full.py
@@ -3,6 +3,7 @@
 import numpy as np
 import tensorrt as trt
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt import _enums
 from torch_tensorrt.dynamo.conversion import impl
@@ -12,7 +13,6 @@
     cast_trt_tensor,
     get_trt_tensor,
 )
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def full(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/grid.py b/py/torch_tensorrt/dynamo/conversion/impl/grid.py
index 302d286237..00211fb520 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/grid.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/grid.py
@@ -1,11 +1,11 @@
 from typing import Optional
 
 import tensorrt as trt
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
 from torch_tensorrt.dynamo.conversion.converter_utils import set_layer_name
-from torch_tensorrt.dynamo.types import TRTTensor
 
 # bilinear, nearest, bicubic
 GridSamplerInterpolationMode = {
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/linear.py b/py/torch_tensorrt/dynamo/conversion/impl/linear.py
index 5e859a46d3..3827284950 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/linear.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/linear.py
@@ -3,11 +3,12 @@
 import numpy as np
 import tensorrt as trt
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
+from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion import impl
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
-from torch_tensorrt.dynamo.conversion.converter_utils import SourceIR, get_trt_tensor
-from torch_tensorrt.dynamo.types import TRTTensor
+from torch_tensorrt.dynamo.conversion.converter_utils import get_trt_tensor
 
 
 def linear(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/matmul.py b/py/torch_tensorrt/dynamo/conversion/impl/matmul.py
index 83ea3dd99b..65e4f53328 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/matmul.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/matmul.py
@@ -2,6 +2,7 @@
 
 import tensorrt as trt
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt import _enums
 from torch_tensorrt.dynamo._SourceIR import SourceIR
@@ -12,7 +13,6 @@
     get_trt_tensor,
     set_layer_name,
 )
-from torch_tensorrt.dynamo.types import TRTTensor
 
 
 def matrix_multiply(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/nccl_ops.py b/py/torch_tensorrt/dynamo/conversion/impl/nccl_ops.py
index c28c5bcc7d..e64c06ca39 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/nccl_ops.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/nccl_ops.py
@@ -5,8 +5,9 @@
 import numpy as np
 import tensorrt as trt
 from torch.fx.node import Argument, Target
+from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
-from torch_tensorrt.fx.converters.converter_utils import SourceIR, set_layer_name
+from torch_tensorrt.dynamo.conversion.converter_utils import set_layer_name
 
 
 # class for AllReduce
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/pad.py b/py/torch_tensorrt/dynamo/conversion/impl/pad.py
index 731058a122..863b6bc218 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/pad.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/pad.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import tensorrt as trt
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion import impl
@@ -11,7 +12,6 @@
     set_layer_name,
 )
 from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape
-from torch_tensorrt.dynamo.types import TRTTensor
 
 """
 Note: IPaddingLayer is deprecated in TensorRT 8.2 and will be removed in TensorRT 10.0.
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/permutation.py b/py/torch_tensorrt/dynamo/conversion/impl/permutation.py
index 1537d0fdbe..60ab762fa6 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/permutation.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/permutation.py
@@ -1,6 +1,7 @@
 from typing import Optional, Sequence, Union
 
 import tensorrt as trt
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion import impl
@@ -12,7 +13,6 @@
     set_layer_name,
 )
 from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def permute(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/pool.py b/py/torch_tensorrt/dynamo/conversion/impl/pool.py
index 4e18aaaef2..757f7209d9 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/pool.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/pool.py
@@ -3,18 +3,16 @@
 
 import tensorrt as trt
 import torch_tensorrt.dynamo.conversion.impl as impl
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
 from torch_tensorrt.dynamo.conversion.converter_utils import (
     extend_attr_to_tuple,
     get_positive_dim,
-)
-from torch_tensorrt.fx.converters.converter_utils import (
     has_dynamic_shape,
     set_layer_name,
 )
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def avg_poolNd(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/prelu.py b/py/torch_tensorrt/dynamo/conversion/impl/prelu.py
index 166ce16367..8e218f49cb 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/prelu.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/prelu.py
@@ -1,10 +1,10 @@
 from typing import Optional
 
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
 from torch_tensorrt.dynamo.conversion.converter_utils import set_layer_name
-from torch_tensorrt.dynamo.types import TRTTensor
 
 
 def prelu(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py
index 2aeedb144e..8dd32b11fc 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py
@@ -3,14 +3,17 @@
 import numpy as np
 import tensorrt as trt
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion import impl
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
-from torch_tensorrt.dynamo.conversion.converter_utils import get_trt_tensor, to_torch
-from torch_tensorrt.fx.converters.converter_utils import set_layer_name
-from torch_tensorrt.fx.types import TRTTensor
+from torch_tensorrt.dynamo.conversion.converter_utils import (
+    get_trt_tensor,
+    set_layer_name,
+    to_torch,
+)
 
 
 def get_ir(target: Target) -> SourceIR:
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/reduce.py b/py/torch_tensorrt/dynamo/conversion/impl/reduce.py
index a61a11772d..2bd7d7de36 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/reduce.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/reduce.py
@@ -1,6 +1,7 @@
 from typing import Optional, Sequence, Tuple, Union
 
 import tensorrt as trt
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion import impl
@@ -9,9 +10,8 @@
     cast_trt_tensor,
     get_axes_for_reduce_op,
     get_positive_dim,
+    set_layer_name,
 )
-from torch_tensorrt.fx.converters.converter_utils import set_layer_name
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def amax(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/select.py b/py/torch_tensorrt/dynamo/conversion/impl/select.py
index fe6ade2e68..c4d44a07ea 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/select.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/select.py
@@ -4,6 +4,7 @@
 import numpy as np
 import tensorrt as trt
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion import impl
@@ -13,16 +14,13 @@
     cast_trt_tensor,
     get_positive_dim,
     get_trt_tensor,
+    has_dynamic_shape,
+    set_layer_name,
     to_numpy,
 )
 from torch_tensorrt.dynamo.conversion.impl.elementwise import convert_binary_elementwise
 from torch_tensorrt.dynamo.conversion.impl.shape import shape as get_shape
 from torch_tensorrt.dynamo.utils import DYNAMIC_DIM
-from torch_tensorrt.fx.converters.converter_utils import (
-    has_dynamic_shape,
-    set_layer_name,
-)
-from torch_tensorrt.fx.types import TRTTensor
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/shape.py b/py/torch_tensorrt/dynamo/conversion/impl/shape.py
index c2dfac802b..27af02e5bb 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/shape.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/shape.py
@@ -3,7 +3,9 @@
 from typing import List, Optional, Tuple
 
 import numpy as np
+import tensorrt as trt
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
@@ -11,18 +13,15 @@
     cast_trt_tensor,
     get_positive_dim,
     get_trt_tensor,
+    set_layer_name,
 )
 from torch_tensorrt.dynamo.conversion.impl.elementwise.base import (
     convert_binary_elementwise,
 )
-from torch_tensorrt.fx.converters.converter_utils import (
+from torch_tensorrt.dynamo.utils import (
     Frameworks,
-    set_layer_name,
     unified_dtype_converter,
 )
-from torch_tensorrt.fx.types import TRTTensor
-
-import tensorrt as trt
 
 
 def shape(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/shuffle.py b/py/torch_tensorrt/dynamo/conversion/impl/shuffle.py
index 975480f390..2ef6c740ae 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/shuffle.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/shuffle.py
@@ -3,6 +3,7 @@
 import numpy as np
 import tensorrt as trt
 import torch_tensorrt.dynamo.conversion.impl as impl
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt import _enums
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
@@ -14,7 +15,6 @@
     set_layer_name,
 )
 from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def reshape(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/slice/base.py b/py/torch_tensorrt/dynamo/conversion/impl/slice/base.py
index a2af840a1f..c2edaceafb 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/slice/base.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/slice/base.py
@@ -1,14 +1,15 @@
 from typing import Optional
 
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
-from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape
-from torch_tensorrt.fx.converters.converter_utils import (
+from torch_tensorrt.dynamo.conversion.converter_utils import (
     has_dynamic_shape,
     set_layer_name,
 )
-from torch_tensorrt.fx.types import Shape, TRTTensor
+from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape
+from torch_tensorrt.dynamo.types import Shape
 
 
 def slice(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/slice/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/slice/ops.py
index 203bb03553..6a59cfda4c 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/slice/ops.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/slice/ops.py
@@ -4,8 +4,8 @@
 
 import numpy as np
 import tensorrt as trt
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
-
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion import impl
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
@@ -26,8 +26,8 @@
 from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape
 from torch_tensorrt.dynamo.conversion.impl.shape import shape as get_shape
 from torch_tensorrt.dynamo.conversion.impl.slice.base import slice
+from torch_tensorrt.dynamo.types import Shape
 from torch_tensorrt.dynamo.utils import DYNAMIC_DIM
-from torch_tensorrt.fx.types import Shape, TRTTensor
 
 
 def slice_op(  # TODO: This should be slice not whatever is in base
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/split.py b/py/torch_tensorrt/dynamo/conversion/impl/split.py
index 0f07ceb7ab..143a05a5b2 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/split.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/split.py
@@ -1,14 +1,14 @@
 from typing import List, Optional, Sequence, Union
 
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
-from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape
-from torch_tensorrt.fx.converters.converter_utils import (
+from torch_tensorrt.dynamo.conversion.converter_utils import (
     has_dynamic_shape,
     set_layer_name,
 )
-from torch_tensorrt.fx.types import TRTTensor
+from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape
 
 
 def split(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py b/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py
index dd6a2b9863..371b0a3c72 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/squeeze.py
@@ -1,5 +1,6 @@
 from typing import Optional, Sequence, Union
 
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
@@ -7,7 +8,6 @@
     get_positive_dim,
     set_layer_name,
 )
-from torch_tensorrt.fx.types import TRTTensor
 
 
 def squeeze(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/topk.py b/py/torch_tensorrt/dynamo/conversion/impl/topk.py
index 3b6549d285..053a46ce2b 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/topk.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/topk.py
@@ -1,6 +1,7 @@
 from typing import Optional, Tuple, Union
 
 import tensorrt as trt
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion import impl
@@ -10,14 +11,13 @@
     flatten_dims,
     get_axes_for_reduce_op,
     get_positive_dim,
-    set_layer_name,
     get_trt_tensor,
     has_dynamic_shape,
+    set_layer_name,
 )
 from torch_tensorrt.dynamo.conversion.impl.elementwise import convert_binary_elementwise
 from torch_tensorrt.dynamo.conversion.impl.shape import shape as get_shape
 from torch_tensorrt.dynamo.utils import DYNAMIC_DIM
-from torch_tensorrt.dynamo.types import TRTTensor
 
 
 def argmax_argmin(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py b/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py
index 5da8bad252..51521ceac9 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/unary/base.py
@@ -1,11 +1,11 @@
 from typing import Optional
 
 import tensorrt as trt
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
-from torch_tensorrt.fx.converters.converter_utils import set_layer_name
-from torch_tensorrt.fx.types import TRTTensor
+from torch_tensorrt.dynamo.conversion.converter_utils import set_layer_name
 
 
 def convert_unary(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py
index 89e490392d..12f6051457 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/unary/ops.py
@@ -4,16 +4,17 @@
 import tensorrt as trt
 import torch
 import torch_tensorrt.dynamo.conversion.impl as impl
+from tensorrt import DataType as TRTDataType
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
 from torch_tensorrt.dynamo.conversion.converter_utils import (
     cast_trt_tensor,
     get_trt_tensor,
+    set_layer_name,
 )
 from torch_tensorrt.dynamo.conversion.impl.unary.base import convert_unary
-from torch_tensorrt.fx.converters.converter_utils import set_layer_name
-from torch_tensorrt.fx.types import TRTDataType, TRTTensor
 
 
 def exp(
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py b/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py
index 35f21198d4..1a54b470f9 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py
@@ -1,6 +1,7 @@
 import logging
 from typing import List, Optional, Sequence, cast
 
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
@@ -9,7 +10,6 @@
     get_trt_tensor,
     set_layer_name,
 )
-from torch_tensorrt.dynamo.types import TRTTensor
 
 logger = logging.getLogger(__name__)
 
diff --git a/py/torch_tensorrt/dynamo/conversion/impl/upsample.py b/py/torch_tensorrt/dynamo/conversion/impl/upsample.py
index 247179455c..4b47ca5dec 100644
--- a/py/torch_tensorrt/dynamo/conversion/impl/upsample.py
+++ b/py/torch_tensorrt/dynamo/conversion/impl/upsample.py
@@ -1,6 +1,7 @@
 from typing import Optional, Sequence
 
 import tensorrt as trt
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Target
 from torch_tensorrt.dynamo._SourceIR import SourceIR
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
@@ -9,7 +10,6 @@
     set_layer_name,
 )
 from torch_tensorrt.dynamo.conversion.impl.shape import get_shape_with_dynamic_shape
-from torch_tensorrt.dynamo.types import TRTTensor
 
 
 def upsample(
diff --git a/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py b/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py
index a2feb99d56..9401e3d99d 100644
--- a/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py
+++ b/py/torch_tensorrt/dynamo/conversion/ops_evaluators.py
@@ -6,6 +6,7 @@
 
 import numpy as np
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Argument, Node, Target
 from torch_tensorrt.dynamo._settings import CompilationSettings
 from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext
@@ -13,8 +14,10 @@
     ConverterRegistry,
     dynamo_tensorrt_converter,
 )
-from torch_tensorrt.fx.types import TRTTensor
-from torch_tensorrt.fx.utils import Frameworks, unified_dtype_converter
+from torch_tensorrt.dynamo.utils import (
+    Frameworks,
+    unified_dtype_converter,
+)
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
diff --git a/py/torch_tensorrt/dynamo/conversion/prims_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/prims_ops_converters.py
index 923ca9be6c..8f2da209b1 100644
--- a/py/torch_tensorrt/dynamo/conversion/prims_ops_converters.py
+++ b/py/torch_tensorrt/dynamo/conversion/prims_ops_converters.py
@@ -2,6 +2,7 @@
 from typing import Dict, Sequence, Tuple, Union
 
 import torch
+from tensorrt import ITensor as TRTTensor
 from torch.fx.node import Argument, Target
 from torch_tensorrt.dynamo._settings import CompilationSettings
 from torch_tensorrt.dynamo._SourceIR import SourceIR
@@ -10,7 +11,6 @@
 from torch_tensorrt.dynamo.conversion._ConverterRegistry import (
     dynamo_tensorrt_converter,
 )
-from torch_tensorrt.fx.types import TRTTensor
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
diff --git a/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py
index b0e41f7aeb..3197d9f7de 100644
--- a/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py
+++ b/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py
@@ -95,7 +95,6 @@ def __init__(
             dla_local_dram_size (int): Host RAM used by DLA to share intermediate tensor data across operations
             dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution
             truncate_double (bool): Truncate weights provided in double (float64) to float32
-            calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
             require_full_compilation (bool): Require modules to be compiled end to end or return an error as opposed to returning a hybrid graph where operations that cannot be run in TensorRT are run in PyTorch
             min_block_size (int): The minimum number of contiguous TensorRT convertible operations in order to run a set of operations in TensorRT
             torch_executed_ops (Collection[Target]): Set of aten operators that must be run in PyTorch. An error will be thrown if this set is not empty but ``require_full_compilation`` is True
diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py
index 0703fd1cb9..de736db1bf 100644
--- a/py/torch_tensorrt/dynamo/utils.py
+++ b/py/torch_tensorrt/dynamo/utils.py
@@ -84,6 +84,37 @@ class Frameworks(Enum):
     }
 
 
+def unified_dtype_converter(
+    dtype: Union[TRTDataType, torch.dtype, np.dtype], to: Frameworks
+) -> Union[np.dtype, torch.dtype, TRTDataType]:
+    """
+    Convert TensorRT, Numpy, or Torch data types to any other of those data types.
+
+    Args:
+        dtype (TRTDataType, torch.dtype, np.dtype): A TensorRT, Numpy, or Torch data type.
+        to (Frameworks): The framework to convert the data type to.
+
+    Returns:
+        The equivalent data type in the requested framework.
+    """
+    assert to in Frameworks, f"Expected valid Framework for translation, got {to}"
+    trt_major_version = int(trt.__version__.split(".")[0])
+    if dtype in (np.int8, torch.int8, trt.int8):
+        return DataTypeEquivalence[trt.int8][to]
+    elif trt_major_version >= 7 and dtype in (np.bool_, torch.bool, trt.bool):
+        return DataTypeEquivalence[trt.bool][to]
+    elif dtype in (np.int32, torch.int32, trt.int32):
+        return DataTypeEquivalence[trt.int32][to]
+    elif dtype in (np.int64, torch.int64, trt.int64):
+        return DataTypeEquivalence[trt.int64][to]
+    elif dtype in (np.float16, torch.float16, trt.float16):
+        return DataTypeEquivalence[trt.float16][to]
+    elif dtype in (np.float32, torch.float32, trt.float32):
+        return DataTypeEquivalence[trt.float32][to]
+    else:
+        raise TypeError("%s is not a supported dtype" % dtype)
+
+
 def deallocate_module(module: torch.fx.GraphModule, delete_module: bool = True) -> None:
     """
     This is a helper function to delete the instance of module. We first move it to CPU and then
diff --git a/py/torch_tensorrt/ts/_compile_spec.py b/py/torch_tensorrt/ts/_compile_spec.py
index 6016fe87c5..0d0b12723e 100644
--- a/py/torch_tensorrt/ts/_compile_spec.py
+++ b/py/torch_tensorrt/ts/_compile_spec.py
@@ -239,9 +239,6 @@ def _parse_compile_spec(compile_spec_: Dict[str, Any]) -> _ts_C.CompileSpec:
             compile_spec["enabled_precisions"]
         )
 
-    if "calibrator" in compile_spec and compile_spec["calibrator"]:
-        info.ptq_calibrator = compile_spec["calibrator"]
-
     if "sparse_weights" in compile_spec:
         assert isinstance(compile_spec["sparse_weights"], bool)
         info.sparse_weights = compile_spec["sparse_weights"]
@@ -319,7 +316,6 @@ def TensorRTCompileSpec(
     dla_local_dram_size: int = 1073741824,
     dla_global_dram_size: int = 536870912,
     truncate_long_and_double: bool = False,
-    calibrator: object = None,
     allow_shape_tensors: bool = False,
 ) -> torch.classes.tensorrt.CompileSpec:
     """Utility to create a formatted spec dictionary for using the PyTorch TensorRT backend
@@ -354,7 +350,6 @@ def TensorRTCompileSpec(
         num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels
         workspace_size (int): Maximum size of workspace given to TensorRT
         truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32
-        calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
         allow_shape_tensors: (Experimental) Allow aten::size to output shape tensors using IShapeLayer in TensorRT
 
       Returns:
@@ -378,7 +373,6 @@ def TensorRTCompileSpec(
         "dla_sram_size": dla_sram_size,  # Fast software managed RAM used by DLA to communicate within a layer.
         "dla_local_dram_size": dla_local_dram_size,  # Host RAM used by DLA to share intermediate tensor data across operations
         "dla_global_dram_size": dla_global_dram_size,  # Host RAM used by DLA to store weights and metadata for execution
-        "calibrator": calibrator,
         "truncate_long_and_double": truncate_long_and_double,
         "allow_shape_tensors": allow_shape_tensors,
     }
@@ -433,6 +427,5 @@ def TensorRTCompileSpec(
     backend_spec._set_dla_global_dram_size(parsed_spec.dla_global_dram_size)
     backend_spec._set_truncate_long_and_double(parsed_spec.truncate_long_and_double)
     backend_spec._set_allow_shape_tensors(parsed_spec.allow_shape_tensors)
-    backend_spec._set_ptq_calibrator(parsed_spec._get_calibrator_handle())
 
     return backend_spec
diff --git a/py/torch_tensorrt/ts/_compiler.py b/py/torch_tensorrt/ts/_compiler.py
index 114398f010..4bcbf058bc 100644
--- a/py/torch_tensorrt/ts/_compiler.py
+++ b/py/torch_tensorrt/ts/_compiler.py
@@ -27,7 +27,6 @@ def compile(
     dla_sram_size: int = 1048576,
     dla_local_dram_size: int = 1073741824,
     dla_global_dram_size: int = 536870912,
-    calibrator: object = None,
     truncate_long_and_double: bool = False,
     require_full_compilation: bool = False,
     min_block_size: int = 3,
@@ -92,7 +91,6 @@ def compile(
         dla_local_dram_size (int): Host RAM used by DLA to share intermediate tensor data across operations
         dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution
         truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32
-        calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
         require_full_compilation (bool): Require modules to be compiled end to end or return an error as opposed to returning a hybrid graph where operations that cannot be run in TensorRT are run in PyTorch
         min_block_size (int): The minimum number of contiguous TensorRT convertible operations in order to run a set of operations in TensorRT
         torch_executed_ops (List[str]): List of aten operators that must be run in PyTorch. An error will be thrown if this list is not empty but ``require_full_compilation`` is True
@@ -147,7 +145,6 @@ def compile(
         "dla_sram_size": dla_sram_size,
         "dla_local_dram_size": dla_local_dram_size,
         "dla_global_dram_size": dla_global_dram_size,
-        "calibrator": calibrator,
         "truncate_long_and_double": truncate_long_and_double,
         "torch_fallback": {
             "enabled": not require_full_compilation,
@@ -182,7 +179,6 @@ def convert_method_to_trt_engine(
     dla_local_dram_size: int = 1073741824,
     dla_global_dram_size: int = 536870912,
     truncate_long_and_double: int = False,
-    calibrator: object = None,
     allow_shape_tensors: bool = False,
 ) -> bytes:
     """Convert a TorchScript module method to a serialized TensorRT engine
@@ -241,7 +237,6 @@ def convert_method_to_trt_engine(
         dla_local_dram_size (int): Host RAM used by DLA to share intermediate tensor data across operations
         dla_global_dram_size (int): Host RAM used by DLA to store weights and metadata for execution
         truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32
-        calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
         allow_shape_tensors: (Experimental) Allow aten::size to output shape tensors using IShapeLayer in TensorRT
 
     Returns:
@@ -274,7 +269,6 @@ def convert_method_to_trt_engine(
         "capability": capability,  # Restrict kernel selection to safe gpu kernels or safe dla kernels
         "num_avg_timing_iters": num_avg_timing_iters,  # Number of averaging timing iterations used to select kernels
         "workspace_size": workspace_size,  # Maximum size of workspace given to TensorRT
-        "calibrator": calibrator,
         "truncate_long_and_double": truncate_long_and_double,
         "allow_shape_tensors": allow_shape_tensors,
     }
diff --git a/py/torch_tensorrt/ts/ptq.py b/py/torch_tensorrt/ts/ptq.py
deleted file mode 100644
index db55aa47e4..0000000000
--- a/py/torch_tensorrt/ts/ptq.py
+++ /dev/null
@@ -1,233 +0,0 @@
-import sys
-from typing import Any, List, Optional
-
-if sys.version_info >= (3, 11):
-    from typing import Self
-else:
-    from typing_extensions import Self
-
-import os
-import warnings
-from enum import Enum
-
-import torch
-from torch_tensorrt import _C
-from torch_tensorrt.ts.logging import Level, log
-
-
-class CalibrationAlgo(Enum):
-    ENTROPY_CALIBRATION = _C.CalibrationAlgo.ENTROPY_CALIBRATION
-    ENTROPY_CALIBRATION_2 = _C.CalibrationAlgo.ENTROPY_CALIBRATION_2
-    LEGACY_CALIBRATION = _C.CalibrationAlgo.LEGACY_CALIBRATION
-    MINMAX_CALIBRATION = _C.CalibrationAlgo.MINMAX_CALIBRATION
-
-
-def get_cache_mode_batch(self: object) -> None:
-    return None
-
-
-def get_batch_size(self: object) -> int:
-    return 1
-
-
-def get_batch(self: object, _: Any) -> Optional[List[int]]:
-    if self.current_batch_idx + self.batch_size > len(self.data_loader.dataset):
-        return None
-
-    batch = next(self.dataset_iterator)
-    self.current_batch_idx += self.batch_size
-    inputs_gpu = []
-    if isinstance(batch, list):
-        for example in batch:
-            inputs_gpu.append(example.to(self.device).data_ptr())
-    else:
-        inputs_gpu.append(batch.to(self.device).data_ptr())
-    return inputs_gpu
-
-
-def read_calibration_cache(self: object) -> bytes:
-    if self.cache_file and self.use_cache:
-        if os.path.exists(self.cache_file):
-            with open(self.cache_file, "rb") as f:
-                b: bytes = f.read()
-                return b
-        else:
-            raise FileNotFoundError(self.cache_file)
-    else:
-        return b""
-
-
-def write_calibration_cache(self: object, cache: bytes) -> None:
-    if self.cache_file:
-        with open(self.cache_file, "wb") as f:
-            f.write(cache)
-    else:
-        return
-
-
-# deepcopy (which involves pickling) is performed on the compile_spec internally during compilation.
-# We register this __reduce__ function for pickler to identity the calibrator object returned by DataLoaderCalibrator during deepcopy.
-# This should be the object's local name relative to the module https://docs.python.org/3/library/pickle.html#object.__reduce__
-def __reduce__(self: object) -> str:
-    return self.__class__.__name__
-
-
-class DataLoaderCalibrator(object):
-    """
-    Constructs a calibrator class in TensorRT and uses pytorch dataloader to load/preprocess
-    data which is passed during calibration.
-
-    Arguments:
-        dataloader (torch.utils.data.DataLoader): an instance of pytorch dataloader which iterates through a given dataset.
-        algo_type (CalibrationAlgo): choice of calibration algorithm.
-        cache_file (str): path to cache file.
-        use_cache (bool): flag which enables usage of pre-existing cache.
-        device (Device): device on which calibration data is copied to.
-    """
-
-    def __init__(self, **kwargs: Any):
-        pass
-
-    def __new__(cls, *args: Any, **kwargs: Any) -> Self:
-        warnings.warn(
-            "Int8 PTQ Calibrator has been deprecated by TensorRT, please plan on porting to a NVIDIA Model Optimizer Toolkit based workflow. See: https://pytorch.org/TensorRT/tutorials/_rendered_examples/dynamo/vgg16_ptq.html for more details",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        dataloader = args[0]
-        algo_type = kwargs.get("algo_type", CalibrationAlgo.ENTROPY_CALIBRATION_2)
-        cache_file = kwargs.get("cache_file", None)
-        use_cache = kwargs.get("use_cache", False)
-        device = kwargs.get("device", torch.device("cuda:0"))
-
-        if not isinstance(dataloader, torch.utils.data.DataLoader):
-            log(
-                Level.Error,
-                "Dataloader : {} is not a valid instance of torch.utils.data.DataLoader".format(
-                    dataloader
-                ),
-            )
-
-        if cache_file:
-            if use_cache:
-                log(
-                    Level.Debug,
-                    "Using existing cache_file {} for calibration".format(cache_file),
-                )
-            else:
-                log(Level.Debug, "Overwriting existing calibration cache file.")
-        else:
-            if use_cache:
-                log(
-                    Level.Warning,
-                    "Input cache file is None but use_cache is set to True in INT8 mode. Ignoring use_cache flag in this run.",
-                )
-
-        # Define attributes and member functions for the calibrator class
-        attribute_mapping = {
-            "data_loader": dataloader,
-            "current_batch_idx": 0,
-            "batch_size": dataloader.batch_size,
-            "dataset_iterator": iter(dataloader),
-            "cache_file": cache_file,
-            "device": device,
-            "use_cache": use_cache,
-            "get_batch_size": get_batch_size,
-            "get_batch": get_cache_mode_batch if use_cache else get_batch,
-            "read_calibration_cache": read_calibration_cache,
-            "write_calibration_cache": write_calibration_cache,
-            "__reduce__": __reduce__,  # used when you deepcopy the DataLoaderCalibrator object
-        }
-
-        # Using type metaclass to construct calibrator class based on algorithm type
-        if algo_type == CalibrationAlgo.ENTROPY_CALIBRATION:
-            calib_ec: Self = type(
-                "Int8EntropyCalibrator", (_C.IInt8EntropyCalibrator,), attribute_mapping
-            )()
-            return calib_ec
-        elif algo_type == CalibrationAlgo.ENTROPY_CALIBRATION_2:
-            calib_ec2: Self = type(
-                "Int8EntropyCalibrator2",
-                (_C.IInt8EntropyCalibrator2,),
-                attribute_mapping,
-            )()
-            return calib_ec2
-        elif algo_type == CalibrationAlgo.LEGACY_CALIBRATION:
-            calib_lc: Self = type(
-                "Int8LegacyCalibrator", (_C.IInt8LegacyCalibrator,), attribute_mapping
-            )()
-            return calib_lc
-        elif algo_type == CalibrationAlgo.MINMAX_CALIBRATION:
-            calib_mmc: Self = type(
-                "Int8MinMaxCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping
-            )()
-            return calib_mmc
-        else:
-            raise ValueError(
-                "Invalid calibration algorithm type. Please select among ENTROPY_CALIBRATION, ENTROPY_CALIBRATION, LEGACY_CALIBRATION or MINMAX_CALIBRATION"
-            )
-
-
-class CacheCalibrator(object):
-    """
-    Constructs a calibrator class in TensorRT which directly uses pre-existing cache file for calibration.
-
-    Arguments:
-        cache_file (str): path to cache file.
-        algo_type (CalibrationAlgo): choice of calibration algorithm.
-    """
-
-    def __init__(self, **kwargs: Any):
-        pass
-
-    def __new__(cls, *args: Any, **kwargs: Any) -> Self:
-        warnings.warn(
-            "Int8 PTQ Calibrator has been deprecated by TensorRT, please plan on porting to a NVIDIA Model Optimizer Toolkit based workflow. See: https://pytorch.org/TensorRT/tutorials/_rendered_examples/dynamo/vgg16_ptq.html for more details",
-            DeprecationWarning,
-            stacklevel=2,
-        )
-        cache_file = args[0]
-        algo_type = kwargs.get("algo_type", CalibrationAlgo.ENTROPY_CALIBRATION_2)
-
-        if os.path.isfile(cache_file):
-            log(
-                Level.Debug,
-                "Using existing cache_file {} for calibration".format(cache_file),
-            )
-        else:
-            log(Level.Error, "Invalid calibration cache file.")
-
-        # Define attributes and member functions for the calibrator class
-        attribute_mapping = {
-            "use_cache": True,
-            "cache_file": cache_file,
-            "get_batch_size": get_batch_size,
-            "get_batch": get_cache_mode_batch,
-            "read_calibration_cache": read_calibration_cache,
-            "write_calibration_cache": write_calibration_cache,
-        }
-        # Using type metaclass to construct calibrator class based on algorithm type
-        if algo_type == CalibrationAlgo.ENTROPY_CALIBRATION:
-            calib_ec: Self = type(
-                "DataLoaderCalibrator", (_C.IInt8EntropyCalibrator,), attribute_mapping
-            )()
-            return calib_ec
-        elif algo_type == CalibrationAlgo.ENTROPY_CALIBRATION_2:
-            calib_ec2: Self = type(
-                "DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping
-            )()
-            return calib_ec2
-        elif algo_type == CalibrationAlgo.LEGACY_CALIBRATION:
-            calib_lc: Self = type(
-                "DataLoaderCalibrator", (_C.IInt8LegacyCalibrator,), attribute_mapping
-            )()
-            return calib_lc
-        elif algo_type == CalibrationAlgo.MINMAX_CALIBRATION:
-            calib_mmc: Self = type(
-                "DataLoaderCalibrator", (_C.IInt8MinMaxCalibrator,), attribute_mapping
-            )()
-            return calib_mmc
-        else:
-            raise ValueError(
-                "Invalid calibration algorithm type. Please select among ENTROPY_CALIBRATION, ENTROPY_CALIBRATION, LEGACY_CALIBRATION or MINMAX_CALIBRATION"
-            )
diff --git a/tests/py/ts/BUILD b/tests/py/ts/BUILD
index 98db68fc44..0da75f7b10 100644
--- a/tests/py/ts/BUILD
+++ b/tests/py/ts/BUILD
@@ -27,30 +27,6 @@ py_test(
     ],
 )
 
-py_test(
-    name = "test_ptq_dataloader_calibrator",
-    srcs = [
-        "model_test_case.py",
-        "test_ptq_dataloader_calibrator.py",
-    ],
-    deps = [
-        requirement("torchvision"),
-    ],
-)
-
-# This test is not included in the main test suite by default. This test checks
-# if trtorch can use pre-existing trt calibrators already implemented by users.
-py_test(
-    name = "test_ptq_trt_calibrator",
-    srcs = [
-        "model_test_case.py",
-        "test_ptq_trt_calibrator.py",
-    ],
-    deps = [
-        requirement("torchvision"),
-    ],
-)
-
 # Following multi_gpu test is only targeted for multi-gpu configurations. It is not included in the test suite by default.
 py_test(
     name = "test_multi_gpu",
@@ -84,23 +60,3 @@ py_test(
         requirement("torchvision"),
     ],
 )
-
-py_test(
-    name = "test_ptq_to_backend",
-    srcs = [
-        "model_test_case.py",
-        "test_ptq_to_backend.py",
-    ],
-    deps = [
-        requirement("torchvision"),
-    ],
-)
-
-test_suite(
-    name = "py_calibrator_tests",
-    tests = [
-        ":test_ptq_dataloader_calibrator",
-        ":test_ptq_to_backend",
-        ":test_ptq_trt_calibrator",
-    ],
-)
diff --git a/tests/py/ts/ptq/test_ptq_dataloader_calibrator.py b/tests/py/ts/ptq/test_ptq_dataloader_calibrator.py
deleted file mode 100644
index 9e3068ec3b..0000000000
--- a/tests/py/ts/ptq/test_ptq_dataloader_calibrator.py
+++ /dev/null
@@ -1,112 +0,0 @@
-import os
-import unittest
-
-import torch
-import torch.nn as nn
-import torch_tensorrt as torchtrt
-import torch_tensorrt.ts.ptq as PTQ
-import torchvision
-import torchvision.transforms as transforms
-from torch.nn import functional as F
-from torch_tensorrt.ts.logging import *
-
-
-def find_repo_root(max_depth=10):
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-    for i in range(max_depth):
-        files = os.listdir(dir_path)
-        if "MODULE.bazel" in files:
-            return dir_path
-        else:
-            dir_path = os.path.dirname(dir_path)
-
-    raise RuntimeError("Could not find repo root")
-
-
-MODULE_DIR = find_repo_root() + "/tests/modules"
-
-
-def compute_accuracy(testing_dataloader, model):
-    total = 0
-    correct = 0
-    loss = 0.0
-    class_probs = []
-    class_preds = []
-    device = torch.device("cuda:0")
-    with torch.no_grad():
-        idx = 0
-        for data, labels in testing_dataloader:
-            data, labels = data.to(device), labels.to(device)
-            out = model(data)
-            preds = torch.max(out, 1)[1]
-            class_probs.append([F.softmax(i, dim=0) for i in out])
-            class_preds.append(preds)
-            total += labels.size(0)
-            correct += (preds == labels).sum().item()
-            idx += 1
-
-    test_probs = torch.cat([torch.stack(batch) for batch in class_probs])
-    test_preds = torch.cat(class_preds)
-    return correct / total
-
-
-@unittest.skipIf(
-    not torchtrt.ENABLED_FEATURES.torchscript_frontend,
-    "TorchScript Frontend is not available",
-)
-class TestAccuracy(unittest.TestCase):
-    def test_compile_script(self):
-        self.model = (
-            torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda")
-        )
-        self.input = torch.randn((1, 3, 32, 32)).to("cuda")
-        self.testing_dataset = torchvision.datasets.CIFAR10(
-            root="./data",
-            train=False,
-            download=True,
-            transform=transforms.Compose(
-                [
-                    transforms.ToTensor(),
-                    transforms.Normalize(
-                        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
-                    ),
-                ]
-            ),
-        )
-
-        self.testing_dataloader = torch.utils.data.DataLoader(
-            self.testing_dataset, batch_size=1, shuffle=False, num_workers=1
-        )
-        self.calibrator = PTQ.DataLoaderCalibrator(
-            self.testing_dataloader,
-            cache_file="./calibration.cache",
-            use_cache=False,
-            algo_type=PTQ.CalibrationAlgo.ENTROPY_CALIBRATION_2,
-            device=torch.device("cuda:0"),
-        )
-
-        compile_spec = {
-            "inputs": [torchtrt.Input([1, 3, 32, 32])],
-            "enabled_precisions": {torch.float, torch.int8},
-            "calibrator": self.calibrator,
-            "truncate_long_and_double": True,
-            "device": {
-                "device_type": torchtrt.DeviceType.GPU,
-                "gpu_id": 0,
-                "dla_core": 0,
-                "allow_gpu_fallback": False,
-            },
-        }
-        trt_mod = torchtrt.ts.compile(self.model, **compile_spec)
-
-        fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model)
-        log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc))
-
-        int8_test_acc = compute_accuracy(self.testing_dataloader, trt_mod)
-        log(Level.Info, "[TRT INT8] Test Acc: {:.2f}%".format(100 * int8_test_acc))
-        acc_diff = fp32_test_acc - int8_test_acc
-        self.assertTrue(abs(acc_diff) < 3)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/py/ts/ptq/test_ptq_to_backend.py b/tests/py/ts/ptq/test_ptq_to_backend.py
deleted file mode 100644
index 015ce97126..0000000000
--- a/tests/py/ts/ptq/test_ptq_to_backend.py
+++ /dev/null
@@ -1,119 +0,0 @@
-import os
-import unittest
-
-import torch
-import torch.nn as nn
-import torch_tensorrt as torchtrt
-import torchvision
-import torchvision.transforms as transforms
-import torch_tensorrt.ts.ptq as PTQ
-from torch.nn import functional as F
-from torch_tensorrt.ts.logging import *
-
-
-def find_repo_root(max_depth=10):
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-    for i in range(max_depth):
-        files = os.listdir(dir_path)
-        if "WORKSPACE" in files:
-            return dir_path
-        else:
-            dir_path = os.path.dirname(dir_path)
-
-    raise RuntimeError("Could not find repo root")
-
-
-MODULE_DIR = find_repo_root() + "/tests/modules"
-
-
-def compute_accuracy(testing_dataloader, model):
-    total = 0
-    correct = 0
-    loss = 0.0
-    class_probs = []
-    class_preds = []
-    device = torch.device("cuda:0")
-    with torch.no_grad():
-        idx = 0
-        for data, labels in testing_dataloader:
-            data, labels = data.to(device), labels.to(device)
-            out = model(data)
-            preds = torch.max(out, 1)[1]
-            class_probs.append([F.softmax(i, dim=0) for i in out])
-            class_preds.append(preds)
-            total += labels.size(0)
-            correct += (preds == labels).sum().item()
-            idx += 1
-
-    test_probs = torch.cat([torch.stack(batch) for batch in class_probs])
-    test_preds = torch.cat(class_preds)
-    return correct / total
-
-
-@unittest.skipIf(
-    not torchtrt.ENABLED_FEATURES.torchscript_frontend,
-    "TorchScript Frontend is not available",
-)
-class TestAccuracy(unittest.TestCase):
-    def test_compile_script(self):
-        self.model = (
-            torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda")
-        )
-        self.input = torch.randn((1, 3, 32, 32)).to("cuda")
-        self.testing_dataset = torchvision.datasets.CIFAR10(
-            root="./data",
-            train=False,
-            download=True,
-            transform=transforms.Compose(
-                [
-                    transforms.ToTensor(),
-                    transforms.Normalize(
-                        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
-                    ),
-                ]
-            ),
-        )
-
-        self.testing_dataloader = torch.utils.data.DataLoader(
-            self.testing_dataset, batch_size=1, shuffle=False, num_workers=1
-        )
-        self.calibrator = PTQ.DataLoaderCalibrator(
-            self.testing_dataloader,
-            cache_file="./calibration.cache",
-            use_cache=False,
-            algo_type=PTQ.CalibrationAlgo.ENTROPY_CALIBRATION_2,
-            device=torch.device("cuda:0"),
-        )
-
-        self.spec = {
-            "forward": torchtrt.ts.TensorRTCompileSpec(
-                **{
-                    "inputs": [torchtrt.Input([1, 3, 32, 32])],
-                    "enabled_precisions": {torch.float, torch.half, torch.int8},
-                    "calibrator": self.calibrator,
-                    "truncate_long_and_double": True,
-                    "device": {
-                        "device_type": torchtrt.DeviceType.GPU,
-                        "gpu_id": 0,
-                        "dla_core": 0,
-                        "allow_gpu_fallback": False,
-                    },
-                }
-            )
-        }
-
-        fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model)
-        log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc))
-
-        trt_mod = torch._C._jit_to_backend("tensorrt", self.model, self.spec)
-        int8_test_acc = compute_accuracy(self.testing_dataloader, trt_mod)
-        log(
-            Level.Info,
-            "[TRT INT8 Backend] Test Acc: {:.2f}%".format(100 * int8_test_acc),
-        )
-        acc_diff = fp32_test_acc - int8_test_acc
-        self.assertTrue(abs(acc_diff) < 3)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/py/ts/ptq/test_ptq_trt_calibrator.py b/tests/py/ts/ptq/test_ptq_trt_calibrator.py
deleted file mode 100644
index bef057081b..0000000000
--- a/tests/py/ts/ptq/test_ptq_trt_calibrator.py
+++ /dev/null
@@ -1,156 +0,0 @@
-import os
-import unittest
-
-import tensorrt as trt
-import torch
-import torch.nn as nn
-import torch_tensorrt as torchtrt
-import torchvision
-import torchvision.transforms as transforms
-from torch.nn import functional as F
-from torch_tensorrt.ts.logging import *
-
-
-def find_repo_root(max_depth=10):
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-    for i in range(max_depth):
-        files = os.listdir(dir_path)
-        if "MODULE.bazel" in files:
-            return dir_path
-        else:
-            dir_path = os.path.dirname(dir_path)
-
-    raise RuntimeError("Could not find repo root")
-
-
-MODULE_DIR = find_repo_root() + "/tests/modules"
-
-
-def compute_accuracy(testing_dataloader, model):
-    total = 0
-    correct = 0
-    loss = 0.0
-    class_probs = []
-    class_preds = []
-    device = torch.device("cuda:0")
-    with torch.no_grad():
-        idx = 0
-        for data, labels in testing_dataloader:
-            data, labels = data.to(device), labels.to(device)
-            out = model(data)
-            preds = torch.max(out, 1)[1]
-            class_probs.append([F.softmax(i, dim=0) for i in out])
-            class_preds.append(preds)
-            total += labels.size(0)
-            correct += (preds == labels).sum().item()
-            idx += 1
-
-    test_probs = torch.cat([torch.stack(batch) for batch in class_probs])
-    test_preds = torch.cat(class_preds)
-    return correct / total
-
-
-@unittest.skipIf(
-    not torchtrt.ENABLED_FEATURES.torchscript_frontend,
-    "TorchScript Frontend is not available",
-)
-class TRTEntropyCalibrator(trt.IInt8EntropyCalibrator2):
-    def __init__(self, dataloader, **kwargs):
-        trt.IInt8EntropyCalibrator2.__init__(self)
-
-        self.cache_file = kwargs.get("cache_file", None)
-        self.use_cache = kwargs.get("use_cache", False)
-        self.device = kwargs.get("device", torch.device("cuda:0"))
-
-        self.dataloader = dataloader
-        self.dataset_iterator = iter(dataloader)
-        self.batch_size = dataloader.batch_size
-        self.current_batch_idx = 0
-
-    def get_batch_size(self):
-        return 1
-
-    # TensorRT passes along the names of the engine bindings to the get_batch function.
-    # You don't necessarily have to use them, but they can be useful to understand the order of
-    # the inputs. The bindings list is expected to have the same ordering as 'names'.
-    def get_batch(self, names):
-        if (
-            self.current_batch_idx + self.batch_size
-            > self.dataloader.dataset.data.shape[0]
-        ):
-            return None
-
-        batch = next(self.dataset_iterator)
-        self.current_batch_idx += self.batch_size
-        # Treat the first element as input and others as targets.
-        if isinstance(batch, list):
-            batch = batch[0].to(self.device)
-        return [batch.data_ptr()]
-
-    def read_calibration_cache(self):
-        # If there is a cache, use it instead of calibrating again. Otherwise, implicitly return None.
-        if self.use_cache:
-            with open(self.cache_file, "rb") as f:
-                return f.read()
-
-    def write_calibration_cache(self, cache):
-        if self.cache_file:
-            with open(self.cache_file, "wb") as f:
-                f.write(cache)
-
-
-@unittest.skipIf(
-    not torchtrt.ENABLED_FEATURES.torchscript_frontend,
-    "TorchScript Frontend is not available",
-)
-class TestAccuracy(unittest.TestCase):
-    def test_compile_script(self):
-        self.model = (
-            torch.jit.load(MODULE_DIR + "/trained_vgg16.jit.pt").eval().to("cuda")
-        )
-        self.input = torch.randn((1, 3, 32, 32)).to("cuda")
-        self.testing_dataset = torchvision.datasets.CIFAR10(
-            root="./data",
-            train=False,
-            download=True,
-            transform=transforms.Compose(
-                [
-                    transforms.ToTensor(),
-                    transforms.Normalize(
-                        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
-                    ),
-                ]
-            ),
-        )
-
-        self.testing_dataloader = torch.utils.data.DataLoader(
-            self.testing_dataset, batch_size=1, shuffle=False, num_workers=1
-        )
-        # Test cases can assume using GPU id: 0
-        self.calibrator = TRTEntropyCalibrator(self.testing_dataloader)
-
-        fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model)
-        log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc))
-
-        compile_spec = {
-            "inputs": [torchtrt.Input([1, 3, 32, 32])],
-            "enabled_precisions": {torch.float, torch.int8},
-            "calibrator": self.calibrator,
-            "truncate_long_and_double": True,
-            "device": {
-                "device_type": torchtrt.DeviceType.GPU,
-                "gpu_id": 0,
-                "dla_core": 0,
-                "allow_gpu_fallback": False,
-            },
-        }
-
-        trt_mod = torchtrt.ts.compile(self.model, **compile_spec)
-        int8_test_acc = compute_accuracy(self.testing_dataloader, trt_mod)
-        log(Level.Info, "[TRT INT8] Test Acc: {:.2f}%".format(100 * int8_test_acc))
-        acc_diff = fp32_test_acc - int8_test_acc
-        self.assertTrue(abs(acc_diff) < 3)
-
-
-if __name__ == "__main__":
-    unittest.main()
diff --git a/tests/py/ts/qat/test_qat_trt_accuracy.py b/tests/py/ts/qat/test_qat_trt_accuracy.py
deleted file mode 100644
index ade2cfc865..0000000000
--- a/tests/py/ts/qat/test_qat_trt_accuracy.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import os
-import sys
-import unittest
-
-import torch
-import torch.nn as nn
-import torch_tensorrt as torchtrt
-import torchvision
-import torchvision.transforms as transforms
-from torch.nn import functional as F
-from torch_tensorrt.ts.logging import *
-
-
-def find_repo_root(max_depth=10):
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-    for i in range(max_depth):
-        files = os.listdir(dir_path)
-        if "WORKSPACE" in files:
-            return dir_path
-        else:
-            dir_path = os.path.dirname(dir_path)
-
-    raise RuntimeError("Could not find repo root")
-
-
-MODULE_DIR = find_repo_root() + "/tests/modules"
-
-set_reportable_log_level(Level.Graph)
-
-
-def compute_accuracy(testing_dataloader, model):
-    total = 0
-    correct = 0
-    loss = 0.0
-    class_probs = []
-    class_preds = []
-    device = torch.device("cuda:0")
-    with torch.no_grad():
-        idx = 0
-        for data, labels in testing_dataloader:
-            data, labels = data.to(device), labels.to(device)
-            out = model(data)
-            preds = torch.max(out, 1)[1]
-            class_probs.append([F.softmax(i, dim=0) for i in out])
-            class_preds.append(preds)
-            total += labels.size(0)
-            correct += (preds == labels).sum().item()
-            idx += 1
-
-    test_probs = torch.cat([torch.stack(batch) for batch in class_probs])
-    test_preds = torch.cat(class_preds)
-    return correct / total
-
-
-@unittest.skipIf(
-    not torchtrt.ENABLED_FEATURES.torchscript_frontend,
-    "TorchScript Frontend is not available",
-)
-class TestAccuracy(unittest.TestCase):
-    def test_compile_script(self):
-        self.model = (
-            torch.jit.load(MODULE_DIR + "/trained_vgg16_qat.jit.pt").eval().to("cuda")
-        )
-        self.testing_dataset = torchvision.datasets.CIFAR10(
-            root="./data",
-            train=False,
-            download=True,
-            transform=transforms.Compose(
-                [
-                    transforms.ToTensor(),
-                    transforms.Normalize(
-                        (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
-                    ),
-                ]
-            ),
-        )
-
-        self.testing_dataloader = torch.utils.data.DataLoader(
-            self.testing_dataset, batch_size=16, shuffle=False, num_workers=1
-        )
-
-        fp32_test_acc = compute_accuracy(self.testing_dataloader, self.model)
-        log(Level.Info, "[Pyt FP32] Test Acc: {:.2f}%".format(100 * fp32_test_acc))
-
-        compile_spec = {
-            "inputs": [torchtrt.Input([16, 3, 32, 32])],
-            "enabled_precisions": {torch.int8},
-            # "enabled_precision": {torch.float32, torch.int8},
-        }
-
-        trt_mod = torchtrt.ts.compile(self.model, **compile_spec)
-        int8_test_acc = compute_accuracy(self.testing_dataloader, trt_mod)
-        log(Level.Info, "[TRT QAT INT8] Test Acc: {:.2f}%".format(100 * int8_test_acc))
-        acc_diff = fp32_test_acc - int8_test_acc
-        self.assertTrue(abs(acc_diff) < 3)
-
-
-if __name__ == "__main__":
-    unittest.main()