diff --git a/WORKSPACE b/WORKSPACE index f2b8755115..5253f52500 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -86,10 +86,10 @@ http_archive( http_archive( name = "tensorrt", build_file = "@//third_party/tensorrt/archive:BUILD", - sha256 = "3177435024ff4aa5a6dba8c1ed06ab11cc0e1bf3bb712dfa63a43422f41313f3", - strip_prefix = "TensorRT-8.0.3.4", + sha256 = "da130296ac6636437ff8465812eb55dbab0621747d82dc4fe9b9376f00d214af", + strip_prefix = "TensorRT-8.2.2.1", urls = [ - "https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.0.3/tars/tensorrt-8.0.3.4.linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz", + "https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.2.2.1/tars/tensorrt-8.2.2.1.linux.x86_64-gnu.cuda-11.4.cudnn8.2.tar.gz", ], ) diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp index 5dde1f9d9a..0d7b7084d9 100644 --- a/core/conversion/conversionctx/ConversionCtx.cpp +++ b/core/conversion/conversionctx/ConversionCtx.cpp @@ -18,19 +18,12 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) { << "\n Truncate Long and Double: " << s.truncate_long_and_double \ << "\n Make Refittable Engine: " << s.refit \ << "\n Debuggable Engine: " << s.debug \ - << "\n Strict Types: " << s.strict_types \ << "\n GPU ID: " << s.device.gpu_id \ << "\n Allow GPU Fallback (if running on DLA): " << s.device.allow_gpu_fallback \ << "\n Min Timing Iterations: " << s.num_min_timing_iters \ << "\n Avg Timing Iterations: " << s.num_avg_timing_iters \ << "\n Max Workspace Size: " << s.workspace_size; - if (s.max_batch_size != 0) { - os << "\n Max Batch Size: " << s.max_batch_size; - } else { - os << "\n Max Batch Size: Not set"; - } - os << "\n Device Type: " << s.device.device_type \ << "\n GPU ID: " << s.device.gpu_id; if (s.device.device_type == nvinfer1::DeviceType::kDLA) { @@ -107,18 +100,10 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) cfg->setFlag(nvinfer1::BuilderFlag::kDEBUG); } - if (settings.strict_types) { - cfg->setFlag(nvinfer1::BuilderFlag::kSTRICT_TYPES); - } - if (settings.device.allow_gpu_fallback) { cfg->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK); } - if (settings.max_batch_size != 0) { - builder->setMaxBatchSize(settings.max_batch_size); - } - cfg->setMinTimingIterations(settings.num_min_timing_iters); cfg->setAvgTimingIterations(settings.num_avg_timing_iters); cfg->setMaxWorkspaceSize(settings.workspace_size); diff --git a/core/conversion/conversionctx/ConversionCtx.h b/core/conversion/conversionctx/ConversionCtx.h index c14b792f6b..11a06b0f20 100644 --- a/core/conversion/conversionctx/ConversionCtx.h +++ b/core/conversion/conversionctx/ConversionCtx.h @@ -29,7 +29,6 @@ struct BuilderSettings { bool disable_tf32 = false; bool refit = false; bool debug = false; - bool strict_types = false; bool truncate_long_and_double = false; Device device; nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD; @@ -37,7 +36,6 @@ struct BuilderSettings { uint64_t num_min_timing_iters = 2; uint64_t num_avg_timing_iters = 1; uint64_t workspace_size = 0; - uint64_t max_batch_size = 0; BuilderSettings() = default; BuilderSettings(const BuilderSettings& other) = default; diff --git a/cpp/bin/torchtrtc/README.md b/cpp/bin/torchtrtc/README.md index ed82ec590d..033b36052c 100644 --- a/cpp/bin/torchtrtc/README.md +++ b/cpp/bin/torchtrtc/README.md @@ -31,8 +31,6 @@ OPTIONS: --i, --info Dumps info messages generated during compilation onto the console --build-debuggable-engine Creates a debuggable engine - --use-strict-types Restrict operating type to only use set - operation precision --allow-gpu-fallback (Only used when targeting DLA (device-type)) Lets engine run layers on GPU if they are not supported on DLA @@ -90,8 +88,6 @@ OPTIONS: used to select kernels --workspace-size=[workspace_size] Maximum size of workspace given to TensorRT - --max-batch-size=[max_batch_size] Maximum batch size (must be >= 1 to be - set, 0 means not set) -t[threshold], --threshold=[threshold] Maximum acceptable numerical deviation from standard torchscript output diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h index c13bc0c488..ace05d33f5 100644 --- a/cpp/include/torch_tensorrt/torch_tensorrt.h +++ b/cpp/include/torch_tensorrt/torch_tensorrt.h @@ -626,12 +626,6 @@ struct TORCHTRT_API CompileSpec { */ bool truncate_long_and_double = false; - /** - * Restrict operating type to only the lowest enabled operation precision - * (enabled_precisions) - */ - bool strict_types = false; - /** * Target Device */ @@ -656,11 +650,6 @@ struct TORCHTRT_API CompileSpec { */ uint64_t workspace_size = 0; - /** - * Maximum batch size (must be >= 1 to be set, 0 means not set) - */ - uint64_t max_batch_size = 0; - /** * Calibration dataloaders for each input for post training quantizatiom */ diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp index 3fc286fa8e..3058b23ce0 100644 --- a/cpp/src/compile_spec.cpp +++ b/cpp/src/compile_spec.cpp @@ -40,9 +40,7 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external) { internal.convert_info.engine_settings.refit = external.refit; internal.convert_info.engine_settings.debug = external.debug; internal.convert_info.engine_settings.truncate_long_and_double = external.truncate_long_and_double; - internal.convert_info.engine_settings.strict_types = external.strict_types; internal.convert_info.engine_settings.device.allow_gpu_fallback = external.device.allow_gpu_fallback; - internal.convert_info.engine_settings.max_batch_size = external.max_batch_size; TORCHTRT_CHECK( !(external.require_full_compilation && (external.torch_executed_ops.size() > 0)), diff --git a/docsrc/tutorials/ptq.rst b/docsrc/tutorials/ptq.rst index 0d87eeb9f3..7b7617289b 100644 --- a/docsrc/tutorials/ptq.rst +++ b/docsrc/tutorials/ptq.rst @@ -194,7 +194,6 @@ to use ``CacheCalibrator`` to use in INT8 mode. "inputs": [torch_tensorrt.Input([1, 3, 32, 32])], "enabled_precisions": {torch.float, torch.half, torch.int8}, "calibrator": calibrator, - "max_batch_size": 32, } trt_mod = torch_tensorrt.compile(model, compile_settings) diff --git a/docsrc/tutorials/torchtrtc.rst b/docsrc/tutorials/torchtrtc.rst index 9d870d9ed6..f1741f373a 100644 --- a/docsrc/tutorials/torchtrtc.rst +++ b/docsrc/tutorials/torchtrtc.rst @@ -34,8 +34,6 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r --i, --info Dumps info messages generated during compilation onto the console --build-debuggable-engine Creates a debuggable engine - --use-strict-types Restrict operating type to only use set - operation precision --allow-gpu-fallback (Only used when targeting DLA (device-type)) Lets engine run layers on GPU if they are not supported on DLA @@ -93,8 +91,6 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r used to select kernels --workspace-size=[workspace_size] Maximum size of workspace given to TensorRT - --max-batch-size=[max_batch_size] Maximum batch size (must be >= 1 to be - set, 0 means not set) -t[threshold], --threshold=[threshold] Maximum acceptable numerical deviation from standard torchscript output diff --git a/docsrc/tutorials/use_from_pytorch.rst b/docsrc/tutorials/use_from_pytorch.rst index c2e3c99b39..0c616e9414 100644 --- a/docsrc/tutorials/use_from_pytorch.rst +++ b/docsrc/tutorials/use_from_pytorch.rst @@ -38,7 +38,6 @@ at the documentation for the Torch-TensorRT ``TensorRTCompileSpec`` API. "enabled_precisions": {torch.float, torch.half}, "refit": False, "debug": False, - "strict_types": False, "device": { "device_type": torch_tensorrt.DeviceType.GPU, "gpu_id": 0, @@ -48,7 +47,6 @@ at the documentation for the Torch-TensorRT ``TensorRTCompileSpec`` API. "capability": torch_tensorrt.EngineCapability.default, "num_min_timing_iters": 2, "num_avg_timing_iters": 1, - "max_batch_size": 0, }) } diff --git a/examples/int8/ptq/main.cpp b/examples/int8/ptq/main.cpp index fed2b2337a..da8328c61b 100644 --- a/examples/int8/ptq/main.cpp +++ b/examples/int8/ptq/main.cpp @@ -49,8 +49,6 @@ torch::jit::Module compile_int8_model(const std::string& data_dir, torch::jit::M compile_spec.enabled_precisions.insert(torch::kI8); /// Use the TensorRT Entropy Calibrator compile_spec.ptq_calibrator = calibrator; - /// Set max batch size for the engine - compile_spec.max_batch_size = 32; /// Set a larger workspace compile_spec.workspace_size = 1 << 28; diff --git a/examples/int8/qat/main.cpp b/examples/int8/qat/main.cpp index b1bec3e6a2..0d83beb22f 100644 --- a/examples/int8/qat/main.cpp +++ b/examples/int8/qat/main.cpp @@ -33,8 +33,6 @@ torch::jit::Module compile_int8_qat_model(const std::string& data_dir, torch::ji auto compile_spec = torch_tensorrt::ts::CompileSpec(inputs); /// Set operating precision to INT8 compile_spec.enabled_precisions.insert(torch::kI8); - /// Set max batch size for the engine - compile_spec.max_batch_size = 32; /// Set a larger workspace compile_spec.workspace_size = 1 << 28; @@ -126,4 +124,3 @@ int main(int argc, const char* argv[]) { print_avg_std_dev("TRT quantized model", trt_runtimes, dims[0][0]); trt_mod.save("/tmp/qat_vgg16.trt.ts"); } - diff --git a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp index a0b9251712..53b9fc2cdb 100644 --- a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp @@ -59,14 +59,12 @@ void RegisterTRTCompileSpec() { ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, disable_tf32); ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, refit); ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, debug); - ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, strict_types); ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, capability); ADD_FIELD_GET_SET_REGISTRATION( TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, num_min_timing_iters); ADD_FIELD_GET_SET_REGISTRATION( TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, num_avg_timing_iters); ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, workspace_size); - ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, max_batch_size); ADD_FIELD_GET_SET_REGISTRATION( TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, truncate_long_and_double); } diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/py/torch_tensorrt/csrc/tensorrt_classes.cpp index e705ca4e5f..2361abd457 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/tensorrt_classes.cpp @@ -209,7 +209,6 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() { info.convert_info.engine_settings.disable_tf32 = disable_tf32; info.convert_info.engine_settings.refit = refit; info.convert_info.engine_settings.debug = debug; - info.convert_info.engine_settings.strict_types = strict_types; info.convert_info.engine_settings.device.device_type = toTRTDeviceType(device.device_type); info.convert_info.engine_settings.device.gpu_id = device.gpu_id; info.convert_info.engine_settings.device.dla_core = device.dla_core; @@ -227,8 +226,6 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() { info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters; TORCHTRT_CHECK(workspace_size >= 0, "workspace_size must be 0 or greater"); info.convert_info.engine_settings.workspace_size = workspace_size; - TORCHTRT_CHECK(max_batch_size >= 0, "max_batch_size must be 0 or greater"); - info.convert_info.engine_settings.max_batch_size = max_batch_size; return info; } @@ -249,13 +246,11 @@ std::string CompileSpec::stringify() { ss << " \"Sparsity\": " << sparse_weights << std::endl; ss << " \"Refit\": " << refit << std::endl; ss << " \"Debug\": " << debug << std::endl; - ss << " \"Strict Types\": " << strict_types << std::endl; ss << " \"Device\": " << device.to_str() << std::endl; ss << " \"Engine Capability\": " << to_str(capability) << std::endl; ss << " \"Num Min Timing Iters\": " << num_min_timing_iters << std::endl; ss << " \"Num Avg Timing Iters\": " << num_avg_timing_iters << std::endl; ss << " \"Workspace Size\": " << workspace_size << std::endl; - ss << " \"Max Batch Size\": " << max_batch_size << std::endl; ss << " \"Truncate long and double\": " << truncate_long_and_double << std::endl; ss << " \"Torch Fallback\": " << torch_fallback.to_str(); ss << "}"; diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.h b/py/torch_tensorrt/csrc/tensorrt_classes.h index e5d783e049..0c80641005 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.h +++ b/py/torch_tensorrt/csrc/tensorrt_classes.h @@ -146,13 +146,11 @@ struct CompileSpec : torch::CustomClassHolder { ADD_FIELD_GET_SET(sparse_weights, bool); ADD_FIELD_GET_SET(refit, bool); ADD_FIELD_GET_SET(debug, bool); - ADD_FIELD_GET_SET(strict_types, bool); ADD_ENUM_GET_SET(capability, EngineCapability, static_cast(EngineCapability::kSAFE_DLA)); ADD_FIELD_GET_SET(num_min_timing_iters, int64_t); ADD_FIELD_GET_SET(num_avg_timing_iters, int64_t); ADD_FIELD_GET_SET(workspace_size, int64_t); ADD_FIELD_GET_SET(truncate_long_and_double, bool); - ADD_FIELD_GET_SET(max_batch_size, int64_t); ADD_FIELD_GET_SET(device, Device); ADD_FIELD_GET_SET(torch_fallback, TorchFallback); ADD_FIELD_GET_SET(ptq_calibrator, nvinfer1::IInt8Calibrator*); @@ -164,7 +162,6 @@ struct CompileSpec : torch::CustomClassHolder { bool disable_tf32 = false; bool refit = false; bool debug = false; - bool strict_types = false; bool truncate_long_and_double = false; Device device; TorchFallback torch_fallback; @@ -172,7 +169,6 @@ struct CompileSpec : torch::CustomClassHolder { int64_t num_min_timing_iters = 2; int64_t num_avg_timing_iters = 1; int64_t workspace_size = 0; - int64_t max_batch_size = 0; }; } // namespace pyapi diff --git a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp index 481aeb1f7f..2b9c0a1db0 100644 --- a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp +++ b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp @@ -298,13 +298,11 @@ PYBIND11_MODULE(_C, m) { .def_readwrite("sparse_weights", &CompileSpec::sparse_weights) .def_readwrite("disable_tf32", &CompileSpec::disable_tf32) .def_readwrite("debug", &CompileSpec::debug) - .def_readwrite("strict_types", &CompileSpec::strict_types) .def_readwrite("device", &CompileSpec::device) .def_readwrite("capability", &CompileSpec::capability) .def_readwrite("num_min_timing_iters", &CompileSpec::num_min_timing_iters) .def_readwrite("num_avg_timing_iters", &CompileSpec::num_avg_timing_iters) .def_readwrite("workspace_size", &CompileSpec::workspace_size) - .def_readwrite("max_batch_size", &CompileSpec::max_batch_size) .def_readwrite("torch_fallback", &CompileSpec::torch_fallback) .def_readwrite("truncate_long_and_double", &CompileSpec::truncate_long_and_double); diff --git a/py/torch_tensorrt/ts/_compile_spec.py b/py/torch_tensorrt/ts/_compile_spec.py index 995e2cf5ae..e406096677 100644 --- a/py/torch_tensorrt/ts/_compile_spec.py +++ b/py/torch_tensorrt/ts/_compile_spec.py @@ -196,10 +196,6 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> _ts_C.CompileSpec: assert isinstance(compile_spec["debug"], bool) info.debug = compile_spec["debug"] - if "strict_types" in compile_spec: - assert isinstance(compile_spec["strict_types"], bool) - info.strict_types = compile_spec["strict_types"] - if "device" in compile_spec: info.device = _parse_device(compile_spec["device"]) @@ -219,10 +215,6 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> _ts_C.CompileSpec: assert type(compile_spec["workspace_size"]) is int info.workspace_size = compile_spec["workspace_size"] - if "max_batch_size" in compile_spec: - assert type(compile_spec["max_batch_size"]) is int - info.max_batch_size = compile_spec["max_batch_size"] - if "truncate_long_and_double" in compile_spec: assert type(compile_spec["truncate_long_and_double"]) is bool info.truncate_long_and_double = compile_spec["truncate_long_and_double"] @@ -240,12 +232,10 @@ def TensorRTCompileSpec(inputs=[], enabled_precisions=set(), refit=False, debug=False, - strict_types=False, capability=_enums.EngineCapability.default, num_min_timing_iters=2, num_avg_timing_iters=1, workspace_size=0, - max_batch_size=0, truncate_long_and_double=False, calibrator=None) -> torch.classes.tensorrt.CompileSpec: """Utility to create a formated spec dictionary for using the PyTorch TensorRT backend @@ -276,12 +266,10 @@ def TensorRTCompileSpec(inputs=[], enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels refit (bool): Enable refitting debug (bool): Enable debuggable engine - strict_types (bool): Kernels should strictly run in a particular operating precision. Enabled precision should only have one type in the set capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels num_min_timing_iters (int): Number of minimization timing iterations used to select kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels workspace_size (int): Maximum size of workspace given to TensorRT - max_batch_size (int): Maximum batch size (must be >= 1 to be set, 0 means not set) truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32 calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration @@ -298,12 +286,10 @@ def TensorRTCompileSpec(inputs=[], "enabled_precisions": enabled_precisions, # Enabling FP16 kernels "refit": refit, # enable refit "debug": debug, # enable debuggable engine - "strict_types": strict_types, # kernels should strictly run in operating precision "capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels "num_min_timing_iters": num_min_timing_iters, # Number of minimization timing iterations used to select kernels "num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels "workspace_size": workspace_size, # Maximum size of workspace given to TensorRT - "max_batch_size": max_batch_size, # Maximum batch size (must be >= 1 to be set, 0 means not set) "calibrator": calibrator, "truncate_long_and_double": truncate_long_and_double } @@ -348,12 +334,10 @@ def TensorRTCompileSpec(inputs=[], backend_spec._set_refit(parsed_spec.refit) backend_spec._set_debug(parsed_spec.debug) backend_spec._set_refit(parsed_spec.refit) - backend_spec._set_strict_types(parsed_spec.strict_types) backend_spec._set_capability(int(parsed_spec.capability)) backend_spec._set_num_min_timing_iters(parsed_spec.num_min_timing_iters) backend_spec._set_num_avg_timing_iters(parsed_spec.num_avg_timing_iters) backend_spec._set_workspace_size(parsed_spec.workspace_size) - backend_spec._set_max_batch_size(parsed_spec.max_batch_size) backend_spec._set_truncate_long_and_double(parsed_spec.truncate_long_and_double) backend_spec._set_ptq_calibrator(parsed_spec._get_calibrator_handle()) diff --git a/py/torch_tensorrt/ts/_compiler.py b/py/torch_tensorrt/ts/_compiler.py index 22de8d2c60..f5c14a4f46 100644 --- a/py/torch_tensorrt/ts/_compiler.py +++ b/py/torch_tensorrt/ts/_compiler.py @@ -22,7 +22,6 @@ def compile(module: torch.jit.ScriptModule, num_min_timing_iters=2, num_avg_timing_iters=1, workspace_size=0, - max_batch_size=0, calibrator=None, truncate_long_and_double=False, require_full_compilation=False, @@ -71,7 +70,6 @@ def compile(module: torch.jit.ScriptModule, num_min_timing_iters (int): Number of minimization timing iterations used to select kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels workspace_size (int): Maximum size of workspace given to TensorRT - max_batch_size (int): Maximum batch size (must be >= 1 to be set, 0 means not set) truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32 calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration require_full_compilation (bool): Require modules to be compiled end to end or return an error as opposed to returning a hybrid graph where operations that cannot be run in TensorRT are run in PyTorch @@ -105,7 +103,6 @@ def compile(module: torch.jit.ScriptModule, "num_min_timing_iters": num_min_timing_iters, # Number of minimization timing iterations used to select kernels "num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels "workspace_size": workspace_size, # Maximum size of workspace given to TensorRT - "max_batch_size": max_batch_size, # Maximum batch size (must be >= 1 to be set, 0 means not set) "calibrator": calibrator, "truncate_long_and_double": truncate_long_and_double, "torch_fallback": { @@ -135,7 +132,6 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, num_min_timing_iters=2, num_avg_timing_iters=1, workspace_size=0, - max_batch_size=0, truncate_long_and_double=False, calibrator=None) -> str: """Convert a TorchScript module method to a serialized TensorRT engine @@ -178,7 +174,6 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, num_min_timing_iters (int): Number of minimization timing iterations used to select kernels num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels workspace_size (int): Maximum size of workspace given to TensorRT - max_batch_size (int): Maximum batch size (must be >= 1 to be set, 0 means not set) truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32 calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration @@ -203,7 +198,6 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, "num_min_timing_iters": num_min_timing_iters, # Number of minimization timing iterations used to select kernels "num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels "workspace_size": workspace_size, # Maximum size of workspace given to TensorRT - "max_batch_size": max_batch_size, # Maximum batch size (must be >= 1 to be set, 0 means not set) "calibrator": calibrator, "truncate_long_and_double": truncate_long_and_double } diff --git a/tests/accuracy/test_dla_int8_accuracy.cpp b/tests/accuracy/test_dla_int8_accuracy.cpp index df371e642d..93ebc545d7 100644 --- a/tests/accuracy/test_dla_int8_accuracy.cpp +++ b/tests/accuracy/test_dla_int8_accuracy.cpp @@ -25,8 +25,6 @@ TEST_P(AccuracyTests, DLAINT8AccuracyIsClose) { compile_spec.enabled_precisions = {torch::kF16, torch::kI8}; // Use the TensorRT Entropy Calibrator compile_spec.ptq_calibrator = calibrator; - // Set max batch size for the engine - compile_spec.max_batch_size = 32; // Set a larger workspace compile_spec.workspace_size = 1 << 28; diff --git a/tests/accuracy/test_int8_accuracy.cpp b/tests/accuracy/test_int8_accuracy.cpp index b32c1b0b16..8f41fb615a 100644 --- a/tests/accuracy/test_int8_accuracy.cpp +++ b/tests/accuracy/test_int8_accuracy.cpp @@ -27,8 +27,6 @@ TEST_P(AccuracyTests, INT8AccuracyIsClose) { compile_spec.enabled_precisions.insert(torch::kI8); // Use the TensorRT Entropy Calibrator compile_spec.ptq_calibrator = calibrator; - // Set max batch size for the engine - compile_spec.max_batch_size = 32; // Set a larger workspace compile_spec.workspace_size = 1 << 28; diff --git a/tests/cpp/test_runtime_thread_safety.cpp b/tests/cpp/test_runtime_thread_safety.cpp index 792463043a..17873b8b41 100644 --- a/tests/cpp/test_runtime_thread_safety.cpp +++ b/tests/cpp/test_runtime_thread_safety.cpp @@ -53,7 +53,6 @@ TEST(CppAPITests, RuntimeThreadSafety) { // FP32 execution compile_settings.enabled_precisions = {torch::kFloat}; - compile_settings.strict_types = true; auto trt_mod = torch_tensorrt::ts::compile(mod, compile_settings); std::cout << "torch_tensorrt::ts::compile" << std::endl; diff --git a/tests/py/test_to_backend_api.py b/tests/py/test_to_backend_api.py index 6aef956440..11c411ff56 100644 --- a/tests/py/test_to_backend_api.py +++ b/tests/py/test_to_backend_api.py @@ -19,7 +19,6 @@ def setUp(self): "enabled_precisions": {torch.float}, "refit": False, "debug": False, - "strict_types": False, "device": { "device_type": torchtrt.DeviceType.GPU, "gpu_id": 0, @@ -29,7 +28,6 @@ def setUp(self): "capability": torchtrt.EngineCapability.default, "num_min_timing_iters": 2, "num_avg_timing_iters": 1, - "max_batch_size": 0, "disable_tf32": False, }) }