Skip to content

refactor: removing the strict_types and max_batch_size apis #782

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ http_archive(
http_archive(
name = "tensorrt",
build_file = "@//third_party/tensorrt/archive:BUILD",
sha256 = "3177435024ff4aa5a6dba8c1ed06ab11cc0e1bf3bb712dfa63a43422f41313f3",
strip_prefix = "TensorRT-8.0.3.4",
sha256 = "da130296ac6636437ff8465812eb55dbab0621747d82dc4fe9b9376f00d214af",
strip_prefix = "TensorRT-8.2.2.1",
urls = [
"https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.0.3/tars/tensorrt-8.0.3.4.linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz",
"https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.2.2.1/tars/tensorrt-8.2.2.1.linux.x86_64-gnu.cuda-11.4.cudnn8.2.tar.gz",
],
)

Expand Down
15 changes: 0 additions & 15 deletions core/conversion/conversionctx/ConversionCtx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,19 +18,12 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
<< "\n Truncate Long and Double: " << s.truncate_long_and_double \
<< "\n Make Refittable Engine: " << s.refit \
<< "\n Debuggable Engine: " << s.debug \
<< "\n Strict Types: " << s.strict_types \
<< "\n GPU ID: " << s.device.gpu_id \
<< "\n Allow GPU Fallback (if running on DLA): " << s.device.allow_gpu_fallback \
<< "\n Min Timing Iterations: " << s.num_min_timing_iters \
<< "\n Avg Timing Iterations: " << s.num_avg_timing_iters \
<< "\n Max Workspace Size: " << s.workspace_size;

if (s.max_batch_size != 0) {
os << "\n Max Batch Size: " << s.max_batch_size;
} else {
os << "\n Max Batch Size: Not set";
}

os << "\n Device Type: " << s.device.device_type \
<< "\n GPU ID: " << s.device.gpu_id;
if (s.device.device_type == nvinfer1::DeviceType::kDLA) {
Expand Down Expand Up @@ -107,18 +100,10 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
cfg->setFlag(nvinfer1::BuilderFlag::kDEBUG);
}

if (settings.strict_types) {
cfg->setFlag(nvinfer1::BuilderFlag::kSTRICT_TYPES);
}

if (settings.device.allow_gpu_fallback) {
cfg->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK);
}

if (settings.max_batch_size != 0) {
builder->setMaxBatchSize(settings.max_batch_size);
}

cfg->setMinTimingIterations(settings.num_min_timing_iters);
cfg->setAvgTimingIterations(settings.num_avg_timing_iters);
cfg->setMaxWorkspaceSize(settings.workspace_size);
Expand Down
2 changes: 0 additions & 2 deletions core/conversion/conversionctx/ConversionCtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,13 @@ struct BuilderSettings {
bool disable_tf32 = false;
bool refit = false;
bool debug = false;
bool strict_types = false;
bool truncate_long_and_double = false;
Device device;
nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD;
nvinfer1::IInt8Calibrator* calibrator = nullptr;
uint64_t num_min_timing_iters = 2;
uint64_t num_avg_timing_iters = 1;
uint64_t workspace_size = 0;
uint64_t max_batch_size = 0;

BuilderSettings() = default;
BuilderSettings(const BuilderSettings& other) = default;
Expand Down
4 changes: 0 additions & 4 deletions cpp/bin/torchtrtc/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ OPTIONS:
--i, --info Dumps info messages generated during
compilation onto the console
--build-debuggable-engine Creates a debuggable engine
--use-strict-types Restrict operating type to only use set
operation precision
--allow-gpu-fallback (Only used when targeting DLA
(device-type)) Lets engine run layers on
GPU if they are not supported on DLA
Expand Down Expand Up @@ -90,8 +88,6 @@ OPTIONS:
used to select kernels
--workspace-size=[workspace_size] Maximum size of workspace given to
TensorRT
--max-batch-size=[max_batch_size] Maximum batch size (must be >= 1 to be
set, 0 means not set)
-t[threshold],
--threshold=[threshold] Maximum acceptable numerical deviation
from standard torchscript output
Expand Down
11 changes: 0 additions & 11 deletions cpp/include/torch_tensorrt/torch_tensorrt.h
Original file line number Diff line number Diff line change
Expand Up @@ -626,12 +626,6 @@ struct TORCHTRT_API CompileSpec {
*/
bool truncate_long_and_double = false;

/**
* Restrict operating type to only the lowest enabled operation precision
* (enabled_precisions)
*/
bool strict_types = false;

/**
* Target Device
*/
Expand All @@ -656,11 +650,6 @@ struct TORCHTRT_API CompileSpec {
*/
uint64_t workspace_size = 0;

/**
* Maximum batch size (must be >= 1 to be set, 0 means not set)
*/
uint64_t max_batch_size = 0;

/**
* Calibration dataloaders for each input for post training quantizatiom
*/
Expand Down
2 changes: 0 additions & 2 deletions cpp/src/compile_spec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external) {
internal.convert_info.engine_settings.refit = external.refit;
internal.convert_info.engine_settings.debug = external.debug;
internal.convert_info.engine_settings.truncate_long_and_double = external.truncate_long_and_double;
internal.convert_info.engine_settings.strict_types = external.strict_types;
internal.convert_info.engine_settings.device.allow_gpu_fallback = external.device.allow_gpu_fallback;
internal.convert_info.engine_settings.max_batch_size = external.max_batch_size;

TORCHTRT_CHECK(
!(external.require_full_compilation && (external.torch_executed_ops.size() > 0)),
Expand Down
1 change: 0 additions & 1 deletion docsrc/tutorials/ptq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,6 @@ to use ``CacheCalibrator`` to use in INT8 mode.
"inputs": [torch_tensorrt.Input([1, 3, 32, 32])],
"enabled_precisions": {torch.float, torch.half, torch.int8},
"calibrator": calibrator,
"max_batch_size": 32,
}

trt_mod = torch_tensorrt.compile(model, compile_settings)
Expand Down
4 changes: 0 additions & 4 deletions docsrc/tutorials/torchtrtc.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r
--i, --info Dumps info messages generated during
compilation onto the console
--build-debuggable-engine Creates a debuggable engine
--use-strict-types Restrict operating type to only use set
operation precision
--allow-gpu-fallback (Only used when targeting DLA
(device-type)) Lets engine run layers on
GPU if they are not supported on DLA
Expand Down Expand Up @@ -93,8 +91,6 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r
used to select kernels
--workspace-size=[workspace_size] Maximum size of workspace given to
TensorRT
--max-batch-size=[max_batch_size] Maximum batch size (must be >= 1 to be
set, 0 means not set)
-t[threshold],
--threshold=[threshold] Maximum acceptable numerical deviation
from standard torchscript output
Expand Down
2 changes: 0 additions & 2 deletions docsrc/tutorials/use_from_pytorch.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ at the documentation for the Torch-TensorRT ``TensorRTCompileSpec`` API.
"enabled_precisions": {torch.float, torch.half},
"refit": False,
"debug": False,
"strict_types": False,
"device": {
"device_type": torch_tensorrt.DeviceType.GPU,
"gpu_id": 0,
Expand All @@ -48,7 +47,6 @@ at the documentation for the Torch-TensorRT ``TensorRTCompileSpec`` API.
"capability": torch_tensorrt.EngineCapability.default,
"num_min_timing_iters": 2,
"num_avg_timing_iters": 1,
"max_batch_size": 0,
})
}

Expand Down
2 changes: 0 additions & 2 deletions examples/int8/ptq/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ torch::jit::Module compile_int8_model(const std::string& data_dir, torch::jit::M
compile_spec.enabled_precisions.insert(torch::kI8);
/// Use the TensorRT Entropy Calibrator
compile_spec.ptq_calibrator = calibrator;
/// Set max batch size for the engine
compile_spec.max_batch_size = 32;
/// Set a larger workspace
compile_spec.workspace_size = 1 << 28;

Expand Down
3 changes: 0 additions & 3 deletions examples/int8/qat/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ torch::jit::Module compile_int8_qat_model(const std::string& data_dir, torch::ji
auto compile_spec = torch_tensorrt::ts::CompileSpec(inputs);
/// Set operating precision to INT8
compile_spec.enabled_precisions.insert(torch::kI8);
/// Set max batch size for the engine
compile_spec.max_batch_size = 32;
/// Set a larger workspace
compile_spec.workspace_size = 1 << 28;

Expand Down Expand Up @@ -126,4 +124,3 @@ int main(int argc, const char* argv[]) {
print_avg_std_dev("TRT quantized model", trt_runtimes, dims[0][0]);
trt_mod.save("/tmp/qat_vgg16.trt.ts");
}

2 changes: 0 additions & 2 deletions py/torch_tensorrt/csrc/register_tensorrt_classes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,12 @@ void RegisterTRTCompileSpec() {
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, disable_tf32);
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, refit);
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, debug);
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, strict_types);
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, capability);
ADD_FIELD_GET_SET_REGISTRATION(
TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, num_min_timing_iters);
ADD_FIELD_GET_SET_REGISTRATION(
TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, num_avg_timing_iters);
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, workspace_size);
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, max_batch_size);
ADD_FIELD_GET_SET_REGISTRATION(
TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, truncate_long_and_double);
}
Expand Down
5 changes: 0 additions & 5 deletions py/torch_tensorrt/csrc/tensorrt_classes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,6 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() {
info.convert_info.engine_settings.disable_tf32 = disable_tf32;
info.convert_info.engine_settings.refit = refit;
info.convert_info.engine_settings.debug = debug;
info.convert_info.engine_settings.strict_types = strict_types;
info.convert_info.engine_settings.device.device_type = toTRTDeviceType(device.device_type);
info.convert_info.engine_settings.device.gpu_id = device.gpu_id;
info.convert_info.engine_settings.device.dla_core = device.dla_core;
Expand All @@ -227,8 +226,6 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() {
info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters;
TORCHTRT_CHECK(workspace_size >= 0, "workspace_size must be 0 or greater");
info.convert_info.engine_settings.workspace_size = workspace_size;
TORCHTRT_CHECK(max_batch_size >= 0, "max_batch_size must be 0 or greater");
info.convert_info.engine_settings.max_batch_size = max_batch_size;
return info;
}

Expand All @@ -249,13 +246,11 @@ std::string CompileSpec::stringify() {
ss << " \"Sparsity\": " << sparse_weights << std::endl;
ss << " \"Refit\": " << refit << std::endl;
ss << " \"Debug\": " << debug << std::endl;
ss << " \"Strict Types\": " << strict_types << std::endl;
ss << " \"Device\": " << device.to_str() << std::endl;
ss << " \"Engine Capability\": " << to_str(capability) << std::endl;
ss << " \"Num Min Timing Iters\": " << num_min_timing_iters << std::endl;
ss << " \"Num Avg Timing Iters\": " << num_avg_timing_iters << std::endl;
ss << " \"Workspace Size\": " << workspace_size << std::endl;
ss << " \"Max Batch Size\": " << max_batch_size << std::endl;
ss << " \"Truncate long and double\": " << truncate_long_and_double << std::endl;
ss << " \"Torch Fallback\": " << torch_fallback.to_str();
ss << "}";
Expand Down
4 changes: 0 additions & 4 deletions py/torch_tensorrt/csrc/tensorrt_classes.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,11 @@ struct CompileSpec : torch::CustomClassHolder {
ADD_FIELD_GET_SET(sparse_weights, bool);
ADD_FIELD_GET_SET(refit, bool);
ADD_FIELD_GET_SET(debug, bool);
ADD_FIELD_GET_SET(strict_types, bool);
ADD_ENUM_GET_SET(capability, EngineCapability, static_cast<int64_t>(EngineCapability::kSAFE_DLA));
ADD_FIELD_GET_SET(num_min_timing_iters, int64_t);
ADD_FIELD_GET_SET(num_avg_timing_iters, int64_t);
ADD_FIELD_GET_SET(workspace_size, int64_t);
ADD_FIELD_GET_SET(truncate_long_and_double, bool);
ADD_FIELD_GET_SET(max_batch_size, int64_t);
ADD_FIELD_GET_SET(device, Device);
ADD_FIELD_GET_SET(torch_fallback, TorchFallback);
ADD_FIELD_GET_SET(ptq_calibrator, nvinfer1::IInt8Calibrator*);
Expand All @@ -164,15 +162,13 @@ struct CompileSpec : torch::CustomClassHolder {
bool disable_tf32 = false;
bool refit = false;
bool debug = false;
bool strict_types = false;
bool truncate_long_and_double = false;
Device device;
TorchFallback torch_fallback;
EngineCapability capability = EngineCapability::kDEFAULT;
int64_t num_min_timing_iters = 2;
int64_t num_avg_timing_iters = 1;
int64_t workspace_size = 0;
int64_t max_batch_size = 0;
};

} // namespace pyapi
Expand Down
2 changes: 0 additions & 2 deletions py/torch_tensorrt/csrc/torch_tensorrt_py.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,13 +298,11 @@ PYBIND11_MODULE(_C, m) {
.def_readwrite("sparse_weights", &CompileSpec::sparse_weights)
.def_readwrite("disable_tf32", &CompileSpec::disable_tf32)
.def_readwrite("debug", &CompileSpec::debug)
.def_readwrite("strict_types", &CompileSpec::strict_types)
.def_readwrite("device", &CompileSpec::device)
.def_readwrite("capability", &CompileSpec::capability)
.def_readwrite("num_min_timing_iters", &CompileSpec::num_min_timing_iters)
.def_readwrite("num_avg_timing_iters", &CompileSpec::num_avg_timing_iters)
.def_readwrite("workspace_size", &CompileSpec::workspace_size)
.def_readwrite("max_batch_size", &CompileSpec::max_batch_size)
.def_readwrite("torch_fallback", &CompileSpec::torch_fallback)
.def_readwrite("truncate_long_and_double", &CompileSpec::truncate_long_and_double);

Expand Down
16 changes: 0 additions & 16 deletions py/torch_tensorrt/ts/_compile_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,10 +196,6 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> _ts_C.CompileSpec:
assert isinstance(compile_spec["debug"], bool)
info.debug = compile_spec["debug"]

if "strict_types" in compile_spec:
assert isinstance(compile_spec["strict_types"], bool)
info.strict_types = compile_spec["strict_types"]

if "device" in compile_spec:
info.device = _parse_device(compile_spec["device"])

Expand All @@ -219,10 +215,6 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> _ts_C.CompileSpec:
assert type(compile_spec["workspace_size"]) is int
info.workspace_size = compile_spec["workspace_size"]

if "max_batch_size" in compile_spec:
assert type(compile_spec["max_batch_size"]) is int
info.max_batch_size = compile_spec["max_batch_size"]

if "truncate_long_and_double" in compile_spec:
assert type(compile_spec["truncate_long_and_double"]) is bool
info.truncate_long_and_double = compile_spec["truncate_long_and_double"]
Expand All @@ -240,12 +232,10 @@ def TensorRTCompileSpec(inputs=[],
enabled_precisions=set(),
refit=False,
debug=False,
strict_types=False,
capability=_enums.EngineCapability.default,
num_min_timing_iters=2,
num_avg_timing_iters=1,
workspace_size=0,
max_batch_size=0,
truncate_long_and_double=False,
calibrator=None) -> torch.classes.tensorrt.CompileSpec:
"""Utility to create a formated spec dictionary for using the PyTorch TensorRT backend
Expand Down Expand Up @@ -276,12 +266,10 @@ def TensorRTCompileSpec(inputs=[],
enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels
refit (bool): Enable refitting
debug (bool): Enable debuggable engine
strict_types (bool): Kernels should strictly run in a particular operating precision. Enabled precision should only have one type in the set
capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels
num_min_timing_iters (int): Number of minimization timing iterations used to select kernels
num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels
workspace_size (int): Maximum size of workspace given to TensorRT
max_batch_size (int): Maximum batch size (must be >= 1 to be set, 0 means not set)
truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32
calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration

Expand All @@ -298,12 +286,10 @@ def TensorRTCompileSpec(inputs=[],
"enabled_precisions": enabled_precisions, # Enabling FP16 kernels
"refit": refit, # enable refit
"debug": debug, # enable debuggable engine
"strict_types": strict_types, # kernels should strictly run in operating precision
"capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels
"num_min_timing_iters": num_min_timing_iters, # Number of minimization timing iterations used to select kernels
"num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels
"workspace_size": workspace_size, # Maximum size of workspace given to TensorRT
"max_batch_size": max_batch_size, # Maximum batch size (must be >= 1 to be set, 0 means not set)
"calibrator": calibrator,
"truncate_long_and_double": truncate_long_and_double
}
Expand Down Expand Up @@ -348,12 +334,10 @@ def TensorRTCompileSpec(inputs=[],
backend_spec._set_refit(parsed_spec.refit)
backend_spec._set_debug(parsed_spec.debug)
backend_spec._set_refit(parsed_spec.refit)
backend_spec._set_strict_types(parsed_spec.strict_types)
backend_spec._set_capability(int(parsed_spec.capability))
backend_spec._set_num_min_timing_iters(parsed_spec.num_min_timing_iters)
backend_spec._set_num_avg_timing_iters(parsed_spec.num_avg_timing_iters)
backend_spec._set_workspace_size(parsed_spec.workspace_size)
backend_spec._set_max_batch_size(parsed_spec.max_batch_size)
backend_spec._set_truncate_long_and_double(parsed_spec.truncate_long_and_double)
backend_spec._set_ptq_calibrator(parsed_spec._get_calibrator_handle())

Expand Down
Loading