Skip to content

Commit f91b24e

Browse files
toothacheapsonawane
authored andcommitted
[TensorRT] Fix DDS output bug during engine update (#26272)
### Description Fix a bug in the TRT Execution Provider where the DDS output tensor was not bound after an engine update. ### Motivation and Context The `dds_output_allocator_map` is not cleared on engine update, so that it will mis-recognized as a known DDS and will not bind the output allocation. Script to reproduce the issue: ```:python # create an onnx model with: # inputs: data -> NonZeros(data) -> GatherND -> output # then run the model with onnxruntime def create_model(): import onnx from onnx import helper, TensorProto input = helper.make_tensor_value_info("data", TensorProto.FLOAT, ["d1", "d2"]) output = helper.make_tensor_value_info("output", TensorProto.FLOAT, ["nzr"]) nonzeros_node = helper.make_node("NonZero", ["data"], ["nonzeros"], "nonzeros_node") transpose_node = helper.make_node( "Transpose", ["nonzeros"], ["nonzeros_t"], "transpose_node" ) gathernd_node = helper.make_node( "GatherND", ["data", "nonzeros_t"], ["output"], "gathernd_node" ) value_info = [ helper.make_tensor_value_info("nonzeros", TensorProto.INT64, [2, "nzr"]), helper.make_tensor_value_info("nonzeros_t", TensorProto.INT64, ["nzr", 2]), ] graph = helper.make_graph( [nonzeros_node, transpose_node, gathernd_node], "test_graph", [input], [output], value_info=value_info, ) model = helper.make_model(graph) onnx.save(model, "model_dds.onnx") def run_model(): import onnxruntime as ort import numpy as np sess = ort.InferenceSession("model_dds.onnx", providers=["TensorrtExecutionProvider", "CUDAExecutionProvider", "CPUExecutionProvider"]) print("Running with data shape (3,4)") data = np.random.randn(3, 4).astype(np.float32) sess.run(None, {"data": data}) print("Running with data shape (5,6)") data = np.random.randn(5, 6).astype(np.float32) sess.run(None, {"data": data}) create_model() run_model() ``` Before the change: > IExecutionContext::enqueueV3: Error Code 3: API Usage Error (Parameter check failed, condition: mContext.profileObliviousBindings.at(profileObliviousIndex) || getPtrOrNull(mOutputAllocators, profileObliviousIndex). Neither address or allocator is set for output tensor scores. Call setOutputTensorAddress, setTensorAddress or setOutputAllocator before enqueue/execute.) ... Status Message: TensorRT EP execution context enqueue failed.
1 parent 584e35b commit f91b24e

File tree

4 files changed

+104
-0
lines changed

4 files changed

+104
-0
lines changed

onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3976,6 +3976,10 @@ Status TensorrtExecutionProvider::CreateNodeComputeInfoFromGraph(const GraphView
39763976
// Destroy the IExecutionContext objects before destroying an engine object, otherwise it will lead to undefined behavior.
39773977
trt_state->context->reset();
39783978
trt_state->engine->reset();
3979+
3980+
// Clear dds output allocator map since the engine and context will be recreated.
3981+
dds_output_allocator_map.clear();
3982+
39793983
auto trt_config = std::unique_ptr<nvinfer1::IBuilderConfig>(trt_builder->createBuilderConfig());
39803984
if (max_workspace_size_ > 0) {
39813985
trt_config->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, max_workspace_size_);

onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,52 @@ TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) {
713713
ASSERT_TRUE(status.IsOK());
714714
}
715715

716+
TEST(TensorrtExecutionProviderTest, DDSOutputTest) {
717+
PathString model_name = ORT_TSTR("testdata/ort_github_issue_26272_dds.onnx");
718+
SessionOptions so;
719+
so.session_logid = "TensorrtExecutionProviderRunWithDDSOutput";
720+
RunOptions run_options;
721+
run_options.run_tag = so.session_logid;
722+
InferenceSession session_object{so, GetEnvironment()};
723+
auto cuda_provider = DefaultCudaExecutionProvider();
724+
auto cuda_allocator = cuda_provider->CreatePreferredAllocators()[1];
725+
std::vector<int64_t> dims_op_x = {3, 4};
726+
std::vector<float> values_op_x(12, 0.f); // 12=3*4
727+
OrtValue ml_value_x;
728+
CreateMLValue<float>(cuda_allocator, dims_op_x, values_op_x, &ml_value_x);
729+
730+
NameMLValMap feeds;
731+
feeds.insert(std::make_pair("data", ml_value_x));
732+
733+
// prepare outputs
734+
std::vector<std::string> output_names;
735+
output_names.push_back("output");
736+
std::vector<OrtValue> fetches;
737+
738+
OrtTensorRTProviderOptionsV2 params;
739+
std::unique_ptr<IExecutionProvider> execution_provider = TensorrtExecutionProviderWithOptions(&params);
740+
EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK());
741+
auto status = session_object.Load(model_name);
742+
ASSERT_TRUE(status.IsOK());
743+
status = session_object.Initialize();
744+
ASSERT_TRUE(status.IsOK());
745+
746+
// First pass run
747+
status = session_object.Run(run_options, feeds, output_names, &fetches);
748+
ASSERT_TRUE(status.IsOK());
749+
750+
// Second pass run with new shape
751+
dims_op_x = {6, 4};
752+
values_op_x.resize(24, 0.f); // 24=6*4
753+
CreateMLValue<float>(cuda_allocator, dims_op_x, values_op_x, &ml_value_x);
754+
feeds.clear();
755+
756+
feeds.insert(std::make_pair("data", ml_value_x));
757+
758+
status = session_object.Run(run_options, feeds, output_names, &fetches);
759+
ASSERT_TRUE(status.IsOK());
760+
}
761+
716762
TEST_P(TensorrtExecutionProviderCacheTest, Run) {
717763
// GetParam() returns the parameter of following format:
718764
// ##cache type##_##input shape type##
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import onnx
2+
from onnx import TensorProto, helper
3+
4+
# Create a simple ONNX model with DDS output
5+
input = helper.make_tensor_value_info("data", TensorProto.FLOAT, ["d1", "d2"])
6+
output = helper.make_tensor_value_info("output", TensorProto.FLOAT, ["nzr"])
7+
8+
nonzeros_node = helper.make_node("NonZero", ["data"], ["nonzeros"], "nonzeros_node")
9+
transpose_node = helper.make_node("Transpose", ["nonzeros"], ["nonzeros_t"], "transpose_node")
10+
gathernd_node = helper.make_node("GatherND", ["data", "nonzeros_t"], ["output"], "gathernd_node")
11+
12+
value_info = [
13+
helper.make_tensor_value_info("nonzeros", TensorProto.INT64, [2, "nzr"]),
14+
helper.make_tensor_value_info("nonzeros_t", TensorProto.INT64, ["nzr", 2]),
15+
]
16+
17+
graph = helper.make_graph(
18+
[nonzeros_node, transpose_node, gathernd_node],
19+
"test_graph",
20+
[input],
21+
[output],
22+
value_info=value_info,
23+
)
24+
25+
model = helper.make_model(graph)
26+
onnx.save(model, "ort_github_issue_26272_dds.onnx")

onnxruntime/test/testdata/ort_github_issue_26272_dds.onnx

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+

2+
:�
3+
(
4+
datanonzerosnonzeros_node"NonZero
5+
1
6+
nonzeros
7+
nonzeros_ttranspose_node" Transpose
8+
3
9+
data
10+
11+
nonzeros_toutputgathernd_node"GatherND
12+
test_graphZ
13+
data
14+

15+
d1
16+
d2b
17+
output
18+

19+
nzrj
20+
nonzeros
21+

22+

23+
nzrj
24+
25+
nonzeros_t
26+

27+
nzr
28+
B

0 commit comments

Comments
 (0)