Remove onnx from valgrind suppressions, and fix leaks that were found.

alonre24 · alonre24 · commit 59723703d4d7 · 2021-03-11T17:59:41.000+02:00
Extend tests to make a more precise check of the allocator usage.
diff --git a/opt/redis_valgrind.sup b/opt/redis_valgrind.sup
@@ -12,13 +12,6 @@
    obj:*/libtensorflow_framework.so.*
 }
 
-{
-   ignore_unversioned_libs
-   Memcheck:Leak
-   ...
-   obj:*/libonnxruntime.so.*
-}
-
 {
    ignore_unversioned_libs
    Memcheck:Leak
diff --git a/src/backends/onnxruntime.c b/src/backends/onnxruntime.c
@@ -17,16 +17,19 @@
 OrtEnv *env = NULL;
 
 // For model that run on GPU, onnx will not use the custom allocator (redis allocator), but
-// the onnx allocator for GPU. But for the auxilery allocations of the input and output names,
-// we will use the custom global allocator for models that run on GPU as well
+// the onnx allocator for GPU. But for the auxiliary allocations of the input and output names,
+// we will use the custom global allocator for models that run on GPU as well.
+OrtMemoryInfo *mem_info = NULL;
 OrtAllocator *global_allocator = NULL;
 unsigned long long OnnxMemory = 0;
 unsigned long long OnnxMemoryAccessCounter = 0;
 
 const OrtMemoryInfo *AllocatorInfo(const OrtAllocator *allocator) {
     (void)allocator;
     const OrtApi *ort = OrtGetApiBase()->GetApi(1);
-    OrtMemoryInfo *mem_info;
+    if (mem_info != NULL) {
+        return mem_info;
+    }
     if (ort->CreateCpuMemoryInfo(OrtDeviceAllocator, OrtMemTypeDefault, &mem_info) != NULL) {
         return NULL;
     }
@@ -369,6 +372,7 @@ RAI_Model *RAI_ModelCreateORT(RAI_Backend backend, const char *devicestr, RAI_Mo
 
     ONNX_VALIDATE_STATUS(
         ort->CreateSessionFromArray(env, modeldef, modellen, session_options, &session))
+    ort->ReleaseSessionOptions(session_options);
 
     size_t n_input_nodes;
     ONNX_VALIDATE_STATUS(ort->SessionGetInputCount(session, &n_input_nodes))
@@ -550,7 +554,14 @@ int RAI_ModelRunORT(RAI_ModelRunCtx **mctxs, RAI_Error *error) {
         OrtRunOptions *run_options = NULL;
         ONNX_VALIDATE_STATUS(ort->Run(session, run_options, input_names,
                                       (const OrtValue *const *)inputs, n_input_nodes, output_names,
-                                      n_output_nodes, outputs))
+                                      n_output_nodes, outputs));
+
+        for (uint32_t i = 0; i < ninputs; i++) {
+            status = ort->AllocatorFree(global_allocator, (void *)input_names[i]);
+        }
+        for (uint32_t i = 0; i < noutputs; i++) {
+            status = ort->AllocatorFree(global_allocator, (void *)output_names[i]);
+        }
 
         for (size_t i = 0; i < n_output_nodes; i++) {
             if (nbatches > 1) {
diff --git a/tests/flow/tests_onnx.py b/tests/flow/tests_onnx.py
@@ -478,16 +478,28 @@ def test_onnx_use_custom_allocator(env):
                         for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
 
     # Expect using at least 130+63+(size of an address) + 2*(2+63+(size of an address)) bytes.
-    env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) > 334)
+    model_allocation_bytes_used = int(ai_memory_config["ai_onnxruntime_memory"])
+    env.assertTrue(model_allocation_bytes_used > 334)
     env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 3)
+    con.execute_command('AI.TENSORSET', 'a_mul{1}', 'FLOAT', 3, 2, 'VALUES', 1.0, 2.0, 3.0, 4.0, 5.0, 6.0)
 
-    # Expect using the allocator free function when releasing the model and input and output names.
+    # Running the model should access the allocator 6 times: allocating+freeing input+output names,
+    # and allocating+freeing the output as OrtValue.
+    con.execute_command('AI.MODELRUN', 'm{1}', 'INPUTS', 'a_mul{1}', 'OUTPUTS', 'b{1}')
+    values = con.execute_command('AI.TENSORGET', 'b{1}', 'VALUES')
+    env.assertEqual(values, [b'1', b'4', b'9', b'16', b'25', b'36'])
+    ai_memory_config = {k.split(":")[0]: k.split(":")[1]
+                        for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
+    env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 9)
+    env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory"]), model_allocation_bytes_used)
+
+    # Expect using the allocator free function 3 times: when releasing the model, input name and output name.
     con.execute_command('AI.MODELDEL', 'm{1}')
     env.assertFalse(con.execute_command('EXISTS', 'm{1}'))
     ai_memory_config = {k.split(":")[0]: k.split(":")[1]
                         for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
     env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory"]), 0)
-    env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 6)
+    env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 12)
 
     # test the use of Redis allocator in model run op.
     model_filename = os.path.join(test_data_path, 'mnist.onnx')
@@ -502,7 +514,7 @@ def test_onnx_use_custom_allocator(env):
     env.assertEqual(ret, b'OK')
     con.execute_command('AI.TENSORSET', 'a{1}', 'FLOAT', 1, 1, 28, 28, 'BLOB', sample_raw)
 
-    # Expect 16 allocator's access from onnx during the run (in addition to the allocations that were made while
+    # Expect 18 allocator's access from onnx during the run (in addition to the allocations that were made while
     # creating the model).
     ai_memory_config = {k.split(":")[0]: k.split(":")[1]
                         for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
@@ -511,7 +523,7 @@ def test_onnx_use_custom_allocator(env):
     ai_memory_config = {k.split(":")[0]: k.split(":")[1]
                         for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
     allocator_access_num_after = ai_memory_config["ai_onnxruntime_memory_access_num"]
-    env.assertEqual(int(allocator_access_num_after) - int(allocator_access_num_before), 16)
+    env.assertEqual(int(allocator_access_num_after) - int(allocator_access_num_before), 18)
 
     values = con.execute_command('AI.TENSORGET', 'b{1}', 'VALUES')
     argmax = max(range(len(values)), key=lambda i: values[i])
@@ -549,18 +561,26 @@ def test_onnx_use_custom_allocator_with_GPU(env):
                         for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
 
     # Expect using at least 130+63+(size of an address) + 4*(2+63+(size of an address)) bytes.
-    env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) > 472)
-    env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) < 705)
+    model_allocation_bytes_used = int(ai_memory_config["ai_onnxruntime_memory"])
+    env.assertTrue(model_allocation_bytes_used > 472)
+    env.assertTrue(model_allocation_bytes_used < 705)
     env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 5)
 
-    # Make sure that allocator is not used for running and freeing the GPU model.
+    # Make sure that allocator is not used for running and freeing the GPU model, except for
+    # the input and output names allocations (and deallocations).
     con.execute_command('AI.TENSORSET', 'a{1}', 'FLOAT', 3, 2, 'VALUES', 1.0, 2.0, 3.0, 4.0, 5.0, 6.0)
     con.execute_command('AI.MODELRUN', 'm_gpu{1}', 'INPUTS', 'a{1}', 'OUTPUTS', 'b{1}')
     values = con.execute_command('AI.TENSORGET', 'b{1}', 'VALUES')
     env.assertEqual(values, [b'1', b'4', b'9', b'16', b'25', b'36'])
+    # Expect that memory usage didn't change, and for another 4 accesses to the allocator (input and output names
+    # allocation and free)
+    env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory"]), model_allocation_bytes_used)
+    env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 9)
+
+    # Expect only 2 more accesses in delete - for deallocating input and output names
     con.execute_command('AI.MODELDEL', 'm_gpu{1}')
     env.assertFalse(con.execute_command('EXISTS', 'm_gpu{1}'))
     ai_memory_config = {k.split(":")[0]: k.split(":")[1]
                         for k in con.execute_command("INFO MODULES").decode().split("#")[4].split()[1:]}
-    env.assertTrue(int(ai_memory_config["ai_onnxruntime_memory"]) < 705)
-    env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 5)
+    env.assertEqual(int(ai_memory_config["ai_onnxruntime_memory_access_num"]), 11)
+

Original file line number	Diff line number	Diff line change
`@@ -12,13 +12,6 @@`
`12`	`12`	`obj:/libtensorflow_framework.so.`
`13`	`13`	`}`
`14`	`14`
`15`		`-{`
`16`		`- ignore_unversioned_libs`
`17`		`- Memcheck:Leak`
`18`		`- ...`
`19`		`- obj:/libonnxruntime.so.`
`20`		`-}`
`21`		`-`
`22`	`15`	`{`
`23`	`16`	`ignore_unversioned_libs`
`24`	`17`	`Memcheck:Leak`