RedisAI
diff --git a/‎.circleci/config.yml‎
Lines changed: 14 additions & 43 deletions b/‎.circleci/config.yml‎
Lines changed: 14 additions & 43 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/backends/backends.c‎
Lines changed: 2 additions & 0 deletions b/‎src/backends/backends.c‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/backends/backends_api.h‎
Lines changed: 10 additions & 2 deletions b/‎src/backends/backends_api.h‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎src/backends/onnx_allocator/CMakeLists.txt‎
Lines changed: 3 additions & 0 deletions b/‎src/backends/onnx_allocator/CMakeLists.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎src/backends/onnx_allocator/onnx_allocator.cpp‎
Lines changed: 114 additions & 0 deletions b/‎src/backends/onnx_allocator/onnx_allocator.cpp‎
Lines changed: 114 additions & 0 deletions
diff --git a/‎src/backends/onnx_allocator/onnx_allocator.h‎
Lines changed: 17 additions & 0 deletions b/‎src/backends/onnx_allocator/onnx_allocator.h‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎src/backends/onnxruntime.c‎
Lines changed: 5 additions & 58 deletions b/‎src/backends/onnxruntime.c‎
Lines changed: 5 additions & 58 deletions
diff --git a/‎src/backends/onnxruntime.h‎
Lines changed: 0 additions & 4 deletions b/‎src/backends/onnxruntime.h‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎src/backends/tensorflow.c‎
Lines changed: 6 additions & 2 deletions b/‎src/backends/tensorflow.c‎
Lines changed: 6 additions & 2 deletions
@@ -221,22 +221,32 @@ jobs:
           platform: debian
 
   platforms-build:
+    machine:
+      enabled: true
+      docker_layer_caching: true
+      resource_class: small
+      image: ubuntu-2004:202107-02
     parameters:
       lite:  # LITE value during make
         type: string
       osnick:  # OSNICK value for the base platform of the docker
         type: string
       target:  # CPU|GPU
         type: string
-    docker:
-      - image: redisfab/rmbuilder:6.2.5-x64-buster
     steps:
       - abort_for_docs
       - abort_for_noci
       - early_return_for_forked_pull_requests
-      - setup_remote_docker:
-          docker_layer_caching: true
       - checkout-all
+      - run:
+          name: Relocate docker overlay2 dir
+          command: |
+            sudo systemctl stop docker
+            sudo mkdir -p /var2/lib/docker
+            sudo mv /var/lib/docker/overlay2 /var2/lib/docker
+            sudo mkdir /var/lib/docker/overlay2
+            sudo mount --bind /var2/lib/docker/overlay2 /var/lib/docker/overlay2
+            sudo systemctl start docker
       - restore_cache:
           keys:
           - v1.2.5-deps-{{ checksum "get_deps.sh" }}-<<parameters.osnick>>-<<parameters.target>>
@@ -342,45 +352,6 @@ jobs:
     steps:
       - valgrind-general-steps
 
-#  build-macos:
-#    macos:
-#      xcode: 11.3.0
-#    steps:
-#      - abort_for_docs
-#      - run:
-#          name: Fix macOS Python installation
-#          command: |
-#            brew reinstall -f python2
-#      - build-steps:
-#          platform: macos
-#
-#  build-multiarch-docker:
-#    machine:
-#      enabled: true
-#      image: cimg/base:2020.01
-#    steps:
-#      - abort_for_docs
-#      - checkout-all
-#      - run:
-#          name: Checkout LFS
-#          command: |
-#            curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash
-#            sudo apt-get install -y git-lfs
-#            git lfs install
-#            git lfs pull
-#      - run:
-#          name: Setup Docker client experimental features
-#          command: |
-#            sudo ./opt/readies/bin/getdocker --just-enable-exp
-#            docker version
-#      - run:
-#          name: Build
-#          command: |
-#            sudo docker login -u redisfab -p $DOCKER_REDISFAB_PWD
-#            cd opt/build/docker
-#            make build
-#            sudo make publish
-
   # internal PRs execute build-and-test either in a workflow or
   # via a github action trigger
   build-and-test-gpu:
 
@@ -160,6 +160,7 @@ ENDIF()
 IF(BUILD_ORT)
     FIND_LIBRARY(ORT_LIBRARIES NAMES onnxruntime
             PATHS ${depsAbs}/onnxruntime/lib)
+    ADD_SUBDIRECTORY(src/backends/onnx_allocator)
     MESSAGE(STATUS "Found ONNXRuntime Libraries: \"${ORT_LIBRARIES}\")")
     IF (NOT ORT_LIBRARIES)
         MESSAGE(FATAL_ERROR "Could not find ONNXRuntime")
@@ -293,6 +294,7 @@ ENDIF()
 
 IF(BUILD_ORT)
     ADD_LIBRARY(redisai_onnxruntime SHARED $<TARGET_OBJECTS:redisai_onnxruntime_obj>)
+    TARGET_LINK_LIBRARIES(redisai_onnxruntime onnx_allocator ${ORT_LIBRARIES})
     TARGET_LINK_LIBRARIES(redisai_onnxruntime ${ORT_LIBRARIES})
     SET_TARGET_PROPERTIES(redisai_onnxruntime PROPERTIES PREFIX "")
     SET_TARGET_PROPERTIES(redisai_onnxruntime PROPERTIES SUFFIX ".so")
 
@@ -50,6 +50,8 @@ int RAI_ExportFunc(const char *func_name, void **targetFuncPtr) {
         *targetFuncPtr = Config_GetModelExecutionTimeout;
     } else if (strcmp("GetThreadsCount", func_name) == 0) {
         *targetFuncPtr = BGWorker_GetThreadsCount;
+    } else if (strcmp("GetBackendMemoryLimit", func_name) == 0) {
+        *targetFuncPtr = Config_GetBackendMemoryLimit;
 
         // Export RedisAI low level API functions.
     } else if (strcmp("RedisAI_InitError", func_name) == 0) {
 
@@ -37,12 +37,20 @@ BACKENDS_API uintptr_t (*RedisAI_GetThreadsCount)(void);
 BACKENDS_API long long (*RedisAI_GetNumThreadsPerQueue)(void);
 
 /**
- * @return The maximal number of milliseconds that a model run session should run
+ * @return The maximum number of milliseconds that a model run session should run
  * before it is terminated forcefully (load time config).
- * Currently supported only fo onnxruntime backend.
+ * Currently supported only for onnxruntime backend.
  */
 BACKENDS_API long long (*RedisAI_GetModelExecutionTimeout)(void);
 
+/**
+ * @return The maximum number of memory (in MB) that a backend can consume
+ * for creating and running inference sessions. When memory limit is exceeded, operation
+ * is not permitted and an error is returned.
+ * Currently supported only for onnxruntime backend.
+ */
+BACKENDS_API long long (*RedisAI_GetMemoryLimit)(void);
+
 /**
  * The following functions are part of RedisAI low level API (the full low level
  * API is defined in redisai.h). For every function below named "RedisAI_X", its
 
@@ -0,0 +1,3 @@
+add_library(onnx_allocator STATIC onnx_allocator.cpp)
+target_link_libraries(onnx_allocator "${ONNX_LIBRARIES}")
+set_property(TARGET onnx_allocator PROPERTY CXX_STANDARD 14)
@@ -0,0 +1,114 @@
+#include "onnx_allocator.h"
+#include "../onnxruntime.h"
+#include "onnxruntime_cxx_api.h"
+#include <atomic>
+
+struct RAIOrtAllocator : OrtAllocator {
+    RAIOrtAllocator();
+    ~RAIOrtAllocator();
+    RAIOrtAllocator(const RAIOrtAllocator&) = delete;
+    RAIOrtAllocator& operator=(const RAIOrtAllocator&) = delete;
+
+    void* Alloc(size_t size);
+    void Free(void* p);
+    const OrtMemoryInfo* Info() const;
+    unsigned long long NumAllocatorAccess() const;
+    unsigned long long MemoryInUse() const;
+    void SetMemoryLimit(unsigned long long max_memory);
+    static RAIOrtAllocator *GetInstance();
+
+private:
+    std::atomic<unsigned long long> memory_inuse{0};
+    std::atomic<unsigned long long> num_allocator_access{0};
+    unsigned long long memory_limit = 0;
+    OrtMemoryInfo* cpu_memory_info;
+    static RAIOrtAllocator* allocator_instance;
+};
+
+RAIOrtAllocator* RAIOrtAllocator::allocator_instance = nullptr;
+
+RAIOrtAllocator::RAIOrtAllocator() {
+    OrtAllocator::version = ORT_API_VERSION;
+    OrtAllocator::Alloc = [](OrtAllocator* this_, size_t size) { return static_cast<RAIOrtAllocator*>(this_)->Alloc(size); };
+    OrtAllocator::Free = [](OrtAllocator* this_, void* p) { static_cast<RAIOrtAllocator*>(this_)->Free(p); };
+    OrtAllocator::Info = [](const OrtAllocator* this_) { return static_cast<const RAIOrtAllocator*>(this_)->Info(); };
+    Ort::ThrowOnError(Ort::GetApi().CreateCpuMemoryInfo(OrtDeviceAllocator, OrtMemTypeDefault, &cpu_memory_info));
+    RAIOrtAllocator::allocator_instance = this;
+}
+
+RAIOrtAllocator::~RAIOrtAllocator() {
+    Ort::GetApi().ReleaseMemoryInfo(cpu_memory_info);
+}
+
+void* RAIOrtAllocator::Alloc(size_t size) {
+    // Allocate an additional 63 bytes to ensure that we can return an address which is
+    // 64-byte aligned, and an additional space in the size of a pointer to store
+    // the address that RedisModule_Alloc returns.
+    int offset = 63 + sizeof(void *);
+    void *allocated_address = (void *)RedisModule_Alloc(size + offset);
+    size_t allocated_size = RedisModule_MallocSize(allocated_address);
+    // Update the total number of bytes that onnx is using and the number of accesses
+    // that onnx made to the allocator.
+    size_t cur_memory = memory_inuse.load();
+    if (memory_limit && cur_memory + allocated_size > memory_limit) {
+        RedisModule_Free(allocated_address);
+        throw Ort::Exception("Onnxruntime memory limit exceeded, memory allocation failed.", ORT_RUNTIME_EXCEPTION);
+    }
+    memory_inuse.fetch_add(allocated_size);
+    num_allocator_access.fetch_add(1);
+    // This operation guarantees that "aligned_address" is the closest 64-aligned address to ("allocated_address"+size_t).
+    void **aligned_address = (void **)(((size_t)(allocated_address) + offset) & (~63));
+    // This stores the address "allocated_address" right before "aligned_address" (so we can retrieve it when we free).
+    aligned_address[-1] = allocated_address;
+    return aligned_address;
+}
+
+void RAIOrtAllocator::Free(void* p) {
+    if (p == nullptr) {
+        return;
+    }
+    // Retrieve the address that we originally received from RedisModule_Alloc
+    // (this is the address that we need to sent to RedisModule_Free).
+    void *allocated_address = ((void **)p)[-1];
+    size_t allocated_size = RedisModule_MallocSize(allocated_address);
+    // Update the total number of bytes that onnx is using and the number of accesses
+    // that onnx made to the allocator.
+    memory_inuse.fetch_sub(allocated_size);
+    num_allocator_access.fetch_add(1);
+    RedisModule_Free(allocated_address);
+}
+
+const OrtMemoryInfo* RAIOrtAllocator::Info() const {
+    return cpu_memory_info;
+}
+
+unsigned long long RAIOrtAllocator::NumAllocatorAccess() const {
+    return num_allocator_access.load();
+}
+
+unsigned long long RAIOrtAllocator::MemoryInUse() const {
+    return memory_inuse.load();
+}
+
+void RAIOrtAllocator::SetMemoryLimit(unsigned long long max_memory) {
+    // max_memory is given in MB
+    memory_limit = 1000000*max_memory;
+}
+
+RAIOrtAllocator *RAIOrtAllocator::GetInstance() {
+    return RAIOrtAllocator::allocator_instance;
+}
+
+OrtAllocator *CreateCustomAllocator(unsigned long long max_memory) {
+    auto *allocator = new RAIOrtAllocator();
+    allocator->SetMemoryLimit(max_memory);
+    return allocator;
+}
+
+unsigned long long RAI_GetMemoryInfoORT() {
+    return RAIOrtAllocator::GetInstance()->MemoryInUse();
+}
+
+unsigned long long RAI_GetMemoryAccessORT() {
+    return RAIOrtAllocator::GetInstance()->NumAllocatorAccess();
+}
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "onnxruntime_c_api.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+OrtAllocator *CreateCustomAllocator(unsigned long long max_memory);
+
+unsigned long long RAI_GetMemoryInfoORT();
+
+unsigned long long RAI_GetMemoryAccessORT();
+
+#ifdef __cplusplus
+}
+#endif
@@ -6,6 +6,7 @@
 #include "util/arr.h"
 #include "backends/onnxruntime.h"
 #include "redis_ai_objects/tensor.h"
+#include "onnx_allocator/onnx_allocator.h"
 
 #include "onnxruntime_c_api.h"
 #include "backends_api.h"
@@ -21,63 +22,7 @@ OrtEnv *env = NULL;
 // For model that run on GPU, onnx will not use the custom allocator (redis allocator), but
 // the onnx allocator for GPU. But for the auxiliary allocations of the input and output names,
 // we will use the custom global allocator for models that run on GPU as well.
-OrtMemoryInfo *mem_info = NULL;
 OrtAllocator *global_allocator = NULL;
-unsigned long long OnnxMemory = 0;
-unsigned long long OnnxMemoryAccessCounter = 0;
-
-const OrtMemoryInfo *AllocatorInfo(const OrtAllocator *allocator) {
-    (void)allocator;
-    const OrtApi *ort = OrtGetApiBase()->GetApi(1);
-    if (mem_info != NULL) {
-        return mem_info;
-    }
-    if (ort->CreateCpuMemoryInfo(OrtDeviceAllocator, OrtMemTypeDefault, &mem_info) != NULL) {
-        return NULL;
-    }
-    return mem_info;
-}
-
-// Allocate address with 64-byte alignment to cope with onnx optimizations.
-void *AllocatorAlloc(OrtAllocator *ptr, size_t size) {
-
-    (void)ptr;
-    // Allocate an additional 63 bytes to ensure that we can return an address which is
-    // 64-byte aligned, and an additional space in the size of a pointer to store
-    // the address that RedisModule_Alloc returns.
-    int offset = 63 + sizeof(void *);
-    void *allocated_address = (void *)RedisModule_Alloc(size + offset);
-    size_t allocated_size = RedisModule_MallocSize(allocated_address);
-    // Update the total number of bytes that onnx is using and the number of accesses
-    // that onnx made to the allocator.
-    atomic_fetch_add(&OnnxMemory, allocated_size);
-    atomic_fetch_add(&OnnxMemoryAccessCounter, 1);
-    // This operation guarantees that p2 is the closest 64-aligned address to (p1+size_t).
-    void **aligned_address = (void **)(((size_t)(allocated_address) + offset) & (~63));
-    // This stores the address p1 right before p2 (so we can retrieve it when we free).
-    aligned_address[-1] = allocated_address;
-    return aligned_address;
-}
-
-void AllocatorFree(OrtAllocator *ptr, void *aligned_address) {
-    (void)ptr;
-    if (aligned_address == NULL) {
-        return;
-    }
-    // Retrieve the address that we originally received from RedisModule_Alloc
-    // (this is the address that we need to sent to RedisModule_Free).
-    void *allocated_address = ((void **)aligned_address)[-1];
-    size_t allocated_size = RedisModule_MallocSize(allocated_address);
-    // Update the total number of bytes that onnx is using and the number of accesses
-    // that onnx made to the allocator.
-    atomic_fetch_sub(&OnnxMemory, allocated_size);
-    atomic_fetch_add(&OnnxMemoryAccessCounter, 1);
-    return RedisModule_Free(allocated_address);
-}
-
-unsigned long long RAI_GetMemoryInfoORT() { return OnnxMemory; }
-
-unsigned long long RAI_GetMemoryAccessORT() { return OnnxMemoryAccessCounter; }
 
 int RAI_InitBackendORT(int (*get_api_fn)(const char *, void **)) {
     // Export redis callbacks.
@@ -95,6 +40,7 @@ int RAI_InitBackendORT(int (*get_api_fn)(const char *, void **)) {
     get_api_fn("GetThreadId", ((void **)&RedisAI_GetThreadId));
     get_api_fn("GetNumThreadsPerQueue", ((void **)&RedisAI_GetNumThreadsPerQueue));
     get_api_fn("GetModelExecutionTimeout", ((void **)&RedisAI_GetModelExecutionTimeout));
+    get_api_fn("GetBackendMemoryLimit", ((void **)&RedisAI_GetMemoryLimit));
     get_api_fn("GetThreadsCount", ((void **)&RedisAI_GetThreadsCount));
 
     // Create a global array of onnx runSessions, with an entry for every working thread.
@@ -389,8 +335,9 @@ RAI_Model *RAI_ModelCreateORT(RAI_Backend backend, const char *devicestr, RAI_Mo
     // allocating buffers when creating and running models that run on CPU, and for allocations of
     // models inputs and outputs names (for both models that run on CPU and GPU)
     if (env == NULL) {
-        ONNX_VALIDATE_STATUS(ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "test", &env))
-        ONNX_VALIDATE_STATUS(ort->GetAllocatorWithDefaultOptions(&global_allocator));
+        ONNX_VALIDATE_STATUS(ort->CreateEnv(ORT_LOGGING_LEVEL_WARNING, "RedisAI", &env))
+        global_allocator = CreateCustomAllocator(RedisAI_GetMemoryLimit());
+        ONNX_VALIDATE_STATUS(ort->RegisterAllocator(env, global_allocator))
     }
 
     ONNX_VALIDATE_STATUS(ort->CreateSessionOptions(&session_options))
 
@@ -5,10 +5,6 @@
 #include "redis_ai_objects/model.h"
 #include "execution/execution_contexts/execution_ctx.h"
 
-unsigned long long RAI_GetMemoryInfoORT(void);
-
-unsigned long long RAI_GetMemoryAccessORT(void);
-
 int RAI_InitBackendORT(int (*get_api_fn)(const char *, void **));
 
 RAI_Model *RAI_ModelCreateORT(RAI_Backend backend, const char *devicestr, RAI_ModelOpts opts,
 
@@ -530,8 +530,10 @@ int RAI_ModelRunTF(RAI_Model *model, RAI_ExecutionCtx **ectxs, RAI_Error *error)
                   outputTensorsValues, noutputs, NULL /* target_opers */, 0 /* ntargets */,
                   NULL /* run_Metadata */, status);
 
+    bool delete_output = true;
     if (TF_GetCode(status) != TF_OK) {
         RAI_SetError(error, RAI_EMODELRUN, TF_Message(status));
+        delete_output = false;
         goto cleanup;
     }
 
@@ -575,8 +577,10 @@ int RAI_ModelRunTF(RAI_Model *model, RAI_ExecutionCtx **ectxs, RAI_Error *error)
         }
         TF_DeleteTensor(inputTensorsValues[i]);
     }
-    for (size_t i = 0; i < noutputs; i++) {
-        TF_DeleteTensor(outputTensorsValues[i]);
+    if (delete_output) {
+        for (size_t i = 0; i < noutputs; i++) {
+            TF_DeleteTensor(outputTensorsValues[i]);
+        }
     }
     return res;
 }
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+add_library(onnx_allocator STATIC onnx_allocator.cpp)`
	`2`	`+target_link_libraries(onnx_allocator "${ONNX_LIBRARIES}")`
	`3`	`+set_property(TARGET onnx_allocator PROPERTY CXX_STANDARD 14)`
Original file line number	Diff line number	Diff line change
`@@ -530,8 +530,10 @@ int RAI_ModelRunTF(RAI_Model model, RAI_ExecutionCtx ectxs, RAI_Error error)`
`530`	`530`	`outputTensorsValues, noutputs, NULL /* target_opers /, 0 / ntargets */,`
`531`	`531`	`NULL /* run_Metadata */, status);`
`532`	`532`
	`533`	`+ bool delete_output = true;`
`533`	`534`	`if (TF_GetCode(status) != TF_OK) {`
`534`	`535`	`RAI_SetError(error, RAI_EMODELRUN, TF_Message(status));`
	`536`	`+ delete_output = false;`
`535`	`537`	`goto cleanup;`
`536`	`538`	`}`
`537`	`539`
`@@ -575,8 +577,10 @@ int RAI_ModelRunTF(RAI_Model model, RAI_ExecutionCtx ectxs, RAI_Error error)`
`575`	`577`	`}`
`576`	`578`	`TF_DeleteTensor(inputTensorsValues[i]);`
`577`	`579`	`}`
`578`		`- for (size_t i = 0; i < noutputs; i++) {`
`579`		`- TF_DeleteTensor(outputTensorsValues[i]);`
	`580`	`+ if (delete_output) {`
	`581`	`+ for (size_t i = 0; i < noutputs; i++) {`
	`582`	`+ TF_DeleteTensor(outputTensorsValues[i]);`
	`583`	`+ }`
`580`	`584`	`}`
`581`	`585`	`return res;`
`582`	`586`	`}`