RedisAI · alonre24 · Mar 11, 2021 · Feb 16, 2021 · Feb 16, 2021 · Feb 16, 2021
diff --git a/get_deps.sh b/get_deps.sh
@@ -12,7 +12,7 @@ HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
 if [[ $1 == --help || $1 == help ]]; then
 	cat <<-END
 		[ARGVARS...] get_deps.sh [cpu|gpu] [--help|help]
-		
+
 		Argument variables:
 		CPU=1              Get CPU dependencies
 		GPU=1              Get GPU dependencies
@@ -103,7 +103,7 @@ if [[ $WITH_TF != 0 ]]; then
 			fi
 			if [[ $ARCH == x64 ]]; then
 				TF_ARCH=x86_64
-				
+
 				LIBTF_URL_BASE=https://storage.googleapis.com/tensorflow/libtensorflow
 			elif [[ $ARCH == arm64v8 ]]; then
 				TF_ARCH=arm64
@@ -134,7 +134,7 @@ if [[ $WITH_TF != 0 ]]; then
 		mkdir $LIBTENSORFLOW.x
 		tar xf $LIBTF_ARCHIVE --no-same-owner -C $LIBTENSORFLOW.x
 		mv $LIBTENSORFLOW.x $LIBTENSORFLOW
-		
+
 		echo "Done."
 	else
 		echo "TensorFlow is in place."
@@ -152,7 +152,7 @@ if [[ $WITH_TFLITE != 0 ]]; then
 
 	if [[ ! -d $LIBTFLITE ]]; then
 		echo "Installing TensorFlow Lite ..."
-		
+
 		LIBTF_URL_BASE=https://s3.amazonaws.com/redismodules/tensorflow
 		if [[ $OS == linux ]]; then
 			TFLITE_OS="linux"
@@ -183,7 +183,7 @@ if [[ $WITH_TFLITE != 0 ]]; then
 		mkdir $LIBTFLITE.x
 		tar xf $LIBTFLITE_ARCHIVE --no-same-owner -C $LIBTFLITE.x
 		mv $LIBTFLITE.x $LIBTFLITE
-		
+
 		echo "Done."
 	else
 		echo "TensorFlow Lite is in place."
@@ -241,14 +241,14 @@ if [[ $WITH_PT != 0 ]]; then
 
 			[[ ! -f $LIBTORCH_ARCHIVE || $FORCE == 1 ]] && wget -q $LIBTORCH_URL
 		fi
-		
+
 		rm -rf $LIBTORCH.x
 		mkdir $LIBTORCH.x
 
 		tar xf $LIBTORCH_ARCHIVE --no-same-owner -C $LIBTORCH.x
 		mv $LIBTORCH.x/libtorch $LIBTORCH
 		rmdir $LIBTORCH.x
-		
+
 		echo "Done."
 	else
 		echo "libtorch is in place."
@@ -282,7 +282,7 @@ fi
 
 ################################################################################### ONNXRUNTIME
 
-ORT_VERSION="1.6.0"
+ORT_VERSION="1.7.1"
 
 if [[ $WITH_ORT != 0 ]]; then
 	[[ $FORCE == 1 ]] && rm -rf $ONNXRUNTIME
@@ -297,15 +297,13 @@ if [[ $WITH_ORT != 0 ]]; then
 			else
 				ORT_BUILD="-gpu"
 			fi
+			ORT_URL_BASE=https://s3.amazonaws.com/redismodules/onnxruntime
 			if [[ $ARCH == x64 ]]; then
 				ORT_ARCH=x64
-				ORT_URL_BASE=https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}
 			elif [[ $ARCH == arm64v8 ]]; then
 				ORT_ARCH=arm64
-				ORT_URL_BASE=https://s3.amazonaws.com/redismodules/onnxruntime
 			elif [[ $ARCH == arm32v7 ]]; then
 				ORT_ARCH=arm
-				ORT_URL_BASE=https://s3.amazonaws.com/redismodules/onnxruntime
 			fi
 		elif [[ $OS == macos ]]; then
 			ORT_OS=osx
@@ -322,7 +320,7 @@ if [[ $WITH_ORT != 0 ]]; then
 		mkdir $ONNXRUNTIME.x
 		tar xzf ${ORT_ARCHIVE} --no-same-owner --strip-components=1 -C $ONNXRUNTIME.x
 		mv $ONNXRUNTIME.x $ONNXRUNTIME
-		
+
 		echo "Done."
 	else
 		echo "ONNXRuntime is in place."

diff --git a/opt/build/onnxruntime/Dockerfile.arm7 b/opt/build/onnxruntime/Dockerfile.arm7
@@ -4,6 +4,7 @@ ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
 ARG ONNXRUNTIME_BRANCH=rel-1.0.0
 ARG ONNXRUNTIME_VER=1.0.0
 ARG ARCH_FLAG="--arm"
+ARG ARCH=arm64v7
 
 RUN [ "cross-build-start" ]
 

diff --git a/opt/build/onnxruntime/Dockerfile.x64 b/opt/build/onnxruntime/Dockerfile.x64
@@ -5,30 +5,36 @@ ARG OS=debian:buster
 #----------------------------------------------------------------------------------------------
 FROM ${OS}
 
-ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
-ARG ONNXRUNTIME_VER=1.0.0
+ARG ONNXRUNTIME_REPO=https://github.com/RedisAI/onnxruntime
+ARG ONNXRUNTIME_VER=1.7.1
+ARG ARCH=x64
 
 RUN apt-get -qq update
-RUN apt-get -qq install -y curl wget tar git
-RUN apt-get -qq install -y build-essential cmake
-RUN apt-get -qq install -y libcurl4-openssl-dev libssl-dev libatlas-base-dev zlib1g-dev
-
-RUN apt-get -qq install -y python3 python3-pip python3-dev
+RUN apt-get -qq install -y curl wget tar git \
+            build-essential cmake \
+            libcurl4-openssl-dev libssl-dev libatlas-base-dev zlib1g-dev \
+            python3 python3-pip python3-dev python3-numpy
 RUN pip3 install --upgrade pip setuptools wheel
-# RUN pip3 install numpy
-RUN apt-get -q install -y python3-numpy
+
+ENV LANG=en_US.UTF-8
+RUN apt-get install -y locales && \
+    sed -i -e "s/# $LANG.*/$LANG UTF-8/" /etc/locale.gen && \
+    dpkg-reconfigure --frontend=noninteractive locales && \
+    update-locale LANG=$LANG
 
 WORKDIR /build
 
-ADD ./pack.sh /build/
 ARG BUILDTYPE=MinSizeRel
+
 ARG BUILDARGS="--config ${BUILDTYPE} --parallel"
 
-RUN git clone --single-branch --branch rel-${ONNXRUNTIME_VER} --recursive ${ONNXRUNTIME_REPO} onnxruntime
-	
-RUN	cd onnxruntime ;\
-	./build.sh ${BUILDARGS} --update --build ;\
-	./build.sh ${BUILDARGS} --build_shared_lib
+RUN git clone --single-branch --branch rel-${ONNXRUNTIME_VER} ${ONNXRUNTIME_REPO} onnxruntime
+WORKDIR /build/onnxruntime
+RUN git fetch --recurse-submodules -j4
+RUN ./build.sh ${BUILDARGS} --update --build
+RUN ./build.sh ${BUILDARGS} --build_shared_lib
 # RUN ./build.sh ${BUILDARGS} --enable_pybind --build_wheel
 
-RUN ./pack.sh ${ONNXRUNTIME_VER}
+ADD ./pack.sh /build
+WORKDIR /build
+RUN ./pack.sh ${ONNXRUNTIME_VER} ${ARCH}
diff --git a/opt/build/onnxruntime/Dockerfile.x64-gpu b/opt/build/onnxruntime/Dockerfile.x64-gpu
@@ -0,0 +1,43 @@
+ARG OS=ubuntu18.04
+ARG CUDA_VER=11.0-cudnn8
+
+FROM nvidia/cuda:11.0-cudnn8-devel-ubuntu18.04
+
+ARG ONNXRUNTIME_REPO=https://github.com/RedisAI/onnxruntime
+ARG ONNXRUNTIME_VER=1.7.1
+ARG ARCH=x64-gpu
+
+RUN apt-get -qq update
+RUN apt-get -qq install -y curl \
+            wget tar git build-essential \
+            libcurl4-openssl-dev libssl-dev \
+            libatlas-base-dev zlib1g-dev python3 python3-pip \
+            python3-dev python3-numpy rsync
+
+RUN pip3 install --upgrade pip setuptools wheel
+
+RUN wget -q https://github.com/Kitware/CMake/releases/download/v3.19.5/cmake-3.19.5-Linux-x86_64.tar.gz -O /tmp/cmake.tgz
+WORKDIR /tmp
+RUN tar -xpf cmake.tgz
+RUN rsync -aqH cmake*/* /usr
+
+
+ENV LANG=en_US.UTF-8
+RUN apt-get install -y locales && \
+    sed -i -e "s/# $LANG.*/$LANG UTF-8/" /etc/locale.gen && \
+    dpkg-reconfigure --frontend=noninteractive locales && \
+    update-locale LANG=$LANG
+
+WORKDIR /build
+
+ARG BUILDTYPE=MinSizeRel
+ARG BUILDARGS="--config ${BUILDTYPE} --parallel"
+
+RUN git clone --single-branch --branch rel-${ONNXRUNTIME_VER} ${ONNXRUNTIME_REPO} onnxruntime
+WORKDIR /build/onnxruntime
+RUN git fetch --recurse-submodules -j4
+RUN ./build.sh ${BUILDARGS} --update --build --use_cuda --cudnn_home /usr/local/cuda --cuda_home /usr/local/cuda --build_shared_lib --parallel
+
+ADD ./pack.sh /build/
+WORKDIR /build
+RUN ./pack.sh ${ONNXRUNTIME_VER} ${ARCH}
diff --git a/opt/build/onnxruntime/Makefile b/opt/build/onnxruntime/Makefile
@@ -1,7 +1,7 @@
 
 ROOT=.
 
-VERSION ?= 1.0.0
+VERSION ?= 1.7.1
 OSNICK ?= buster
 
 #----------------------------------------------------------------------------------------------
@@ -16,7 +16,7 @@ DOCKER_OS.bionic=ubuntu:bionic
 DOCKER_OS.stretch=debian:stretch-slim
 DOCKER_OS.buster=debian:buster-slim
 DOCKER_OS=$(DOCKER_OS.$(OSNICK))
- 
+
 #----------------------------------------------------------------------------------------------
 
 define targets # (1=OP, 2=op)
@@ -40,10 +40,23 @@ IID_$(1)=$(1)_$(VERSION).iid
 CID_$(1)=$(1)_$(VERSION).cid
 
 build_x64:
-	@docker build --iidfile $$(IID_$(1)) -t redisfab/$(STEM)-$(1):$(VERSION) -f Dockerfile.x64 \
+	@docker build --iidfile $$(IID_$(1)) -t redisfab/$(STEM)-x64:$(VERSION) -f Dockerfile.x64 \
 		--build-arg OS=$(DOCKER_OS) $(ROOT)
 	@docker create --cidfile $$(CID_$(1)) `cat $$(IID_$(1))`
-	@docker cp `cat $$(CID_$(1))`:/build/$(STEM)-$(2)-$(VERSION).tgz .
+	@docker cp `cat $$(CID_$(1))`:/build/$(STEM)-x64-$(VERSION).tgz .
+
+.PHONY: build_x64
+endef
+
+define build_x64-gpu # (1=arch, 2=tar-arch)
+IID_$(1)=$(1)_$(VERSION).iid
+CID_$(1)=$(1)_$(VERSION).cid
+
+build_x64-gpu:
+	@docker build --iidfile $$(IID_$(1)) -t redisfab/$(STEM)-$(1):$(VERSION) -f Dockerfile.x64-gpu \
+		--build-arg OS=$(DOCKER_OS) $(ROOT)
+	@docker create --cidfile $$(CID_$(1)) `cat $$(IID_$(1))`
+	@docker cp `cat $$(CID_$(1))`:/build/$(STEM)-$(1)-$(VERSION).tgz .
 
 .PHONY: build_x64
 endef
@@ -68,11 +81,18 @@ endef
 
 define publish_x64 # (1=arch, 2=tar-arch)
 publish_x64:
-	@aws s3 cp $(STEM)-$(2)-$(VERSION).tar.gz s3://$(S3_URL)/ --acl public-read
+	@aws s3 cp $(STEM)-$(2)-$(VERSION).tgz s3://$(S3_URL)/ --acl public-read
 
 .PHONY: publish_x64
 endef
 
+define publish_x64-gpu # (1=arch, 2=tar-arch)
+publish_x64-gpu:
+	@aws s3 cp $(STEM)-$(2)-$(VERSION).tgz s3://$(S3_URL)/ --acl public-read
+
+.PHONY: publish_x64-gpu
+endef
+
 define publish_arm # (1=arch, 2=tar-arch)
 publish_$(1):
 	@aws s3 cp $(STEM)-$(2)-$(VERSION).tgz s3://$(S3_URL)/ --acl public-read
@@ -90,6 +110,7 @@ all: build publish
 build: $(BUILD_TARGETS)
 
 $(eval $(call build_x64,x64,x86_64))
+$(eval $(call build_x64-gpu,x64-gpu,x86_64-gpu))
 $(eval $(call build_arm,arm64v8,arm64))
 $(eval $(call build_arm,arm32v7,arm))
 
@@ -101,7 +122,8 @@ endif
 
 publish: $(PUBLISH_TARGETS)
 
-$(eval $(call publish_x64,x64,x86_64))
+$(eval $(call publish_x64,x64,x64))
+$(eval $(call publish_x64-gpu,x64-gpu,x64-gpu))
 $(eval $(call publish_arm,arm64v8,arm64))
 $(eval $(call publish_arm,arm32v7,arm))
 

diff --git a/opt/build/onnxruntime/pack.sh b/opt/build/onnxruntime/pack.sh
@@ -2,10 +2,11 @@
 
 set -e
 VER="$1"
+PLATFORM="$2"
 
 mkdir -p pack/include pack/lib
 cp onnxruntime/build/Linux/MinSizeRel/libonnxruntime.so.${VER} pack/lib/
-cp onnxruntime/docs/C_API.md pack/
+cp onnxruntime/docs/C_API_Guidelines.md pack/
 cp onnxruntime/LICENSE pack/
 cp onnxruntime/README.md pack/
 cp onnxruntime/ThirdPartyNotices.txt pack/
@@ -14,8 +15,9 @@ cd onnxruntime/
 git rev-parse HEAD > ../pack/GIT_COMMIT_ID
 cd ..
 cp onnxruntime/include/onnxruntime/core/session/onnxruntime_c_api.h pack/include/
+cp onnxruntime/include/onnxruntime/core/providers/cuda/cuda_provider_factory.h pack/include/
 cd pack/lib/
 ln -s libonnxruntime.so.${VER} libonnxruntime.so
 cd ../..
-mv pack onnxruntime-linux-arm64-${VER}
-tar czf onnxruntime-linux-arm64-${VER}.tgz onnxruntime-linux-arm64-${VER}/
+mv pack onnxruntime-linux-${PLATFORM}-${VER}
+tar czf onnxruntime-linux-${PLATFORM}-${VER}.tgz onnxruntime-linux-${PLATFORM}-${VER}/
diff --git a/opt/redis_valgrind.sup b/opt/redis_valgrind.sup
@@ -12,13 +12,6 @@
    obj:*/libtensorflow_framework.so.*
 }
 
-{
-   ignore_unversioned_libs
-   Memcheck:Leak
-   ...
-   obj:*/libonnxruntime.so.*
-}
-
 {
    ignore_unversioned_libs
    Memcheck:Leak

diff --git a/src/backends.c b/src/backends.c
@@ -451,10 +451,27 @@ int RAI_LoadBackend_ONNXRuntime(RedisModuleCtx *ctx, const char *path) {
         return REDISMODULE_ERR;
     }
 
-    RAI_backends.onnx = backend;
+    backend.get_memory_info =
+        (unsigned long long (*)(void))(unsigned long)dlsym(handle, "RAI_GetMemoryInfoORT");
+    if (backend.get_memory_info == NULL) {
+        dlclose(handle);
+        RedisModule_Log(ctx, "warning",
+                        "Backend does not export RAI_GetMemoryInfoORT. ONNX backend "
+                        "not loaded from %s",
+                        path);
+    }
+    backend.get_memory_access_num =
+        (unsigned long long (*)(void))(unsigned long)dlsym(handle, "RAI_GetMemoryAccessORT");
+    if (backend.get_memory_access_num == NULL) {
+        dlclose(handle);
+        RedisModule_Log(ctx, "warning",
+                        "Backend does not export RAI_GetMemoryAccessORT. ONNX backend "
+                        "not loaded from %s",
+                        path);
+    }
 
+    RAI_backends.onnx = backend;
     RedisModule_Log(ctx, "notice", "ONNX backend loaded from %s", path);
-
     return REDISMODULE_OK;
 }
 

diff --git a/src/backends.h b/src/backends.h
@@ -77,6 +77,11 @@ typedef struct RAI_LoadedBackend {
     // Returns the backend version.
     const char *(*get_version)(void);
 
+    // Returns the backend's memory usage for INFO report
+    unsigned long long (*get_memory_info)(void);
+
+    // Returns the number of times that Redis accessed backend allocator.
+    unsigned long long (*get_memory_access_num)(void);
 } RAI_LoadedBackend;
 
 typedef struct RAI_LoadedBackends {