Support bool type for tensors (#775)

alonre24 · web-flow · commit 2f6b4be18624 · 2021-06-06T12:55:08.000+03:00
* Support tensors of type bool. Add validation that a input value doesn't overflows than the tensor type in TENSORSET.

* Support bool tensor in backends

* Use forked dlpack, that contains the new type kDLBool, use v0.5_RAI branch in dlpack instead of main
diff --git a/get_deps.sh b/get_deps.sh
@@ -70,13 +70,13 @@ MKL=mkl
 ONNXRUNTIME=onnxruntime
 
 ######################################################################################## DLPACK
-DLPACK_VERSION="v0.4"
+DLPACK_VERSION="v0.5_RAI"
 if [[ $WITH_DLPACK != 0 ]]; then
 	[[ $FORCE == 1 ]] && rm -rf $DLPACK
 
 	if [[ ! -d $DLPACK ]]; then
 		echo "Cloning dlpack ..."
-		git clone --depth 1 --branch $DLPACK_VERSION https://github.com/dmlc/dlpack.git $DLPACK
+		git clone --depth 1 --branch $DLPACK_VERSION https://github.com/RedisAI/dlpack.git $DLPACK
 		echo "Done."
 	else
 		echo "dlpack is in place."
diff --git a/src/backends/libtflite_c/tflite_c.cpp b/src/backends/libtflite_c/tflite_c.cpp
@@ -42,6 +42,10 @@ static DLDataType getDLDataType(const TfLiteTensor *tensor) {
         dtype.bits = 16;
         dtype.code = DLDataTypeCode::kDLFloat;
         break;
+    case kTfLiteBool:
+        dtype.bits = 8;
+        dtype.code = DLDataTypeCode::kDLBool;
+        break;
     default:
         break;
     }
@@ -55,23 +59,6 @@ static DLDevice getDLDevice(const TfLiteTensor *tensor, const int64_t &device_id
     return device;
 }
 
-#if 0
-static at::DeviceType getATenDeviceType(DLDeviceType device_type) {
-  switch (device_type) {
-    case DLDeviceType::kDLCPU:
-      return at::DeviceType::CPU;
-    case DLDeviceType::kDLGPU:
-      return at::DeviceType::CUDA;
-    case DLDeviceType::kDLOpenCL:
-      return at::DeviceType::OPENCL;
-    case DLDeviceType::kDLROCM:
-      return at::DeviceType::HIP;
-    default:
-      throw std::logic_error("Unsupported device_type: " + std::to_string(device_type));
-  }
-  return at::DeviceType::CPU; // impossible
-}
-#endif
 
 size_t dltensorBytes(DLManagedTensor *t) {
     int64_t *shape = t->dl_tensor.shape;
@@ -110,9 +97,10 @@ void copyToTfLiteTensor(std::shared_ptr<tflite::Interpreter> interpreter, int tf
     case kTfLiteFloat32:
         memcpy(interpreter->typed_tensor<float>(tflite_input), input->dl_tensor.data, nbytes);
         break;
+    case kTfLiteBool:
+        memcpy(interpreter->typed_tensor<bool>(tflite_input), input->dl_tensor.data, nbytes);
     case kTfLiteFloat16:
         throw std::logic_error("Float16 not currently supported as input tensor data type");
-        break;
     default:
         throw std::logic_error("Unsupported input data type");
     }
@@ -174,9 +162,11 @@ DLManagedTensor *toManagedDLPack(std::shared_ptr<tflite::Interpreter> interprete
     case kTfLiteFloat32:
         memcpy(dl_tensor.data, interpreter->typed_tensor<float>(tflite_output), tensor->bytes);
         break;
+    case kTfLiteBool:
+        memcpy(dl_tensor.data, interpreter->typed_tensor<bool>(tflite_output), tensor->bytes);
+        break;
     case kTfLiteFloat16:
         throw std::logic_error("Float16 not currently supported as output tensor data type");
-        break;
     default:
         throw std::logic_error("Unsupported output data type");
     }
@@ -231,7 +221,7 @@ extern "C" void *tfliteLoadModel(const char *graph, size_t graphlen, DLDeviceTyp
     }
 
 #if RAI_TFLITE_USE_CUDA
-    if (device == DLDeviceType::kDLGPU) {
+    if (device == DLDeviceType::kDLCUDA) {
         tflite::Interpreter::TfLiteDelegatePtr delegate =
             tflite::evaluation::CreateGPUDelegate(model.get());
         if (interpreter_->ModifyGraphWithDelegate(std::move(delegate)) != kTfLiteOk) {
diff --git a/src/backends/libtorch_c/torch_c.cpp b/src/backends/libtorch_c/torch_c.cpp
@@ -41,7 +41,8 @@ static DLDataType getDLDataType(const at::Tensor &t) {
         dtype.code = DLDataTypeCode::kDLFloat;
         break;
     case at::ScalarType::Bool:
-        throw std::logic_error("Bool is not supported by dlpack");
+        dtype.code = DLDataTypeCode::kDLBool;
+        break;
     case at::ScalarType::BFloat16:
         throw std::logic_error("BFloat16 is not supported by dlpack");
     case at::ScalarType::QInt8:
@@ -68,7 +69,7 @@ static DLDevice getDLDevice(const at::Tensor &tensor, const int64_t &device_id)
     DLDevice device;
     device.device_id = device_id;
     if (tensor.is_cuda()) {
-        device.device_type = DLDeviceType::kDLGPU;
+        device.device_type = DLDeviceType::kDLCUDA;
     } else {
         device.device_type = DLDeviceType::kDLCPU;
     }
@@ -79,7 +80,7 @@ static at::DeviceType getATenDeviceType(DLDeviceType device_type) {
     switch (device_type) {
     case DLDeviceType::kDLCPU:
         return at::DeviceType::CPU;
-    case DLDeviceType::kDLGPU:
+    case DLDeviceType::kDLCUDA:
         return at::DeviceType::CUDA;
     case DLDeviceType::kDLOpenCL:
         return at::DeviceType::OPENCL;
@@ -138,6 +139,15 @@ at::ScalarType toScalarType(const DLDataType &dtype) {
             throw std::logic_error("Unsupported kFloat bits " + std::to_string(dtype.bits));
         }
         break;
+    case DLDataTypeCode::kDLBool:
+        switch (dtype.bits) {
+        case 8:
+            stype = at::ScalarType::Bool;
+            break;
+        default:
+            throw std::logic_error("Unsupported kOpaque bits " + std::to_string(dtype.bits));
+        }
+        break;
     default:
         throw std::logic_error("Unsupported code " + std::to_string(dtype.code));
     }
@@ -310,7 +320,7 @@ static torch::DeviceType getDeviceType(ModuleContext *ctx) {
     switch (ctx->device) {
         case kDLCPU:
             return torch::kCPU;
-        case kDLGPU:
+        case kDLCUDA:
             return torch::kCUDA;
         default:
             throw std::runtime_error(std::string("Unsupported device ") + std::to_string(ctx->device));
diff --git a/src/backends/onnxruntime.c b/src/backends/onnxruntime.c
@@ -2,6 +2,7 @@
 #include <cuda_provider_factory.h>
 #include "backends/util.h"
 #include <stdatomic.h>
+#include <math.h>
 #include "util/arr.h"
 #include "backends/onnxruntime.h"
 #include "redis_ai_objects/tensor.h"
@@ -152,6 +153,14 @@ ONNXTensorElementDataType RAI_GetOrtDataTypeFromDL(DLDataType dtype) {
         default:
             return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
         }
+    } else if (dtype.code == kDLBool) {
+        switch (dtype.bits) {
+        case 8:
+            return ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL;
+            break;
+        default:
+            return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
+        }
     }
     return ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED;
 }
@@ -174,6 +183,8 @@ DLDataType RAI_GetDLDataTypeFromORT(ONNXTensorElementDataType dtype) {
         return (DLDataType){.code = kDLUInt, .bits = 8, .lanes = 1};
     case ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
         return (DLDataType){.code = kDLUInt, .bits = 16, .lanes = 1};
+    case ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
+        return (DLDataType){.code = kDLBool, .bits = 8, .lanes = 1};
     default:
         return (DLDataType){.bits = 0};
     }
@@ -281,7 +292,7 @@ RAI_Tensor *RAI_TensorCreateFromOrtValue(OrtValue *v, size_t batch_offset, long
         size_t elem_count;
         ONNX_VALIDATE_STATUS(ort->GetTensorShapeElementCount(info, &elem_count))
 
-        const size_t len = dtype.bits * elem_count / 8;
+        const size_t len = ceil((double)dtype.bits * elem_count / 8);
         const size_t total_bytesize = len * sizeof(char);
         const size_t sample_bytesize = total_bytesize / total_batch_size;
         const size_t batch_bytesize = sample_bytesize * batch_size;
diff --git a/src/backends/tensorflow.c b/src/backends/tensorflow.c
@@ -24,38 +24,37 @@ TF_DataType RAI_GetTFDataTypeFromDL(DLDataType dtype) {
         switch (dtype.bits) {
         case 32:
             return TF_FLOAT;
-            break;
         case 64:
             return TF_DOUBLE;
-            break;
         default:
             return 0;
         }
     } else if (dtype.code == kDLInt) {
         switch (dtype.bits) {
         case 8:
             return TF_INT8;
-            break;
         case 16:
             return TF_INT16;
-            break;
         case 32:
             return TF_INT32;
-            break;
         case 64:
             return TF_INT64;
-            break;
         default:
             return 0;
         }
     } else if (dtype.code == kDLUInt) {
         switch (dtype.bits) {
         case 8:
             return TF_UINT8;
-            break;
         case 16:
             return TF_UINT16;
-            break;
+        default:
+            return 0;
+        }
+    } else if (dtype.code == kDLBool) {
+        switch (dtype.bits) {
+        case 8:
+            return TF_BOOL;
         default:
             return 0;
         }
@@ -81,6 +80,8 @@ DLDataType RAI_GetDLDataTypeFromTF(TF_DataType dtype) {
         return (DLDataType){.code = kDLUInt, .bits = 8, .lanes = 1};
     case TF_UINT16:
         return (DLDataType){.code = kDLUInt, .bits = 16, .lanes = 1};
+    case TF_BOOL:
+        return (DLDataType){.code = kDLBool, .bits = 8, .lanes = 1};
     default:
         return (DLDataType){.bits = 0};
     }
diff --git a/src/backends/tflite.c b/src/backends/tflite.c
@@ -32,7 +32,7 @@ RAI_Model *RAI_ModelCreateTFLite(RAI_Backend backend, const char *devicestr, RAI
         dl_device = kDLCPU;
         break;
     case RAI_DEVICE_GPU:
-        dl_device = kDLGPU;
+        dl_device = kDLCUDA;
         break;
     default:
         RAI_SetError(error, RAI_EMODELCONFIGURE, "ERR Error configuring model: unsupported device");
diff --git a/src/backends/torch.c b/src/backends/torch.c
@@ -53,7 +53,7 @@ RAI_Model *RAI_ModelCreateTorch(RAI_Backend backend, const char *devicestr, RAI_
         dl_device = kDLCPU;
         break;
     case RAI_DEVICE_GPU:
-        dl_device = kDLGPU;
+        dl_device = kDLCUDA;
         break;
     default:
         RAI_SetError(error, RAI_EMODELCONFIGURE, "ERR Error configuring model: unsupported device");
@@ -304,7 +304,7 @@ RAI_Script *RAI_ScriptCreateTorch(const char *devicestr, const char *scriptdef,
         dl_device = kDLCPU;
         break;
     case RAI_DEVICE_GPU:
-        dl_device = kDLGPU;
+        dl_device = kDLCUDA;
         break;
     default:
         RAI_SetError(error, RAI_ESCRIPTCONFIGURE,
diff --git a/src/redis_ai_objects/tensor.c b/src/redis_ai_objects/tensor.c
@@ -14,6 +14,7 @@
 #include "tensor.h"
 #include "err.h"
 #include "arr.h"
+#include "math.h"
 #include "redisai.h"
 #include "version.h"
 #include "tensor_struct.h"
@@ -24,6 +25,27 @@
 
 extern RedisModuleType *RedisAI_TensorType;
 
+// Check if the given value is in the range of the tensor type.
+bool _ValOverflow(long long val, RAI_Tensor *t) {
+    DLDataType dtype = t->tensor.dl_tensor.dtype;
+    if (dtype.code == kDLInt) {
+        unsigned long long max_abs_val = ((unsigned long long)1 << (uint)(dtype.bits - 1));
+        if ((unsigned long long)val >= max_abs_val || val < -1 * (long long)max_abs_val) {
+            return true;
+        }
+    } else if (dtype.code == kDLUInt) {
+        uint max_val = (uint)1 << dtype.bits;
+        if (val >= max_val || val < 0) {
+            return true;
+        }
+    } else if (dtype.code == kDLBool) {
+        if (val < 0 || val > 1) {
+            return true;
+        }
+    }
+    return false;
+}
+
 DLDataType RAI_TensorDataTypeFromString(const char *typestr) {
     if (strcasecmp(typestr, RAI_DATATYPE_STR_FLOAT) == 0) {
         return (DLDataType){.code = kDLFloat, .bits = 32, .lanes = 1};
@@ -55,6 +77,9 @@ DLDataType RAI_TensorDataTypeFromString(const char *typestr) {
             return (DLDataType){.code = kDLUInt, .bits = 16, .lanes = 1};
         }
     }
+    if (strcasecmp(typestr, "BOOL") == 0) {
+        return (DLDataType){.code = kDLBool, .bits = 8, .lanes = 1};
+    }
     return (DLDataType){.bits = 0};
 }
 
@@ -93,6 +118,9 @@ int Tensor_DataTypeStr(DLDataType dtype, char *dtypestr) {
             strcpy(dtypestr, RAI_DATATYPE_STR_UINT16);
             result = REDISMODULE_OK;
         }
+    } else if (dtype.code == kDLBool && dtype.bits == 8) {
+        strcpy(dtypestr, RAI_DATATYPE_STR_BOOL);
+        result = REDISMODULE_OK;
     }
     return result;
 }
@@ -129,7 +157,7 @@ RAI_Tensor *RAI_TensorCreateWithDLDataType(DLDataType dtype, long long *dims, in
     DLDevice device = (DLDevice){.device_type = kDLCPU, .device_id = 0};
 
     // If we return an empty tensor, we initialize the data with zeros to avoid security
-    // issues. Otherwise, we only allocate without initializing (for better performance)
+    // issues. Otherwise, we only allocate without initializing (for better performance).
     void *data;
     if (empty) {
         data = RedisModule_Calloc(len, dtypeSize);
@@ -429,8 +457,12 @@ int RAI_TensorSetValueFromLongLong(RAI_Tensor *t, long long i, long long val) {
         default:
             return 0;
         }
-    } else {
-        return 0;
+    } else if (dtype.code == kDLBool) {
+        if (dtype.bits == 8) {
+            ((uint8_t *)data)[i] = val;
+        } else {
+            return 0;
+        }
     }
     return 1;
 }
@@ -518,8 +550,12 @@ int RAI_TensorGetValueAsLongLong(RAI_Tensor *t, long long i, long long *val) {
         default:
             return 0;
         }
-    } else {
-        return 0;
+    } else if (dtype.code == kDLBool) {
+        if (dtype.bits == 8) {
+            *val = ((uint8_t *)data)[i];
+        } else {
+            return 0;
+        }
     }
     return 1;
 }
@@ -707,7 +743,7 @@ int RAI_parseTensorSetArgs(RedisModuleString **argv, int argc, RAI_Tensor **t, i
             } else {
                 long long val;
                 const int retval = RedisModule_StringToLongLong(argv[argpos], &val);
-                if (retval != REDISMODULE_OK) {
+                if (retval != REDISMODULE_OK || _ValOverflow(val, *t)) {
                     RAI_TensorFree(*t);
                     array_free(dims);
                     RAI_SetError(error, RAI_ETENSORSET, "ERR invalid value");
diff --git a/src/redis_ai_objects/tensor.h b/src/redis_ai_objects/tensor.h
@@ -31,6 +31,7 @@ static const char *RAI_DATATYPE_STR_INT32 = "INT32";
 static const char *RAI_DATATYPE_STR_INT64 = "INT64";
 static const char *RAI_DATATYPE_STR_UINT8 = "UINT8";
 static const char *RAI_DATATYPE_STR_UINT16 = "UINT16";
+static const char *RAI_DATATYPE_STR_BOOL = "BOOL";
 
 #define TENSOR_NONE                0
 #define TENSOR_VALUES              (1 << 0)
diff --git a/tests/flow/tests_common.py b/tests/flow/tests_common.py