diff --git a/.circleci/config.yml b/.circleci/config.yml
index ffd00560f..0849ff45c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -37,6 +37,7 @@ commands:
           command: |
             mkdir -p ~/workspace/tests
             make -C opt test SHOW=1
+          no_output_timeout: 20m
       - run:
           name: Package
           command: make -C opt pack BRANCH="${CIRCLE_BRANCH//[^A-Za-z0-9._-]/_}" INTO=~/workspace/packages SHOW=1
diff --git a/.gitignore b/.gitignore
index dce0540c4..389589f9b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
 /build/
 /install*
 /test/venv/
+logs/
 /test/logs/
 .venv/
 venv*/
@@ -13,6 +14,9 @@ venv*/
 *.tar.gz
 /VARIANT
 
+### Cmake auto tools
+cmake-build-debug
+
 # Misc
 .DS_Store
 *.swp
@@ -73,6 +77,10 @@ __pycache__
 *.idb
 *.pdb
 
+# Debug/Profile files
+# ignore perf html reports
+*.html 
+
 # Kernel Module Compile Results
 *.mod*
 *.cmd
@@ -86,6 +94,10 @@ dkms.conf
 # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
 # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 
+.idea
+.idea/
+.idea/*
+
 # User-specific stuff:
 .idea/workspace.xml
 .idea/tasks.xml
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 144ac3fa5..fee5dbd15 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -2,7 +2,13 @@ ADD_LIBRARY(redisai_obj OBJECT
         util/dict.c
         util/queue.c
         redisai.c
+        run_info.c
+        background_workers.c
+        model_script_run_session.c
+        config.c
+        dag.c
         backends.c
+        backends/util.c
         model.c
         err.c
         script.c
@@ -13,13 +19,14 @@ ADD_LIBRARY(redisai_obj OBJECT
         rmutil/args.c
         rmutil/heap.c
         rmutil/priority_queue.c
-        rmutil/vector.c)
+        rmutil/vector.c run_info.c)
 
 IF(BUILD_TF)
     ADD_LIBRARY(redisai_tensorflow_obj OBJECT
             backends/tensorflow.c
             backends/util.c
             err.c
+            util/dict.c
             tensor.c)
 ENDIF()
 
@@ -28,6 +35,7 @@ IF(BUILD_TFLITE)
             backends/tflite.c
             backends/util.c
             err.c
+            util/dict.c
             tensor.c)
 ENDIF()
 
@@ -36,6 +44,7 @@ IF(BUILD_TORCH)
             backends/torch.c
             backends/util.c
             err.c
+            util/dict.c
             tensor.c)
 ENDIF()
 
@@ -44,6 +53,7 @@ IF(BUILD_ORT)
             backends/onnxruntime.c
             backends/util.c
             err.c
+            util/dict.c
             tensor.c)
 ENDIF()
 
diff --git a/src/backends/tensorflow.c b/src/backends/tensorflow.c
index 8dd9405e5..4444ae267 100644
--- a/src/backends/tensorflow.c
+++ b/src/backends/tensorflow.c
@@ -2,6 +2,7 @@
 #include "backends/util.h"
 #include "tensor.h"
 #include "util/arr_rm_alloc.h"
+#include "model.h"
 
 #include "tensorflow/c/c_api.h"
 
@@ -292,16 +293,44 @@ RAI_Model *RAI_ModelCreateTF(RAI_Backend backend, const char* devicestr, RAI_Mod
 
   if (device == RAI_DEVICE_CPU) {
     // Set number of GPU to 0 with
-    // config.device_count = {'GPU': 0} 
-    uint8_t config[9] = {0x0a, 0x07, 0x0a, 0x03, 0x47, 0x50, 0x55, 0x10, 0x00};
-    TF_SetConfig(sessionOptions, (void *)config, 9, status);
-  }
-  else if (device == RAI_DEVICE_GPU) {
+    // config.device_count = {'GPU': 0}
+    uint8_t config[] = {0x0a, 0x07, 0x0a, 0x03, 0x47, 0x50, 0x55, 0x10, 0x00};
+    TF_SetConfig(sessionOptions, (void *)config, sizeof(config), optionsStatus);
+
+    if (TF_GetCode(optionsStatus) != TF_OK) {
+      RAI_SetError(error, RAI_EMODELCONFIGURE,
+                   RedisModule_Strdup(TF_Message(optionsStatus)));
+      // TODO: free memory
+      return NULL;
+    }
+
+    if (opts.backends_intra_op_parallelism > 0) {
+      uint8_t proto[] = {0x10, (uint8_t)opts.backends_intra_op_parallelism};
+      TF_SetConfig(sessionOptions, proto, sizeof(proto), optionsStatus);
+      if (TF_GetCode(optionsStatus) != TF_OK) {
+        RAI_SetError(error, RAI_EMODELCONFIGURE,
+                     RedisModule_Strdup(TF_Message(optionsStatus)));
+        // TODO: free memory
+        return NULL;
+      }
+    }
+
+    if (opts.backends_inter_op_parallelism > 0) {
+      uint8_t proto1[] = {0x28, (uint8_t)opts.backends_inter_op_parallelism};
+      TF_SetConfig(sessionOptions, proto1, sizeof(proto1), optionsStatus);
+      if (TF_GetCode(optionsStatus) != TF_OK) {
+        RAI_SetError(error, RAI_EMODELCONFIGURE,
+                     RedisModule_Strdup(TF_Message(optionsStatus)));
+        // TODO: free memory
+        return NULL;
+      }
+    }
+  } else if (device == RAI_DEVICE_GPU) {
     if (deviceid == -1) {
       // Set
       // config.gpu_options.allow_growth = True
       uint8_t config[4] = {0x32, 0x02, 0x20, 0x01};
-      TF_SetConfig(sessionOptions, (void *)config, 4, status);
+      TF_SetConfig(sessionOptions, (void *)config, 4, optionsStatus);
     }
     else {
       // Set
@@ -309,12 +338,12 @@ RAI_Model *RAI_ModelCreateTF(RAI_Backend backend, const char* devicestr, RAI_Mod
       // config.gpu_options.visible_device_list = '<deviceid>'
       uint8_t config[7] = {0x32, 0x05, 0x20, 0x01, 0x2a, 0x01, 0x30};
       config[6] += (uint8_t)deviceid;
-      TF_SetConfig(sessionOptions, (void *)config, 7, status);
+      TF_SetConfig(sessionOptions, (void *)config, 7, optionsStatus);
     }
   }
 
   if (TF_GetCode(optionsStatus) != TF_OK) {
-    RAI_SetError(error, RAI_EMODELCONFIGURE, RedisModule_Strdup(TF_Message(status)));
+    RAI_SetError(error, RAI_EMODELCONFIGURE, RedisModule_Strdup(TF_Message(optionsStatus)));
     // TODO: free memory
     return NULL;
   }
@@ -437,7 +466,7 @@ int RAI_ModelRunTF(RAI_ModelRunCtx** mctxs, RAI_Error *error) {
 
   size_t batch_sizes[nbatches];
   size_t batch_offsets[nbatches];
-  if (array_len(mctxs[0]->inputs) > 0) {
+  if (ninputs > 0) {
     for (size_t b=0; b<nbatches; ++b) {
       batch_sizes[b] = RAI_TensorDim(mctxs[b]->inputs[0].tensor, 0);
     }
@@ -453,7 +482,6 @@ int RAI_ModelRunTF(RAI_ModelRunCtx** mctxs, RAI_Error *error) {
     for (size_t b=0; b<nbatches; ++b) {
       batched_input_tensors[b] = mctxs[b]->inputs[i].tensor;
     }
-    // inputTensorsValues[i] = RAI_TFTensorFromTensor(mctx->inputs[i].tensor);
     inputTensorsValues[i] = RAI_TFTensorFromTensors(batched_input_tensors, nbatches);
     TF_Output port;
     port.oper = TF_GraphOperationByName(mctxs[0]->model->model, mctxs[0]->inputs[i].name);
@@ -495,21 +523,11 @@ int RAI_ModelRunTF(RAI_ModelRunCtx** mctxs, RAI_Error *error) {
 
   for(size_t i=0; i<noutputs; ++i) {
     for (size_t b=0; b<nbatches; b++) {
-      RAI_Tensor* output_tensor = RAI_TensorCreateFromTFTensor(outputTensorsValues[i], batch_offsets[b], batch_sizes[b]);
-      mctxs[b]->outputs[i].tensor = RAI_TensorGetShallowCopy(output_tensor);
-      RAI_TensorFree(output_tensor);
+      mctxs[b]->outputs[i].tensor = RAI_TensorCreateFromTFTensor(outputTensorsValues[i], batch_offsets[b], batch_sizes[b]);
     }
     TF_DeleteTensor(outputTensorsValues[i]);
   }
 
-  // TODO: add (make sure we deallocate once)
-  // for (size_t i=0 ; i<array_len(mctx->inputs); ++i) {
-  //   TF_DeleteTensor(inputTensorsValues[i]);
-  // }
-  // for (size_t i=0 ; i<array_len(mctx->outputs); ++i) {
-  //   TF_DeleteTensor(outputTensorsValues[i]);
-  // }
-
   TF_DeleteStatus(status);
 
   return 0;
diff --git a/src/backends/util.c b/src/backends/util.c
index 8babb9a27..3f294e5b6 100644
--- a/src/backends/util.c
+++ b/src/backends/util.c
@@ -1,21 +1,20 @@
 #include "backends/util.h"
 
-int parseDeviceStr(const char* devicestr, RAI_Device* device, int64_t* deviceid) {
+int parseDeviceStr(const char* devicestr, RAI_Device* device,
+                   int64_t* deviceid) {
   if (strcasecmp(devicestr, "CPU") == 0) {
     *device = RAI_DEVICE_CPU;
     *deviceid = -1;
-  }
-  else if (strcasecmp(devicestr, "GPU") == 0) {
+  } else if (strcasecmp(devicestr, "GPU") == 0) {
     *device = RAI_DEVICE_GPU;
     *deviceid = -1;
-  }
-  else if (strncasecmp(devicestr, "GPU:", 4) == 0) {
+  } else if (strncasecmp(devicestr, "GPU:", 4) == 0) {
     *device = RAI_DEVICE_GPU;
     sscanf(devicestr, "GPU:%lld", deviceid);
-  }
-  else {
+  } else {
     return 0;
   }
 
   return 1;
 }
+
diff --git a/src/backends/util.h b/src/backends/util.h
index 921dbf525..913ad0a30 100644
--- a/src/backends/util.h
+++ b/src/backends/util.h
@@ -1,11 +1,13 @@
 #ifndef SRC_BACKENDS_UTIL_H_
 #define SRC_BACKENDS_UTIL_H_
 
-#include "config.h"
 #include <stdint.h>
 #include <stdio.h>
 #include <strings.h>
 
-int parseDeviceStr(const char* devicestr, RAI_Device* device, int64_t* deviceid);
+#include "config.h"
+
+int parseDeviceStr(const char* devicestr, RAI_Device* device,
+                   int64_t* deviceid);
 
 #endif /* SRC_BACKENDS_UTIL_H_ */
diff --git a/src/background_workers.c b/src/background_workers.c
new file mode 100644
index 000000000..71e6e3625
--- /dev/null
+++ b/src/background_workers.c
@@ -0,0 +1,197 @@
+#include "background_workers.h"
+#include "dag.h"
+#include "model_script_run_session.h"
+#include "model.h"
+#include "redisai.h"
+#include "rmutil/alloc.h"
+#include "rmutil/args.h"
+#include "run_info.h"
+#include "script.h"
+#include "stats.h"
+#include "tensor.h"
+#include "util/arr_rm_alloc.h"
+#include "util/dict.h"
+#include "util/queue.h"
+#include <pthread.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdlib.h>
+
+int freeRunQueueInfo(RunQueueInfo *info) {
+  int result = REDISMODULE_OK;
+  if (info->run_queue) {
+    RedisModule_Free(info->run_queue);
+  }
+  if (info->threads) {
+    /* Wait for workers to exit */
+    for (int i = 0; i < perqueueThreadPoolSize; i++) {
+      const int rtn = pthread_join(info->threads[i], NULL);
+      if (rtn != 0) {
+        result = REDISMODULE_ERR;
+      }
+    }
+    /* Now free pool structure */
+    RedisModule_Free(info->threads);
+  }
+  RedisModule_Free(info);
+  return result;
+}
+
+void *RedisAI_Run_ThreadMain(void *arg);
+
+/* Ensure that the the run queue for the device exists.
+ * If not, create it. */
+int ensureRunQueue(const char *devicestr, RunQueueInfo **run_queue_info) {
+  int result = REDISMODULE_ERR;
+  if (run_queues == NULL) {
+    return result;
+  }
+
+  AI_dictEntry *entry = AI_dictFind(run_queues, devicestr);
+  if (entry) {
+    *run_queue_info = AI_dictGetVal(entry);
+    result = REDISMODULE_OK;
+  } else {
+    *run_queue_info = RedisModule_Alloc(sizeof(RunQueueInfo));
+    (*run_queue_info)->run_queue = queueCreate();
+    pthread_cond_init(&(*run_queue_info)->queue_condition_var, NULL);
+    pthread_mutex_init(&(*run_queue_info)->run_queue_mutex, NULL);
+    (*run_queue_info)->threads = (pthread_t *)RedisModule_Alloc(
+        sizeof(pthread_t) * perqueueThreadPoolSize);
+    /* create threads */
+    for (int i = 0; i < perqueueThreadPoolSize; i++) {
+      if (pthread_create(&((*run_queue_info)->threads[i]), NULL,
+                         RedisAI_Run_ThreadMain, *run_queue_info) != 0) {
+        freeRunQueueInfo(*run_queue_info);
+        return REDISMODULE_ERR;
+      }
+    }
+    AI_dictAdd(run_queues, (void *)devicestr, (void *)*run_queue_info);
+    result = REDISMODULE_OK;
+  }
+
+  return result;
+}
+
+void *RedisAI_Run_ThreadMain(void *arg) {
+  RunQueueInfo *run_queue_info = (RunQueueInfo *)arg;
+  pthread_t self = pthread_self();
+#ifdef __APPLE__
+  int res = pthread_setname_np("redisai_bthread");
+#else
+  int res = pthread_setname_np(self, "redisai_bthread");
+#endif
+  pthread_mutex_lock(&run_queue_info->run_queue_mutex);
+  while (true) {
+    int rc = pthread_cond_wait(&run_queue_info->queue_condition_var,
+                               &run_queue_info->run_queue_mutex);
+
+    long long run_queue_len = queueLength(run_queue_info->run_queue);
+
+    while (run_queue_len > 0) {
+      queueItem **evicted_items = NULL;
+      RedisAI_RunInfo **batch_rinfo = NULL;
+
+      queueItem *item = queueFront(run_queue_info->run_queue);
+
+      while (item) {
+        RedisAI_RunInfo *rinfo = (RedisAI_RunInfo *)item->value;
+
+        if (evicted_items) {
+          array_free(evicted_items);
+          array_free(batch_rinfo);
+        }
+        evicted_items = array_new(queueItem *, run_queue_len);
+        batch_rinfo = array_new(RedisAI_RunInfo *, run_queue_len);
+
+        array_append(evicted_items, item);
+        array_append(batch_rinfo, rinfo);
+
+        if (rinfo->sctx) {
+          break;
+        }
+
+        // DAGRUN
+        if (rinfo->use_local_context==1){
+          break;
+        }
+
+        size_t batchsize = rinfo->mctx->model->opts.batchsize;
+
+        if (batchsize == 0) {
+          break;
+        }
+
+        size_t current_batchsize = RAI_RunInfoBatchSize(rinfo);
+
+        if (current_batchsize == 0 || current_batchsize >= batchsize) {
+          break;
+        }
+
+        queueItem *next_item = item->next;
+
+        while (next_item != NULL) {
+          RedisAI_RunInfo *next_rinfo = (RedisAI_RunInfo *)next_item->value;
+
+          if (RAI_RunInfoBatchable(rinfo, next_rinfo) == 0) {
+            next_item = queueNext(next_item);
+            continue;
+          }
+
+          int next_batchsize = RAI_RunInfoBatchSize(next_rinfo);
+
+          if (current_batchsize + next_batchsize > batchsize) {
+            break;
+          }
+
+          array_append(evicted_items, next_item);
+          array_append(batch_rinfo, next_rinfo);
+
+          current_batchsize += next_batchsize;
+          next_item = queueNext(next_item);
+        }
+
+        size_t minbatchsize = rinfo->mctx->model->opts.minbatchsize;
+
+        if (minbatchsize == 0 || current_batchsize >= minbatchsize) {
+          break;
+        }
+
+        item = item->next;
+      }
+
+      if (item == NULL) {
+        array_free(evicted_items);
+        array_free(batch_rinfo);
+        pthread_mutex_unlock(&run_queue_info->run_queue_mutex);
+        break;
+      }
+
+      for (long long i = 0; i < array_len(evicted_items); i++) {
+        queueEvict(run_queue_info->run_queue, evicted_items[i]);
+      }
+
+      pthread_mutex_unlock(&run_queue_info->run_queue_mutex);
+
+      if (array_len(batch_rinfo) > 0) {
+        if (batch_rinfo[0]->use_local_context == 1) {
+          RedisAI_DagRunSession(batch_rinfo[0]);
+        } else {
+          RAI_ModelRunScriptRunSession(batch_rinfo);
+        }
+      }
+
+      for (long long i = 0; i < array_len(evicted_items); i++) {
+        RedisModule_Free(evicted_items[i]);
+      }
+      array_free(evicted_items);
+      array_free(batch_rinfo);
+
+      pthread_mutex_lock(&run_queue_info->run_queue_mutex);
+
+      run_queue_len = queueLength(run_queue_info->run_queue);
+    }
+  }
+}
diff --git a/src/background_workers.h b/src/background_workers.h
new file mode 100644
index 000000000..3f449e0b0
--- /dev/null
+++ b/src/background_workers.h
@@ -0,0 +1,37 @@
+#ifndef SRC_BACKGROUND_WORKERS_H_
+#define SRC_BACKGROUND_WORKERS_H_
+
+#include <pthread.h>
+
+#include "config.h"
+#include "dag.h"
+#include "model.h"
+#include "model_script_run_session.h"
+#include "redisai.h"
+#include "rmutil/alloc.h"
+#include "rmutil/args.h"
+#include "script.h"
+#include "stats.h"
+#include "tensor.h"
+#include "util/arr_rm_alloc.h"
+#include "util/dict.h"
+#include "util/queue.h"
+
+AI_dict *run_queues;
+long long perqueueThreadPoolSize;
+
+
+typedef struct RunQueueInfo {
+  pthread_mutex_t run_queue_mutex;
+  pthread_cond_t queue_condition_var;
+  queue *run_queue;
+  pthread_t *threads;
+} RunQueueInfo;
+
+int freeRunQueueInfo(RunQueueInfo *info);
+
+/* Ensure that the the run queue for the device exists.
+ * If not, create it. */
+int ensureRunQueue(const char *devicestr, RunQueueInfo **run_queue_info);
+
+#endif /* SRC_BACKGROUND_WORKERS_H_ */
diff --git a/src/config.c b/src/config.c
new file mode 100644
index 000000000..803f86bd2
--- /dev/null
+++ b/src/config.c
@@ -0,0 +1,284 @@
+#include "config.h"
+
+#include <stdbool.h>
+#include <string.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "backends.h"
+#include "backends/util.h"
+#include "background_workers.h"
+#include "err.h"
+#include "redismodule.h"
+#include "rmutil/alloc.h"
+#include "util/arr_rm_alloc.h"
+#include "util/dict.h"
+#include "util/queue.h"
+
+long long backends_intra_op_parallelism;  //  number of threads used within an
+//  individual op for parallelism.
+long long
+    backends_inter_op_parallelism;  //  number of threads used for parallelism
+                                    //  between independent operations.
+
+/**
+ *
+ * @return number of threads used within an individual op for parallelism.
+ */
+long long getBackendsInterOpParallelism() {
+  return backends_inter_op_parallelism;
+}
+
+/**
+ * Set number of threads used for parallelism between independent operations, by
+ * backend.
+ *
+ * @param num_threads
+ * @return 0 on success, or 1  if failed
+ */
+int setBackendsInterOpParallelism(long long num_threads) {
+  int result = 1;
+  if (num_threads >= 0) {
+    backends_inter_op_parallelism = num_threads;
+    result = 0;
+  }
+  return result;
+}
+
+/**
+ *
+ * @return
+ */
+long long getBackendsIntraOpParallelism() {
+  return backends_intra_op_parallelism;
+}
+
+/**
+ * Set number of threads used within an individual op for parallelism, by
+ * backend.
+ *
+ * @param num_threads
+ * @return 0 on success, or 1  if failed
+ */
+int setBackendsIntraOpParallelism(long long num_threads) {
+  int result = 1;
+  if (num_threads >= 0) {
+    backends_intra_op_parallelism = num_threads;
+    result = 0;
+  }
+  return result;
+}
+
+/**
+ * Helper method for AI.CONFIG LOADBACKEND <backend_identifier>
+ * <location_of_backend_library>
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param argv Redis command arguments, as an array of strings
+ * @param argc Redis command number of arguments
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if the DAGRUN failed
+ */
+int RedisAI_Config_LoadBackend(RedisModuleCtx *ctx, RedisModuleString **argv,
+                               int argc) {
+  if (argc < 3) return RedisModule_WrongArity(ctx);
+
+  const char *backend = RedisModule_StringPtrLen(argv[1], NULL);
+  const char *path = RedisModule_StringPtrLen(argv[2], NULL);
+
+  int result = REDISMODULE_ERR;
+  if (!strcasecmp(backend, "TF")) {
+    result = RAI_LoadBackend(ctx, RAI_BACKEND_TENSORFLOW, path);
+  } else if (!strcasecmp(backend, "TFLITE")) {
+    result = RAI_LoadBackend(ctx, RAI_BACKEND_TFLITE, path);
+  } else if (!strcasecmp(backend, "TORCH")) {
+    result = RAI_LoadBackend(ctx, RAI_BACKEND_TORCH, path);
+  } else if (!strcasecmp(backend, "ONNX")) {
+    result = RAI_LoadBackend(ctx, RAI_BACKEND_ONNXRUNTIME, path);
+  } else {
+    return RedisModule_ReplyWithError(ctx, "ERR unsupported backend");
+  }
+
+  if (result == REDISMODULE_OK) {
+    return RedisModule_ReplyWithSimpleString(ctx, "OK");
+  }
+
+  return RedisModule_ReplyWithError(ctx, "ERR error loading backend");
+}
+
+/**
+ * Helper method for AI.CONFIG BACKENDSPATH
+ * <default_location_of_backend_libraries>
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param path string containing backend path
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if the DAGRUN failed
+ */
+int RedisAI_Config_BackendsPath(RedisModuleCtx *ctx, const char *path) {
+  if (RAI_BackendsPath != NULL) {
+    RedisModule_Free(RAI_BackendsPath);
+  }
+  RAI_BackendsPath = RedisModule_Strdup(path);
+
+  return RedisModule_ReplyWithSimpleString(ctx, "OK");
+}
+
+/**
+ * Set number of threads used for parallelism between RedisAI independent
+ * blocking commands ( AI.DAGRUN, AI.SCRIPTRUN, AI.MODELRUN ).
+ *
+ * @param num_threads_string string containing thread number
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if failed
+ */
+int RedisAI_Config_QueueThreads(RedisModuleString *num_threads_string) {
+  int result =
+      RedisModule_StringToLongLong(num_threads_string, &perqueueThreadPoolSize);
+  // make sure the number of threads is a positive integer
+  // if not set the value to the default
+  if (result == REDISMODULE_OK && perqueueThreadPoolSize < 1) {
+    perqueueThreadPoolSize = REDISAI_DEFAULT_THREADS_PER_QUEUE;
+    result = REDISMODULE_ERR;
+  }
+  return result;
+}
+
+/**
+ * Set number of threads used for parallelism between independent operations, by
+ * backend.
+ *
+ * @param num_threads_string string containing thread number
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if failed
+ */
+int RedisAI_Config_InterOperationParallelism(
+    RedisModuleString *num_threads_string) {
+  long long temp;
+  int result = RedisModule_StringToLongLong(num_threads_string, &temp);
+  if (result == REDISMODULE_OK) {
+    result = setBackendsInterOpParallelism(temp);
+  }
+  return result;
+}
+
+/**
+ * Set number of threads used within an individual op for parallelism, by
+ * backend.
+ *
+ * @param num_threads_string string containing thread number
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if failed
+ */
+int RedisAI_Config_IntraOperationParallelism(
+    RedisModuleString *num_threads_string) {
+  long long temp;
+  int result = RedisModule_StringToLongLong(num_threads_string, &temp);
+  if (result == REDISMODULE_OK) {
+    result = setBackendsIntraOpParallelism(temp);
+  }
+  return result;
+}
+
+/**
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param key
+ * @param val
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if failed
+ */
+int RAI_configParamParse(RedisModuleCtx *ctx, const char *key,
+                         const char *val, RedisModuleString *rsval) {
+  int ret = REDISMODULE_OK;
+  if (strcasecmp((key), "TF") == 0) {
+    ret = RAI_LoadBackend(ctx, RAI_BACKEND_TENSORFLOW, (val));
+  } else if (strcasecmp((key), "TFLITE") == 0) {
+    ret = RAI_LoadBackend(ctx, RAI_BACKEND_TFLITE, (val));
+  } else if (strcasecmp((key), "TORCH") == 0) {
+    ret = RAI_LoadBackend(ctx, RAI_BACKEND_TORCH, (val));
+  } else if (strcasecmp((key), "ONNX") == 0) {
+    ret = RAI_LoadBackend(ctx, RAI_BACKEND_ONNXRUNTIME, (val));
+  }
+  // enable configuring the main thread to create a fixed number of worker
+  // threads up front per device. by default we'll use 1
+  else if (strcasecmp((key), "THREADS_PER_QUEUE") == 0) {
+    ret = RedisAI_Config_QueueThreads(rsval);
+    if (ret == REDISMODULE_OK) {
+      char *buffer = RedisModule_Alloc(
+          (3 + strlen(REDISAI_INFOMSG_THREADS_PER_QUEUE) + strlen((val))) *
+          sizeof(*buffer));
+      sprintf(buffer, "%s: %s", REDISAI_INFOMSG_THREADS_PER_QUEUE, (val));
+      RedisModule_Log(ctx, "notice", buffer);
+      RedisModule_Free(buffer);
+    }
+  } else if (strcasecmp((key), "INTRA_OP_PARALLELISM") == 0) {
+    ret = RedisAI_Config_IntraOperationParallelism(rsval);
+    if (ret == REDISMODULE_OK) {
+      char *buffer = RedisModule_Alloc(
+          (3 + strlen(REDISAI_INFOMSG_INTRA_OP_PARALLELISM) + strlen((val))) *
+          sizeof(*buffer));
+      sprintf(buffer, "%s: %lld", REDISAI_INFOMSG_INTRA_OP_PARALLELISM,
+              getBackendsIntraOpParallelism());
+      RedisModule_Log(ctx, "notice", buffer);
+      RedisModule_Free(buffer);
+    }
+  } else if (strcasecmp((key), "INTER_OP_PARALLELISM") == 0) {
+    ret = RedisAI_Config_InterOperationParallelism(rsval);
+    if (ret == REDISMODULE_OK) {
+      char *buffer = RedisModule_Alloc(
+          (3 + strlen(REDISAI_INFOMSG_INTER_OP_PARALLELISM) + strlen((val))) *
+          sizeof(*buffer));
+      sprintf(buffer, "%s: %lld", REDISAI_INFOMSG_INTER_OP_PARALLELISM,
+              getBackendsInterOpParallelism());
+      RedisModule_Log(ctx, "notice", buffer);
+      RedisModule_Free(buffer);
+    }
+  } else if (strcasecmp((key), "BACKENDSPATH") == 0) {
+    // already taken care of
+  } else {
+    ret = REDISMODULE_ERR;
+  }
+  return ret;
+}
+
+/**
+ * Load time configuration parser
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param argv Redis command arguments, as an array of strings
+ * @param argc Redis command number of arguments
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if the DAGRUN failed
+ */
+int RAI_loadTimeConfig(RedisModuleCtx *ctx,
+                       RedisModuleString *const *argv, int argc) {
+  if (argc > 0 && argc % 2 != 0) {
+    RedisModule_Log(ctx, "warning",
+                    "Even number of arguments provided to module. Please "
+                    "provide arguments as KEY VAL pairs");
+  }
+
+  // need BACKENDSPATH set up before loading specific backends
+  for (int i = 0; i < argc / 2; i++) {
+    const char *key = RedisModule_StringPtrLen(argv[2 * i], NULL);
+    const char *val = RedisModule_StringPtrLen(argv[2 * i + 1], NULL);
+
+    int ret = REDISMODULE_OK;
+    if (strcasecmp(key, "BACKENDSPATH") == 0) {
+      ret = RedisAI_Config_BackendsPath(ctx, val);
+    }
+  }
+
+  for (int i = 0; i < argc / 2; i++) {
+    const char *key = RedisModule_StringPtrLen(argv[2 * i], NULL);
+    const char *val = RedisModule_StringPtrLen(argv[2 * i + 1], NULL);
+    int ret = RAI_configParamParse(ctx, key, val, argv[2 * i + 1]);
+
+    if (ret == REDISMODULE_ERR) {
+      char *buffer =
+          RedisModule_Alloc((4 + strlen(REDISAI_ERRORMSG_PROCESSING_ARG) +
+                             strlen(key) + strlen(val)) *
+                            sizeof(*buffer));
+      sprintf(buffer, "%s: %s %s", REDISAI_ERRORMSG_PROCESSING_ARG, key, val);
+      RedisModule_Log(ctx, "warning", buffer);
+      RedisModule_Free(buffer);
+      return ret;
+    }
+  }
+
+  return REDISMODULE_OK;
+}
diff --git a/src/config.h b/src/config.h
index 7c522da4c..0d807611d 100644
--- a/src/config.h
+++ b/src/config.h
@@ -1,10 +1,9 @@
 #ifndef SRC_CONFIG_H_
 #define SRC_CONFIG_H_
 
-typedef enum {
-  RAI_MODEL,
-  RAI_SCRIPT
-} RAI_RunType;
+#include "redismodule.h"
+
+typedef enum { RAI_MODEL, RAI_SCRIPT } RAI_RunType;
 
 typedef enum {
   RAI_BACKEND_TENSORFLOW = 0,
@@ -16,15 +15,135 @@ typedef enum {
 // NOTE: entry in queue hash is formed by
 // device * MAX_DEVICE_ID + deviceid
 
-typedef enum {
-  RAI_DEVICE_CPU = 0,
-  RAI_DEVICE_GPU = 1
-} RAI_Device;
+typedef enum { RAI_DEVICE_CPU = 0, RAI_DEVICE_GPU = 1 } RAI_Device;
 
 #define RAI_ENC_VER 900
 
 //#define RAI_COPY_RUN_INPUT
 #define RAI_COPY_RUN_OUTPUT
 #define RAI_PRINT_BACKEND_ERRORS
+#define REDISAI_DEFAULT_THREADS_PER_QUEUE 1
+#define REDISAI_DEFAULT_INTRA_OP_PARALLELISM 0
+#define REDISAI_DEFAULT_INTER_OP_PARALLELISM 0
+#define REDISAI_ERRORMSG_PROCESSING_ARG "ERR error processing argument"
+#define REDISAI_ERRORMSG_THREADS_PER_QUEUE \
+  "ERR error setting THREADS_PER_QUEUE to"
+#define REDISAI_ERRORMSG_INTRA_OP_PARALLELISM \
+  "ERR error setting INTRA_OP_PARALLELISM to"
+#define REDISAI_ERRORMSG_INTER_OP_PARALLELISM \
+  "ERR error setting INTER_OP_PARALLELISM to"
+
+#define REDISAI_INFOMSG_THREADS_PER_QUEUE \
+  "Setting THREADS_PER_QUEUE parameter to"
+#define REDISAI_INFOMSG_INTRA_OP_PARALLELISM \
+  "Setting INTRA_OP_PARALLELISM parameter to"
+#define REDISAI_INFOMSG_INTER_OP_PARALLELISM \
+  "Setting INTER_OP_PARALLELISM parameter to"
+
+/**
+ * Get number of threads used for parallelism between independent operations, by
+ * backend.
+ * @return number of threads used for parallelism between independent
+ * operations, by backend
+ */
+long long getBackendsInterOpParallelism();
+
+/**
+ * Set number of threads used for parallelism between independent operations, by
+ * backend.
+ *
+ * @param num_threads
+ * @return 0 on success, or 1  if failed
+ */
+int setBackendsInterOpParallelism(long long num_threads);
+
+/**
+ * Get number of threads used within an individual op for parallelism, by
+ * backend.
+ * @return number of threads used within an individual op for parallelism, by
+ * backend.
+ */
+long long getBackendsIntraOpParallelism();
+
+/**
+ * Set number of threads used within an individual op for parallelism, by
+ * backend.
+ *
+ * @param num_threads
+ * @return 0 on success, or 1  if failed
+ */
+int setBackendsIntraOpParallelism(long long num_threads);
+
+/**
+ * Helper method for AI.CONFIG LOADBACKEND <backend_identifier>
+ * <location_of_backend_library>
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param argv Redis command arguments, as an array of strings
+ * @param argc Redis command number of arguments
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if the DAGRUN failed
+ */
+int RedisAI_Config_LoadBackend(RedisModuleCtx *ctx, RedisModuleString **argv,
+                               int argc);
+
+/**
+ * Helper method for AI.CONFIG BACKENDSPATH
+ * <default_location_of_backend_libraries>
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param path string containing backend path
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if the DAGRUN failed
+ */
+int RedisAI_Config_BackendsPath(RedisModuleCtx *ctx, const char *path);
+
+/**
+ * Set number of threads used for parallelism between RedisAI independent
+ * blocking commands ( AI.DAGRUN, AI.SCRIPTRUN, AI.MODELRUN ).
+ *
+ * @param num_threads_string string containing thread number
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if failed
+ */
+int RedisAI_Config_QueueThreads(RedisModuleString *num_threads_string);
+
+/**
+ * Set number of threads used for parallelism between independent operations, by
+ * backend.
+ *
+ * @param num_threads_string string containing thread number
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if failed
+ */
+int RedisAI_Config_InterOperationParallelism(
+    RedisModuleString *num_threads_string);
+
+/**
+ * Set number of threads used within an individual op for parallelism, by
+ * backend.
+ *
+ * @param num_threads_string string containing thread number
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if failed
+ */
+int RedisAI_Config_IntraOperationParallelism(
+    RedisModuleString *num_threads_string);
+
+/**
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param key
+ * @param val
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if failed
+ */
+int RAI_configParamParse(RedisModuleCtx *ctx, const char *key,
+                         const char *val, RedisModuleString *rsval);
+
+/**
+ * Load time configuration parser
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param argv Redis command arguments, as an array of strings
+ * @param argc Redis command number of arguments
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if the DAGRUN failed
+ */
+int RAI_loadTimeConfig(RedisModuleCtx *ctx,
+                       RedisModuleString *const *argv, int argc);
 
 #endif /* SRC_CONFIG_H_ */
diff --git a/src/dag.c b/src/dag.c
new file mode 100644
index 000000000..0c242fd2b
--- /dev/null
+++ b/src/dag.c
@@ -0,0 +1,316 @@
+#include "dag.h"
+
+#include <pthread.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "model.h"
+#include "redisai.h"
+#include "rmutil/alloc.h"
+#include "rmutil/args.h"
+#include "run_info.h"
+#include "stats.h"
+#include "tensor.h"
+#include "util/arr_rm_alloc.h"
+#include "util/dict.h"
+#include "util/queue.h"
+
+/**
+ * Actual method running the DAGRUN Commands in the background
+ * thread Called within `RedisAI_Run_ThreadMain`
+ */
+void *RedisAI_DagRunSession(RedisAI_RunInfo *rinfo) {
+  for (size_t i = 0; i < array_len(rinfo->dagOps); i++) {
+    RAI_DagOp *currentOp = rinfo->dagOps[i];
+    switch (currentOp->commandType) {
+      case REDISAI_DAG_CMD_TENSORSET: {
+        RAI_Tensor *t = NULL;
+        const int parse_result = RAI_parseTensorSetArgs(
+            NULL, currentOp->argv, currentOp->argc, &t, 0, currentOp->err);
+        if (parse_result > 0) {
+          const char *key_string =
+              RedisModule_StringPtrLen(currentOp->argv[1], NULL);
+          const char *dictKey = RedisModule_Strdup(key_string);
+          AI_dictReplace(rinfo->dagTensorsContext, (void*)dictKey, t);
+          currentOp->result = REDISMODULE_OK;
+        } else {
+          currentOp->result = REDISMODULE_ERR;
+        }
+        break;
+      }
+      case REDISAI_DAG_CMD_TENSORGET: {
+        const char *key_string =
+            RedisModule_StringPtrLen(currentOp->argv[1], NULL);
+        RAI_Tensor *t = NULL;
+        currentOp->result = RAI_getTensorFromLocalContext(
+            NULL, rinfo->dagTensorsContext, key_string, &t, currentOp->err);
+        if (currentOp->result == REDISMODULE_OK) {
+          RAI_Tensor *outTensor = NULL;
+          // TODO: check tensor copy return value
+          RAI_TensorCopyTensor(t, &outTensor);
+          array_append(currentOp->outTensors, outTensor);
+          currentOp->result = REDISMODULE_OK;
+        }
+        break;
+      }
+      case REDISAI_DAG_CMD_MODELRUN: {
+        const int parse_result = RedisAI_Parse_ModelRun_RedisCommand(
+            NULL, currentOp->argv, currentOp->argc, &(currentOp->mctx),
+            &(currentOp->outkeys), &(currentOp->mctx->model), 1,
+            &(rinfo->dagTensorsContext), 0, NULL, currentOp->err);
+
+        if (parse_result > 0) {
+          RAI_ModelRunCtx **mctxs = NULL;
+          mctxs = array_new(RAI_ModelRunCtx *, 1);
+          mctxs = array_append(mctxs, currentOp->mctx);
+          currentOp->result = REDISMODULE_OK;
+          const long long start = ustime();
+          currentOp->result = RAI_ModelRun(mctxs, currentOp->err);
+          currentOp->duration_us = ustime() - start;
+          const size_t noutputs = RAI_ModelRunCtxNumOutputs(currentOp->mctx);
+          for (size_t outputNumber = 0; outputNumber < noutputs;
+               outputNumber++) {
+            RAI_Tensor *tensor =
+                RAI_ModelRunCtxOutputTensor(currentOp->mctx, outputNumber);
+            if (tensor) {
+              const char *key_string = RedisModule_StringPtrLen(
+                  currentOp->outkeys[outputNumber], NULL);
+              const char *dictKey = RedisModule_Strdup(key_string);
+              AI_dictReplace(rinfo->dagTensorsContext, (void*)dictKey, tensor);
+            } else {
+              RAI_SetError(currentOp->err, RAI_EMODELRUN,
+                           "ERR output tensor on DAG's MODELRUN was null");
+              currentOp->result = REDISMODULE_ERR;
+            }
+          }
+          array_free(mctxs);
+        } else {
+          currentOp->result = REDISMODULE_ERR;
+        }
+        break;
+      }
+      default: {
+        /* unsupported DAG's command */
+        RAI_SetError(currentOp->err, RAI_EDAGRUN,
+                     "ERR unsupported command within DAG");
+        currentOp->result = REDISMODULE_ERR;
+        break;
+      }
+    }
+  }
+  if (rinfo->client != NULL) {
+    RedisModule_UnblockClient(rinfo->client, rinfo);
+  }
+  return NULL;
+}
+
+int RedisAI_DagRun_Reply(RedisModuleCtx *ctx, RedisModuleString **argv,
+                         int argc) {
+  REDISMODULE_NOT_USED(argv);
+  REDISMODULE_NOT_USED(argc);
+  RedisAI_RunInfo *rinfo = RedisModule_GetBlockedClientPrivateData(ctx);
+  RedisModule_ReplyWithArray(ctx, REDISMODULE_POSTPONED_ARRAY_LEN);
+  for (size_t i = 0; i < array_len(rinfo->dagOps); i++) {
+    RAI_DagOp *currentOp = rinfo->dagOps[i];
+    switch (currentOp->commandType) {
+      case REDISAI_DAG_CMD_TENSORSET: {
+        rinfo->dagReplyLength++;
+        if (currentOp->result == REDISMODULE_ERR) {
+          RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
+        } else {
+          RedisModule_ReplyWithSimpleString(ctx, "OK");
+        }
+        break;
+      }
+
+      case REDISAI_DAG_CMD_TENSORGET: {
+        rinfo->dagReplyLength++;
+        if (currentOp->result == REDISMODULE_ERR) {
+          RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
+        } else {
+          if (array_len(currentOp->outTensors) > 0) {
+            RAI_Tensor *tensor = currentOp->outTensors[0];
+            RAI_parseTensorGetArgs(ctx, currentOp->argv, currentOp->argc,
+                                   tensor);
+          } else {
+            RedisModule_ReplyWithError(
+                ctx, "ERR error getting tensor from local context");
+          }
+        }
+        break;
+      }
+
+      case REDISAI_DAG_CMD_MODELRUN: {
+        rinfo->dagReplyLength++;
+        if (currentOp->result == REDISMODULE_ERR) {
+          RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
+        } else {
+          RedisModule_ReplyWithSimpleString(ctx, "OK");
+        }
+        break;
+      }
+
+      default:
+        /* no-op */
+        break;
+    }
+  }
+
+  AI_dictIterator *persist_iter =
+      AI_dictGetSafeIterator(rinfo->dagTensorsPersistentContext);
+  AI_dictEntry *persist_entry = AI_dictNext(persist_iter);
+  while (persist_entry) {
+    const char *persist_key_name = AI_dictGetKey(persist_entry);
+    AI_dictEntry *tensor_entry =
+        AI_dictFind(rinfo->dagTensorsContext, persist_key_name);
+    if (tensor_entry) {
+      RAI_Tensor *tensor = AI_dictGetVal(tensor_entry);
+      RedisModuleKey *key;
+      RedisModuleString *tensor_keyname = RedisModule_CreateString(
+          ctx, persist_key_name, strlen(persist_key_name));
+      const int status = RAI_OpenKey_Tensor(
+          ctx, tensor_keyname, &key, REDISMODULE_READ | REDISMODULE_WRITE);
+      if (status == REDISMODULE_ERR) {
+        RedisModule_ReplyWithError(ctx, "ERR could not save tensor");
+        rinfo->dagReplyLength++;
+      } else {
+        if (RedisModule_ModuleTypeSetValue(key, RedisAI_TensorType, tensor) !=
+            REDISMODULE_OK) {
+          RedisModule_ReplyWithError(ctx, "ERR could not save tensor");
+          rinfo->dagReplyLength++;
+        }
+      }
+      RedisModule_CloseKey(key);
+      RedisAI_ReplicateTensorSet(ctx, tensor_keyname, tensor);
+      // TODO: free Tensor
+    } else {
+      RedisModule_ReplyWithError(
+          ctx, "ERR specified persistent key that was not used on DAG");
+      rinfo->dagReplyLength++;
+
+      RedisModule_Log(ctx, "warning",
+                      "on DAGRUN's PERSIST pecified persistent key (%s) that "
+                      "was not used on DAG. Logging all local context keys",
+                      persist_key_name);
+      AI_dictIterator *local_iter =
+          AI_dictGetSafeIterator(rinfo->dagTensorsContext);
+      AI_dictEntry *local_entry = AI_dictNext(local_iter);
+      while (local_entry) {
+        const char *localcontext_key_name = AI_dictGetKey(local_entry);
+        RedisModule_Log(ctx, "warning", "DAG's local context key (%s)",
+                        localcontext_key_name);
+        local_entry = AI_dictNext(local_iter);
+      }
+
+      for (size_t opN = 0; opN < array_len(rinfo->dagOps); opN++) {
+        RedisModule_Log(
+            ctx, "warning", "DAG's op n#  %d - cmdType %d ( argc %d )", opN,
+            rinfo->dagOps[opN]->commandType, rinfo->dagOps[opN]->argc);
+      }
+    }
+
+    persist_entry = AI_dictNext(persist_iter);
+  }
+  AI_dictReleaseIterator(persist_iter);
+  RedisModule_ReplySetArrayLength(ctx, rinfo->dagReplyLength);
+  RAI_FreeRunInfo(ctx, rinfo);
+  return REDISMODULE_OK;
+}
+
+/**
+ * DAGRUN Building Block to parse [LOAD <nkeys> key1 key2... ]
+ */
+int RAI_parseDAGLoadArgs(RedisModuleCtx *ctx, RedisModuleString **argv,
+                         int argc, AI_dict **loadedContextDict,
+                         AI_dict **localContextDict,
+                         const char *chaining_operator) {
+  if (argc < 3) {
+    RedisModule_WrongArity(ctx);
+    return -1;
+  }
+
+  long long n_keys;
+  const int retval = RedisModule_StringToLongLong(argv[1], &n_keys);
+  if (retval != REDISMODULE_OK || n_keys <= 0) {
+    RedisModule_ReplyWithError(
+        ctx, "ERR invalid or negative value found in number of keys to LOAD");
+    return -1;
+  }
+
+  int number_loaded_keys = 0;
+  int separator_flag = 0;
+  size_t argpos = 2;
+  for (; (argpos <= argc - 1) && (number_loaded_keys < n_keys); argpos++) {
+    const char *arg_string = RedisModule_StringPtrLen(argv[argpos], NULL);
+    if (!strcasecmp(arg_string, chaining_operator)) {
+      separator_flag = 1;
+      break;
+    } else {
+      RAI_Tensor *t;
+      RedisModuleKey *key;
+      const int status = RAI_GetTensorFromKeyspace(ctx, argv[argpos], &key, &t,
+                                                   REDISMODULE_READ);
+      if (status == REDISMODULE_ERR) {
+        RedisModule_Log(
+            ctx, "warning",
+            "on DAGRUN's LOAD could not load tensor %s from keyspace",
+            arg_string);
+        return -1;
+      }
+      RedisModule_CloseKey(key);
+      const char *dictKey = RedisModule_Strdup(arg_string);
+      AI_dictAdd(*localContextDict, (void*)dictKey, t);
+      const char *keyspacePersistKey = RedisModule_Strdup(dictKey);
+      AI_dictAdd(*loadedContextDict, (void*)keyspacePersistKey, (void *)1);
+      number_loaded_keys++;
+    }
+  }
+  if (number_loaded_keys != n_keys) {
+    RedisModule_WrongArity(ctx);
+    return -1;
+  }
+  return argpos;
+}
+
+/**
+ * DAGRUN Building Block to parse [PERSIST <nkeys> key1 key2... ]
+ */
+int RAI_parseDAGPersistArgs(RedisModuleCtx *ctx, RedisModuleString **argv,
+                            int argc, AI_dict **persistContextDict,
+                            const char *chaining_operator) {
+  if (argc < 3) {
+    RedisModule_WrongArity(ctx);
+    return -1;
+  }
+
+  long long n_keys;
+  const int retval = RedisModule_StringToLongLong(argv[1], &n_keys);
+  if (retval != REDISMODULE_OK || n_keys <= 0) {
+    RedisModule_ReplyWithError(
+        ctx,
+        "ERR invalid or negative value found in number of keys to PERSIST");
+    return -1;
+  }
+
+  int number_loaded_keys = 0;
+  int separator_flag = 0;
+  size_t argpos = 2;
+  for (; (argpos < argc) && (number_loaded_keys < n_keys); argpos++) {
+    const char *arg_string = RedisModule_StringPtrLen(argv[argpos], NULL);
+    if (!strcasecmp(arg_string, chaining_operator)) {
+      separator_flag = 1;
+      break;
+    } else {
+      const char *key = RedisModule_Strdup(arg_string);
+      AI_dictAdd(*persistContextDict, (void*)key, (void *)1);
+      number_loaded_keys++;
+    }
+  }
+  if (number_loaded_keys != n_keys) {
+    RedisModule_WrongArity(ctx);
+    return -1;
+  }
+  return argpos;
+}
diff --git a/src/dag.h b/src/dag.h
new file mode 100644
index 000000000..ee274c158
--- /dev/null
+++ b/src/dag.h
@@ -0,0 +1,69 @@
+#ifndef SRC_DAG_H_
+#define SRC_DAG_H_
+
+#include "model.h"
+#include "redisai.h"
+#include "run_info.h"
+#include "tensor.h"
+#include "util/arr_rm_alloc.h"
+
+/**
+ * Actual method running the DAGRUN Commands in the background
+ * thread Called within `RedisAI_Run_ThreadMain`
+ * After all computation is done, this will trigger
+ * the reply callback to be called in order to reply to the client.
+ * The 'rinfo' argument will be accessible by the reply callback.
+ *
+ * @param rinfo context in which RedisAI blocking commands operate.
+ * @return
+ */
+void *RedisAI_DagRunSession(RedisAI_RunInfo *rinfo);
+
+/**
+ * Reply Callback called after a successful RedisModule_UnblockClient() within
+ * RedisAI_DagRunSession() in order to reply to the client and unblock it
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param argv Redis command arguments, as an array of strings
+ * @param argc Redis command number of arguments
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if the DAGRUN failed
+ */
+int RedisAI_DagRun_Reply(RedisModuleCtx *ctx, RedisModuleString **argv,
+                         int argc);
+
+/**
+ * DAGRUN Building Block to parse [LOAD <nkeys> key1 key2... ]
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param argv Redis command arguments, as an array of strings
+ * @param argc Redis command number of arguments
+ * @param loadedContextDict local non-blocking hash table containing key names
+ * loaded from the keyspace tensors
+ * @param localContextDict local non-blocking hash table containing DAG's
+ * tensors
+ * @param chaining_operator operator used to split operations. Any command
+ * argument after the chaining operator is not considered
+ * @return processed number of arguments on success, or -1 if the parsing failed
+ */
+int RAI_parseDAGLoadArgs(RedisModuleCtx *ctx, RedisModuleString **argv,
+                         int argc, AI_dict **loadedContextDict,
+                         AI_dict **localContextDict,
+                         const char *chaining_operator);
+
+/**
+ * DAGRUN Building Block to parse [PERSIST <nkeys> key1 key2... ]
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param argv Redis command arguments, as an array of strings
+ * @param argc Redis command number of arguments
+ * @param localContextDict local non-blocking hash table containing DAG's
+ * keynames marked as persistent
+ * @param chaining_operator operator used to split operations. Any command
+ * argument after the chaining operator is not considered
+ * @return processed number of arguments on success, or -1 if the parsing failed
+ */
+int RAI_parseDAGPersistArgs(RedisModuleCtx *ctx, RedisModuleString **argv,
+                            int argc, AI_dict **localContextDict,
+                            const char *chaining_operator);
+
+#endif /* SRC_DAG_H_ */
diff --git a/src/err.c b/src/err.c
index 901fe3d13..4fbce946a 100644
--- a/src/err.c
+++ b/src/err.c
@@ -28,10 +28,28 @@ void RAI_SetError(RAI_Error *err, RAI_ErrorCode code, const char *detail) {
   } else {
     err->detail = RedisModule_Strdup("ERR Generic error");
   }
-
   err->detail_oneline = RAI_Chomp(err->detail);
 }
 
+/**
+ * Allocate the memory and initialise the RAI_Error.
+ * @param result Output parameter to capture allocated RAI_Error.
+ * @return 0 on success, or 1 if the allocation
+ * failed.
+ */
+int RAI_InitError(RAI_Error** result) {
+  RAI_Error* err;
+  err = (RAI_Error*)RedisModule_Calloc(1, sizeof(RAI_Error));
+  if (!err) {
+    return 1;
+  }
+  err->code=0;
+  err->detail=NULL;
+  err->detail_oneline=NULL;
+  *result = err;
+  return 0;
+}
+
 void RAI_ClearError(RAI_Error *err) {
   if (err->detail) {
     RedisModule_Free(err->detail);
@@ -43,3 +61,10 @@ void RAI_ClearError(RAI_Error *err) {
   }
   err->code = RAI_OK;
 }
+
+void RAI_FreeError(RAI_Error *err) {
+  if (err) {
+    RAI_ClearError(err);
+    RedisModule_Free(err);
+  }
+}
diff --git a/src/err.h b/src/err.h
index 1b442fbca..087b1b6e1 100644
--- a/src/err.h
+++ b/src/err.h
@@ -15,7 +15,10 @@ typedef enum {
   RAI_ESCRIPTRUN,
   RAI_EUNSUPPORTEDBACKEND,
   RAI_EBACKENDNOTLOADED,
-  RAI_ESCRIPTFREE
+  RAI_ESCRIPTFREE,
+  RAI_ETENSORSET,
+  RAI_ETENSORGET,
+  RAI_EDAGRUN,
 } RAI_ErrorCode;
 
 typedef struct RAI_Error {
@@ -24,8 +27,18 @@ typedef struct RAI_Error {
   char* detail_oneline;
 } RAI_Error;
 
+/**
+ * Allocate the memory and initialise the RAI_Error.
+ * @param result Output parameter to capture allocated RAI_Error.
+ * @return 0 on success, or 1 if the allocation
+ * failed.
+ */
+int RAI_InitError(RAI_Error **err);
+
 void RAI_SetError(RAI_Error *err, RAI_ErrorCode code, const char *detail);
 
 void RAI_ClearError(RAI_Error *err);
 
+void RAI_FreeError(RAI_Error *err);
+
 #endif
\ No newline at end of file
diff --git a/src/model.c b/src/model.c
index 3028d5ec6..a68248ed3 100644
--- a/src/model.c
+++ b/src/model.c
@@ -2,9 +2,12 @@
 #include "model_struct.h"
 #include "backends.h"
 #include "stats.h"
+#include "backends/util.h"
 
 #include "rmutil/alloc.h"
 #include "util/arr_rm_alloc.h"
+#include "util/dict.h"
+#include "run_info.h"
 
 RedisModuleType *RedisAI_ModelType = NULL;
 
@@ -40,7 +43,9 @@ static void* RAI_Model_RdbLoad(struct RedisModuleIO *io, int encver) {
 
   RAI_ModelOpts opts = {
     .batchsize = batchsize,
-    .minbatchsize = minbatchsize
+    .minbatchsize = minbatchsize,
+    .backends_intra_op_parallelism = getBackendsIntraOpParallelism(),
+    .backends_inter_op_parallelism = getBackendsInterOpParallelism(),
   };
 
   size_t len;
@@ -188,6 +193,28 @@ static void RAI_Model_AofRewrite(RedisModuleIO *aof, RedisModuleString *key, voi
   array_free(outputs_);
 }
 
+
+/* Return REDISMODULE_ERR if there was an error getting the Model.
+ * Return REDISMODULE_OK if the model value stored at key was correctly
+ * returned and available at *model variable. */
+int RAI_GetModelFromKeyspace(RedisModuleCtx *ctx, RedisModuleString *keyName,
+                              RedisModuleKey **key, RAI_Model **model,
+                              int mode) {
+  *key = RedisModule_OpenKey(ctx, keyName, mode);
+  if (RedisModule_KeyType(*key) == REDISMODULE_KEYTYPE_EMPTY) {
+    RedisModule_CloseKey(*key);
+    RedisModule_ReplyWithError(ctx, "ERR model key is empty");
+    return REDISMODULE_ERR;
+  }
+  if (RedisModule_ModuleTypeGetType(*key) != RedisAI_ModelType) {
+    RedisModule_CloseKey(*key);
+    RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
+    return REDISMODULE_ERR;
+  }
+  *model = RedisModule_ModuleTypeGetValue(*key);
+  return REDISMODULE_OK;
+}
+
 // TODO: pass err in?
 static void RAI_Model_DTFree(void *value) {
   RAI_Error err = {0};
@@ -463,3 +490,144 @@ int RAI_ModelSerialize(RAI_Model *model, char **buffer, size_t *len, RAI_Error *
 
   return ret;
 }
+
+
+int RedisAI_Parse_ModelRun_RedisCommand(RedisModuleCtx *ctx,
+                                        RedisModuleString **argv, int argc,
+                                        // RedisAI_RunInfo **rinfo,
+                                        RAI_ModelRunCtx **mctx,
+                                        RedisModuleString ***outkeys,
+                                        RAI_Model **mto, int useLocalContext,
+                                        AI_dict **localContextDict, 
+                                        int use_chaining_operator,
+                                        const char *chaining_operator, RAI_Error *error) {
+  if (argc < 3) {
+    if (ctx == NULL) {
+      RAI_SetError(error, RAI_EMODELRUN,
+                   "ERR wrong number of arguments for 'AI.MODELRUN' command");
+    } else {
+      RedisModule_WrongArity(ctx);
+    }
+    return -1;
+  }
+
+  const char *inputstr = RedisModule_StringPtrLen(argv[2], NULL);
+  if (strcasecmp(inputstr, "INPUTS")) {
+    if (ctx == NULL) {
+      RAI_SetError(error, RAI_EMODELRUN, "ERR INPUTS not specified");
+    } else {
+      RedisModule_ReplyWithError(ctx, "ERR INPUTS not specified");
+    }
+    return -1;
+  }
+
+  // parsing aux vars
+  int is_input = 0;
+  size_t ninputs = 0;
+  size_t noutputs = 0;
+  int outputs_flag_count = 0;
+  size_t argpos = 3;
+
+  for (; argpos <= argc - 1; argpos++) {
+    const char *arg_string = RedisModule_StringPtrLen(argv[argpos], NULL);
+    if (use_chaining_operator == 1) {
+      if (!strcasecmp(arg_string, chaining_operator)) {
+        break;
+      }
+    }
+    if (!strcasecmp(arg_string, "OUTPUTS") && outputs_flag_count == 0) {
+      is_input = 1;
+      outputs_flag_count = 1;
+      const size_t expected_noutputs = argc - argpos - 1;
+      // if (expected_noutputs > 0) {
+      //   *outkeys =
+      //       RedisModule_Calloc(expected_noutputs, sizeof(RedisModuleString *));
+      // }
+    } else {
+      RedisModule_RetainString(ctx, argv[argpos]);
+      if (is_input == 0) {
+        RAI_Tensor *inputTensor;
+        if (useLocalContext == 0) {
+          RedisModuleKey *tensorKey;
+          const int status = RAI_GetTensorFromKeyspace(
+              ctx, argv[argpos], &tensorKey, &inputTensor, REDISMODULE_READ);
+          if (status == REDISMODULE_ERR) {
+            // TODO: free rinfo
+            return -1;
+          }
+          RedisModule_CloseKey(tensorKey);
+        } else {
+          const int get_result = RAI_getTensorFromLocalContext(
+              ctx, *localContextDict, arg_string, &inputTensor,error);
+          if (get_result == REDISMODULE_ERR) {
+            return -1;
+          }
+        }
+
+        // Opname here is passed without copying
+        const char *opname = NULL;
+        if ((*mto)->inputs) {
+          opname = (*mto)->inputs[ninputs];
+        }
+        if (!RAI_ModelRunCtxAddInput(*mctx, opname, inputTensor)) {
+          // todo free rinfo
+          if (ctx == NULL) {
+            RAI_SetError(error, RAI_EMODELRUN, "ERR Input key not found");
+          } else {
+            RedisModule_ReplyWithError(ctx, "ERR Input key not found");
+          }
+          return -1;
+        }
+        ninputs++;
+      } else {
+        // Opname here is passed without copying
+        const char *opname = NULL;
+        if ((*mto)->outputs) {
+          opname = (*mto)->outputs[noutputs];
+        }
+        if (!RAI_ModelRunCtxAddOutput(*mctx, opname)) {
+          // todo free rinfo
+          if (ctx == NULL) {
+            RAI_SetError(error, RAI_EMODELRUN, "ERR Output key not found");
+          } else {
+            RedisModule_ReplyWithError(ctx, "ERR Output key not found");
+          }
+        }
+        *outkeys=array_append(*outkeys,argv[argpos]);
+        // (*outkeys)[noutputs] = argv[argpos];
+        noutputs++;
+      }
+    }
+  }
+
+  if ((*mto)->inputs && array_len((*mto)->inputs) != ninputs) {
+    if (ctx == NULL) {
+      RAI_SetError(
+          error, RAI_EMODELRUN,
+          "Number of names given as INPUTS during MODELSET and keys given as "
+          "INPUTS here do not match");
+    } else {
+      RedisModule_ReplyWithError(
+          ctx,
+          "Number of names given as INPUTS during MODELSET and keys given as "
+          "INPUTS here do not match");
+    }
+    return -1;
+  }
+
+  if ((*mto)->outputs && array_len((*mto)->outputs) != noutputs) {
+    if (ctx == NULL) {
+      RAI_SetError(
+          error, RAI_EMODELRUN,
+          "Number of names given as OUTPUTS during MODELSET and keys given as "
+          "INPUTS here do not match");
+    } else {
+      RedisModule_ReplyWithError(
+          ctx,
+          "Number of names given as OUTPUTS during MODELSET and keys given as "
+          "INPUTS here do not match");
+    }
+    return -1;
+  }
+  return argpos;
+}
\ No newline at end of file
diff --git a/src/model.h b/src/model.h
index 87d8d0b3b..d01b9d510 100644
--- a/src/model.h
+++ b/src/model.h
@@ -5,7 +5,11 @@
 #include "model_struct.h"
 #include "tensor.h"
 #include "redismodule.h"
+#include "run_info.h"
 #include "err.h"
+#include "redisai.h"
+#include "util/dict.h"
+#include "run_info.h"
 
 extern RedisModuleType *RedisAI_ModelType;
 
@@ -30,5 +34,19 @@ int RAI_ModelRun(RAI_ModelRunCtx** mctxs, RAI_Error* err);
 RAI_Model* RAI_ModelGetShallowCopy(RAI_Model* model);
 
 int RAI_ModelSerialize(RAI_Model *model, char **buffer, size_t *len, RAI_Error *err);
-
+/* Return REDISMODULE_ERR if there was an error getting the Model.
+ * Return REDISMODULE_OK if the model value stored at key was correctly
+ * returned and available at *model variable. */
+int RAI_GetModelFromKeyspace(RedisModuleCtx *ctx, RedisModuleString *keyName,
+                              RedisModuleKey **key, RAI_Model **model,
+                              int mode);
+
+int RedisAI_Parse_ModelRun_RedisCommand(RedisModuleCtx *ctx,
+                                        RedisModuleString **argv, int argc,
+                                        RAI_ModelRunCtx **mctx,
+                                        RedisModuleString ***outkeys,
+                                        RAI_Model **mto, int useLocalContext,
+                                        AI_dict **localContextDict, 
+                                        int use_chaining_operator,
+                                        const char *chaining_operator, RAI_Error *error);
 #endif /* SRC_MODEL_H_ */
diff --git a/src/model_script_run_session.c b/src/model_script_run_session.c
new file mode 100644
index 000000000..bd925e094
--- /dev/null
+++ b/src/model_script_run_session.c
@@ -0,0 +1,171 @@
+#include "model_script_run_session.h"
+
+#include "model.h"
+#include "redisai.h"
+#include "rmutil/alloc.h"
+#include "rmutil/args.h"
+#include "run_info.h"
+#include "script.h"
+#include "stats.h"
+#include "tensor.h"
+#include "util/arr_rm_alloc.h"
+#include "util/dict.h"
+#include "util/queue.h"
+
+
+/**
+ * Actual method running the MODELRUN and SCRIPTRUN Commands in the background
+ * thread Called within `RedisAI_Run_ThreadMain`
+ */
+void *RAI_ModelRunScriptRunSession(RedisAI_RunInfo **batch_rinfo) {
+  const long long batch_size = array_len(batch_rinfo);
+
+  if (batch_size == 0) {
+    return NULL;
+  }
+
+  RAI_ModelRunCtx **mctxs = NULL;
+  RAI_ScriptRunCtx *sctx = NULL;
+
+  RAI_Error *err = RedisModule_Calloc(1, sizeof(RAI_Error));
+  long long rtime;
+  int status;
+  if (batch_rinfo[0]->mctx) {
+    mctxs = array_new(RAI_ModelRunCtx *, batch_size);
+    for (long long i = 0; i < batch_size; i++) {
+      mctxs = array_append(mctxs, batch_rinfo[i]->mctx);
+    }
+  } else if (batch_rinfo[0]->sctx) {
+    sctx = batch_rinfo[0]->sctx;
+  }
+
+  const long long start = ustime();
+  if (mctxs) {
+    status = RAI_ModelRun(mctxs, err);
+  } else if (sctx) {
+    status = RAI_ScriptRun(sctx, err);
+  }
+  rtime = ustime() - start;
+
+  for (long long i = 0; i < batch_size; i++) {
+    struct RedisAI_RunInfo *rinfo = batch_rinfo[i];
+
+    rinfo->result = status;
+    rinfo->err = RedisModule_Calloc(1, sizeof(RAI_Error));
+    // TODO: add information on whether the call was batched
+    // and how large the batch was
+    rinfo->duration_us = rtime;
+
+    rinfo->err->code = err->code;
+    if (err->code != RAI_OK) {
+      rinfo->err->detail = RedisModule_Strdup(err->detail);
+      rinfo->err->detail_oneline = RedisModule_Strdup(err->detail_oneline);
+    }
+    if (rinfo->client != NULL) {
+      RedisModule_UnblockClient(rinfo->client, rinfo);
+    }
+  }
+
+  if (mctxs) {
+    array_free(mctxs);
+  } else if (sctx) {
+    // No batching for scripts for now
+  }
+
+  return NULL;
+}
+
+/**
+ * Reply Callback called after a successful RedisModule_UnblockClient() within
+ * RAI_ModelRunScriptRunSession() in order to reply to the client and unblock it
+ */
+int RAI_ModelRunScriptRunReply(RedisModuleCtx *ctx, RedisModuleString **argv,
+                               int argc) {
+  REDISMODULE_NOT_USED(argv);
+  REDISMODULE_NOT_USED(argc);
+  struct RedisAI_RunInfo *rinfo = RedisModule_GetBlockedClientPrivateData(ctx);
+
+  const char *runkey = RedisModule_StringPtrLen(rinfo->runkey, NULL);
+  AI_dictEntry *stats_entry = AI_dictFind(run_stats, runkey);
+
+  struct RedisAI_RunStats *rstats = NULL;
+  if (stats_entry) {
+    rstats = AI_dictGetVal(stats_entry);
+  }
+
+  if (rinfo->result == REDISMODULE_ERR) {
+    RedisModule_Log(ctx, "warning", "ERR %s", rinfo->err->detail);
+    if (rstats) {
+      rstats->calls += 1;
+      rstats->nerrors += 1;
+    }
+    int ret = RedisModule_ReplyWithError(ctx, rinfo->err->detail_oneline);
+    RAI_FreeRunInfo(ctx, rinfo);
+    return ret;
+  }
+
+  size_t num_outputs = 0;
+  if (rinfo->mctx) {
+    num_outputs = RAI_ModelRunCtxNumOutputs(rinfo->mctx);
+  } else if (rinfo->sctx) {
+    num_outputs = RAI_ScriptRunCtxNumOutputs(rinfo->sctx);
+  }
+
+  int64_t batch_size = 0;
+
+  for (size_t i = 0; i < num_outputs; ++i) {
+    RedisModuleKey *outkey;
+    const int status = RAI_OpenKey_Tensor(ctx, rinfo->outkeys[i], &outkey,
+                                          REDISMODULE_READ | REDISMODULE_WRITE);
+    if (status == REDISMODULE_ERR) {
+      RAI_FreeRunInfo(ctx, rinfo);
+      if (rstats) {
+        rstats->calls += 1;
+        rstats->nerrors += 1;
+      }
+      return REDISMODULE_ERR;
+    }
+    RAI_Tensor *t = NULL;
+    if (rinfo->mctx) {
+      t = RAI_ModelRunCtxOutputTensor(rinfo->mctx, i);
+      if (t && batch_size == 0) {
+        batch_size = RAI_TensorDim(t, 0);
+      }
+    } else if (rinfo->sctx) {
+      t = RAI_ScriptRunCtxOutputTensor(rinfo->sctx, i);
+    }
+    if (t) {
+      RedisModule_ModuleTypeSetValue(outkey, RedisAI_TensorType,
+                                     RAI_TensorGetShallowCopy(t));
+    }
+    RedisModule_CloseKey(outkey);
+
+    if (t) {
+      RedisAI_ReplicateTensorSet(ctx, rinfo->outkeys[i], t);
+    }
+  }
+
+  if (rstats) {
+    rstats->duration_us += rinfo->duration_us;
+    rstats->calls += 1;
+
+    if (rinfo->mctx) {
+      rstats->samples += batch_size;
+    }
+  }
+
+  RAI_FreeRunInfo(ctx, rinfo);
+  return RedisModule_ReplyWithSimpleString(ctx, "OK");
+}
+
+/**
+ * Called in order to free the private data that is passed
+ * by RedisModule_UnblockClient() call after
+ * RAI_ModelRunScriptRunSession()
+ */
+void RedisAI_FreeData(RedisModuleCtx *ctx, void *rinfo) {}
+
+void RedisAI_Disconnected(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc) {
+  RedisModule_Log(ctx, "warning", "Blocked client %p disconnected!",
+                  (void *)bc);
+}
\ No newline at end of file
diff --git a/src/model_script_run_session.h b/src/model_script_run_session.h
new file mode 100644
index 000000000..6c4639133
--- /dev/null
+++ b/src/model_script_run_session.h
@@ -0,0 +1,61 @@
+#ifndef SRC_MODEL_SCRIPT_RUN_SESSION_H_
+#define SRC_MODEL_SCRIPT_RUN_SESSION_H_
+
+#include "model.h"
+#include "redisai.h"
+#include "rmutil/alloc.h"
+#include "rmutil/args.h"
+#include "run_info.h"
+#include "script.h"
+#include "stats.h"
+#include "tensor.h"
+#include "util/arr_rm_alloc.h"
+#include "util/dict.h"
+#include "util/queue.h"
+
+
+
+/**
+ * Actual method running the MODELRUN and SCRIPTRUN Commands in the background
+ * thread Called within `RedisAI_Run_ThreadMain`
+ * After all computation is done, this will trigger
+ * the reply callbacks to be called in order to reply to the clients.
+ * The 'rinfo' argument will be accessible by the reply callback, for each of
+ * the runinfo present in batch_rinfo
+ *
+ * @param batch_rinfo array of `RedisAI_RunInfo *rinfo` contexts in which RedisAI blocking commands operate.
+ * @return
+ */
+void *RAI_ModelRunScriptRunSession(RedisAI_RunInfo **batch_rinfo);
+
+/**
+ * Reply Callback called after a successful RedisModule_UnblockClient() within
+ * RAI_ModelRunScriptRunSession() in order to reply to the client and unblock it
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param argv Redis command arguments, as an array of strings
+ * @param argc Redis command number of arguments
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR  if the MODELRUN/SCRIPTRUN failed
+ */
+int RAI_ModelRunScriptRunReply(RedisModuleCtx *ctx, RedisModuleString **argv,
+                               int argc);
+
+/**
+ * Called in order to free the private data that is passed
+ * by RedisModule_UnblockClient() call after
+ * RAI_ModelRunScriptRunSession()
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param rinfo
+ */
+void RedisAI_FreeData(RedisModuleCtx *ctx, void *rinfo);
+
+/**
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param bc
+ */
+void RedisAI_Disconnected(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc);
+
+
+#endif /* SRC_MODEL_SCRIPT_RUN_SESSION_H_ */
diff --git a/src/model_struct.h b/src/model_struct.h
index 074785e41..1c0381a89 100644
--- a/src/model_struct.h
+++ b/src/model_struct.h
@@ -7,6 +7,11 @@
 typedef struct RAI_ModelOpts {
   size_t batchsize;
   size_t minbatchsize;
+  long long backends_intra_op_parallelism;  //  number of threads used within an
+//  individual op for parallelism.
+long long
+    backends_inter_op_parallelism;  //  number of threads used for parallelism
+                                    //  between independent operations.
 } RAI_ModelOpts;
 
 typedef struct RAI_Model {
diff --git a/src/redisai.c b/src/redisai.c
index 5bc8876ab..842736afc 100644
--- a/src/redisai.c
+++ b/src/redisai.c
@@ -1,6 +1,10 @@
 #include "redismodule.h"
 #include "tensor.h"
+
 #include "model.h"
+#include "dag.h"
+#include "model_script_run_session.h"
+#include "background_workers.h"
 #include "script.h"
 #include "backends.h"
 #include "stats.h"
@@ -9,557 +13,19 @@
 #include <sys/time.h>
 #include <unistd.h>
 #include <stdbool.h>
+#include "backends/util.h"
 
 #include "rmutil/alloc.h"
 #include "util/arr_rm_alloc.h"
 #include "util/dict.h"
 #include "util/queue.h"
 #include "rmutil/args.h"
+#include "run_info.h"
 
 #define REDISAI_H_INCLUDE
 #include "redisai.h"
 #undef REDISAI_H_INCLUDE
 
-typedef struct RunQueueInfo {
-  pthread_mutex_t run_queue_mutex;
-  pthread_cond_t queue_condition_var;
-  queue *run_queue;
-  pthread_t *threads;
-} RunQueueInfo;
-
-static AI_dict *run_queues = NULL;
-static long long perqueueThreadPoolSize = REDISAI_DEFAULT_THREADS_PER_QUEUE;
-
-int freeRunQueueInfo(RunQueueInfo* info) {
-  int result = REDISMODULE_OK;
-  if (info->run_queue) {
-    RedisModule_Free(info->run_queue);
-  }
-  if (info->threads){
-    /* Wait for workers to exit */
-    for (int i = 0; i < perqueueThreadPoolSize; i++){
-      const int rtn = pthread_join(info->threads[i], NULL);
-      if (rtn != 0 ){
-          result = REDISMODULE_ERR;
-      }
-    }
-    /* Now free pool structure */
-      RedisModule_Free(info->threads);
-  }
-  RedisModule_Free(info);
-  return result;
-}
-
-void *RedisAI_Run_ThreadMain(void *arg);
-
-/* Ensure that the the run queue for the device exists.
- * If not, create it. */
-int ensureRunQueue(const char* devicestr) {
-  int result = REDISMODULE_ERR;
-
-  AI_dictEntry *entry = AI_dictFind(run_queues, devicestr);
-  if (entry){
-    result = REDISMODULE_OK;
-  }
-  else{
-    RunQueueInfo *run_queue_info = RedisModule_Alloc(sizeof(RunQueueInfo));
-    run_queue_info->run_queue = queueCreate();
-    pthread_cond_init(&run_queue_info->queue_condition_var, NULL);
-    pthread_mutex_init(&run_queue_info->run_queue_mutex, NULL);
-    run_queue_info->threads = (pthread_t *)RedisModule_Alloc(sizeof(pthread_t) * perqueueThreadPoolSize);
-    /* create threads */
-    for (int i = 0; i < perqueueThreadPoolSize; i++){
-      if (pthread_create(&(run_queue_info->threads[i]), NULL, RedisAI_Run_ThreadMain, run_queue_info) != 0){
-        freeRunQueueInfo(run_queue_info);
-        return REDISMODULE_ERR;
-      }
-    }
-    AI_dictAdd(run_queues, (void*)devicestr, (void*)run_queue_info);
-    result = REDISMODULE_OK;
-  }
-
-  return result;
-}
-
-long long ustime(void) {
-  struct timeval tv;
-  long long ust;
-
-  gettimeofday(&tv, NULL);
-  ust = ((long long)tv.tv_sec)*1000000;
-  ust += tv.tv_usec;
-  return ust;
-}
-
-mstime_t mstime(void) {
-  return ustime()/1000;
-}
-
-/* Return REDISMODULE_ERR if there was an error getting the Model.
- * Return REDISMODULE_OK if the model value stored at key was correctly
- * returned and available at *model variable. */
-int RAI_GetModelFromKeyspace(RedisModuleCtx *ctx, RedisModuleString *keyName,
-                              RedisModuleKey **key, RAI_Model **model,
-                              int mode) {
-  *key = RedisModule_OpenKey(ctx, keyName, mode);
-  if (RedisModule_KeyType(*key) == REDISMODULE_KEYTYPE_EMPTY) {
-    RedisModule_CloseKey(*key);
-    RedisModule_ReplyWithError(ctx, "ERR model key is empty");
-    return REDISMODULE_ERR;
-  }
-  if (RedisModule_ModuleTypeGetType(*key) != RedisAI_ModelType) {
-    RedisModule_CloseKey(*key);
-    RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
-    return REDISMODULE_ERR;
-  }
-  *model = RedisModule_ModuleTypeGetValue(*key);
-  return REDISMODULE_OK;
-}
-
-/* Return REDISMODULE_ERR if there was an error getting the Script.
- * Return REDISMODULE_OK if the model value stored at key was correctly
- * returned and available at *model variable. */
-int RAI_GetScriptFromKeyspace(RedisModuleCtx *ctx, RedisModuleString *keyName,
-                              RedisModuleKey **key, RAI_Script **script,
-                              int mode) {
-  *key = RedisModule_OpenKey(ctx, keyName, mode);
-  if (RedisModule_KeyType(*key) == REDISMODULE_KEYTYPE_EMPTY) {
-    RedisModule_CloseKey(*key);
-    RedisModule_ReplyWithError(ctx, "ERR script key is empty");
-    return REDISMODULE_ERR;
-  }
-  if (RedisModule_ModuleTypeGetType(*key) != RedisAI_ScriptType) {
-    RedisModule_CloseKey(*key);
-    RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
-    return REDISMODULE_ERR;
-  }
-  *script = RedisModule_ModuleTypeGetValue(*key);
-  return REDISMODULE_OK;
-}
-
-/* Return REDISMODULE_ERR if is the key not associated with a tensor type.
- * Return REDISMODULE_OK otherwise. */
-int RAI_OpenKey_Tensor(RedisModuleCtx *ctx, RedisModuleString *keyName,
-                              RedisModuleKey **key,
-                              int mode) {
-  *key = RedisModule_OpenKey(ctx, keyName, mode);
-  if (RedisModule_KeyType(*key) == REDISMODULE_KEYTYPE_EMPTY) { 
-    return REDISMODULE_OK;
-  }
-  if (RedisModule_ModuleTypeGetType(*key) != RedisAI_TensorType) {
-    RedisModule_CloseKey(*key);
-    RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
-    return REDISMODULE_ERR;
-  }
-  return REDISMODULE_OK;
-}
-
-/* Return REDISMODULE_ERR if there was an error getting the Tensor.
- * Return REDISMODULE_OK if the tensor value stored at key was correctly
- * returned and available at *tensor variable. */
-int RAI_GetTensorFromKeyspace(RedisModuleCtx *ctx, RedisModuleString *keyName,
-                               RedisModuleKey **key, RAI_Tensor **tensor,
-                               int mode) {
-  *key = RedisModule_OpenKey(ctx, keyName, mode);
-  if (RedisModule_KeyType(*key) == REDISMODULE_KEYTYPE_EMPTY) {
-    RedisModule_CloseKey(*key);
-    RedisModule_ReplyWithError(ctx, "ERR tensor key is empty");
-    return REDISMODULE_ERR;
-  }
-  if (RedisModule_ModuleTypeGetType(*key) != RedisAI_TensorType) {
-    RedisModule_CloseKey(*key);
-    RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
-    return REDISMODULE_ERR;
-  }
-  *tensor = RedisModule_ModuleTypeGetValue(*key);
-  return REDISMODULE_OK;
-}
-
-void RedisAI_FreeRunInfo(RedisModuleCtx *ctx, struct RedisAI_RunInfo *rinfo) {
-  if (rinfo->mctx) {
-    for(int i = 0 ; i < RAI_ModelRunCtxNumOutputs(rinfo->mctx) ; ++i){
-      RedisModule_FreeString(ctx, rinfo->outkeys[i]);
-    }
-    RedisModule_Free(rinfo->outkeys);
-    RAI_ModelRunCtxFree(rinfo->mctx);
-  }
-  else if (rinfo->sctx) {
-    for(int i = 0 ; i < RAI_ScriptRunCtxNumOutputs(rinfo->sctx) ; ++i){
-      RedisModule_FreeString(ctx, rinfo->outkeys[i]);
-    }
-    RedisModule_Free(rinfo->outkeys);
-    RAI_ScriptRunCtxFree(rinfo->sctx);
-  }
-
-  if (rinfo->err) {
-    RAI_ClearError(rinfo->err);
-    RedisModule_Free(rinfo->err);
-  }
-
-  RedisModule_Free(rinfo);
-}
-
-void RedisAI_FreeRunStats(RedisModuleCtx *ctx, struct RedisAI_RunStats *rstats) {
-  RedisModule_FreeString(ctx, rstats->key);
-  RedisModule_Free(rstats->devicestr);
-}
-
-void *RedisAI_RunSession(struct RedisAI_RunInfo **batch_rinfo) {
-
-  const long long batch_size = array_len(batch_rinfo);
-
-  if (batch_size == 0) {
-    return NULL;
-  }
-
-  RAI_ModelRunCtx** mctxs = NULL;
-  RAI_ScriptRunCtx* sctx = NULL;
-
-  RAI_Error* err = RedisModule_Calloc(1, sizeof(RAI_Error));
-  long long rtime;
-  int status;
-  if (batch_rinfo[0]->mctx) {
-    mctxs = array_new(RAI_ModelRunCtx*, batch_size);
-    for (long long i=0; i<batch_size; i++) {
-      mctxs = array_append(mctxs, batch_rinfo[i]->mctx);
-    }
-  }
-  else if (batch_rinfo[0]->sctx) {
-    sctx = batch_rinfo[0]->sctx;
-  }
-
-  const long long start = ustime();
-  if (mctxs) {
-    status = RAI_ModelRun(mctxs, err);
-  }
-  else if (sctx) {
-    status = RAI_ScriptRun(sctx, err);
-  }
-  rtime = ustime() - start;
-
-  for (long long i=0; i<batch_size; i++) {
-    struct RedisAI_RunInfo *rinfo = batch_rinfo[i];
-
-    rinfo->status = status;
-    rinfo->err = RedisModule_Calloc(1, sizeof(RAI_Error));
-    // TODO: add information on whether the call was batched
-    // and how large the batch was
-    rinfo->duration_us = rtime;
-
-    rinfo->err->code = err->code;
-    if (err->code != RAI_OK) {
-      rinfo->err->detail = RedisModule_Strdup(err->detail);
-      rinfo->err->detail_oneline = RedisModule_Strdup(err->detail_oneline);
-    }
-    if (rinfo->client != NULL) {
-      RedisModule_UnblockClient(rinfo->client, rinfo);
-    }
-  }
-
-  if (mctxs) {
-    array_free(mctxs);
-  }
-  else if (sctx) {
-    // No batching for scripts for now
-  }
-
-  return NULL;
-}
-
-void RedisAI_FreeData(RedisModuleCtx *ctx, void *rinfo) {
-}
-
-void RedisAI_Disconnected(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc) {
-  RedisModule_Log(ctx, "warning", "Blocked client %p disconnected!", (void*)bc);
-}
-
-void RedisAI_ReplicateTensorSet(RedisModuleCtx *ctx, RedisModuleString *key, RAI_Tensor *t) {
-  long long ndims = RAI_TensorNumDims(t);
-
-  char *dtypestr = NULL;
-  Tensor_DataTypeStr(RAI_TensorDataType(t), &dtypestr);
-
-  assert(dtypestr);
-
-  char *data = RAI_TensorData(t);
-  long long size = RAI_TensorByteSize(t);
-
-  RedisModuleString* dims[ndims];
-
-  for (long long i=0; i<ndims; i++) {
-    dims[i] = RedisModule_CreateStringFromLongLong(ctx, RAI_TensorDim(t, i));
-  }
-
-  RedisModule_Replicate(ctx, "AI.TENSORSET", "scvcb", key, dtypestr,
-                        dims, ndims, "BLOB", data, size);
-
-  for (long long i=0; i<ndims; i++) {
-    RedisModule_FreeString(ctx,dims[i]);
-  }
-
-  RedisModule_Free(dtypestr);
-}
-
-int RedisAI_Run_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
-  REDISMODULE_NOT_USED(argv);
-  REDISMODULE_NOT_USED(argc);
-  struct RedisAI_RunInfo *rinfo = RedisModule_GetBlockedClientPrivateData(ctx);
-  
-  const char* runkey = RedisModule_StringPtrLen(rinfo->runkey, NULL);
-  AI_dictEntry *stats_entry = AI_dictFind(run_stats, runkey);
-
-  struct RedisAI_RunStats *rstats = NULL;
-  if (stats_entry) {
-    rstats = AI_dictGetVal(stats_entry);
-  }
-
-  if (rinfo->status) {
-    RedisModule_Log(ctx, "warning", "ERR %s", rinfo->err->detail);
-    if (rstats) {
-      rstats->calls += 1;
-      rstats->nerrors += 1;
-    }
-    int ret = RedisModule_ReplyWithError(ctx, rinfo->err->detail_oneline);
-    RedisAI_FreeRunInfo(ctx, rinfo);
-    return ret;
-  }
-
-  size_t num_outputs = 0;
-  if (rinfo->mctx) {
-    num_outputs = RAI_ModelRunCtxNumOutputs(rinfo->mctx);
-  }
-  else if (rinfo->sctx) {
-    num_outputs = RAI_ScriptRunCtxNumOutputs(rinfo->sctx);
-  }
-
-  int64_t batch_size = 0;
-
-  for (size_t i=0; i<num_outputs; ++i) {
-    RedisModuleKey *outkey;
-    const int status = RAI_OpenKey_Tensor(ctx, rinfo->outkeys[i], &outkey, REDISMODULE_READ|REDISMODULE_WRITE);
-    if(status==REDISMODULE_ERR){
-        RedisAI_FreeRunInfo(ctx, rinfo);
-        if (rstats) {
-          rstats->calls += 1;
-          rstats->nerrors += 1;
-        }
-        return REDISMODULE_ERR;
-    }
-    RAI_Tensor *t = NULL;
-    if (rinfo->mctx) {
-      t = RAI_ModelRunCtxOutputTensor(rinfo->mctx, i);
-      if (t && batch_size == 0) {
-        batch_size = RAI_TensorDim(t, 0);
-      }
-    }
-    else if (rinfo->sctx) {
-      t = RAI_ScriptRunCtxOutputTensor(rinfo->sctx, i);
-    }
-    if (t) {
-      RedisModule_ModuleTypeSetValue(outkey, RedisAI_TensorType, RAI_TensorGetShallowCopy(t));
-    }
-    RedisModule_CloseKey(outkey);
-
-    if (t) {
-      RedisAI_ReplicateTensorSet(ctx, rinfo->outkeys[i], t);
-    }
-  }
-
-  if (rstats) {
-    rstats->duration_us += rinfo->duration_us;
-    rstats->calls += 1;
-
-    if (rinfo->mctx) {
-      rstats->samples += batch_size;
-    }
-  }
-
-  // FIXME This crashes Redis, we need to investigate.
-  //RedisModule_CloseKey(rinfo->modelkey);
-
-  RedisAI_FreeRunInfo(ctx, rinfo);
-
-  return RedisModule_ReplyWithSimpleString(ctx, "OK");
-}
-
-size_t RAI_RunInfoBatchSize(struct RedisAI_RunInfo* rinfo) {
-  if (rinfo->mctx == NULL) {
-    return -1;
-  }
-
-  size_t ninputs = RAI_ModelRunCtxNumInputs(rinfo->mctx);
-
-  int batchsize = 0;
-
-  if (ninputs == 0) {
-    return batchsize;
-  }
-
-  for (size_t i=0; i<ninputs; i++) {
-    RAI_Tensor* input = RAI_ModelRunCtxInputTensor(rinfo->mctx, i);
-
-    if (i == 0) {
-      batchsize = RAI_TensorDim(input, 0);
-      continue;
-    }
-
-    if (batchsize != RAI_TensorDim(input, 0)) {
-      batchsize = 0;
-      break;
-    }
-  }
-
-  return batchsize;
-}
-
-int RAI_RunInfoBatchable(struct RedisAI_RunInfo* rinfo1, struct RedisAI_RunInfo* rinfo2) {
-  if (rinfo1->mctx == NULL || rinfo2->mctx == NULL) {
-    return 0;
-  }
-
-  if (rinfo1->mctx->model != rinfo2->mctx->model) {
-    return 0;
-  }
-
-  int ninputs1 = RAI_ModelRunCtxNumInputs(rinfo1->mctx);
-  int ninputs2 = RAI_ModelRunCtxNumInputs(rinfo2->mctx);
-
-  if (ninputs1 != ninputs2) {
-    return 0;
-  }
-
-  for (int i=0; i<ninputs1; i++) {
-    RAI_Tensor* input1 = RAI_ModelRunCtxInputTensor(rinfo1->mctx, i);
-    RAI_Tensor* input2 = RAI_ModelRunCtxInputTensor(rinfo2->mctx, i);
-
-    int ndims1 = RAI_TensorNumDims(input1);
-    int ndims2 = RAI_TensorNumDims(input2);
-
-    if (ndims1 != ndims2) {
-      return 0;
-    }
-
-    if (ndims1 == 0) {
-      continue;
-    }
-
-    for (int j=1; j<ndims1; j++) {
-      int dim1 = RAI_TensorDim(input1, j);
-      int dim2 = RAI_TensorDim(input2, j);
-      if (dim1 != dim2) {
-        return 0;
-      }
-    }
-  }
-
-  return 1;
-}
-
-void *RedisAI_Run_ThreadMain(void *arg) {
-
-  RunQueueInfo* run_queue_info = (RunQueueInfo*)arg;
-
-  pthread_mutex_lock(&run_queue_info->run_queue_mutex);
-  while (true){
-    int rc = pthread_cond_wait(&run_queue_info->queue_condition_var, &run_queue_info->run_queue_mutex);
-
-    long long run_queue_len = queueLength(run_queue_info->run_queue);
-
-    while (run_queue_len > 0) {
-      queueItem **evicted_items = NULL;
-      struct RedisAI_RunInfo **batch_rinfo = NULL;
-
-      queueItem *item = queueFront(run_queue_info->run_queue);
-
-      while (item) {
-        struct RedisAI_RunInfo *rinfo = (struct RedisAI_RunInfo *)item->value;
-
-        if (evicted_items) {
-          array_free(evicted_items);
-          array_free(batch_rinfo);
-        }
-        evicted_items = array_new(queueItem *, run_queue_len);
-        batch_rinfo = array_new(struct RedisAI_RunInfo *, run_queue_len);
-
-        evicted_items = array_append(evicted_items, item);
-        batch_rinfo = array_append(batch_rinfo, rinfo);
-
-        if (rinfo->sctx) {
-          break;
-        }
-
-        size_t batchsize = rinfo->mctx->model->opts.batchsize;
-
-        if (batchsize == 0) {
-          break;
-        }
-
-        size_t current_batchsize = RAI_RunInfoBatchSize(rinfo);
-
-        if (current_batchsize == 0 ||
-            current_batchsize >= batchsize) {
-          break;
-        }
-
-        queueItem *next_item = item->next;
-
-        while (next_item != NULL) {
-          struct RedisAI_RunInfo *next_rinfo = (struct RedisAI_RunInfo *)next_item->value;
-
-          if (RAI_RunInfoBatchable(rinfo, next_rinfo) == 0) {
-            next_item = queueNext(next_item);
-            continue;
-          }
-
-          int next_batchsize = RAI_RunInfoBatchSize(next_rinfo);
-
-          if (current_batchsize + next_batchsize > batchsize) {
-            break;
-          }
-
-          evicted_items = array_append(evicted_items, next_item);
-          batch_rinfo = array_append(batch_rinfo, next_rinfo);
-
-          current_batchsize += next_batchsize;
-          next_item = queueNext(next_item);
-        }
-
-        size_t minbatchsize = rinfo->mctx->model->opts.minbatchsize;
-
-        if (minbatchsize == 0 || current_batchsize >= minbatchsize) {
-          break;
-        }
-
-        item = item->next;
-      }
-
-      if (item == NULL) {
-        array_free(evicted_items);
-        array_free(batch_rinfo);
-        pthread_mutex_unlock(&run_queue_info->run_queue_mutex);
-        break;
-      }
-
-      for (long long i=0; i<array_len(evicted_items); i++) {
-        queueEvict(run_queue_info->run_queue, evicted_items[i]);
-      }
-
-      pthread_mutex_unlock(&run_queue_info->run_queue_mutex);
-
-      RedisAI_RunSession(batch_rinfo);
-
-      for (long long i=0; i<array_len(evicted_items); i++) {
-        RedisModule_Free(evicted_items[i]);
-      }
-      array_free(evicted_items);
-      array_free(batch_rinfo);
-
-      pthread_mutex_lock(&run_queue_info->run_queue_mutex);
-
-      run_queue_len = queueLength(run_queue_info->run_queue);
-    }
-  }
-}
 
 /* ----------------------- RedisAI Module Commands ------------------------- */
 
@@ -575,128 +41,16 @@ int RedisAI_TensorSet_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv
       return REDISMODULE_ERR;
   }
 
-  // get the tensor datatype
-  const char* typestr = RedisModule_StringPtrLen(argv[2], NULL);
-  size_t datasize = RAI_TensorDataSizeFromString(typestr);
-  if (!datasize){
-    return RedisModule_ReplyWithError(ctx, "ERR invalid data type");
-  }
-
-  const char* fmtstr;
-  int datafmt = REDISAI_DATA_NONE;
-  int tensorAllocMode = TENSORALLOC_CALLOC;
-  size_t ndims = 0;
-  long long len = 1;
-  long long* dims = NULL;
-  size_t argpos = 3;
-  long long remaining_args = argc-1;
-
-  for (; argpos <= argc-1; argpos++){
-    const char *opt = RedisModule_StringPtrLen(argv[argpos], NULL);
-    remaining_args = argc-1-argpos;
-    if (!strcasecmp(opt, "BLOB")){
-      datafmt = REDISAI_DATA_BLOB;
-      tensorAllocMode = TENSORALLOC_NONE;
-      // if we've found the dataformat there are no more dimensions
-      // check right away if the arity is correct
-      if (remaining_args != 1 ){
-        RedisModule_Free(dims);
-        RedisModule_CloseKey(key);
-        return RedisModule_WrongArity(ctx);
-      }
-      argpos++;
-      break;
-    }
-    else if (!strcasecmp(opt, "VALUES")){
-      datafmt = REDISAI_DATA_VALUES;
-      tensorAllocMode = TENSORALLOC_ALLOC;
-      //if we've found the dataformat there are no more dimensions
-      // check right away if the arity is correct
-      if (remaining_args != len ){ 
-        RedisModule_Free(dims);
-        RedisModule_CloseKey(key);
-        return RedisModule_WrongArity(ctx);
-      }
-      argpos++;
-      break;
-    } else {
-      long long dimension = 1;
-      const int retval = RedisModule_StringToLongLong(argv[argpos],&dimension);
-      if (retval != REDISMODULE_OK || dimension <= 0) {
-          RedisModule_Free(dims);
-          RedisModule_CloseKey(key);
-          return RedisModule_ReplyWithError(ctx,
-              "ERR invalid or negative value found in tensor shape");
-      }
-      
-      ndims++;
-      dims=RedisModule_Realloc(dims,ndims*sizeof(long long));
-      dims[ndims-1]=dimension;
-      len *= dimension;
-    }
-  }
+  RAI_Tensor *t=NULL;
+  RAI_Error err;
+  const int parse_result = RAI_parseTensorSetArgs(ctx,argv,argc,&t,1,&err);
 
-  const long long nbytes = len * datasize;
-  size_t datalen;
-  const char *data;
-  DLDataType datatype = RAI_TensorDataTypeFromString(typestr);
-  RAI_Tensor *t = RAI_TensorCreateWithDLDataType(datatype, dims, ndims, tensorAllocMode);
-  if (!t){
-    RedisModule_Free(dims);
+  // if the number of parsed args is negative something went wrong
+  if(parse_result<0){
     RedisModule_CloseKey(key);
-    return RedisModule_ReplyWithError(ctx, "ERR could not create tensor");
-  }
-  size_t i = 0;
-  switch (datafmt){
-    case REDISAI_DATA_BLOB:
-      RedisModule_StringPtrLen(argv[argpos],&datalen);
-      if (datalen != nbytes){
-        RAI_TensorFree(t);
-        RedisModule_CloseKey(key);
-        return RedisModule_ReplyWithError(ctx, "ERR data length does not match tensor shape and type");
-      }
-      RedisModule_RetainString(NULL,argv[argpos]);
-      RAI_TensorSetDataFromRS(t,argv[argpos]);
-      break;
-    case REDISAI_DATA_VALUES:
-      for (; argpos <= argc-1; argpos++){
-        if (datatype.code == kDLFloat){
-          double val;
-          const int retval = RedisModule_StringToDouble(argv[argpos],&val);
-          if (retval != REDISMODULE_OK) {
-            RAI_TensorFree(t);
-            RedisModule_CloseKey(key);
-            return RedisModule_ReplyWithError(ctx, "ERR invalid value");
-          }
-          const int retset = RAI_TensorSetValueFromDouble(t, i, val);
-          if (retset == -1){
-            RAI_TensorFree(t);
-            RedisModule_CloseKey(key);
-            return RedisModule_ReplyWithError(ctx, "ERR cannot specify values for this datatype");
-          }
-        }
-        else{
-          long long val;
-          const int retval = RedisModule_StringToLongLong(argv[argpos],&val);
-          if (retval != REDISMODULE_OK) {
-            RAI_TensorFree(t);
-            RedisModule_CloseKey(key);
-            return RedisModule_ReplyWithError(ctx, "ERR invalid value");
-          }
-          const int retset = RAI_TensorSetValueFromLongLong(t, i, val);
-          if (retset == -1){
-            RAI_TensorFree(t);
-            RedisModule_CloseKey(key);
-            return RedisModule_ReplyWithError(ctx, "ERR cannot specify values for this datatype");
-          }
-        }
-        i++;
-      }
-      break;
-    default:
-      // default does not require tensor data setting since calloc setted it to 0
-      break;
+    return REDISMODULE_ERR;
   }
+
   if( RedisModule_ModuleTypeSetValue(key, RedisAI_TensorType, t) != REDISMODULE_OK ){
     RAI_TensorFree(t);
     RedisModule_CloseKey(key);
@@ -718,91 +72,15 @@ int RedisAI_TensorGet_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv
   RedisModuleKey *key;
   const int status = RAI_GetTensorFromKeyspace(ctx, argv[1], &key, &t, REDISMODULE_READ);
   if(status==REDISMODULE_ERR){
-      return REDISMODULE_ERR;
+  return REDISMODULE_ERR;
   }
 
-  int datafmt;
-  long long resplen = 2;
-  const char *fmtstr = RedisModule_StringPtrLen(argv[2], NULL);
-  if (!strcasecmp(fmtstr, "BLOB")) {
-    datafmt = REDISAI_DATA_BLOB;
-    resplen = 3;
-  } else if (!strcasecmp(fmtstr, "VALUES")) {
-    datafmt = REDISAI_DATA_VALUES;
-    resplen = 3;
-  } else if (!strcasecmp(fmtstr, "META")) {
-    datafmt = REDISAI_DATA_NONE;
-  } else {
-    RedisModule_CloseKey(key);
-    return RedisModule_ReplyWithError(ctx, "ERR unsupported data format");
-  }
-
-  const long long ndims = RAI_TensorNumDims(t);
-  
-  RedisModule_ReplyWithArray(ctx, resplen);
-
-  char *dtypestr = NULL;
-  const int dtypestr_result = Tensor_DataTypeStr(RAI_TensorDataType(t), &dtypestr);
-  if(dtypestr_result==REDISMODULE_ERR){
-    RedisModule_CloseKey(key);
-    return RedisModule_ReplyWithError(ctx, "ERR unsupported dtype");
-  }
-  RedisModule_ReplyWithSimpleString(ctx, dtypestr);
-
-  RedisModule_ReplyWithArray(ctx, ndims);
-  for (long long i=0; i<ndims; i++) {
-    const long long dim = RAI_TensorDim(t, i);
-    RedisModule_ReplyWithLongLong(ctx, dim);
-  }
-
-  if (datafmt == REDISAI_DATA_BLOB) {
-    long long size = RAI_TensorByteSize(t);
-    char *data = RAI_TensorData(t);
-
-    int ret = RedisModule_ReplyWithStringBuffer(ctx, data, size);
-
-    if (ret != REDISMODULE_OK) {
-      RedisModule_CloseKey(key);
-      return ret;
-    }
-  }
-  else if (datafmt == REDISAI_DATA_VALUES) {
-    long long ndims = RAI_TensorNumDims(t);
-    long long len = 1;
-    long long i;
-    for (i=0; i<ndims; i++) {
-      len *= RAI_TensorDim(t, i);
-    }
-
-    DLDataType dtype = RAI_TensorDataType(t);
-
-    RedisModule_ReplyWithArray(ctx, len);
-
-    if (dtype.code == kDLFloat) {
-      double val;
-      for (i=0; i<len; i++) {
-        int ret = RAI_TensorGetValueAsDouble(t, i, &val);
-        if (!ret) {
-          RedisModule_CloseKey(key);
-          return RedisModule_ReplyWithError(ctx, "ERR cannot get values for this datatype");
-        }
-        RedisModule_ReplyWithDouble(ctx, val);
-      }
-    }
-    else {
-      long long val;
-      for (i=0; i<len; i++) {
-        int ret = RAI_TensorGetValueAsLongLong(t, i, &val);
-        if (!ret) {
-          RedisModule_CloseKey(key);
-          return RedisModule_ReplyWithError(ctx, "ERR cannot get values for this datatype");
-        }
-        RedisModule_ReplyWithLongLong(ctx, val);
-      }
-    }
-  }
+  const int parse_result = RAI_parseTensorGetArgs(ctx, argv, argc, t);
   RedisModule_CloseKey(key);
-
+  // if the number of parsed args is negative something went wrong
+  if(parse_result<0){
+    return REDISMODULE_ERR;
+  }
   return REDISMODULE_OK;
 }
 
@@ -916,7 +194,9 @@ int RedisAI_ModelSet_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv,
 
   RAI_ModelOpts opts = {
     .batchsize = batchsize,
-    .minbatchsize = minbatchsize
+    .minbatchsize = minbatchsize,
+    .backends_intra_op_parallelism = getBackendsIntraOpParallelism(),
+    .backends_inter_op_parallelism = getBackendsInterOpParallelism(),
   };
 
   RAI_Model *model = NULL;
@@ -952,8 +232,8 @@ int RedisAI_ModelSet_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv,
   }
 
   // TODO: if backend loaded, make sure there's a queue
-
-  if (ensureRunQueue(devicestr) == REDISMODULE_ERR) {
+  RunQueueInfo *run_queue_info = NULL;
+  if (ensureRunQueue(devicestr,&run_queue_info) != REDISMODULE_OK){
     RAI_ModelFree(model, &err);
     if (err.code != RAI_OK) {
       #ifdef RAI_PRINT_BACKEND_ERRORS
@@ -1134,110 +414,37 @@ int RedisAI_ModelRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv,
                                   int argc) {
   if (argc < 3) return RedisModule_WrongArity(ctx);
 
+  RedisAI_RunInfo *rinfo = NULL;
+  if (RAI_InitRunInfo(&rinfo) == REDISMODULE_ERR) {
+    return RedisModule_ReplyWithError(ctx, "ERR Unable to allocate the memory and initialise the RedisAI_RunInfo structure");
+  }
+
   RAI_Model *mto;
   RedisModuleKey *modelKey;
   const int status = RAI_GetModelFromKeyspace(ctx, argv[1], &modelKey, &mto, REDISMODULE_READ);
   if(status==REDISMODULE_ERR){
       return REDISMODULE_ERR;
   }
-
-  const char *inputstr = RedisModule_StringPtrLen(argv[2], NULL);
-  if (strcasecmp(inputstr, "INPUTS")) {
-    RedisModule_CloseKey(modelKey);
-    return RedisModule_ReplyWithError(ctx, "ERR INPUTS not specified");
-  }
-
-  struct RedisAI_RunInfo *rinfo = RedisModule_Calloc(1, sizeof(struct RedisAI_RunInfo));
+  
   RedisModule_RetainString(NULL, argv[1]);
   rinfo->runkey = argv[1];
   rinfo->mctx = RAI_ModelRunCtxCreate(mto);
-  // rinfo->mctxs = array_new(RAI_ModelRunCtx, 10);
-  // rinfo->mctxs = array_append(rinfo->mctxs, );
-  rinfo->sctx = NULL;
-  rinfo->outkeys = NULL;
-  rinfo->err = NULL;
-
-  // parsing aux vars
-  int is_input = 0;
-  size_t ninputs = 0;
-  size_t noutputs = 0;
-  int outputs_flag_count = 0;
-
-  for (size_t argpos = 3; argpos <= argc - 1; argpos++) {
-    const char *arg_string = RedisModule_StringPtrLen(argv[argpos], NULL);
-    if (!strcasecmp(arg_string, "OUTPUTS") && outputs_flag_count == 0) {
-      is_input = 1;
-      outputs_flag_count = 1;
-      const size_t expected_noutputs = argc - argpos - 1;
-      if(expected_noutputs>0){
-        rinfo->outkeys =
-          RedisModule_Calloc(expected_noutputs, sizeof(RedisModuleString *));
-      }
-    } else {
-      RedisModule_RetainString(NULL, argv[argpos]);
-      if (is_input == 0) {
-        RAI_Tensor *inputTensor;
-        RedisModuleKey *tensorKey;
-        const int status = RAI_GetTensorFromKeyspace(
-            ctx, argv[argpos], &tensorKey, &inputTensor, REDISMODULE_READ);
-        if (status == REDISMODULE_ERR) {
-          // TODO: free rinfo
-          RedisModule_CloseKey(modelKey);
-          return REDISMODULE_ERR;
-        }
-        RedisModule_CloseKey(tensorKey);
-        // Opname here is passed without copying
-        const char *opname = NULL;
-        if (mto->inputs) {
-          opname = mto->inputs[ninputs];
-        }
-        if (!RAI_ModelRunCtxAddInput(rinfo->mctx, opname, inputTensor)) {
-          // todo free rinfo
-          return RedisModule_ReplyWithError(ctx, "ERR Input key not found");
-        }
-        ninputs++;
-      } else {
-        // Opname here is passed without copying
-        const char *opname = NULL;
-        if (mto->outputs) {
-          opname = mto->outputs[noutputs];
-        }
-        if (!RAI_ModelRunCtxAddOutput(rinfo->mctx, opname)) {
-          // todo free rinfo
-          return RedisModule_ReplyWithError(ctx, "ERR Output key not found");
-        }
-        rinfo->outkeys[noutputs] = argv[argpos];
-        noutputs++;
-      }
-    }
-  }
 
-  if (mto->inputs && array_len(mto->inputs) != ninputs) {
-    return RedisModule_ReplyWithError(
-        ctx,
-        "Number of names given as INPUTS during MODELSET and keys given as INPUTS here do not match");
-  }
-
-  if (mto->outputs && array_len(mto->outputs) != noutputs) {
-    return RedisModule_ReplyWithError(
-        ctx,
-        "Number of names given as OUTPUTS during MODELSET and keys given as OUTPUTS here do not match");
+  const int parse_result = RedisAI_Parse_ModelRun_RedisCommand(ctx, argv,
+                                   argc, &(rinfo->mctx), &(rinfo->outkeys), &mto, 0, NULL, 0, NULL, NULL);
+  RedisModule_CloseKey(modelKey);
+  // if the number of parsed args is negative something went wrong
+  if(parse_result<0){
+    return REDISMODULE_ERR;
   }
 
-  AI_dictEntry *entry = AI_dictFind(run_queues, mto->devicestr);
   RunQueueInfo *run_queue_info = NULL;
-  if (!entry) {
     // If the queue does not exist, initialize it
-    if (ensureRunQueue(mto->devicestr) == REDISMODULE_ERR) {
-      return RedisModule_ReplyWithError(ctx, "ERR Queue not initialized for device");
-    }
-    entry = AI_dictFind(run_queues, mto->devicestr);
-    run_queue_info = AI_dictGetVal(entry);
-  } else {
-    run_queue_info = AI_dictGetVal(entry);
+  if (ensureRunQueue(mto->devicestr,&run_queue_info) == REDISMODULE_ERR) {
+    return RedisModule_ReplyWithError(ctx, "ERR Queue not initialized for device");
   }
 
-  rinfo->client = RedisModule_BlockClient(ctx, RedisAI_Run_Reply, NULL, RedisAI_FreeData, 0);
+  rinfo->client = RedisModule_BlockClient(ctx, RAI_ModelRunScriptRunReply, NULL, RedisAI_FreeData, 0);
   // RedisModule_SetDisconnectCallback(rinfo->client, RedisAI_Disconnected);
 
   pthread_mutex_lock(&run_queue_info->run_queue_mutex);
@@ -1254,6 +461,11 @@ int RedisAI_ModelRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv,
 int RedisAI_ScriptRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
   if (argc < 4) return RedisModule_WrongArity(ctx);
 
+  RedisAI_RunInfo *rinfo = NULL;
+  if (RAI_InitRunInfo(&rinfo) == REDISMODULE_ERR) {
+    return RedisModule_ReplyWithError(ctx, "ERR Unable to allocate the memory and initialise the RedisAI_RunInfo structure");
+  }
+
   if (RedisModule_IsKeysPositionRequest(ctx)) {
     RedisModule_KeyAtPos(ctx, 1);
     for (int i=3; i<argc; i++) {
@@ -1312,9 +524,7 @@ int RedisAI_ScriptRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv
     AC_GetRString(&outac, outputs+i, 0); 
   }
 
-  RAI_ScriptRunCtx *sctx = RAI_ScriptRunCtxCreate(sto, fnname);
-
-  RedisModuleString **outkeys;
+  rinfo->sctx = RAI_ScriptRunCtxCreate(sto, fnname);
 
   for (size_t i=0; i<ninputs; i++) {
     RAI_Tensor *t;
@@ -1322,45 +532,37 @@ int RedisAI_ScriptRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv
     const int status = RAI_GetTensorFromKeyspace(ctx, inputs[i], &argkey, &t, REDISMODULE_READ);
     if(status==REDISMODULE_ERR){
          RedisModule_CloseKey(key);
+         RAI_FreeRunInfo(ctx,rinfo);
         return REDISMODULE_ERR;
     }
     RedisModule_CloseKey(argkey);
-    if (!RAI_ScriptRunCtxAddInput(sctx, t)) {
-      RAI_ScriptRunCtxFree(sctx);
+    if (!RAI_ScriptRunCtxAddInput(rinfo->sctx, t)) {
+      RAI_FreeRunInfo(ctx,rinfo);
       RedisModule_CloseKey(key);
       return RedisModule_ReplyWithError(ctx, "Input key not found");
     }
   }
 
-  outkeys = RedisModule_Calloc(noutputs, sizeof(RedisModuleString*));
   for (size_t i=0; i<noutputs; i++) {
-    if (!RAI_ScriptRunCtxAddOutput(sctx)) {
-      RAI_ScriptRunCtxFree(sctx);
+    if (!RAI_ScriptRunCtxAddOutput(rinfo->sctx)) {
+      RAI_FreeRunInfo(ctx,rinfo);
       RedisModule_CloseKey(key);
       return RedisModule_ReplyWithError(ctx, "Output key not found");
     }
     RedisModule_RetainString(ctx, outputs[i]);
-    outkeys[i] = outputs[i];
+    array_append(rinfo->outkeys,outputs[i]);
   }
-
-  struct RedisAI_RunInfo *rinfo = RedisModule_Calloc(1, sizeof(struct RedisAI_RunInfo));
-  rinfo->mctx = NULL;
-  rinfo->sctx = sctx;
+  
   RedisModule_RetainString(ctx, keystr);
   rinfo->runkey = keystr;
-  rinfo->outkeys = outkeys;
-  rinfo->err = NULL;
-  AI_dictEntry *entry = AI_dictFind(run_queues, sto->devicestr);
   RunQueueInfo *run_queue_info = NULL;
-  if (!entry){
-    RAI_ScriptRunCtxFree(sctx);
-    return RedisModule_ReplyWithError(ctx, "Queue not initialized for device");
-  }
-  else{
-    run_queue_info = AI_dictGetVal(entry);
+    // If the queue does not exist, initialize it
+  if (ensureRunQueue(sto->devicestr,&run_queue_info) == REDISMODULE_ERR) {
+    RAI_FreeRunInfo(ctx,rinfo);
+    return RedisModule_ReplyWithError(ctx, "ERR Queue not initialized for device");
   }
 
-  rinfo->client = RedisModule_BlockClient(ctx, RedisAI_Run_Reply, NULL, RedisAI_FreeData, 0);
+  rinfo->client = RedisModule_BlockClient(ctx, RAI_ModelRunScriptRunReply, NULL, RedisAI_FreeData, 0);
   // RedisModule_SetDisconnectCallback(rinfo->client, RedisAI_Disconnected);
 
   pthread_mutex_lock(&run_queue_info->run_queue_mutex);
@@ -1455,10 +657,10 @@ int RedisAI_ScriptSet_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv
   script = RAI_ScriptCreate(devicestr, tag, scriptdef, &err);
 
   if (err.code == RAI_EBACKENDNOTLOADED) {
-    RedisModule_Log(ctx, "warning", "Backend TORCH not loaded, will try loading default backend\n");
+    RedisModule_Log(ctx, "warning", "Backend TORCH not loaded, will try loading default backend");
     int ret = RAI_LoadDefaultBackend(ctx, RAI_BACKEND_TORCH);
     if (ret == REDISMODULE_ERR) {
-      RedisModule_Log(ctx, "error", "Could not load TORCH default backend\n");
+      RedisModule_Log(ctx, "error", "Could not load TORCH default backend");
       int ret = RedisModule_ReplyWithError(ctx, "ERR Could not load backend");
       RAI_ClearError(&err);
       return ret;
@@ -1476,7 +678,9 @@ int RedisAI_ScriptSet_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv
     return ret;
   }
 
-  if (ensureRunQueue(devicestr)==REDISMODULE_ERR) {
+  RunQueueInfo *run_queue_info = NULL;
+  // If the queue does not exist, initialize it
+  if (ensureRunQueue(devicestr,&run_queue_info) == REDISMODULE_ERR) {
     RAI_ScriptFree(script, &err);
     if (err.code != RAI_OK) {
       #ifdef RAI_PRINT_BACKEND_ERRORS
@@ -1609,94 +813,148 @@ int RedisAI_Info_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int
   return REDISMODULE_OK;
 }
 
-int RedisAI_Config_LoadBackend(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
-  RedisModule_AutoMemory(ctx);
-
+/** 
+* AI.CONFIG [BACKENDSPATH <default_location_of_backend_libraries> | LOADBACKEND <backend_identifier> <location_of_backend_library>]
+*/
+int RedisAI_Config_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
   if (argc < 2) return RedisModule_WrongArity(ctx);
 
-  ArgsCursor ac;
-  ArgsCursor_InitRString(&ac, argv+1, argc-1);
-
-  const char* backend;
-  AC_GetString(&ac, &backend, NULL, 0); 
-
-  const char* path;
-  AC_GetString(&ac, &path, NULL, 0); 
-
-  int ret;
-  if (strcasecmp(backend, "TF") == 0) {
-    ret = RAI_LoadBackend(ctx, RAI_BACKEND_TENSORFLOW, path);
-  }
-  else if (strcasecmp(backend, "TFLITE") == 0) {
-    ret = RAI_LoadBackend(ctx, RAI_BACKEND_TFLITE, path);
-  }
-  else if (strcasecmp(backend, "TORCH") == 0) {
-    ret = RAI_LoadBackend(ctx, RAI_BACKEND_TORCH, path);
-  }
-  else if (strcasecmp(backend, "ONNX") == 0) {
-    ret = RAI_LoadBackend(ctx, RAI_BACKEND_ONNXRUNTIME, path);
-  }
-  else {
-    return RedisModule_ReplyWithError(ctx, "ERR unsupported backend");
+  const char *subcommand = RedisModule_StringPtrLen(argv[1], NULL);
+  if (!strcasecmp(subcommand, "LOADBACKEND")) {
+    return RedisAI_Config_LoadBackend(ctx, argv + 1, argc - 1);
   }
 
-  if (ret == REDISMODULE_OK) {
-    return RedisModule_ReplyWithSimpleString(ctx, "OK");
+  if (!strcasecmp(subcommand, "BACKENDSPATH")) {
+    if (argc > 2) {
+      return RedisAI_Config_BackendsPath(
+          ctx, RedisModule_StringPtrLen(argv[2], NULL));
+    } else {
+      return RedisModule_ReplyWithError(
+          ctx, "ERR BACKENDSPATH: missing path argument");
+    }
   }
 
-  return RedisModule_ReplyWithError(ctx, "ERR error loading backend");
+  return RedisModule_ReplyWithError(ctx, "ERR unsupported subcommand");
 }
 
+/**
+ * AI.DAGRUN [LOAD <nkeys> key1 key2... ] [PERSIST <nkeys> key1 key2... ] |>
+ * [COMMAND1] |> [COMMAND2] |> [COMMANDN]
+ *
+ * The request is queued and evaded asynchronously from a separate thread. The
+ * client blocks until the computation finishes.
+ */
+int RedisAI_DagRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv,
+                                int argc) {
+  if (argc < 4) return RedisModule_WrongArity(ctx);
 
-int RedisAI_Config_BackendsPath(RedisModuleCtx *ctx, const char *path) {
-  RedisModule_AutoMemory(ctx);
-
-  if (RAI_BackendsPath != NULL) {
-    RedisModule_Free(RAI_BackendsPath);
+  RedisAI_RunInfo *rinfo = NULL;
+  if (RAI_InitRunInfo(&rinfo) == REDISMODULE_ERR) {
+    return RedisModule_ReplyWithError(ctx, "ERR Unable to allocate the memory and initialise the RedisAI_RunInfo structure");
   }
-  RAI_BackendsPath = RedisModule_Strdup(path);
+  rinfo->use_local_context = 1;
+  RAI_DagOp* currentDagOp = NULL;
+  RAI_InitDagOp(&currentDagOp);
+  array_append(rinfo->dagOps,currentDagOp);
 
-  return RedisModule_ReplyWithSimpleString(ctx, "OK");
-}
+  int persistFlag=0;
+  int loadFlag=0;
+  int chainingOpCount=0;
+  const char* deviceStr = NULL;
 
-int RedisAI_Config_QueueThreads(RedisModuleString *queueThreadsString) {
-  int result = RedisModule_StringToLongLong(queueThreadsString, &perqueueThreadPoolSize);
-  // make sure the number of threads is a positive integer
-  // if not set the value to the default 
-  if (result == REDISMODULE_OK && perqueueThreadPoolSize < 1 ){
-    perqueueThreadPoolSize = REDISAI_DEFAULT_THREADS_PER_QUEUE;
-    result = REDISMODULE_ERR;
+  for (size_t argpos = 1; argpos <= argc - 1; argpos++) {
+    const char *arg_string = RedisModule_StringPtrLen(argv[argpos], NULL);
+    if (!strcasecmp(arg_string, "LOAD")) {
+      loadFlag=1;
+      const int parse_result = RAI_parseDAGLoadArgs(
+          ctx, &argv[argpos], argc - argpos,&(rinfo->dagTensorsLoadedContext), &(rinfo->dagTensorsContext), "|>");
+      if (parse_result > 0) {
+        argpos += parse_result - 1;
+      } else {
+        RAI_FreeRunInfo(ctx,rinfo);
+        return REDISMODULE_ERR;
+      }
+    } else if (!strcasecmp(arg_string, "PERSIST")) {
+      persistFlag = 1;
+      const int parse_result =
+          RAI_parseDAGPersistArgs(ctx, &argv[argpos], argc - argpos,
+                                  &(rinfo->dagTensorsPersistentContext), "|>");
+      if (parse_result > 0) {
+        argpos += parse_result - 1;
+      } else {
+        RAI_FreeRunInfo(ctx, rinfo);
+        return REDISMODULE_ERR;
+      }
+    } else if (!strcasecmp(arg_string, "|>")) {
+      // on the first pipe operator, if LOAD or PERSIST were used, we've already
+      // allocated memory
+      if (!((persistFlag == 1 || loadFlag == 1) && chainingOpCount == 0)) {
+        rinfo->dagNumberCommands++;
+        RAI_DagOp *currentDagOp = NULL;
+        RAI_InitDagOp(&currentDagOp);
+        array_append(rinfo->dagOps, currentDagOp);
+      }
+      chainingOpCount++;
+    } else {
+      if (!strcasecmp(arg_string, "AI.TENSORGET")) {
+        rinfo->dagOps[rinfo->dagNumberCommands]->commandType = REDISAI_DAG_CMD_TENSORGET;
+      }
+      if (!strcasecmp(arg_string, "AI.TENSORSET")) {
+        rinfo->dagOps[rinfo->dagNumberCommands]->commandType = REDISAI_DAG_CMD_TENSORSET;
+      }
+      if (!strcasecmp(arg_string, "AI.MODELRUN")) {
+        if (argc - 2 < argpos) {
+          return RedisModule_WrongArity(ctx);
+        }
+        rinfo->dagOps[rinfo->dagNumberCommands]->commandType = REDISAI_DAG_CMD_MODELRUN;
+        RAI_Model *mto;
+        RedisModuleKey *modelKey;
+        const int status = RAI_GetModelFromKeyspace(ctx, argv[argpos+1], &modelKey,
+                                                    &mto, REDISMODULE_READ);
+        if (status == REDISMODULE_ERR) {
+          RAI_FreeRunInfo(ctx,rinfo);
+          return REDISMODULE_ERR;
+        } 
+        if (deviceStr==NULL){
+          deviceStr=mto->devicestr;
+        }else{
+          // If the device strings are not equivalent, reply with error ( for now )
+          if(strcasecmp(mto->devicestr, deviceStr)!=0){            
+            RAI_FreeRunInfo(ctx,rinfo);
+            return RedisModule_ReplyWithError(ctx,"ERR multi-device DAGs not supported yet");;
+          }
+        }
+        rinfo->dagOps[rinfo->dagNumberCommands]->runkey = argv[argpos];
+        rinfo->dagOps[rinfo->dagNumberCommands]->mctx =
+            RAI_ModelRunCtxCreate(mto);
+      }
+      RedisModule_RetainString(NULL, argv[argpos]);
+      array_append(rinfo->dagOps[rinfo->dagNumberCommands]->argv, argv[argpos]);
+      rinfo->dagOps[rinfo->dagNumberCommands]->argc++;
+    }
   }
-  return result;
-}
-
-/** 
-* AI.CONFIG [BACKENDSPATH <default_location_of_backend_libraries> | LOADBACKEND <backend_identifier> <location_of_backend_library>]
-*/
-int RedisAI_Config_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
-  RedisModule_AutoMemory(ctx);
-
-  if (argc < 2) return RedisModule_WrongArity(ctx);
-
-  ArgsCursor ac;
-  ArgsCursor_InitRString(&ac, argv+1, argc-1);
 
-  const char* subcommand;
-  AC_GetString(&ac, &subcommand, NULL, 0); 
-
-  if (strcasecmp(subcommand, "LOADBACKEND") == 0) {
-    return RedisAI_Config_LoadBackend(ctx, argv + 1, argc - 1);
+  RunQueueInfo *run_queue_info = NULL;
+  // If there was no MODELRUN on the DAG, we default all ops to CPU
+  if(deviceStr==NULL){
+    deviceStr="CPU";
   }
-
-  if (strcasecmp(subcommand, "BACKENDSPATH") == 0) {
-    if (argc > 2) {
-      return RedisAI_Config_BackendsPath(ctx, RedisModule_StringPtrLen(argv[2], NULL));
-    } else {
-      return RedisModule_ReplyWithError(ctx, "ERR BACKENDSPATH: missing path argument");
-    }
+  // If the queue does not exist, initialize it
+  if (ensureRunQueue(deviceStr,&run_queue_info) == REDISMODULE_ERR) {
+    RAI_FreeRunInfo(ctx,rinfo);
+    return RedisModule_ReplyWithError(
+        ctx, "ERR Queue not initialized for device");
   }
 
-  return RedisModule_ReplyWithError(ctx, "ERR unsupported subcommand");
+  rinfo->client = RedisModule_BlockClient(ctx, RedisAI_DagRun_Reply, NULL,
+                                          NULL, 0);
+
+  pthread_mutex_lock(&run_queue_info->run_queue_mutex);
+  queuePush(run_queue_info->run_queue, rinfo);
+  pthread_cond_signal(&run_queue_info->queue_condition_var);
+  pthread_mutex_unlock(&run_queue_info->run_queue_mutex);
+
+  return REDISMODULE_OK;
 }
 
 #define EXECUTION_PLAN_FREE_MSG 100
@@ -1768,8 +1026,6 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
 
   int flags = RedisModule_GetContextFlags(ctx);
 
-  RAI_BackendsPath = NULL;
-
   if(RedisAI_RegisterApi(ctx) != REDISMODULE_OK){
     RedisModule_Log(ctx, "warning", "could not register RedisAI api\r\n");
     return REDISMODULE_ERR;
@@ -1846,66 +1102,21 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
       == REDISMODULE_ERR)
     return REDISMODULE_ERR;
 
-  if (argc > 0 && argc % 2 != 0) {
-    RedisModule_Log(ctx, "warning", "Even number of arguments provided to module. Please provide arguments as KEY VAL pairs");
-  }
-
-  // need BACKENDSPATH set up before loading specific backends
-  for (int i=0; i<argc/2; i++) {
-    const char *key = RedisModule_StringPtrLen(argv[2*i], NULL);
-    const char *val = RedisModule_StringPtrLen(argv[2*i + 1], NULL);
-
-    int ret = REDISMODULE_OK;
-    if (strcasecmp(key, "BACKENDSPATH") == 0) {
-      ret = RedisAI_Config_BackendsPath(ctx, val);
-    }
-  }
-
-  for (int i=0; i<argc/2; i++) {
-    const char *key = RedisModule_StringPtrLen(argv[2*i], NULL);
-    const char *val = RedisModule_StringPtrLen(argv[2*i + 1], NULL);
-
-    int ret = REDISMODULE_OK;
-    if (strcasecmp(key, "TF") == 0) {
-      ret = RAI_LoadBackend(ctx, RAI_BACKEND_TENSORFLOW, val);
-    }
-    else if (strcasecmp(key, "TFLITE") == 0) {
-      ret = RAI_LoadBackend(ctx, RAI_BACKEND_TFLITE, val);
-    }
-    else if (strcasecmp(key, "TORCH") == 0) {
-      ret = RAI_LoadBackend(ctx, RAI_BACKEND_TORCH, val);
-    }
-    else if (strcasecmp(key, "ONNX") == 0) {
-      ret = RAI_LoadBackend(ctx, RAI_BACKEND_ONNXRUNTIME, val);
-    }
-    // enable configuring the main thread to create a fixed number of worker threads up front per device.
-    // by default we'll use 1
-    else if (strcasecmp(key, "THREADS_PER_QUEUE") == 0) {
-      ret = RedisAI_Config_QueueThreads(argv[2*i + 1]);
-      if (ret == REDISMODULE_OK){
-        char *buffer = RedisModule_Alloc((3 + strlen(REDISAI_INFOMSG_THREADS_PER_QUEUE) + strlen(val)) * sizeof(*buffer));
-        sprintf(buffer, "%s: %s", REDISAI_INFOMSG_THREADS_PER_QUEUE, val);
-        RedisModule_Log(ctx, "verbose", buffer);
-        RedisModule_Free(buffer);
-      }
-    }
-    else if (strcasecmp(key, "BACKENDSPATH") == 0) {
-      // aleady taken care of
-    } else {
-      ret = REDISMODULE_ERR;
-    }
+  if (RedisModule_CreateCommand(ctx, "ai.dagrun", RedisAI_DagRun_RedisCommand, "write deny-oom", 3, 3, 1)
+      == REDISMODULE_ERR)
+    return REDISMODULE_ERR;
 
-    if (ret == REDISMODULE_ERR) {
-      char* buffer = RedisModule_Alloc((4 + strlen(REDISAI_ERRORMSG_PROCESSING_ARG) + strlen(key) + strlen(val)) * sizeof(*buffer));
-      sprintf(buffer, "%s: %s %s", REDISAI_ERRORMSG_PROCESSING_ARG, key, val);
-      RedisModule_Log(ctx, "warning", buffer);
-      RedisModule_Free(buffer);
-    }
-  }
+  // Default configs
+  RAI_BackendsPath = NULL;
+  perqueueThreadPoolSize = REDISAI_DEFAULT_THREADS_PER_QUEUE;
+  setBackendsInterOpParallelism(REDISAI_DEFAULT_INTER_OP_PARALLELISM);
+  setBackendsIntraOpParallelism(REDISAI_DEFAULT_INTRA_OP_PARALLELISM);
+  
+  RAI_loadTimeConfig(ctx,argv,argc);
 
   run_queues = AI_dictCreate(&AI_dictTypeHeapStrings, NULL);
-
-  if (ensureRunQueue("CPU") != REDISMODULE_OK){
+  RunQueueInfo *run_queue_info = NULL;
+  if (ensureRunQueue("CPU",&run_queue_info) != REDISMODULE_OK){
     RedisModule_Log(ctx, "warning", "Queue not initialized for device CPU" );
     return REDISMODULE_ERR;
   }
diff --git a/src/redisai.h b/src/redisai.h
index cdac8ff67..e2d736114 100644
--- a/src/redisai.h
+++ b/src/redisai.h
@@ -3,6 +3,10 @@
 
 #include <stdbool.h>
 #include "redismodule.h"
+#include "util/dict.h"
+#include "model_struct.h"
+#include "model_script_run_session.h"
+#include "background_workers.h"
 
 #define REDISAI_LLAPI_VERSION 1
 
@@ -25,11 +29,6 @@ typedef struct RAI_Error RAI_Error;
 
 #define REDISAI_DEVICE_CPU 0
 #define REDISAI_DEVICE_GPU 1
-#define REDISAI_DEFAULT_THREADS_PER_QUEUE 1
-
-#define REDISAI_ERRORMSG_PROCESSING_ARG "ERR: error processing argument"
-#define REDISAI_ERRORMSG_THREADS_PER_QUEUE "ERR: error setting THREADS_PER_QUEUE to"
-#define REDISAI_INFOMSG_THREADS_PER_QUEUE "Setting THREADS_PER_QUEUE parameter to"
 
 enum RedisAI_DataFmt {
   REDISAI_DATA_BLOB = 0,
@@ -37,17 +36,6 @@ enum RedisAI_DataFmt {
   REDISAI_DATA_NONE
 };
 
-struct RedisAI_RunInfo {
-  RedisModuleBlockedClient *client;
-  RedisModuleString *runkey;
-  RedisModuleString **outkeys;
-  RAI_ModelRunCtx *mctx;
-  RAI_ScriptRunCtx *sctx;
-  int status;
-  long long duration_us;
-  RAI_Error* err;
-};
-
 RAI_Tensor* MODULE_API_FUNC(RedisAI_TensorCreate)(const char* dataTypeStr, long long* dims, int ndims);
 RAI_Tensor* MODULE_API_FUNC(RedisAI_TensorCreateByConcatenatingTensors)(RAI_Tensor** ts, long long n);
 RAI_Tensor* MODULE_API_FUNC(RedisAI_TensorCreateBySlicingTensor)(RAI_Tensor* t, long long offset, long long len);
diff --git a/src/run_info.c b/src/run_info.c
new file mode 100644
index 000000000..d35634e76
--- /dev/null
+++ b/src/run_info.c
@@ -0,0 +1,258 @@
+#include "err.h"
+#include "model.h"
+#include "model_struct.h"
+#include "redismodule.h"
+#include "script.h"
+#include "tensor.h"
+#include "util/arr_rm_alloc.h"
+#include "util/dict.h"
+
+/**
+ * Allocate the memory and initialise the RAI_DagOp.
+ * @param result Output parameter to capture allocated RAI_DagOp.
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR if the allocation
+ * failed.
+ */
+int RAI_InitDagOp(RAI_DagOp **result) {
+  RAI_DagOp *dagOp;
+  dagOp = (RAI_DagOp *)RedisModule_Calloc(1, sizeof(RAI_DagOp));
+  if (!dagOp) {
+    return REDISMODULE_ERR;
+  }
+  dagOp->commandType = REDISAI_DAG_CMD_NONE;
+  dagOp->runkey = NULL;
+  dagOp->outkeys = (RedisModuleString **)array_new(RedisModuleString *, 1);
+  if (!(dagOp->outkeys)) {
+    return REDISMODULE_ERR;
+  }
+  dagOp->outTensors = (RAI_Tensor **)array_new(RAI_Tensor*, 1);
+  if (!(dagOp->outTensors)) {
+    return REDISMODULE_ERR;
+  }
+  dagOp->mctx = NULL;
+  dagOp->sctx = NULL;
+  dagOp->duration_us = 0;
+  RAI_InitError(&dagOp->err);
+  if (!(dagOp->err)) {
+    return REDISMODULE_ERR;
+  }
+  dagOp->argv = (RedisModuleString **)array_new(RedisModuleString *, 1);
+  if (!(dagOp->argv)) {
+    return REDISMODULE_ERR;
+  }
+  dagOp->argc = 0;
+  *result = dagOp;
+  return REDISMODULE_OK;
+}
+
+/**
+ * Allocate the memory and initialise the RedisAI_RunInfo.
+ * @param result Output parameter to capture allocated RedisAI_RunInfo.
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR if the allocation
+ * failed.
+ */
+int RAI_InitRunInfo(RedisAI_RunInfo **result) {
+  RedisAI_RunInfo *rinfo;
+  rinfo = (RedisAI_RunInfo *)RedisModule_Calloc(1, sizeof(RedisAI_RunInfo));
+  if (!rinfo) {
+    return REDISMODULE_ERR;
+  }
+  rinfo->runkey = NULL;
+  rinfo->outkeys = (RedisModuleString **)array_new(RedisModuleString *, 1);
+  rinfo->mctx = NULL;
+  rinfo->sctx = NULL;
+  rinfo->duration_us = 0;
+  RAI_InitError(&rinfo->err);
+  if (!(rinfo->err)) {
+    return REDISMODULE_ERR;
+  }
+  rinfo->use_local_context = 0;
+  rinfo->dagTensorsContext = AI_dictCreate(&AI_dictTypeHeapStrings, NULL);
+  if (!(rinfo->dagTensorsContext)) {
+    return REDISMODULE_ERR;
+  }
+  rinfo->dagTensorsLoadedContext = AI_dictCreate(&AI_dictTypeHeapStrings, NULL);
+  if (!(rinfo->dagTensorsLoadedContext)) {
+    return REDISMODULE_ERR;
+  }
+  rinfo->dagTensorsPersistentContext =
+      AI_dictCreate(&AI_dictTypeHeapStrings, NULL);
+  if (!(rinfo->dagTensorsPersistentContext)) {
+    return REDISMODULE_ERR;
+  }
+  rinfo->dagOps = (RAI_DagOp **)array_new(RAI_DagOp *, 1);
+  if (!(rinfo->dagOps)) {
+    return REDISMODULE_ERR;
+  }
+  rinfo->dagReplyLength = 0;
+  rinfo->dagNumberCommands = 0;
+  *result = rinfo;
+  return REDISMODULE_OK;
+}
+
+void RAI_FreeDagOp(RedisModuleCtx *ctx, RAI_DagOp *dagOp) {
+  if (dagOp) {
+    RAI_FreeError(dagOp->err);
+    if (dagOp->argv) {
+      for (size_t i = 0; i < array_len(dagOp->argv); i++) {
+        RedisModule_FreeString(ctx, dagOp->argv[i]);
+      }
+      array_free(dagOp->argv);
+    }
+    // dagOp->outkeys is released on all argv release above
+    // dagOp->outTensors is released on RunInfo after checking what tensors to
+    // persist
+    for (size_t i = 0; i < array_len(dagOp->outTensors); i++) {
+      RAI_TensorFree(dagOp->outTensors[i]);
+    }
+    array_free(dagOp->outTensors);
+
+    RedisModule_Free(dagOp);
+  }
+}
+
+void RAI_FreeRunInfo(RedisModuleCtx *ctx, struct RedisAI_RunInfo *rinfo) {
+  if (rinfo->mctx) {
+    RAI_ModelRunCtxFree(rinfo->mctx);
+  }
+  if (rinfo->sctx) {
+    RAI_ScriptRunCtxFree(rinfo->sctx);
+  }
+  RAI_FreeError(rinfo->err);
+
+  if (rinfo->dagTensorsContext) {
+    AI_dictIterator *iter = AI_dictGetSafeIterator(rinfo->dagTensorsContext);
+    AI_dictEntry *stats_entry = AI_dictNext(iter);
+    RAI_Tensor *tensor = NULL;
+
+    while (stats_entry) {
+      tensor = AI_dictGetVal(stats_entry);
+      char *key = (char *)AI_dictGetKey(stats_entry);
+
+      if (tensor) {
+        // if the key is persistent then we should not delete it
+        AI_dictEntry *persistent_entry =
+            AI_dictFind(rinfo->dagTensorsPersistentContext, key);
+        // if the key was loaded from the keyspace then we should not delete it
+        AI_dictEntry *loaded_entry =
+            AI_dictFind(rinfo->dagTensorsLoadedContext, key);
+        if (persistent_entry == NULL && loaded_entry == NULL) {
+          RAI_TensorFree(tensor);
+        }
+      }
+      RedisModule_Free(key);
+      stats_entry = AI_dictNext(iter);
+    }
+    AI_dictReleaseIterator(iter);
+    RedisModule_Free(rinfo->dagTensorsContext);
+  }
+
+  if (rinfo->dagTensorsPersistentContext) {
+    AI_dictIterator *iter =
+        AI_dictGetSafeIterator(rinfo->dagTensorsPersistentContext);
+    AI_dictEntry *stats_entry = AI_dictNext(iter);
+    while (stats_entry) {
+      char *key = (char *)AI_dictGetKey(stats_entry);
+      RedisModule_Free(key);
+      stats_entry = AI_dictNext(iter);
+    }
+    AI_dictReleaseIterator(iter);
+    RedisModule_Free(rinfo->dagTensorsPersistentContext);
+  }
+
+  if (rinfo->dagOps) {
+    for (size_t i = 0; i < array_len(rinfo->dagOps); i++) {
+      RAI_FreeDagOp(ctx, rinfo->dagOps[i]);
+    }
+    array_free(rinfo->dagOps);
+  }
+
+  if (rinfo->outkeys) {
+    for (size_t i = 0; i < array_len(rinfo->outkeys); i++) {
+      RedisModule_FreeString(ctx, rinfo->outkeys[i]);
+    }
+    array_free(rinfo->outkeys);
+  }
+
+  RedisModule_Free(rinfo);
+}
+
+size_t RAI_RunInfoBatchSize(struct RedisAI_RunInfo *rinfo) {
+  if (rinfo->mctx == NULL) {
+    return -1;
+  }
+
+  size_t ninputs = RAI_ModelRunCtxNumInputs(rinfo->mctx);
+
+  int batchsize = 0;
+
+  if (ninputs == 0) {
+    return batchsize;
+  }
+
+  for (size_t i = 0; i < ninputs; i++) {
+    RAI_Tensor *input = RAI_ModelRunCtxInputTensor(rinfo->mctx, i);
+
+    if (i == 0) {
+      batchsize = RAI_TensorDim(input, 0);
+      continue;
+    }
+
+    if (batchsize != RAI_TensorDim(input, 0)) {
+      batchsize = 0;
+      break;
+    }
+  }
+
+  return batchsize;
+}
+
+int RAI_RunInfoBatchable(struct RedisAI_RunInfo *rinfo1,
+                         struct RedisAI_RunInfo *rinfo2) {
+
+  // DAG case
+  if (rinfo1->use_local_context == 1 || rinfo2->use_local_context == 1) {
+    return 0;
+  }
+
+  if (rinfo1->mctx == NULL || rinfo2->mctx == NULL) {
+    return 0;
+  }
+
+  if (rinfo1->mctx->model != rinfo2->mctx->model) {
+    return 0;
+  }
+
+  const int ninputs1 = RAI_ModelRunCtxNumInputs(rinfo1->mctx);
+  const int ninputs2 = RAI_ModelRunCtxNumInputs(rinfo2->mctx);
+
+  if (ninputs1 != ninputs2) {
+    return 0;
+  }
+
+  for (int i = 0; i < ninputs1; i++) {
+    RAI_Tensor *input1 = RAI_ModelRunCtxInputTensor(rinfo1->mctx, i);
+    RAI_Tensor *input2 = RAI_ModelRunCtxInputTensor(rinfo2->mctx, i);
+
+    int ndims1 = RAI_TensorNumDims(input1);
+    int ndims2 = RAI_TensorNumDims(input2);
+
+    if (ndims1 != ndims2) {
+      return 0;
+    }
+
+    if (ndims1 == 0) {
+      continue;
+    }
+
+    for (int j = 1; j < ndims1; j++) {
+      int dim1 = RAI_TensorDim(input1, j);
+      int dim2 = RAI_TensorDim(input2, j);
+      if (dim1 != dim2) {
+        return 0;
+      }
+    }
+  }
+
+  return 1;
+}
diff --git a/src/run_info.h b/src/run_info.h
new file mode 100644
index 000000000..0b4f15958
--- /dev/null
+++ b/src/run_info.h
@@ -0,0 +1,109 @@
+
+
+#ifndef SRC_RUN_INFO_H_
+#define SRC_RUN_INFO_H_
+
+#include "err.h"
+#include "model.h"
+#include "model_struct.h"
+#include "redismodule.h"
+#include "script.h"
+#include "util/arr_rm_alloc.h"
+#include "util/dict.h"
+
+enum RedisAI_DAGCommands {
+  REDISAI_DAG_CMD_NONE = 0,
+  REDISAI_DAG_CMD_TENSORSET,
+  REDISAI_DAG_CMD_TENSORGET,
+  REDISAI_DAG_CMD_MODELRUN
+};
+
+typedef struct RAI_DagOp {
+  int commandType;
+  RedisModuleString *runkey;
+  RedisModuleString **outkeys;
+  RAI_Tensor **outTensors;
+  RAI_ModelRunCtx *mctx;
+  RAI_ScriptRunCtx *sctx;
+  int result;  // REDISMODULE_OK or REDISMODULE_ERR
+  long long duration_us;
+  RAI_Error *err;
+  RedisModuleString **argv;
+  int argc;
+} RAI_DagOp;
+
+/**
+ * Allocate the memory and initialise the RAI_DagOp.
+ * @param result Output parameter to capture allocated RAI_DagOp.
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR if the allocation
+ * failed.
+ */
+int RAI_InitDagOp(RAI_DagOp **result);
+
+/**
+ * Frees the memory allocated of RAI_DagOp
+ * @param ctx Context in which Redis modules operate
+ * @param RAI_DagOp context in which RedisAI command operates.
+ */
+void RAI_FreeDagOp(RedisModuleCtx *ctx, RAI_DagOp *dagOp);
+
+/**
+ * This structure represents the context in which RedisAI blocking commands
+ * operate.
+ *
+ * Note that not all the context structure is always filled with actual values
+ * but only the fields needed in a given operation.
+ */
+typedef struct RedisAI_RunInfo {
+  RedisModuleBlockedClient *client;
+  // TODO: completly move modelrun and scriptrun to dagOps
+  RedisModuleString *runkey;
+  RedisModuleString **outkeys;
+  RAI_ModelRunCtx *mctx;
+  RAI_ScriptRunCtx *sctx;
+  int result;  // REDISMODULE_OK or REDISMODULE_ERR
+  long long duration_us;
+  RAI_Error *err;
+  // DAG
+  int use_local_context;
+  AI_dict *dagTensorsContext;
+  AI_dict *dagTensorsPersistentContext;  // dict to flag tensors to persist
+  AI_dict *
+      dagTensorsLoadedContext;  // dict to flag tensors loaded from the keyspace
+  RAI_DagOp **dagOps;
+  int dagReplyLength;
+  int dagNumberCommands;
+} RedisAI_RunInfo;
+
+/**
+ * Allocate the memory and initialise the RedisAI_RunInfo.
+ * @param result Output parameter to capture allocated RedisAI_RunInfo.
+ * @return REDISMODULE_OK on success, or REDISMODULE_ERR if the allocation
+ * failed.
+ */
+int RAI_InitRunInfo(RedisAI_RunInfo **result);
+
+/**
+ * Frees the memory allocated on RedisAI_RunInfo
+ * @param ctx Context in which Redis modules operate
+ * @param rinfo context in which RedisAI blocking command operate.
+ */
+void RAI_FreeRunInfo(RedisModuleCtx *ctx, RedisAI_RunInfo *rinfo);
+
+/**
+ *
+ * @param rinfo context in which RedisAI blocking command operate.
+ * @return
+ */
+size_t RAI_RunInfoBatchSize(struct RedisAI_RunInfo *rinfo);
+
+/**
+ *
+ * @param rinfo1 rinfo context 1 in which RedisAI blocking command 1 operates.
+ * @param rinfo2 rinfo context 2 in which RedisAI blocking command 2 operates.
+ * @return
+ */
+int RAI_RunInfoBatchable(struct RedisAI_RunInfo *rinfo1,
+                         struct RedisAI_RunInfo *rinfo2);
+
+#endif /* SRC_RUN_INFO_H_ */
diff --git a/src/script.c b/src/script.c
index 8d1a785ad..c06be1e83 100644
--- a/src/script.c
+++ b/src/script.c
@@ -207,3 +207,25 @@ RAI_Script* RAI_ScriptGetShallowCopy(RAI_Script* script) {
   ++script->refCount;
   return script;
 }
+
+
+/* Return REDISMODULE_ERR if there was an error getting the Script.
+ * Return REDISMODULE_OK if the model value stored at key was correctly
+ * returned and available at *model variable. */
+int RAI_GetScriptFromKeyspace(RedisModuleCtx *ctx, RedisModuleString *keyName,
+                              RedisModuleKey **key, RAI_Script **script,
+                              int mode) {
+  *key = RedisModule_OpenKey(ctx, keyName, mode);
+  if (RedisModule_KeyType(*key) == REDISMODULE_KEYTYPE_EMPTY) {
+    RedisModule_CloseKey(*key);
+    RedisModule_ReplyWithError(ctx, "ERR script key is empty");
+    return REDISMODULE_ERR;
+  }
+  if (RedisModule_ModuleTypeGetType(*key) != RedisAI_ScriptType) {
+    RedisModule_CloseKey(*key);
+    RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
+    return REDISMODULE_ERR;
+  }
+  *script = RedisModule_ModuleTypeGetValue(*key);
+  return REDISMODULE_OK;
+}
diff --git a/src/script.h b/src/script.h
index fdc0985fa..c9ee90ad6 100644
--- a/src/script.h
+++ b/src/script.h
@@ -22,5 +22,11 @@ void RAI_ScriptRunCtxFree(RAI_ScriptRunCtx* sctx);
 
 int RAI_ScriptRun(RAI_ScriptRunCtx* sctx, RAI_Error* err);
 RAI_Script* RAI_ScriptGetShallowCopy(RAI_Script* script);
+/* Return REDISMODULE_ERR if there was an error getting the Script.
+ * Return REDISMODULE_OK if the model value stored at key was correctly
+ * returned and available at *model variable. */
+int RAI_GetScriptFromKeyspace(RedisModuleCtx *ctx, RedisModuleString *keyName,
+                              RedisModuleKey **key, RAI_Script **script,
+                              int mode);
 
 #endif /* SRC_SCRIPT_H_ */
diff --git a/src/stats.c b/src/stats.c
index 9d63c9e16..a2f942650 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1,10 +1,25 @@
 #include "stats.h"
 
-void* RAI_AddStatsEntry(RedisModuleCtx* ctx, RedisModuleString* key, RAI_RunType runtype,
-                        RAI_Backend backend, const char* devicestr, const char* tag) {
+#include <sys/time.h>
+
+long long ustime(void) {
+  struct timeval tv;
+  long long ust;
+
+  gettimeofday(&tv, NULL);
+  ust = ((long long)tv.tv_sec) * 1000000;
+  ust += tv.tv_usec;
+  return ust;
+}
+
+mstime_t mstime(void) { return ustime() / 1000; }
+
+void* RAI_AddStatsEntry(RedisModuleCtx* ctx, RedisModuleString* key,
+                        RAI_RunType runtype, RAI_Backend backend,
+                        const char* devicestr, const char* tag) {
   const char* infokey = RedisModule_StringPtrLen(key, NULL);
 
-  struct RedisAI_RunStats *rstats = NULL;
+  struct RedisAI_RunStats* rstats = NULL;
   rstats = RedisModule_Calloc(1, sizeof(struct RedisAI_RunStats));
   RedisModule_RetainString(ctx, key);
   rstats->key = key;
@@ -18,9 +33,9 @@ void* RAI_AddStatsEntry(RedisModuleCtx* ctx, RedisModuleString* key, RAI_RunType
   return (void*)infokey;
 }
 
-void RAI_ListStatsEntries(RAI_RunType type, long long* nkeys, RedisModuleString*** keys,
-                          const char*** tags) {
-  AI_dictIterator *stats_iter = AI_dictGetSafeIterator(run_stats);
+void RAI_ListStatsEntries(RAI_RunType type, long long* nkeys,
+                          RedisModuleString*** keys, const char*** tags) {
+  AI_dictIterator* stats_iter = AI_dictGetSafeIterator(run_stats);
 
   long long stats_size = AI_dictSize(run_stats);
 
@@ -29,8 +44,8 @@ void RAI_ListStatsEntries(RAI_RunType type, long long* nkeys, RedisModuleString*
 
   *nkeys = 0;
 
-  AI_dictEntry *stats_entry = AI_dictNext(stats_iter);
-  struct RedisAI_RunStats *rstats = NULL;
+  AI_dictEntry* stats_entry = AI_dictNext(stats_iter);
+  struct RedisAI_RunStats* rstats = NULL;
 
   while (stats_entry) {
     rstats = AI_dictGetVal(stats_entry);
@@ -48,18 +63,23 @@ void RAI_ListStatsEntries(RAI_RunType type, long long* nkeys, RedisModuleString*
 }
 
 void RAI_RemoveStatsEntry(void* infokey) {
-  AI_dictEntry *stats_entry = AI_dictFind(run_stats, infokey);
+  AI_dictEntry* stats_entry = AI_dictFind(run_stats, infokey);
 
   if (stats_entry) {
-    struct RedisAI_RunStats *rstats = AI_dictGetVal(stats_entry);
+    struct RedisAI_RunStats* rstats = AI_dictGetVal(stats_entry);
     AI_dictDelete(run_stats, infokey);
     RAI_FreeRunStats(rstats);
     RedisModule_Free(rstats);
   }
 }
 
-void RAI_FreeRunStats(struct RedisAI_RunStats *rstats) {
+void RAI_FreeRunStats(struct RedisAI_RunStats* rstats) {
   RedisModule_Free(rstats->devicestr);
   RedisModule_Free(rstats->tag);
 }
 
+void RedisAI_FreeRunStats(RedisModuleCtx* ctx,
+                          struct RedisAI_RunStats* rstats) {
+  RedisModule_FreeString(ctx, rstats->key);
+  RedisModule_Free(rstats->devicestr);
+}
\ No newline at end of file
diff --git a/src/stats.h b/src/stats.h
index 117f9d3ec..76ee7f122 100644
--- a/src/stats.h
+++ b/src/stats.h
@@ -1,12 +1,14 @@
 #ifndef SRC_STATS_H_
 #define SRC_STATS_H_
 
-#include "redismodule.h"
+#include <sys/time.h>
+
 #include "config.h"
+#include "redismodule.h"
 #include "util/dict.h"
 
 struct RedisAI_RunStats {
-  RedisModuleString *key;
+  RedisModuleString* key;
   RAI_RunType type;
   RAI_Backend backend;
   char* devicestr;
@@ -17,16 +19,22 @@ struct RedisAI_RunStats {
   long long nerrors;
 };
 
-void* RAI_AddStatsEntry(RedisModuleCtx* ctx, RedisModuleString* key, RAI_RunType type,
-                        RAI_Backend backend, const char* devicestr, const char* tag);
+AI_dict* run_stats;
+
+long long ustime(void);
+mstime_t mstime(void);
+
+void* RAI_AddStatsEntry(RedisModuleCtx* ctx, RedisModuleString* key,
+                        RAI_RunType type, RAI_Backend backend,
+                        const char* devicestr, const char* tag);
 
 void RAI_RemoveStatsEntry(void* infokey);
 
-void RAI_ListStatsEntries(RAI_RunType type, long long* nkeys, RedisModuleString*** keys,
-                          const char*** tags);
+void RAI_ListStatsEntries(RAI_RunType type, long long* nkeys,
+                          RedisModuleString*** keys, const char*** tags);
 
-void RAI_FreeRunStats(struct RedisAI_RunStats *rstats);
+void RAI_FreeRunStats(struct RedisAI_RunStats* rstats);
 
-AI_dict *run_stats;
+void RedisAI_FreeRunStats(RedisModuleCtx* ctx, struct RedisAI_RunStats* rstats);
 
 #endif /* SRC_SATTS_H_ */
diff --git a/src/tensor.c b/src/tensor.c
index 7d79b110e..5d5176dbb 100644
--- a/src/tensor.c
+++ b/src/tensor.c
@@ -1,10 +1,13 @@
 #include "tensor.h"
+#include "err.h"
 #include "tensor_struct.h"
 #include <stddef.h>
 #include <strings.h>
 #include <string.h>
 #include "rmutil/alloc.h"
+#include "util/dict.h"
 #include <assert.h>
+#include "redisai.h"
 
 RedisModuleType *RedisAI_TensorType = NULL;
 
@@ -294,7 +297,6 @@ RAI_Tensor* RAI_TensorCreateWithDLDataType(DLDataType dtype, long long* dims, in
   };
 
   ret->refCount = 1;
-  ret->tensorRS = NULL;
   return ret;
 }
 
@@ -390,6 +392,34 @@ RAI_Tensor* RAI_TensorCreateBySlicingTensor(RAI_Tensor* t, long long offset, lon
   return ret;
 }
 
+/**
+ * Allocate the memory for a new Tensor and copy data fom a tensor to it.
+ * @param t Source tensor to copy.
+ * @param result Destination tensor to copy.
+ * @return 0 on success, or 1 if the copy failed
+ * failed.
+ */
+int RAI_TensorCopyTensor(RAI_Tensor* t, RAI_Tensor** dest) {
+  const long long ndims = RAI_TensorNumDims(t);
+  long long dims[ndims];
+
+  const long long dtype_size = RAI_TensorDataSize(t);
+  long long sample_size = 1;
+
+  for (long long i=0; i<ndims; i++) {
+    dims[i] = RAI_TensorDim(t, i);
+    sample_size *= dims[i];
+  }
+
+  DLDataType dtype = RAI_TensorDataType(t);
+
+  RAI_Tensor* ret = RAI_TensorCreateWithDLDataType(dtype, dims, ndims, TENSORALLOC_ALLOC);
+
+  memcpy(RAI_TensorData(ret), RAI_TensorData(t), sample_size * dtype_size);
+  *dest = ret;
+  return 0;
+}
+
 // Beware: this will take ownership of dltensor
 RAI_Tensor* RAI_TensorCreateFromDLTensor(DLManagedTensor* dl_tensor) {
 
@@ -410,7 +440,6 @@ RAI_Tensor* RAI_TensorCreateFromDLTensor(DLManagedTensor* dl_tensor) {
   };
 
   ret->refCount = 1;
-  ret->tensorRS = NULL;
   return ret;
 }
 
@@ -440,24 +469,24 @@ size_t RAI_TensorDataSizeFromDLDataType(DLDataType dtype) {
   return Tensor_DataTypeSize(dtype);
 }
 
-void RAI_TensorFree(RAI_Tensor* t){
-  if (--t->refCount <= 0){
-    if (t->tensor.deleter) {
-      t->tensor.deleter(&t->tensor);
-    }
-    else {
-      RedisModule_Free(t->tensor.dl_tensor.shape);
-      if (t->tensor.dl_tensor.strides) {
-        RedisModule_Free(t->tensor.dl_tensor.strides);
-      }
-      if ( t->tensorRS != NULL ){
-        RedisModule_FreeString(NULL,t->tensorRS);
-      }
-      else{
-        RedisModule_Free(t->tensor.dl_tensor.data);
+void RAI_TensorFree(RAI_Tensor *t) {
+  if (t) {
+    if (--t->refCount <= 0) {
+      if (t->tensor.deleter) {
+        t->tensor.deleter(&t->tensor);
+      } else {
+        if (t->tensor.dl_tensor.shape) {
+          RedisModule_Free(t->tensor.dl_tensor.shape);
+        }
+        if (t->tensor.dl_tensor.strides) {
+          RedisModule_Free(t->tensor.dl_tensor.strides);
+        }
+        if (t->tensor.dl_tensor.data) {
+          RedisModule_Free(t->tensor.dl_tensor.data);
+        }
+        RedisModule_Free(t);
       }
     }
-    RedisModule_Free(t);
   }
 }
 
@@ -466,12 +495,6 @@ int RAI_TensorSetData(RAI_Tensor* t, const char* data, size_t len){
   return 1;
 }
 
-int RAI_TensorSetDataFromRS(RAI_Tensor* t, RedisModuleString* rs){
-  t->tensorRS = rs;
-  t->tensor.dl_tensor.data = (void*)RedisModule_StringPtrLen(rs,NULL);
-  return 1;
-}
-
 int RAI_TensorSetValueFromLongLong(RAI_Tensor* t, long long i, long long val){
   DLDataType dtype = t->tensor.dl_tensor.dtype;
   void* data = t->tensor.dl_tensor.data;
@@ -621,3 +644,366 @@ size_t RAI_TensorByteSize(RAI_Tensor* t){
 char* RAI_TensorData(RAI_Tensor* t){
   return t->tensor.dl_tensor.data;
 }
+
+/* Return REDISMODULE_ERR if is the key not associated with a tensor type.
+ * Return REDISMODULE_OK otherwise. */
+int RAI_OpenKey_Tensor(RedisModuleCtx *ctx, RedisModuleString *keyName,
+                              RedisModuleKey **key,
+                              int mode) {
+  *key = RedisModule_OpenKey(ctx, keyName, mode);
+  if (RedisModule_KeyType(*key) == REDISMODULE_KEYTYPE_EMPTY) { 
+    return REDISMODULE_OK;
+  }
+  if (RedisModule_ModuleTypeGetType(*key) != RedisAI_TensorType) {
+    RedisModule_CloseKey(*key);
+    RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
+    return REDISMODULE_ERR;
+  }
+  return REDISMODULE_OK;
+}
+
+/* Return REDISMODULE_ERR if there was an error getting the Tensor.
+ * Return REDISMODULE_OK if the tensor value stored at key was correctly
+ * returned and available at *tensor variable. */
+int RAI_GetTensorFromKeyspace(RedisModuleCtx *ctx, RedisModuleString *keyName,
+                               RedisModuleKey **key, RAI_Tensor **tensor,
+                               int mode) {
+  *key = RedisModule_OpenKey(ctx, keyName, mode);
+  if (RedisModule_KeyType(*key) == REDISMODULE_KEYTYPE_EMPTY) {
+    RedisModule_CloseKey(*key);
+    RedisModule_ReplyWithError(ctx, "ERR tensor key is empty");
+    return REDISMODULE_ERR;
+  }
+  if (RedisModule_ModuleTypeGetType(*key) != RedisAI_TensorType) {
+    RedisModule_CloseKey(*key);
+    RedisModule_ReplyWithError(ctx, REDISMODULE_ERRORMSG_WRONGTYPE);
+    return REDISMODULE_ERR;
+  }
+  *tensor = RedisModule_ModuleTypeGetValue(*key);
+  return REDISMODULE_OK;
+}
+
+/* Return REDISMODULE_ERR if there was an error getting the Tensor.
+ * Return REDISMODULE_OK if the tensor value is present at the localContextDict. */
+int RAI_getTensorFromLocalContext(RedisModuleCtx *ctx,
+                                  AI_dict *localContextDict,
+                                  const char *localContextKey,
+                                  RAI_Tensor **tensor, RAI_Error *error) {
+  int result = REDISMODULE_ERR;
+  AI_dictEntry *tensor_entry = AI_dictFind(localContextDict, localContextKey);
+  if (tensor_entry) {
+    *tensor = AI_dictGetVal(tensor_entry);
+    result = REDISMODULE_OK;
+  } else{
+    if (ctx == NULL) {
+      RAI_SetError(error, RAI_ETENSORGET,
+                    "ERR tensor key is empty");
+    } else {
+      RedisModule_ReplyWithError(
+          ctx, "ERR tensor key is empty");
+    }
+  }
+  return result;
+}
+
+void RedisAI_ReplicateTensorSet(RedisModuleCtx *ctx, RedisModuleString *key, RAI_Tensor *t) {
+  long long ndims = RAI_TensorNumDims(t);
+
+  char *dtypestr = NULL;
+  Tensor_DataTypeStr(RAI_TensorDataType(t), &dtypestr);
+
+  assert(dtypestr);
+
+  char *data = RAI_TensorData(t);
+  long long size = RAI_TensorByteSize(t);
+
+  RedisModuleString* dims[ndims];
+
+  for (long long i=0; i<ndims; i++) {
+    dims[i] = RedisModule_CreateStringFromLongLong(ctx, RAI_TensorDim(t, i));
+  }
+
+  RedisModule_Replicate(ctx, "AI.TENSORSET", "scvcb", key, dtypestr,
+                        dims, ndims, "BLOB", data, size);
+
+  for (long long i=0; i<ndims; i++) {
+    RedisModule_FreeString(ctx,dims[i]);
+  }
+
+  RedisModule_Free(dtypestr);
+}
+
+int RAI_parseTensorSetArgs(RedisModuleCtx *ctx, RedisModuleString **argv, int argc, RAI_Tensor **t, int enforceArity, RAI_Error *error)
+{
+  if (argc < 4) {
+    RedisModule_WrongArity(ctx);
+    return -1;
+  } 
+  // get the tensor datatype
+  const char* typestr = RedisModule_StringPtrLen(argv[2], NULL);
+  size_t datasize = RAI_TensorDataSizeFromString(typestr);
+  if (!datasize){
+    if(ctx==NULL){
+      RAI_SetError(error, RAI_ETENSORSET, "ERR invalid data type");
+    }else{
+      RedisModule_ReplyWithError(ctx, "ERR invalid data type");
+    }
+    return -1;
+  }
+  const char* fmtstr;
+  int datafmt = REDISAI_DATA_NONE;
+  int tensorAllocMode = TENSORALLOC_CALLOC;
+  size_t ndims = 0;
+  long long len = 1;
+  long long* dims = (long long*)array_new(long long,1);
+  size_t argpos = 3;
+  long long remaining_args = argc-1;
+  size_t expected_nvalues = 0;
+  size_t current_nvalues = 0;
+
+  for (; argpos <= argc - 1; argpos++) {
+    const char *opt = RedisModule_StringPtrLen(argv[argpos], NULL);
+    remaining_args = argc - 1 - argpos;
+    if (!strcasecmp(opt, "BLOB")) {
+      datafmt = REDISAI_DATA_BLOB;
+      tensorAllocMode = TENSORALLOC_CALLOC;
+      // if we've found the dataformat there are no more dimensions
+      // check right away if the arity is correct
+      if (remaining_args != 1 && enforceArity == 1) {
+        array_free(dims);
+        if (ctx == NULL) {
+          RAI_SetError(
+              error, RAI_ETENSORSET,
+              "ERR wrong number of arguments for 'AI.TENSORSET' command");
+        } else {
+          RedisModule_WrongArity(ctx);
+        }
+        return -1;
+      }
+      argpos++;
+      break;
+    } else if (!strcasecmp(opt, "VALUES")) {
+      datafmt = REDISAI_DATA_VALUES;
+      tensorAllocMode = TENSORALLOC_CALLOC;
+      // if we've found the dataformat there are no more dimensions
+      // check right away if the arity is correct
+      if (remaining_args != len && enforceArity == 1) {
+        array_free(dims);
+        if (ctx == NULL) {
+          RAI_SetError(
+              error, RAI_ETENSORSET,
+              "ERR wrong number of arguments for 'AI.TENSORSET' command");
+        } else {
+          RedisModule_WrongArity(ctx);
+        }
+        return -1;
+      }
+      argpos++;
+      break;
+    } else {
+      long long dimension;
+      const int retval = RedisModule_StringToLongLong(argv[argpos], &dimension);
+      if (retval != REDISMODULE_OK || dimension <= 0) {
+        array_free(dims);
+        if (ctx == NULL) {
+          RAI_SetError(error, RAI_ETENSORSET,
+                       "ERR invalid or negative value found in tensor shape");
+        } else {
+          RedisModule_ReplyWithError(
+              ctx, "ERR invalid or negative value found in tensor shape");
+        }
+        return -1;
+      }
+      ndims++;
+      array_append(dims, dimension);
+      len *= dimension;
+    }
+  }
+
+  const long long nbytes = len * datasize;
+  size_t datalen;
+  const char *data;
+  DLDataType datatype = RAI_TensorDataTypeFromString(typestr);
+  *t = RAI_TensorCreateWithDLDataType(datatype, dims, ndims, tensorAllocMode);
+  if (!t){
+    array_free(dims);
+    if (ctx == NULL) {
+      RAI_SetError(error, RAI_ETENSORSET,
+                    "ERR could not create tensor");
+    } else {
+      RedisModule_ReplyWithError(
+          ctx, "ERR could not create tensor");
+    }
+    return -1;
+  }
+  long i = 0;
+  switch (datafmt){
+    case REDISAI_DATA_BLOB:
+    {
+      const char*blob = RedisModule_StringPtrLen(argv[argpos],&datalen);
+      if (datalen != nbytes){
+        RAI_TensorFree(*t);
+        array_free(dims);
+        if (ctx == NULL) {
+          RAI_SetError(error, RAI_ETENSORSET,
+                        "ERR data length does not match tensor shape and type");
+        } else {
+          RedisModule_ReplyWithError(ctx, "ERR data length does not match tensor shape and type");
+        }
+        return -1;
+      }
+      RAI_TensorSetData(*t,blob,datalen);
+    }
+      break;
+    case REDISAI_DATA_VALUES:
+      for (; (argpos <= argc-1) && (i < len); argpos++){
+        if (datatype.code == kDLFloat){
+          double val;
+          const int retval = RedisModule_StringToDouble(argv[argpos],&val);
+          if (retval != REDISMODULE_OK) {
+            RAI_TensorFree(*t);
+            array_free(dims);
+            if (ctx == NULL) {
+              RAI_SetError(error, RAI_ETENSORSET,
+                            "ERR invalid value");
+            } else {
+              RedisModule_ReplyWithError(ctx, "ERR invalid value");
+            }
+            return -1;
+          }
+          const int retset = RAI_TensorSetValueFromDouble(*t, i, val);
+          if (retset == -1){
+            RAI_TensorFree(*t);
+            array_free(dims);
+            if (ctx == NULL) {
+              RAI_SetError(error, RAI_ETENSORSET,
+                            "ERR cannot specify values for this datatype");
+            } else {
+              RedisModule_ReplyWithError(ctx, "ERR cannot specify values for this datatype");
+            }
+            return -1;
+          }
+        }
+        else{
+          long long val;
+          const int retval = RedisModule_StringToLongLong(argv[argpos],&val);
+          if (retval != REDISMODULE_OK) {
+            RAI_TensorFree(*t);
+            array_free(dims);
+            if (ctx == NULL) {
+              RAI_SetError(error, RAI_ETENSORSET,
+                            "ERR invalid value");
+            } else {
+              RedisModule_ReplyWithError(ctx, "ERR invalid value");
+            }
+            return -1;
+          }
+          const int retset = RAI_TensorSetValueFromLongLong(*t, i, val);
+          if (retset == -1){
+            RAI_TensorFree(*t);
+            array_free(dims);
+            if (ctx == NULL) {
+              RAI_SetError(error, RAI_ETENSORSET,
+                            "ERR cannot specify values for this datatype");
+            } else {
+              RedisModule_ReplyWithError(ctx, "ERR cannot specify values for this datatype");
+            }
+            return -1;
+          }
+        }
+        i++;
+      }
+      break;
+    default:
+      // default does not require tensor data setting since calloc setted it to 0
+      break;
+  }
+  array_free(dims);
+  return argpos;
+}
+
+int RAI_parseTensorGetArgs(RedisModuleCtx *ctx, RedisModuleString **argv, int argc, RAI_Tensor *t){
+  if (argc < 3) {
+    RedisModule_WrongArity(ctx);
+    return -1;
+  }
+
+  int datafmt;
+  long long resplen = 2;
+  const char *fmtstr = RedisModule_StringPtrLen(argv[2], NULL);
+  if (!strcasecmp(fmtstr, "BLOB")) {
+    datafmt = REDISAI_DATA_BLOB;
+    resplen = 3;
+  } else if (!strcasecmp(fmtstr, "VALUES")) {
+    datafmt = REDISAI_DATA_VALUES;
+    resplen = 3;
+  } else if (!strcasecmp(fmtstr, "META")) {
+    datafmt = REDISAI_DATA_NONE;
+  } else {
+    RedisModule_ReplyWithError(ctx, "ERR unsupported data format");
+    return -1;
+  }
+
+  RedisModule_ReplyWithArray(ctx, resplen);
+
+  char *dtypestr = NULL;
+  const int dtypestr_result = Tensor_DataTypeStr(RAI_TensorDataType(t), &dtypestr);
+  if(dtypestr_result==REDISMODULE_ERR){
+    RedisModule_ReplyWithError(ctx, "ERR unsupported dtype");
+    return -1;
+  }
+  RedisModule_ReplyWithStringBuffer(ctx,dtypestr,strlen(dtypestr));
+  if(dtypestr){
+    RedisModule_Free(dtypestr);
+  }
+
+  const long long ndims = RAI_TensorNumDims(t);
+  RedisModule_ReplyWithArray(ctx, ndims);
+  for (long long i=0; i<ndims; i++) {
+    const long long dim = RAI_TensorDim(t, i);
+    RedisModule_ReplyWithLongLong(ctx, dim);
+  }
+
+  if (datafmt == REDISAI_DATA_BLOB) {
+    long long size = RAI_TensorByteSize(t);
+    char *data = RAI_TensorData(t);
+    RedisModule_ReplyWithStringBuffer(ctx, data, size);
+  }
+  else if (datafmt == REDISAI_DATA_VALUES) {
+    long long ndims = RAI_TensorNumDims(t);
+    long long len = 1;
+    long long i;
+    for (i=0; i<ndims; i++) {
+      len *= RAI_TensorDim(t, i);
+    }
+
+    DLDataType dtype = RAI_TensorDataType(t);
+
+    RedisModule_ReplyWithArray(ctx, len);
+
+    if (dtype.code == kDLFloat) {
+      double val;
+      for (i=0; i<len; i++) {
+        int ret = RAI_TensorGetValueAsDouble(t, i, &val);
+        if (!ret) {
+          RedisModule_ReplyWithError(ctx, "ERR cannot get values for this datatype");
+          return -1;
+        }
+        RedisModule_ReplyWithDouble(ctx, val);
+      }
+    }
+    else {
+      long long val;
+      for (i=0; i<len; i++) {
+        int ret = RAI_TensorGetValueAsLongLong(t, i, &val);
+        if (!ret) {
+          RedisModule_ReplyWithError(ctx, "ERR cannot get values for this datatype");
+          return -1;
+        }
+        RedisModule_ReplyWithLongLong(ctx, val);
+      }
+    }
+  }
+  // return command arity as the number of processed args
+  return 3;
+}
\ No newline at end of file
diff --git a/src/tensor.h b/src/tensor.h
index 253ddbde8..2598af9ae 100644
--- a/src/tensor.h
+++ b/src/tensor.h
@@ -5,6 +5,8 @@
 #include "tensor_struct.h"
 #include "dlpack/dlpack.h"
 #include "redismodule.h"
+#include "util/dict.h"
+#include "err.h"
 
 #define TENSORALLOC_NONE 0
 #define TENSORALLOC_ALLOC 1
@@ -25,6 +27,15 @@ extern RedisModuleType *RedisAI_TensorType;
 int RAI_TensorInit(RedisModuleCtx* ctx);
 RAI_Tensor* RAI_TensorCreate(const char* dataType, long long* dims, int ndims, int hasdata);
 RAI_Tensor* RAI_TensorCreateWithDLDataType(DLDataType dtype, long long* dims, int ndims, int tensorAllocMode);
+
+/**
+ * Allocate the memory for a new Tensor and copy data fom a tensor to it.
+ * @param t Source tensor to copy.
+ * @param result Destination tensor to copy.
+ * @return 0 on success, or 1 if the copy failed
+ * failed.
+ */
+int RAI_TensorCopyTensor(RAI_Tensor* t, RAI_Tensor** dest);
 RAI_Tensor* RAI_TensorCreateFromDLTensor(DLManagedTensor* dl_tensor);
 RAI_Tensor* RAI_TensorCreateByConcatenatingTensors(RAI_Tensor** ts, long long n);
 RAI_Tensor* RAI_TensorCreateBySlicingTensor(RAI_Tensor* t, long long offset, long long len);
@@ -37,7 +48,6 @@ DLDataType RAI_TensorDataTypeFromString(const char* dataType);
 int Tensor_DataTypeStr(DLDataType dtype, char **dtypestr);
 void RAI_TensorFree(RAI_Tensor* t);
 int RAI_TensorSetData(RAI_Tensor* t, const char* data, size_t len);
-int RAI_TensorSetDataFromRS(RAI_Tensor* t, RedisModuleString* rs);
 int RAI_TensorSetValueFromLongLong(RAI_Tensor* t, long long i, long long val);
 int RAI_TensorSetValueFromDouble(RAI_Tensor* t, long long i, double val);
 int RAI_TensorGetValueAsDouble(RAI_Tensor* t, long long i, double* val);
@@ -48,4 +58,32 @@ long long RAI_TensorDim(RAI_Tensor* t, int dim);
 size_t RAI_TensorByteSize(RAI_Tensor* t);
 char* RAI_TensorData(RAI_Tensor* t);
 
+/* Return REDISMODULE_ERR if is the key not associated with a tensor type.
+ * Return REDISMODULE_OK otherwise. */
+int RAI_OpenKey_Tensor(RedisModuleCtx *ctx, RedisModuleString *keyName,
+                              RedisModuleKey **key,
+                              int mode);
+
+/* Return REDISMODULE_ERR if there was an error getting the Tensor.
+ * Return REDISMODULE_OK if the tensor value stored at key was correctly
+ * returned and available at *tensor variable. */
+int RAI_GetTensorFromKeyspace(RedisModuleCtx *ctx, RedisModuleString *keyName,
+                               RedisModuleKey **key, RAI_Tensor **tensor,
+                               int mode);
+
+/* Return REDISMODULE_ERR if there was an error getting the Tensor.
+ * Return REDISMODULE_OK if the tensor value is present at the localContextDict. */
+int RAI_getTensorFromLocalContext(RedisModuleCtx *ctx,
+                                  AI_dict *localContextDict,
+                                  const char *localContextKey,
+                                  RAI_Tensor **tensor, RAI_Error *error);
+
+void RedisAI_ReplicateTensorSet(RedisModuleCtx *ctx, RedisModuleString *key, RAI_Tensor *t);
+
+int RAI_parseTensorSetArgs(RedisModuleCtx* ctx, RedisModuleString** argv,
+                           int argc, RAI_Tensor** t, int enforceArity,
+                           RAI_Error* error);
+
+int RAI_parseTensorGetArgs(RedisModuleCtx *ctx, RedisModuleString **argv, int argc, RAI_Tensor *t);
+
 #endif /* SRC_TENSOR_H_ */
diff --git a/src/tensor_struct.h b/src/tensor_struct.h
index 8ded1a4a9..225c1ec1a 100644
--- a/src/tensor_struct.h
+++ b/src/tensor_struct.h
@@ -2,13 +2,11 @@
 #define SRC_TENSOR_STRUCT_H_
 
 #include "config.h"
-#include "redismodule.h"
 #include "dlpack/dlpack.h"
 
 typedef struct RAI_Tensor {
   DLManagedTensor tensor;
   long long refCount;
-  RedisModuleString* tensorRS;
 } RAI_Tensor;
 
 #endif /* SRC_TENSOR_STRUCT_H_ */
diff --git a/src/util/dict.c b/src/util/dict.c
index 93af3dce8..d04efa993 100644
--- a/src/util/dict.c
+++ b/src/util/dict.c
@@ -47,6 +47,8 @@
 
 #include "../redisai_memory.h"
 
+#include "siphash.c.inc"
+
 static uint64_t stringsHashFunction(const void *key){
     return AI_dictGenHashFunction(key, strlen((char*)key));
 }
@@ -294,7 +296,11 @@ static void _dictRehashStep(AI_dict *d) {
     if (d->iterators == 0) AI_dictRehash(d,1);
 }
 
-/* Add an element to the target hash table */
+/**
+ *  Add an element to the target hash table
+ * @return 0 on success, or 1 if the insertion failed
+ * failed.
+ */
 int AI_dictAdd(AI_dict *d, void *key, void *val)
 {
     AI_dictEntry *entry = AI_dictAddRaw(d,key,NULL);
diff --git a/src/util/siphash.c.inc b/src/util/siphash.c.inc
new file mode 100644
index 000000000..6b9419031
--- /dev/null
+++ b/src/util/siphash.c.inc
@@ -0,0 +1,360 @@
+/*
+   SipHash reference C implementation
+
+   Copyright (c) 2012-2016 Jean-Philippe Aumasson
+   <jeanphilippe.aumasson@gmail.com>
+   Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
+   Copyright (c) 2017 Salvatore Sanfilippo <antirez@gmail.com>
+
+   To the extent possible under law, the author(s) have dedicated all copyright
+   and related and neighboring rights to this software to the public domain
+   worldwide. This software is distributed without any warranty.
+
+   You should have received a copy of the CC0 Public Domain Dedication along
+   with this software. If not, see
+   <http://creativecommons.org/publicdomain/zero/1.0/>.
+
+   ----------------------------------------------------------------------------
+
+   This version was modified by Salvatore Sanfilippo <antirez@gmail.com>
+   in the following ways:
+
+   1. We use SipHash 1-2. This is not believed to be as strong as the
+      suggested 2-4 variant, but AFAIK there are not trivial attacks
+      against this reduced-rounds version, and it runs at the same speed
+      as Murmurhash2 that we used previously, why the 2-4 variant slowed
+      down Redis by a 4% figure more or less.
+   2. Hard-code rounds in the hope the compiler can optimize it more
+      in this raw from. Anyway we always want the standard 2-4 variant.
+   3. Modify the prototype and implementation so that the function directly
+      returns an uint64_t value, the hash itself, instead of receiving an
+      output buffer. This also means that the output size is set to 8 bytes
+      and the 16 bytes output code handling was removed.
+   4. Provide a case insensitive variant to be used when hashing strings that
+      must be considered identical by the hash table regardless of the case.
+      If we don't have directly a case insensitive hash function, we need to
+      perform a text transformation in some temporary buffer, which is costly.
+   5. Remove debugging code.
+   6. Modified the original test.c file to be a stand-alone function testing
+      the function in the new form (returing an uint64_t) using just the
+      relevant test vector.
+ */
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+/* Fast tolower() alike function that does not care about locale
+ * but just returns a-z insetad of A-Z. */
+int siptlw(int c) {
+    if (c >= 'A' && c <= 'Z') {
+        return c+('a'-'A');
+    } else {
+        return c;
+    }
+}
+
+/* Test of the CPU is Little Endian and supports not aligned accesses.
+ * Two interesting conditions to speedup the function that happen to be
+ * in most of x86 servers. */
+#if defined(__X86_64__) || defined(__x86_64__) || defined (__i386__)
+#define UNALIGNED_LE_CPU
+#endif
+
+#define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
+
+#define U32TO8_LE(p, v)                                                        \
+    (p)[0] = (uint8_t)((v));                                                   \
+    (p)[1] = (uint8_t)((v) >> 8);                                              \
+    (p)[2] = (uint8_t)((v) >> 16);                                             \
+    (p)[3] = (uint8_t)((v) >> 24);
+
+#define U64TO8_LE(p, v)                                                        \
+    U32TO8_LE((p), (uint32_t)((v)));                                           \
+    U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
+
+#ifdef UNALIGNED_LE_CPU
+#define U8TO64_LE(p) (*((uint64_t*)(p)))
+#else
+#define U8TO64_LE(p)                                                           \
+    (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) |                        \
+     ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) |                 \
+     ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) |                 \
+     ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
+#endif
+
+#define U8TO64_LE_NOCASE(p)                                                    \
+    (((uint64_t)(siptlw((p)[0]))) |                                           \
+     ((uint64_t)(siptlw((p)[1])) << 8) |                                      \
+     ((uint64_t)(siptlw((p)[2])) << 16) |                                     \
+     ((uint64_t)(siptlw((p)[3])) << 24) |                                     \
+     ((uint64_t)(siptlw((p)[4])) << 32) |                                              \
+     ((uint64_t)(siptlw((p)[5])) << 40) |                                              \
+     ((uint64_t)(siptlw((p)[6])) << 48) |                                              \
+     ((uint64_t)(siptlw((p)[7])) << 56))
+
+#define SIPROUND                                                               \
+    do {                                                                       \
+        v0 += v1;                                                              \
+        v1 = ROTL(v1, 13);                                                     \
+        v1 ^= v0;                                                              \
+        v0 = ROTL(v0, 32);                                                     \
+        v2 += v3;                                                              \
+        v3 = ROTL(v3, 16);                                                     \
+        v3 ^= v2;                                                              \
+        v0 += v3;                                                              \
+        v3 = ROTL(v3, 21);                                                     \
+        v3 ^= v0;                                                              \
+        v2 += v1;                                                              \
+        v1 = ROTL(v1, 17);                                                     \
+        v1 ^= v2;                                                              \
+        v2 = ROTL(v2, 32);                                                     \
+    } while (0)
+
+uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k) {
+#ifndef UNALIGNED_LE_CPU
+    uint64_t hash;
+    uint8_t *out = (uint8_t*) &hash;
+#endif
+    uint64_t v0 = 0x736f6d6570736575ULL;
+    uint64_t v1 = 0x646f72616e646f6dULL;
+    uint64_t v2 = 0x6c7967656e657261ULL;
+    uint64_t v3 = 0x7465646279746573ULL;
+    uint64_t k0 = U8TO64_LE(k);
+    uint64_t k1 = U8TO64_LE(k + 8);
+    uint64_t m;
+    const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+    const int left = inlen & 7;
+    uint64_t b = ((uint64_t)inlen) << 56;
+    v3 ^= k1;
+    v2 ^= k0;
+    v1 ^= k1;
+    v0 ^= k0;
+
+    for (; in != end; in += 8) {
+        m = U8TO64_LE(in);
+        v3 ^= m;
+
+        SIPROUND;
+
+        v0 ^= m;
+    }
+
+    switch (left) {
+    case 7: b |= ((uint64_t)in[6]) << 48; /* fall-thru */
+    case 6: b |= ((uint64_t)in[5]) << 40; /* fall-thru */
+    case 5: b |= ((uint64_t)in[4]) << 32; /* fall-thru */
+    case 4: b |= ((uint64_t)in[3]) << 24; /* fall-thru */
+    case 3: b |= ((uint64_t)in[2]) << 16; /* fall-thru */
+    case 2: b |= ((uint64_t)in[1]) << 8; /* fall-thru */
+    case 1: b |= ((uint64_t)in[0]); break;
+    case 0: break;
+    }
+
+    v3 ^= b;
+
+    SIPROUND;
+
+    v0 ^= b;
+    v2 ^= 0xff;
+
+    SIPROUND;
+    SIPROUND;
+
+    b = v0 ^ v1 ^ v2 ^ v3;
+#ifndef UNALIGNED_LE_CPU
+    U64TO8_LE(out, b);
+    return hash;
+#else
+    return b;
+#endif
+}
+
+uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k)
+{
+#ifndef UNALIGNED_LE_CPU
+    uint64_t hash;
+    uint8_t *out = (uint8_t*) &hash;
+#endif
+    uint64_t v0 = 0x736f6d6570736575ULL;
+    uint64_t v1 = 0x646f72616e646f6dULL;
+    uint64_t v2 = 0x6c7967656e657261ULL;
+    uint64_t v3 = 0x7465646279746573ULL;
+    uint64_t k0 = U8TO64_LE(k);
+    uint64_t k1 = U8TO64_LE(k + 8);
+    uint64_t m;
+    const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
+    const int left = inlen & 7;
+    uint64_t b = ((uint64_t)inlen) << 56;
+    v3 ^= k1;
+    v2 ^= k0;
+    v1 ^= k1;
+    v0 ^= k0;
+
+    for (; in != end; in += 8) {
+        m = U8TO64_LE_NOCASE(in);
+        v3 ^= m;
+
+        SIPROUND;
+
+        v0 ^= m;
+    }
+
+    switch (left) {
+    case 7: b |= ((uint64_t)siptlw(in[6])) << 48; /* fall-thru */
+    case 6: b |= ((uint64_t)siptlw(in[5])) << 40; /* fall-thru */
+    case 5: b |= ((uint64_t)siptlw(in[4])) << 32; /* fall-thru */
+    case 4: b |= ((uint64_t)siptlw(in[3])) << 24; /* fall-thru */
+    case 3: b |= ((uint64_t)siptlw(in[2])) << 16; /* fall-thru */
+    case 2: b |= ((uint64_t)siptlw(in[1])) << 8; /* fall-thru */
+    case 1: b |= ((uint64_t)siptlw(in[0])); break;
+    case 0: break;
+    }
+
+    v3 ^= b;
+
+    SIPROUND;
+
+    v0 ^= b;
+    v2 ^= 0xff;
+
+    SIPROUND;
+    SIPROUND;
+
+    b = v0 ^ v1 ^ v2 ^ v3;
+#ifndef UNALIGNED_LE_CPU
+    U64TO8_LE(out, b);
+    return hash;
+#else
+    return b;
+#endif
+}
+
+
+/* --------------------------------- TEST ------------------------------------ */
+
+#ifdef SIPHASH_TEST
+
+const uint8_t vectors_sip64[64][8] = {
+    { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, },
+    { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, },
+    { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, },
+    { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, },
+    { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, },
+    { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, },
+    { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, },
+    { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, },
+    { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, },
+    { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, },
+    { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, },
+    { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, },
+    { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, },
+    { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, },
+    { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, },
+    { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, },
+    { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, },
+    { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, },
+    { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, },
+    { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, },
+    { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, },
+    { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, },
+    { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, },
+    { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, },
+    { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, },
+    { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, },
+    { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, },
+    { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, },
+    { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, },
+    { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, },
+    { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, },
+    { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, },
+    { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, },
+    { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, },
+    { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, },
+    { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, },
+    { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, },
+    { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, },
+    { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, },
+    { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, },
+    { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, },
+    { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, },
+    { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, },
+    { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, },
+    { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, },
+    { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, },
+    { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, },
+    { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, },
+    { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, },
+    { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, },
+    { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, },
+    { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, },
+    { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, },
+    { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, },
+    { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, },
+    { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, },
+    { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, },
+    { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, },
+    { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, },
+    { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, },
+    { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, },
+    { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, },
+    { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, },
+    { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, },
+};
+
+
+/* Test siphash using a test vector. Returns 0 if the function passed
+ * all the tests, otherwise 1 is returned.
+ *
+ * IMPORTANT: The test vector is for SipHash 2-4. Before running
+ * the test revert back the siphash() function to 2-4 rounds since
+ * now it uses 1-2 rounds. */
+int siphash_test(void) {
+    uint8_t in[64], k[16];
+    int i;
+    int fails = 0;
+
+    for (i = 0; i < 16; ++i)
+        k[i] = i;
+
+    for (i = 0; i < 64; ++i) {
+        in[i] = i;
+        uint64_t hash = siphash(in, i, k);
+        const uint8_t *v = NULL;
+        v = (uint8_t *)vectors_sip64;
+        if (memcmp(&hash, v + (i * 8), 8)) {
+            /* printf("fail for %d bytes\n", i); */
+            fails++;
+        }
+    }
+
+    /* Run a few basic tests with the case insensitive version. */
+    uint64_t h1, h2;
+    h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
+    h2 = siphash_nocase((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
+    if (h1 != h2) fails++;
+
+    h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
+    h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
+    if (h1 != h2) fails++;
+
+    h1 = siphash((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
+    h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
+    if (h1 == h2) fails++;
+
+    if (!fails) return 0;
+    return 1;
+}
+
+int main(void) {
+    if (siphash_test() == 0) {
+        printf("SipHash test: OK\n");
+        return 0;
+    } else {
+        printf("SipHash test: FAILED\n");
+        return 1;
+    }
+}
+
+#endif
diff --git a/test/test_requirements.txt b/test/test_requirements.txt
index 441f4e7f3..674878911 100644
--- a/test/test_requirements.txt
+++ b/test/test_requirements.txt
@@ -1,3 +1,4 @@
 numpy>=1.17.1
 scikit-image
-redis-py-cluster
\ No newline at end of file
+redis-py-cluster
+redisai
\ No newline at end of file
diff --git a/test/tests_dag.py b/test/tests_dag.py
new file mode 100644
index 000000000..e7abe7064
--- /dev/null
+++ b/test/tests_dag.py
@@ -0,0 +1,476 @@
+import redis
+
+from includes import *
+
+'''
+python -m RLTest --test tests_dag.py --module path/to/redisai.so
+'''
+
+
+def test_dag_load(env):
+    con = env.getConnection()
+    ret = con.execute_command(
+        "AI.TENSORSET persisted_tensor_1 FLOAT 1 2 VALUES 5 10")
+    env.assertEqual(ret, b'OK')
+    command = "AI.DAGRUN "\
+                "LOAD 1 persisted_tensor_1 "\
+                "PERSIST 1 tensor1 |> "\
+              "AI.TENSORSET tensor1 FLOAT 1 2 VALUES 5 10"
+
+    ret = con.execute_command(command)
+    env.assertEqual(ret, [b'OK'])
+
+def test_dag_load_errors(env):
+    con = env.getConnection()
+
+    # ERR tensor key is empty
+    try:
+        command = "AI.DAGRUN "\
+                    "LOAD 1 persisted_tensor_1 "\
+                    "PERSIST 1 tensor1 |> "\
+                "AI.TENSORSET tensor1 FLOAT 1 2 VALUES 5 10"
+
+        ret = con.execute_command(command)
+    except Exception as e:
+        exception = e
+        env.assertEqual(type(exception), redis.exceptions.ResponseError)
+        env.assertEqual("tensor key is empty",exception.__str__())
+
+    # WRONGTYPE Operation against a key holding the wrong kind of value
+    try:
+        con.execute_command('SET', 'non-tensor', 'value')
+        command = "AI.DAGRUN "\
+                    "LOAD 1 non-tensor "\
+                    "PERSIST 1 tensor1 |> "\
+                "AI.TENSORSET tensor1 FLOAT 1 2 VALUES 5 10"
+
+        ret = con.execute_command(command)
+    except Exception as e:
+        exception = e
+        env.assertEqual(type(exception), redis.exceptions.ResponseError)
+        env.assertEqual("WRONGTYPE Operation against a key holding the wrong kind of value",exception.__str__())
+
+
+def test_dag_common_errors(env):
+    con = env.getConnection()
+
+    # ERR unsupported command within DAG
+    try:
+        command = "AI.DAGRUN |> "\
+                "AI.DONTEXIST tensor1 FLOAT 1 2 VALUES 5 10"
+
+        ret = con.execute_command(command)
+    except Exception as e:
+        exception = e
+        env.assertEqual(type(exception), redis.exceptions.ResponseError)
+        env.assertEqual("ERR unsupported command within DAG",exception.__str__())
+
+    # ERR wrong number of arguments for 'AI.DAGRUN' command
+    try:
+        command = "AI.DAGRUN "
+
+        ret = con.execute_command(command)
+    except Exception as e:
+        exception = e
+        env.assertEqual(type(exception), redis.exceptions.ResponseError)
+        env.assertEqual("wrong number of arguments for 'AI.DAGRUN' command",exception.__str__())
+
+    # ERR invalid or negative value found in number of keys to PERSIST
+    try:
+        command = "AI.DAGRUN PERSIST notnumber |> "\
+                "AI.TENSORSET tensor1 FLOAT 1 2 VALUES 5 10"
+
+        ret = con.execute_command(command)
+    except Exception as e:
+        exception = e
+        env.assertEqual(type(exception), redis.exceptions.ResponseError)
+        env.assertEqual("invalid or negative value found in number of keys to PERSIST",exception.__str__())
+
+    # ERR invalid or negative value found in number of keys to LOAD
+    try:
+        command = "AI.DAGRUN LOAD notnumber |> "\
+                "AI.TENSORSET tensor1 FLOAT 1 2 VALUES 5 10"
+
+        ret = con.execute_command(command)
+    except Exception as e:
+        exception = e
+        env.assertEqual(type(exception), redis.exceptions.ResponseError)
+        env.assertEqual("invalid or negative value found in number of keys to LOAD",exception.__str__())
+
+
+def test_dag_modelrun_financialNet_errors(env):
+    con = env.getConnection()
+
+    model_pb, creditcard_transactions, creditcard_referencedata = load_creditcardfraud_data(
+        env)
+    ret = con.execute_command('AI.MODELSET', 'financialNet', 'TF', "CPU",
+                              'INPUTS', 'transaction', 'reference', 'OUTPUTS', 'output', model_pb)
+    env.assertEqual(ret, b'OK')
+
+    tensor_number=1
+    ret = con.execute_command(  'AI.TENSORSET', 'referenceTensor:{0}'.format(tensor_number),
+                                  'FLOAT', 1, 256,
+                                  'BLOB', creditcard_referencedata[0].tobytes())
+    env.assertEqual(ret, b'OK')
+    
+
+    # ERR wrong number of inputs
+    try:
+        tensor_number=1
+        ret = con.execute_command(
+        'AI.DAGRUN', 'LOAD', '1', 'referenceTensor:{}'.format(tensor_number), 
+                        'PERSIST', '1', 'classificationTensor:{}'.format(tensor_number), '|>',
+        'AI.TENSORSET', 'transactionTensor:{}'.format(tensor_number), 'FLOAT', 1, 30, '|>',
+        'AI.MODELRUN', 'financialNet', 
+                        'INPUTS', 'transactionTensor:{}'.format(tensor_number),
+                        'OUTPUTS', 'classificationTensor:{}'.format(tensor_number), '|>',
+        'AI.TENSORGET', 'classificationTensor:{}'.format(tensor_number), 'META',
+    )
+    except Exception as e:
+        exception = e
+        env.assertEqual(type(exception), redis.exceptions.ResponseError)
+        env.assertEqual("ERR unsupported command within DAG",exception.__str__())
+
+        
+
+def test_dag_local_tensorset(env):
+    con = env.getConnection()
+
+    command = "AI.DAGRUN "\
+        "AI.TENSORSET volatile_tensor1 FLOAT 1 2 VALUES 5 10 |> "\
+        "AI.TENSORSET volatile_tensor2 FLOAT 1 2 VALUES 5 10 "
+
+    ret = con.execute_command(command)
+    env.assertEqual(ret, [b'OK',b'OK'])
+
+    # assert that transaction tensor does not exist
+    ret = con.execute_command("EXISTS volatile_tensor")
+    env.assertEqual(ret, 0 )
+
+def test_dag_local_tensorset_persist(env):
+    con = env.getConnection()
+
+    command = "AI.DAGRUN "\
+        "PERSIST 1 tensor1 |> "\
+        "AI.TENSORSET tensor1 FLOAT 1 2 VALUES 5 10"
+
+    ret = con.execute_command(command)
+    env.assertEqual(ret, [b'OK'])
+
+    # assert that transaction tensor exists
+    ret = con.execute_command("EXISTS tensor1")
+    env.assertEqual(ret, 1 )
+
+    ret = con.execute_command("AI.TENSORGET tensor1 VALUES")
+    env.assertEqual(ret, [b'FLOAT', [1, 2], [b'5', b'10']])
+
+
+def test_dag_multilocal_tensorset_persist(env):
+    con = env.getConnection()
+
+    command = "AI.DAGRUN "\
+        "PERSIST 1 tensor3 |> "\
+        "AI.TENSORSET tensor1 FLOAT 1 2 VALUES 5 10 |> "\
+        "AI.TENSORSET tensor2 FLOAT 1 2 VALUES 5 10 |> "\
+        "AI.TENSORSET tensor3 FLOAT 1 2 VALUES 5 10 |> "\
+        "AI.TENSORSET tensor4 FLOAT 1 2 VALUES 5 10 "
+
+    ret = con.execute_command(command)
+    env.assertEqual([b'OK',b'OK',b'OK',b'OK'],ret)
+
+    # assert that transaction tensor exists
+    ret = con.execute_command("EXISTS tensor1")
+    env.assertEqual(ret, 0 )
+
+    # assert that transaction tensor exists
+    ret = con.execute_command("EXISTS tensor2")
+    env.assertEqual(ret, 0 )
+
+    # assert that transaction tensor exists
+    ret = con.execute_command("EXISTS tensor3")
+    env.assertEqual(ret, 1 )
+
+    # assert that transaction tensor exists
+    ret = con.execute_command("EXISTS tensor4")
+    env.assertEqual(ret, 0 )
+
+    ret = con.execute_command("AI.TENSORGET tensor3 VALUES")
+    env.assertEqual(ret, [b'FLOAT', [1, 2], [b'5', b'10']])
+
+def test_dag_local_tensorset_tensorget_persist(env):
+    con = env.getConnection()
+
+    command = "AI.DAGRUN PERSIST 1 tensor1 |> "\
+        "AI.TENSORSET tensor1 FLOAT 1 2 VALUES 5 10 |> "\
+        "AI.TENSORGET tensor1 VALUES"
+
+    ret = con.execute_command(command)
+    env.assertEqual(ret, [b'OK', [b'FLOAT', [1, 2], [b'5', b'10']]])
+
+    ret = con.execute_command("AI.TENSORGET tensor1 VALUES")
+    env.assertEqual(ret, [b'FLOAT', [1, 2], [b'5', b'10']])
+
+
+def test_dag_local_multiple_tensorset_on_same_tensor(env):
+    con = env.getConnection()
+
+    command = "AI.DAGRUN "\
+                     "PERSIST 1 tensor1 |> "\
+        "AI.TENSORSET tensor1 FLOAT 1 2 VALUES 5 10 |> "\
+        "AI.TENSORGET tensor1 VALUES |> "\
+        "AI.TENSORSET tensor1 FLOAT 1 4 VALUES 20 40 60 80 |> "\
+        "AI.TENSORGET tensor1 VALUES"
+
+    ret = con.execute_command(command)
+    env.assertEqual([
+                     b'OK', 
+                    [b'FLOAT', [1, 2], [b'5', b'10']],
+                     b'OK', 
+                    [b'FLOAT', [1, 4], [b'20', b'40', b'60', b'80']]
+                    ], ret)
+
+    ret = con.execute_command("AI.TENSORGET tensor1 VALUES")
+    env.assertEqual([b'FLOAT', [1, 4], [b'20', b'40',b'60',b'80']],ret)
+
+
+def test_dag_load_persist_tensorset_tensorget(env):
+    con = env.getConnection()
+
+    ret = con.execute_command(
+        "AI.TENSORSET persisted_tensor_1 FLOAT 1 2 VALUES 5 10")
+    env.assertEqual(ret, b'OK')
+
+    ret = con.execute_command(
+        "AI.TENSORSET persisted_tensor_2 FLOAT 1 3 VALUES 0 0 0")
+    env.assertEqual(ret, b'OK')
+
+    command = "AI.DAGRUN LOAD 2 persisted_tensor_1 persisted_tensor_2 PERSIST 1 volatile_tensor_persisted |> "\
+        "AI.TENSORSET volatile_tensor_persisted FLOAT 1 2 VALUES 5 10 |> "\
+        "AI.TENSORGET persisted_tensor_1 VALUES |> "\
+        "AI.TENSORGET persisted_tensor_2 VALUES "
+
+    ret = con.execute_command(command)
+    env.assertEqual(ret, [b'OK', [b'FLOAT', [1, 2], [b'5', b'10']], [
+                    b'FLOAT', [1, 3], [b'0', b'0', b'0']]])
+
+    ret = con.execute_command("AI.TENSORGET volatile_tensor_persisted VALUES")
+    env.assertEqual(ret, [b'FLOAT', [1, 2], [b'5', b'10']])
+
+
+def test_dag_local_tensorset_tensorget(env):
+    con = env.getConnection()
+
+    command = "AI.DAGRUN "\
+        "AI.TENSORSET volatile_tensor FLOAT 1 2 VALUES 5 10 |> "\
+        "AI.TENSORGET volatile_tensor VALUES"
+
+    ret = con.execute_command(command)
+    env.assertEqual(ret, [b'OK', [b'FLOAT', [1, 2], [b'5', b'10']]])
+
+
+def test_dag_keyspace_tensorget(env):
+    con = env.getConnection()
+
+    ret = con.execute_command(
+        "AI.TENSORSET persisted_tensor FLOAT 1 2 VALUES 5 10")
+    env.assertEqual(ret, b'OK')
+
+    command = "AI.DAGRUN LOAD 1 persisted_tensor |> "\
+        "AI.TENSORGET persisted_tensor VALUES"
+
+    ret = con.execute_command(command)
+    env.assertEqual(ret, [[b'FLOAT', [1, 2], [b'5', b'10']]])
+
+
+def test_dag_keyspace_and_localcontext_tensorget(env):
+    con = env.getConnection()
+
+    ret = con.execute_command(
+        "AI.TENSORSET persisted_tensor FLOAT 1 2 VALUES 5 10")
+    env.assertEqual(ret, b'OK')
+
+    command = "AI.DAGRUN LOAD 1 persisted_tensor |> "\
+        "AI.TENSORSET volatile_tensor FLOAT 1 2 VALUES 5 10 |> "\
+        "AI.TENSORGET persisted_tensor VALUES |> "\
+        "AI.TENSORGET volatile_tensor VALUES"
+
+    ret = con.execute_command(command)
+    env.assertEqual(ret, [b'OK', [b'FLOAT', [1, 2], [b'5', b'10']], [
+                    b'FLOAT', [1, 2], [b'5', b'10']]])
+
+
+def test_dag_modelrun_financialNet_separate_tensorget(env):
+    con = env.getConnection()
+
+    model_pb, creditcard_transactions, creditcard_referencedata = load_creditcardfraud_data(
+        env)
+    ret = con.execute_command('AI.MODELSET', 'financialNet', 'TF', "CPU",
+                              'INPUTS', 'transaction', 'reference', 'OUTPUTS', 'output', model_pb)
+    env.assertEqual(ret, b'OK')
+
+    tensor_number = 1
+    for reference_tensor in creditcard_referencedata[:5]:
+        ret = con.execute_command(  'AI.TENSORSET', 'referenceTensor:{0}'.format(tensor_number),
+                                  'FLOAT', 1, 256,
+                                  'BLOB', reference_tensor.tobytes())
+        env.assertEqual(ret, b'OK')
+        tensor_number = tensor_number + 1
+
+    tensor_number = 1
+    for transaction_tensor in creditcard_transactions[:5]:
+        ret = con.execute_command(
+            'AI.DAGRUN', 'LOAD', '1', 'referenceTensor:{}'.format(tensor_number), 
+            'PERSIST', '1', 'classificationTensor:{}'.format(tensor_number), '|>',
+            'AI.TENSORSET', 'transactionTensor:{}'.format(tensor_number), 'FLOAT', 1, 30,'BLOB', transaction_tensor.tobytes(), '|>',
+            'AI.MODELRUN', 'financialNet', 
+                'INPUTS', 'transactionTensor:{}'.format(tensor_number), 'referenceTensor:{}'.format(tensor_number),
+                'OUTPUTS', 'classificationTensor:{}'.format(tensor_number), 
+        )
+        env.assertEqual([b'OK',b'OK'],ret)
+
+        ret = con.execute_command("AI.TENSORGET classificationTensor:{} META".format(
+            tensor_number))
+        env.assertEqual([b'FLOAT', [1, 2]],ret)
+
+        # assert that transaction tensor does not exist
+        ret = con.execute_command("EXISTS transactionTensor:{} META".format(
+            tensor_number))
+        env.assertEqual(ret, 0 )
+        tensor_number = tensor_number + 1
+
+def test_dag_modelrun_financialNet(env):
+    con = env.getConnection()
+
+    model_pb, creditcard_transactions, creditcard_referencedata = load_creditcardfraud_data(
+        env)
+    ret = con.execute_command('AI.MODELSET', 'financialNet', 'TF', "CPU",
+                              'INPUTS', 'transaction', 'reference', 'OUTPUTS', 'output', model_pb)
+    env.assertEqual(ret, b'OK')
+
+    tensor_number = 1
+    for reference_tensor in creditcard_referencedata[:5]:
+        ret = con.execute_command(  'AI.TENSORSET', 'referenceTensor:{0}'.format(tensor_number),
+                                  'FLOAT', 1, 256,
+                                  'BLOB', reference_tensor.tobytes())
+        env.assertEqual(ret, b'OK')
+        tensor_number = tensor_number + 1
+
+    tensor_number = 1
+    for transaction_tensor in creditcard_transactions[:5]:
+        ret = con.execute_command(
+            'AI.DAGRUN', 'LOAD', '1', 'referenceTensor:{}'.format(tensor_number), 
+                         'PERSIST', '1', 'classificationTensor:{}'.format(tensor_number), '|>',
+            'AI.TENSORSET', 'transactionTensor:{}'.format(tensor_number), 'FLOAT', 1, 30,'BLOB', transaction_tensor.tobytes(), '|>',
+            'AI.MODELRUN', 'financialNet', 
+                           'INPUTS', 'transactionTensor:{}'.format(tensor_number), 'referenceTensor:{}'.format(tensor_number),
+                           'OUTPUTS', 'classificationTensor:{}'.format(tensor_number), '|>',
+            'AI.TENSORGET', 'classificationTensor:{}'.format(tensor_number), 'META',
+        )
+        env.assertEqual([b'OK',b'OK',[b'FLOAT', [1, 2]]],ret)
+
+        # assert that transaction tensor does not exist
+        ret = con.execute_command("EXISTS transactionTensor:{}".format(
+            tensor_number))
+        env.assertEqual(ret, 0 )
+        tensor_number = tensor_number + 1
+
+def test_dag_modelrun_financialNet_no_writes(env):
+    con = env.getConnection()
+
+    model_pb, creditcard_transactions, creditcard_referencedata = load_creditcardfraud_data(
+        env)
+    ret = con.execute_command('AI.MODELSET', 'financialNet', 'TF', "CPU",
+                              'INPUTS', 'transaction', 'reference', 'OUTPUTS', 'output', model_pb)
+    env.assertEqual(ret, b'OK')
+
+    tensor_number = 1
+    for reference_tensor in creditcard_referencedata:
+        ret = con.execute_command(  'AI.TENSORSET', 'referenceTensor:{0}'.format(tensor_number),
+                                  'FLOAT', 1, 256,
+                                  'BLOB', reference_tensor.tobytes())
+        env.assertEqual(ret, b'OK')
+        tensor_number = tensor_number + 1
+
+    tensor_number = 1
+    for transaction_tensor in creditcard_transactions:
+        for run_number in range(1,10):
+            ret = con.execute_command(
+                'AI.DAGRUN', 'LOAD', '1', 'referenceTensor:{}'.format(tensor_number), '|>',
+                'AI.TENSORSET', 'transactionTensor:{}'.format(tensor_number), 'FLOAT', 1, 30,'BLOB', transaction_tensor.tobytes(), '|>',
+                'AI.MODELRUN', 'financialNet', 
+                            'INPUTS', 'transactionTensor:{}'.format(tensor_number), 'referenceTensor:{}'.format(tensor_number),
+                            'OUTPUTS', 'classificationTensor:{}'.format(tensor_number), '|>',
+                'AI.TENSORGET', 'classificationTensor:{}'.format(tensor_number), 'META',  '|>',
+                'AI.TENSORGET', 'classificationTensor:{}'.format(tensor_number), 'VALUES'
+            )
+            env.assertEqual(4, len(ret))
+            env.assertEqual([b'OK',b'OK'], ret[:2])
+            env.assertEqual([b'FLOAT', [1, 2]], ret[2])
+            dtype, shape, values = ret[3]
+            # Assert that resulting classification is within [0,1]
+            env.assertEqual(True, 0 <= float(values[0]) <= 1)
+            env.assertEqual(True, 0 <= float(values[1]) <= 1)
+
+            # assert that transactionTensor does not exist
+            ret = con.execute_command("EXISTS transactionTensor:{}".format(
+                tensor_number))
+            env.assertEqual(ret, 0 )
+
+            # assert that classificationTensor does not exist
+            ret = con.execute_command("EXISTS classificationTensor:{}".format(
+                tensor_number))
+            env.assertEqual(ret, 0 )
+        tensor_number = tensor_number + 1
+
+
+def test_dag_modelrun_financialNet_no_writes_multiple_modelruns(env):
+    con = env.getConnection()
+
+    model_pb, creditcard_transactions, creditcard_referencedata = load_creditcardfraud_data(
+        env)
+    ret = con.execute_command('AI.MODELSET', 'financialNet', 'TF', "CPU",
+                              'INPUTS', 'transaction', 'reference', 'OUTPUTS', 'output', model_pb)
+    env.assertEqual(ret, b'OK')
+
+    tensor_number = 1
+    for reference_tensor in creditcard_referencedata:
+        ret = con.execute_command(  'AI.TENSORSET', 'referenceTensor:{0}'.format(tensor_number),
+                                  'FLOAT', 1, 256,
+                                  'BLOB', reference_tensor.tobytes())
+        env.assertEqual(ret, b'OK')
+        tensor_number = tensor_number + 1
+
+    tensor_number = 1
+    for transaction_tensor in creditcard_transactions:
+        ret = con.execute_command(
+            'AI.DAGRUN', 'LOAD', '1', 'referenceTensor:{}'.format(tensor_number), '|>',
+            'AI.TENSORSET', 'transactionTensor:{}'.format(tensor_number), 'FLOAT', 1, 30,'BLOB', transaction_tensor.tobytes(), '|>',
+            'AI.MODELRUN', 'financialNet', 
+                           'INPUTS', 'transactionTensor:{}'.format(tensor_number), 'referenceTensor:{}'.format(tensor_number),
+                           'OUTPUTS', 'classificationTensor:{}'.format(tensor_number), '|>',
+            'AI.TENSORGET', 'classificationTensor:{}'.format(tensor_number), 'VALUES',  '|>',
+            'AI.MODELRUN', 'financialNet', 
+                           'INPUTS', 'transactionTensor:{}'.format(tensor_number), 'referenceTensor:{}'.format(tensor_number),
+                           'OUTPUTS', 'classificationTensor:{}'.format(tensor_number), '|>',
+            'AI.TENSORGET', 'classificationTensor:{}'.format(tensor_number), 'VALUES', 
+        )
+        env.assertEqual(5, len(ret))
+        env.assertEqual([b'OK',b'OK'],ret[:2])
+        env.assertEqual([b'FLOAT', [1, 2]],ret[2][:2])
+        env.assertEqual(b'OK',ret[3])
+        env.assertEqual([b'FLOAT', [1, 2]],ret[4][:2])
+        for dtype, shape, values in [ret[2], ret[4]]:
+            # Assert that resulting classification is within [0,1]
+            env.assertEqual(True, 0 <= float(values[0]) <= 1)
+            env.assertEqual(True, 0 <= float(values[1]) <= 1)
+
+        # assert that transactionTensor does not exist
+        ret = con.execute_command("EXISTS transactionTensor:{}".format(
+            tensor_number))
+        env.assertEqual(ret, 0 )
+
+        # assert that classificationTensor does not exist
+        ret = con.execute_command("EXISTS classificationTensor:{}".format(
+            tensor_number))
+        env.assertEqual(ret, 0 )
+        tensor_number = tensor_number + 1
diff --git a/test/tests_onnx.py b/test/tests_onnx.py
index 75c4c7a8e..67aaf9280 100644
--- a/test/tests_onnx.py
+++ b/test/tests_onnx.py
@@ -346,6 +346,7 @@ def test_onnx_model_rdb_save_load(env):
 
     model_serialized_memory = con.execute_command('AI.MODELGET', 'linear', 'BLOB')
 
+    ensureSlaveSynced(con, env)
     ret = con.execute_command('SAVE')
     env.assertEqual(ret, True)