RedisAI · alonre24 · Jun 24, 2021 · May 26, 2021 · May 27, 2021 · May 27, 2021
diff --git a/docs/commands.md b/docs/commands.md
@@ -606,7 +606,7 @@ redis> AI.TENSORGET result{tag} VALUES
 ```
 
 ### Redis Commands support.
-In RedisAI TorchScript now supports simple (non-blocking) Redis commnands via the `redis.execute` API. The following (usless) script gets a key name (`x{1}`), and an `int` value (3). First, the script `SET`s the value in the key. Next, the script `GET`s the value back from the key, and sets it in a tensor which is eventually stored under the key 'y{1}'. Note that the inputs are `str` and `int`. The script sets and gets the value and set it into a tensor.
+RedisAI TorchScript now supports simple (non-blocking) Redis commands via the `redis.execute` API. The following (useless) script gets a key name (`x{1}`), and an `int` value (3). First, the script `SET`s the value in the key. Next, the script `GET`s the value back from the key, and sets it in a tensor which is eventually stored under the key 'y{1}'. Note that the inputs are `str` and `int`. The script sets and gets the value and set it into a tensor.
 
 ```
 def redis_int_to_tensor(redis_value: int):
@@ -624,6 +624,30 @@ redis> AI.TENSORGET y{1} VALUES
 1) (integer) 3
 ```
 
+### RedisAI model execution support.
+RedisAI TorchScript also supports executing models which are stored in RedisAI by calling `redisAI.model_execute` command. 
+The command receives 3 inputs:
+1. model name (string)
+2. model inputs (List of torch.Tensor)
+3. number of model outputs (int)
+Return value - the model execution output tensors (List of torch.Tensor)
+The following script creates two tensors, and executes the (tensorflow) model which is stored under the name 'tf_mul{1}' with these two tensors as inputs.
+```
+def test_model_execute(keys:List[str]):
+    a = torch.tensor([[2.0, 3.0], [2.0, 3.0]])
+    b = torch.tensor([[2.0, 3.0], [2.0, 3.0]])
+    return redisAI.model_execute(keys[0], [a, b], 1) # assume keys[0] is the model name stored in RedisAI.
+```
+```
+redis> AI.SCRIPTEXECUTE redis_scripts{1} test_model_execute KEYS 1 {1} LIST_INPUTS 1 tf_mul{1} OUTPUTS 1 y{1}
+OK
+redis> AI.TENSORGET y{1} VALUES
+1) (float) 4
+2) (float) 9
+3) (float) 4
+4) (float) 9
+```
+
 !!! warning "Intermediate memory overhead"
     The execution of scripts may generate intermediate tensors that are not allocated by the Redis allocator, but by whatever allocator is used in the backends (which may act on main memory or GPU memory, depending on the device), thus not being limited by `maxmemory` configuration settings of Redis.
 

diff --git a/src/backends/backedns_api.h b/src/backends/backedns_api.h
diff --git a/src/backends/backends.c b/src/backends/backends.c
@@ -17,6 +17,7 @@
 #include "redismodule.h"
 #include "config/config.h"
 #include "execution/background_workers.h"
+#include "execution/execution_contexts/modelRun_ctx.h"
 
 static bool _ValidateFuncExists(RedisModuleCtx *ctx, void *func_ptr, const char *func_name,
                                 const char *backend_name, const char *path) {
@@ -40,6 +41,7 @@ static bool _ValidateFuncExists(RedisModuleCtx *ctx, void *func_ptr, const char
  */
 int RAI_ExportFunc(const char *func_name, void **targetFuncPtr) {
 
+    // Retrieve info from RedisAI internals.
     if (strcmp("GetThreadId", func_name) == 0) {
         *targetFuncPtr = BGWorker_GetThreadId;
     } else if (strcmp("GetNumThreadsPerQueue", func_name) == 0) {
@@ -48,6 +50,40 @@ int RAI_ExportFunc(const char *func_name, void **targetFuncPtr) {
         *targetFuncPtr = Config_GetModelExecutionTimeout;
     } else if (strcmp("GetThreadsCount", func_name) == 0) {
         *targetFuncPtr = BGWorker_GetThreadsCount;
+
+        // Export RedisAI low level API functions.
+    } else if (strcmp("RedisAI_InitError", func_name) == 0) {
+        *targetFuncPtr = RAI_InitError;
+    } else if (strcmp("RedisAI_FreeError", func_name) == 0) {
+        *targetFuncPtr = RAI_FreeError;
+    } else if (strcmp("RedisAI_GetError", func_name) == 0) {
+        *targetFuncPtr = RAI_GetError;
+    } else if (strcmp("RedisAI_TensorCreateFromDLTensor", func_name) == 0) {
+        *targetFuncPtr = RAI_TensorCreateFromDLTensor;
+    } else if (strcmp("RedisAI_TensorGetDLTensor", func_name) == 0) {
+        *targetFuncPtr = RAI_TensorGetDLTensor;
+    } else if (strcmp("RedisAI_TensorGetShallowCopy", func_name) == 0) {
+        *targetFuncPtr = RAI_TensorGetShallowCopy;
+    } else if (strcmp("RedisAI_TensorFree", func_name) == 0) {
+        *targetFuncPtr = RAI_TensorFree;
+    } else if (strcmp("RedisAI_GetModelFromKeyspace", func_name) == 0) {
+        *targetFuncPtr = RAI_GetModelFromKeyspace;
+    } else if (strcmp("RedisAI_ModelRunCtxCreate", func_name) == 0) {
+        *targetFuncPtr = RAI_ModelRunCtxCreate;
+    } else if (strcmp("RedisAI_ModelRunCtxAddInput", func_name) == 0) {
+        *targetFuncPtr = RAI_ModelRunCtxAddInput;
+    } else if (strcmp("RedisAI_ModelRunCtxNumOutputs", func_name) == 0) {
+        *targetFuncPtr = RAI_ModelRunCtxNumOutputs;
+    } else if (strcmp("RedisAI_ModelRunCtxAddOutput", func_name) == 0) {
+        *targetFuncPtr = RAI_ModelRunCtxAddOutput;
+    } else if (strcmp("RedisAI_ModelRunCtxOutputTensor", func_name) == 0) {
+        *targetFuncPtr = RAI_ModelRunCtxOutputTensor;
+    } else if (strcmp("RedisAI_ModelRunCtxFree", func_name) == 0) {
+        *targetFuncPtr = RAI_ModelRunCtxFree;
+    } else if (strcmp("RedisAI_ModelRun", func_name) == 0) {
+        *targetFuncPtr = RAI_ModelRun;
+
+        // Export RedisModule API functions.
     } else {
         return RedisModule_GetApi(func_name, targetFuncPtr);
     }
@@ -244,15 +280,15 @@ int RAI_LoadBackend_Torch(RedisModuleCtx *ctx, const char *path) {
 
     RAI_LoadedBackend backend = {0}; // Initialize all the callbacks to NULL.
 
-    int (*init_backend)(int (*)(const char *, void *));
-    init_backend = (int (*)(int (*)(const char *, void *)))(unsigned long)dlsym(
+    int (*init_backend)(int (*)(const char *, void **));
+    init_backend = (int (*)(int (*)(const char *, void **)))(unsigned long)dlsym(
         handle, "RAI_InitBackendTorch");
     if (!_ValidateFuncExists(ctx, init_backend, "RAI_InitBackendTorch", "TORCH", path)) {
         goto error;
     }
-    // Here we use the input callback to export functions from Redis to the backend,
-    // by setting the backend's function pointers to the corresponding functions in Redis.
-    init_backend(RedisModule_GetApi);
+    // Here we use the input callback to export functions from Redis and Redis AI to the backend,
+    // by setting the backend's function pointers to the corresponding functions in Redis/RedisAI.
+    init_backend(RAI_ExportFunc);
 
     backend.model_create =
         (RAI_Model * (*)(RAI_Backend, const char *, RAI_ModelOpts, const char *, size_t,

diff --git a/src/backends/backends_api.h b/src/backends/backends_api.h
@@ -0,0 +1,70 @@
+#pragma once
+
+#include <stdint.h>
+#include "redismodule.h"
+
+#ifdef BACKENDS_API_EXTERN
+#define BACKENDS_API extern
+#endif
+
+#ifndef BACKENDS_API
+#define BACKENDS_API
+#endif
+
+typedef struct RAI_Tensor RAI_Tensor;
+typedef struct RAI_Model RAI_Model;
+typedef struct RAI_ModelRunCtx RAI_ModelRunCtx;
+typedef struct RAI_Error RAI_Error;
+
+/**
+ * @return The internal id of RedisAI current working thread.
+ * id range is {0, ..., <threads_count>-1}. If this is called from a non
+ * RedisAI BG thread, return -1.
+ */
+BACKENDS_API long (*RedisAI_GetThreadId)(void);
+
+/**
+ * @return The number of working threads in RedisAI. This number should be
+ * equal to the number of threads per queue (load time config) * number of devices
+ * registered in RedisAI (a new device is registered if a model is set to run on
+ * this device in AI.MODELSTORE command.
+ */
+BACKENDS_API uintptr_t (*RedisAI_GetThreadsCount)(void);
+
+/**
+ * @return The number of working threads per device queue (load time config).
+ */
+BACKENDS_API long long (*RedisAI_GetNumThreadsPerQueue)(void);
+
+/**
+ * @return The maximal number of milliseconds that a model run session should run
+ * before it is terminated forcefully (load time config).
+ * Currently supported only fo onnxruntime backend.
+ */
+BACKENDS_API long long (*RedisAI_GetModelExecutionTimeout)(void);
+
+/**
+ * The following functions are part of RedisAI low level API (the full low level
+ * API is defined in redisai.h). For every function below named "RedisAI_X", its
+ * implementation can be found under the name "RAI_X" in RedisAI header files.
+ */
+
+BACKENDS_API int (*RedisAI_InitError)(RAI_Error **err);
+BACKENDS_API void (*RedisAI_FreeError)(RAI_Error *err);
+BACKENDS_API const char *(*RedisAI_GetError)(RAI_Error *err);
+
+BACKENDS_API RAI_Tensor *(*RedisAI_TensorCreateFromDLTensor)(DLManagedTensor *dl_tensor);
+BACKENDS_API DLTensor *(*RedisAI_TensorGetDLTensor)(RAI_Tensor *tensor);
+BACKENDS_API RAI_Tensor *(*RedisAI_TensorGetShallowCopy)(RAI_Tensor *t);
+BACKENDS_API void (*RedisAI_TensorFree)(RAI_Tensor *tensor);
+
+BACKENDS_API RAI_ModelRunCtx *(*RedisAI_ModelRunCtxCreate)(RAI_Model *model);
+BACKENDS_API int (*RedisAI_GetModelFromKeyspace)(RedisModuleCtx *ctx, RedisModuleString *keyName,
+                                                 RAI_Model **model, int mode, RAI_Error *err);
+BACKENDS_API int (*RedisAI_ModelRunCtxAddInput)(RAI_ModelRunCtx *mctx, const char *inputName,
+                                                RAI_Tensor *inputTensor);
+BACKENDS_API int (*RedisAI_ModelRunCtxAddOutput)(RAI_ModelRunCtx *mctx, const char *outputName);
+BACKENDS_API size_t (*RedisAI_ModelRunCtxNumOutputs)(RAI_ModelRunCtx *mctx);
+BACKENDS_API RAI_Tensor *(*RedisAI_ModelRunCtxOutputTensor)(RAI_ModelRunCtx *mctx, size_t index);
+BACKENDS_API void (*RedisAI_ModelRunCtxFree)(RAI_ModelRunCtx *mctx);
+BACKENDS_API int (*RedisAI_ModelRun)(RAI_ModelRunCtx **mctx, long long n, RAI_Error *err);
diff --git a/src/backends/libtorch_c/torch_c.cpp b/src/backends/libtorch_c/torch_c.cpp
@@ -1,5 +1,7 @@
+#define BACKENDS_API_EXTERN
 #include "torch_c.h"
 #include "torch/torch.h"
+#include "backends/backends_api.h"
 #include "redismodule.h"
 #include "ATen/Functions.h"
 #include "torch/csrc/jit/serialization/import.h"
@@ -157,14 +159,34 @@ at::ScalarType toScalarType(const DLDataType &dtype) {
 torch::Tensor fromDLPack(const DLTensor *src) {
     at::DeviceType device_type = getATenDeviceType(src->device.device_type);
     at::ScalarType stype = toScalarType(src->dtype);
-    // torch::Device device(device_type, src->ctx.device_id);
-    torch::Device device(device_type, -1);
-    // torch::DeviceType device = device_type;
+    torch::Device device(device_type, src->device.device_id);
     return torch::from_blob(src->data, at::IntArrayRef(src->shape, src->ndim),
                             at::IntArrayRef(src->strides, src->ndim),
                             torch::device(device).dtype(stype));
 }
 
+extern "C" void torchTensorFromRAITensor(RAI_Tensor *src, void *torch_tensor) {
+    DLTensor *dl_tensor = RedisAI_TensorGetDLTensor(src);
+    at::DeviceType device_type = getATenDeviceType(dl_tensor->device.device_type);
+    at::ScalarType stype = toScalarType(dl_tensor->dtype);
+    torch::Device device(device_type, dl_tensor->device.device_id);
+
+    // Capture the RAI_Tensor to be able to release it once torch is done with
+    // the tensor that we are about to create (to avoid copying of the blob).
+    auto free_tensor = [src](void *data) {
+        RedisAI_TensorFree(src);
+    };
+
+    // Create torch tensor with the tensor's blob, and send a deleter callback
+    // for torch to use to release the RAI_Tensor when it finishes.
+    *static_cast<torch::Tensor *>(torch_tensor) =
+      torch::Tensor(torch::from_blob(dl_tensor->data,
+      at::IntArrayRef(dl_tensor->shape, dl_tensor->ndim),
+      at::IntArrayRef(dl_tensor->strides, dl_tensor->ndim),
+      free_tensor,
+      torch::device(device).dtype(stype)));
+}
+
 struct ATenDLMTensor {
     torch::Tensor handle;
     DLManagedTensor tensor;
@@ -182,7 +204,7 @@ DLManagedTensor *toManagedDLPack(const torch::Tensor &src_) {
     atDLMTensor->tensor.manager_ctx = atDLMTensor;
     atDLMTensor->tensor.deleter = &deleter;
     atDLMTensor->tensor.dl_tensor.data = src.data_ptr();
-    int64_t device_id = 0;
+    int64_t device_id = -1;  // This should be used for the default 'CPU' device.
     if (src.is_cuda()) {
         device_id = src.get_device();
     }
@@ -195,6 +217,10 @@ DLManagedTensor *toManagedDLPack(const torch::Tensor &src_) {
     return &(atDLMTensor->tensor);
 }
 
+extern "C" DLManagedTensor *torchTensorPtrToManagedDLPack(const void *src) {
+    return toManagedDLPack(*static_cast<const torch::Tensor *>(src));
+}
+
 struct ModuleContext {
     std::shared_ptr<torch::jit::script::Module> module;
     std::shared_ptr<torch::jit::script::CompilationUnit> cu;

diff --git a/src/backends/libtorch_c/torch_c.h b/src/backends/libtorch_c/torch_c.h
@@ -186,6 +186,25 @@ size_t torchScript_FunctionArgumentCount(void *scriptCtx, size_t fn_index);
 TorchScriptFunctionArgumentType torchScript_FunctionArgumentype(void *scriptCtx, size_t fn_index,
                                                                 size_t arg_index);
 
+/**
+ * @brief Creates a new dltensor representation from torch tensor, by taking
+ * ownership on the tensor and keeping it in the manager_context field. The tensor
+ * data will be freed by calling the deleter function on the manager context field.
+ * @param src - A pointer to torch tensor.
+ * @returns The newly created DLManaged tensor.
+ */
+DLManagedTensor *torchTensorPtrToManagedDLPack(const void *src);
+
+/**
+ * @brief Creates a new torch tensor from a RedisAI tensor, by using its data
+ * and store it in torch_tensor pointer. Note that the ownership of the tensor
+ * is transferred to the torch tensor, and it will be released by calling the
+ * created deleter function, which is RAI_TensorFree
+ * @param src - the input RAI tensor
+ * @param torch_tensor - place holder for the newly created torch tensor.
+ */
+void torchTensorFromRAITensor(RAI_Tensor *src, void *torch_tensor);
+
 #ifdef __cplusplus
 }
 #endif