diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index cf135fdc3..945141c9a 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -7,10 +7,13 @@ ADD_LIBRARY(redisai_obj OBJECT
         util/queue.c
         util/string_utils.c
         redisai.c
+        command_parser.c
         run_info.c
         background_workers.c
         config.c
-        dag.c
+        DAG/dag.c
+        DAG/dag_parser.c
+        modelRun_ctx.c
         backends.c
         backends/util.c
         model.c
diff --git a/src/DAG/dag.c b/src/DAG/dag.c
new file mode 100644
index 000000000..ad7b8380f
--- /dev/null
+++ b/src/DAG/dag.c
@@ -0,0 +1,893 @@
+/**
+ * dag.c
+ *
+ * Contains the helper methods for both parsing, running the command in the
+ * background, and replying DAG structured commands.
+ *
+ * The way we allow DAG operations to run on different devices in parallel
+ * (when possible) is the following: instead of running the whole DAG in one
+ * swoop, the DAG run info is created on one
+ * queue/device and shallow copied (appropriately) across other queues/devices
+ * as indicated by the DAG specification. A DAG mutex is shared across all
+ * copies.
+ * The DAG run info is placed on the queue for each device and evicted for
+ * execution (in background_workers). Execution happens one DAG op at a time:
+ * once the individual op has executed, it is marked as such and the DAG run
+ * info is placed back on the queue. The current un-executed op is checked for
+ * its inputs. If all inputs are found in the tensor context, then the DAG op
+ * can be executed. If not, the execution quits and control is given back to
+ * the worker. If there are other items in the queue the op is placed after the
+ * next item. When all ops for a device have been executed, the DAG is not
+ * placed back on the queue. When all ops in a DAG have been executed or an
+ * error occurs, the client is unblocked.
+ *
+ * See background_workers.c for the queue logic, everything else DAG is here.
+ */
+
+#include "dag.h"
+
+#include <pthread.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "model.h"
+#include "modelRun_ctx.h"
+#include "redisai.h"
+#include "background_workers.h"
+#include "rmutil/alloc.h"
+#include "rmutil/args.h"
+#include "run_info.h"
+#include "stats.h"
+#include "tensor.h"
+#include "util/arr_rm_alloc.h"
+#include "util/dict.h"
+#include "util/queue.h"
+#include "dag_parser.h"
+#include "util/string_utils.h"
+
+/**
+ * Execution of a TENSORSET DAG step.
+ * If an error occurs, it is recorded in the DagOp struct.
+ *
+ * @param rinfo context in which RedisAI blocking commands operate.
+ * @param currentOp TENSORSET DagOp to be executed
+ * @return
+ */
+void RedisAI_DagRunSession_TensorSet_Step(RedisAI_RunInfo *rinfo, RAI_DagOp *currentOp) {
+    RAI_Tensor *t = NULL;
+    const int parse_result =
+        RAI_parseTensorSetArgs(NULL, currentOp->argv, currentOp->argc, &t, 0, currentOp->err);
+    if (parse_result > 0) {
+        RedisModuleString *key_string = currentOp->outkeys[0];
+        RAI_ContextWriteLock(rinfo);
+        AI_dictReplace(rinfo->dagTensorsContext, (void *)key_string, t);
+        RAI_ContextUnlock(rinfo);
+        currentOp->result = REDISMODULE_OK;
+    } else {
+        currentOp->result = REDISMODULE_ERR;
+    }
+}
+
+/**
+ * Execution of a TENSORGET DAG step.
+ * If an error occurs, it is recorded in the DagOp struct.
+ *
+ * @param rinfo context in which RedisAI blocking commands operate.
+ * @param currentOp TENSORGET DagOp to be executed
+ * @return
+ */
+void RedisAI_DagRunSession_TensorGet_Step(RedisAI_RunInfo *rinfo, RAI_DagOp *currentOp) {
+    RedisModuleString *key_string = currentOp->inkeys[0];
+    RAI_Tensor *t = NULL;
+    RAI_ContextReadLock(rinfo);
+    currentOp->result = RAI_getTensorFromLocalContext(NULL, rinfo->dagTensorsContext, key_string,
+                                                      &t, currentOp->err);
+    RAI_ContextUnlock(rinfo);
+    if (currentOp->result == REDISMODULE_OK) {
+        RAI_Tensor *outTensor = NULL;
+        // TODO: check tensor copy return value
+        RAI_TensorDeepCopy(t, &outTensor);
+        currentOp->outTensors = array_append(currentOp->outTensors, outTensor);
+    }
+}
+
+static void Dag_LoadInputsToModelRunCtx(RedisAI_RunInfo *rinfo, RAI_DagOp *currentOp) {
+    uint n_inkeys = array_len(currentOp->inkeys);
+    uint n_outkeys = array_len(currentOp->outkeys);
+
+    RAI_ContextReadLock(rinfo);
+
+    RAI_Tensor *inputTensors[n_inkeys];
+    for (uint i = 0; i < n_inkeys; i++) {
+        RAI_Tensor *inputTensor;
+        const int get_result = RAI_getTensorFromLocalContext(
+            NULL, rinfo->dagTensorsContext, currentOp->inkeys[i], &inputTensor, currentOp->err);
+        if (get_result == REDISMODULE_ERR) {
+            // We check for this outside the function
+            // this check cannot be covered by tests
+            currentOp->result = REDISMODULE_ERR;
+            RAI_ContextUnlock(rinfo);
+            return;
+        }
+        inputTensors[i] = inputTensor;
+    }
+
+    RAI_ContextUnlock(rinfo);
+
+    // Input and output names should match to the one specified when the model was set, only in TF.
+    // For other backends, model->inputs and model->outputs is null.
+    for (uint i = 0; i < n_inkeys; i++) {
+        const char *opname = NULL;
+        if (currentOp->mctx->model->inputs) {
+            opname = currentOp->mctx->model->inputs[i];
+        }
+        RAI_ModelRunCtxAddInput(currentOp->mctx, opname, inputTensors[i]);
+    }
+
+    for (uint i = 0; i < n_outkeys; i++) {
+        const char *opname = NULL;
+        if (currentOp->mctx->model->outputs) {
+            opname = currentOp->mctx->model->outputs[i];
+        }
+        RAI_ModelRunCtxAddOutput(currentOp->mctx, opname);
+    }
+}
+
+static void Dag_StoreOutputsFromModelRunCtx(RedisAI_RunInfo *rinfo, RAI_DagOp *currentOp) {
+
+    RAI_ContextReadLock(rinfo);
+    const size_t noutputs = RAI_ModelRunCtxNumOutputs(currentOp->mctx);
+    for (size_t outputNumber = 0; outputNumber < noutputs; outputNumber++) {
+        RAI_Tensor *tensor = RAI_ModelRunCtxOutputTensor(currentOp->mctx, outputNumber);
+        tensor = tensor ? RAI_TensorGetShallowCopy(tensor) : NULL;
+        AI_dictReplace(rinfo->dagTensorsContext, (void *)currentOp->outkeys[outputNumber], tensor);
+    }
+    RAI_ContextUnlock(rinfo);
+}
+
+/**
+ * Execution of a MODELRUN DAG step.
+ * If an error occurs, it is recorded in the DagOp struct.
+ *
+ * @param rinfo context in which RedisAI blocking commands operate.
+ * @param currentOp MODELRUN DagOp to be executed
+ * @return
+ */
+void RedisAI_DagRunSession_ModelRun_Step(RedisAI_RunInfo *rinfo, RAI_DagOp *currentOp) {
+
+    // Get the needed tensors from the DAG local context. If the DAG originated
+    // from a model run command, we are ready to execute.
+    if (rinfo->single_op_dag == 0)
+        Dag_LoadInputsToModelRunCtx(rinfo, currentOp);
+
+    RAI_ModelRunCtx *mctxs[1];
+    mctxs[0] = currentOp->mctx;
+    const long long start = ustime();
+    int result = RAI_ModelRun(mctxs, 1, currentOp->err);
+    const long long end = ustime();
+
+    currentOp->duration_us = end - start;
+    currentOp->result = result;
+    if (result == REDISMODULE_ERR)
+        return;
+    if (rinfo->single_op_dag == 0)
+        Dag_StoreOutputsFromModelRunCtx(rinfo, currentOp);
+}
+
+/**
+ * Execution of a batched (MODELRUN) DAG step.
+ * If an error occurs, it is recorded in all DagOp structs.
+ *
+ * @param batched_rinfo array of contexts in which RedisAI blocking commands operate.
+ * @param currentOps MODELRUN DagOps to be executed
+ * @return
+ */
+void RedisAI_BatchedDagRunSession_ModelRun_Step(RedisAI_RunInfo **batched_rinfo,
+                                                RAI_DagOp **currentOps) {
+
+    int n_rinfo = array_len(batched_rinfo);
+    RAI_ModelRunCtx *mctxs[n_rinfo];
+
+    for (int i = 0; i < n_rinfo; i++) {
+        RedisAI_RunInfo *rinfo = batched_rinfo[i];
+        RAI_DagOp *currentOp = currentOps[i];
+
+        // Get the needed tensors from the DAG local context. If the DAG originated
+        // from a model run command, we are ready to execute.
+        if (rinfo->single_op_dag == 0)
+            Dag_LoadInputsToModelRunCtx(rinfo, currentOp);
+        mctxs[i] = currentOp->mctx;
+    }
+
+    RAI_Error err = {0};
+    const long long start = ustime();
+    int result = RAI_ModelRun(mctxs, n_rinfo, &err);
+    const long long end = ustime();
+
+    long long duration = end - start;
+
+    for (int i = 0; i < n_rinfo; i++) {
+        RedisAI_RunInfo *rinfo = batched_rinfo[i];
+        RAI_DagOp *currentOp = currentOps[i];
+
+        if (result == REDISMODULE_ERR) {
+            currentOp->result = result;
+            RAI_SetError(currentOp->err, err.code, err.detail);
+            continue;
+        }
+
+        currentOp->duration_us = duration;
+        currentOp->result = result;
+        if (rinfo->single_op_dag == 0)
+            Dag_StoreOutputsFromModelRunCtx(rinfo, currentOp);
+    }
+}
+
+/**
+ * Execution of a SCRIPTRUN DAG step.
+ * If an error occurs, it is recorded in the DagOp struct.
+ *
+ * @param rinfo context in which RedisAI blocking commands operate.
+ * @param currentOp SCRIPTRUN DagOp to be executed
+ * @return
+ */
+void RedisAI_DagRunSession_ScriptRun_Step(RedisAI_RunInfo *rinfo, RAI_DagOp *currentOp) {
+    uint n_inkeys = array_len(currentOp->inkeys);
+    uint n_outkeys = array_len(currentOp->outkeys);
+
+    RAI_ContextReadLock(rinfo);
+
+    RAI_Tensor *inputTensors[n_inkeys];
+    for (uint i = 0; i < n_inkeys; i++) {
+        RAI_Tensor *inputTensor;
+        const int get_result = RAI_getTensorFromLocalContext(
+            NULL, rinfo->dagTensorsContext, currentOp->inkeys[i], &inputTensor, currentOp->err);
+        if (get_result == REDISMODULE_ERR) {
+            // We check for this outside the function
+            // this check cannot be covered by tests
+            currentOp->result = REDISMODULE_ERR;
+            RAI_ContextUnlock(rinfo);
+            return;
+        }
+        inputTensors[i] = inputTensor;
+    }
+
+    RAI_ContextUnlock(rinfo);
+
+    for (uint i = 0; i < n_inkeys; i++) {
+        RAI_ScriptRunCtxAddInput(currentOp->sctx, inputTensors[i], currentOp->err);
+    }
+
+    for (uint i = 0; i < n_outkeys; i++) {
+        RAI_ScriptRunCtxAddOutput(currentOp->sctx);
+    }
+
+    const long long start = ustime();
+    int result = RAI_ScriptRun(currentOp->sctx, currentOp->err);
+    const long long end = ustime();
+
+    RAI_ContextWriteLock(rinfo);
+
+    const size_t noutputs = RAI_ScriptRunCtxNumOutputs(currentOp->sctx);
+    for (size_t outputNumber = 0; outputNumber < noutputs; outputNumber++) {
+        RAI_Tensor *tensor = RAI_ScriptRunCtxOutputTensor(currentOp->sctx, outputNumber);
+        RedisModuleString *key_string = currentOp->outkeys[outputNumber];
+        tensor = tensor ? RAI_TensorGetShallowCopy(tensor) : NULL;
+        AI_dictReplace(rinfo->dagTensorsContext, (void *)key_string, tensor);
+    }
+
+    currentOp->result = result;
+    currentOp->duration_us = end - start;
+
+    RAI_ContextUnlock(rinfo);
+
+    return;
+}
+
+size_t RAI_DagOpBatchSize(RAI_DagOp *op, RedisAI_RunInfo *rinfo) {
+    if (op->mctx == NULL) {
+        return -1;
+    }
+
+    size_t ninputs = array_len(op->inkeys);
+    int batchsize = 0;
+
+    if (!rinfo->single_device_dag) {
+        RAI_ContextReadLock(rinfo);
+    }
+    for (size_t i = 0; i < ninputs; i++) {
+        RAI_Tensor *input;
+        if (rinfo->single_op_dag) {
+            input = op->mctx->inputs[i].tensor;
+        } else {
+            RAI_getTensorFromLocalContext(NULL, rinfo->dagTensorsContext, op->inkeys[i], &input,
+                                          op->err);
+        }
+        // We are expecting input != NULL, because we only reach this function if all inputs
+        // are available in context for the current dagOp. We could be more defensive
+        // eventually.
+        assert(input != NULL);
+
+        if (i == 0) {
+            batchsize = RAI_TensorDim(input, 0);
+            continue;
+        }
+        if (batchsize != RAI_TensorDim(input, 0)) {
+            batchsize = 0;
+            break;
+        }
+    }
+    if (!rinfo->single_device_dag) {
+        RAI_ContextUnlock(rinfo);
+    }
+    return batchsize;
+}
+
+int RAI_DagOpBatchable(RAI_DagOp *op1, RedisAI_RunInfo *rinfo1, RAI_DagOp *op2,
+                       RedisAI_RunInfo *rinfo2) {
+
+    if (op1->mctx == NULL || op2->mctx == NULL) {
+        return 0;
+    }
+    if (op1->mctx->model != op2->mctx->model) {
+        return 0;
+    }
+    const int ninputs1 = array_len(op1->inkeys);
+    const int ninputs2 = array_len(op2->inkeys);
+
+    if (ninputs1 != ninputs2) {
+        return 0;
+    }
+    if (!rinfo1->single_device_dag) {
+        RAI_ContextReadLock(rinfo1);
+    }
+    if (!rinfo2->single_device_dag) {
+        RAI_ContextReadLock(rinfo2);
+    }
+    for (int i = 0; i < ninputs1; i++) {
+        RAI_Tensor *input1;
+        if (rinfo1->single_op_dag == 1) {
+            input1 = op1->mctx->inputs[i].tensor;
+        } else {
+            RAI_getTensorFromLocalContext(NULL, rinfo1->dagTensorsContext, op1->inkeys[i], &input1,
+                                          op1->err);
+        }
+        RAI_Tensor *input2;
+        if (rinfo2->single_op_dag == 1) {
+            input2 = op2->mctx->inputs[i].tensor;
+        } else {
+            RAI_getTensorFromLocalContext(NULL, rinfo2->dagTensorsContext, op2->inkeys[i], &input2,
+                                          op2->err);
+        }
+        if (input1 == NULL || input2 == NULL) {
+            return 0;
+        }
+
+        int ndims1 = RAI_TensorNumDims(input1);
+        int ndims2 = RAI_TensorNumDims(input2);
+
+        if (ndims1 != ndims2) {
+            return 0;
+        }
+
+        if (ndims1 == 0) {
+            continue;
+        }
+
+        for (int j = 1; j < ndims1; j++) {
+            long long dim1 = RAI_TensorDim(input1, j);
+            long long dim2 = RAI_TensorDim(input2, j);
+            if (dim1 != dim2) {
+                return 0;
+            }
+        }
+    }
+    if (!rinfo1->single_device_dag) {
+        RAI_ContextUnlock(rinfo1);
+    }
+    if (!rinfo2->single_device_dag) {
+        RAI_ContextUnlock(rinfo2);
+    }
+    return 1;
+}
+
+int RedisAI_DagDeviceComplete(RedisAI_RunInfo *rinfo) {
+    return rinfo->dagDeviceCompleteOpCount == rinfo->dagDeviceOpCount;
+}
+
+int RedisAI_DagComplete(RedisAI_RunInfo *rinfo) {
+    int completeOpCount = __atomic_load_n(rinfo->dagCompleteOpCount, __ATOMIC_RELAXED);
+
+    return completeOpCount == rinfo->dagOpCount;
+}
+
+RAI_DagOp *RedisAI_DagCurrentOp(RedisAI_RunInfo *rinfo) {
+    if (rinfo->dagDeviceCompleteOpCount == rinfo->dagDeviceOpCount) {
+        return NULL;
+    }
+
+    return rinfo->dagDeviceOps[rinfo->dagDeviceCompleteOpCount];
+}
+
+void RedisAI_DagCurrentOpInfo(RedisAI_RunInfo *rinfo, int *currentOpReady,
+                              int *currentOpBatchable) {
+    RAI_DagOp *currentOp_ = RedisAI_DagCurrentOp(rinfo);
+
+    *currentOpReady = 0;
+    *currentOpBatchable = 0;
+
+    if (currentOp_ == NULL) {
+        return;
+    }
+
+    if (currentOp_->mctx && currentOp_->mctx->model->opts.batchsize > 0) {
+        *currentOpBatchable = 1;
+    }
+    *currentOpReady = 1;
+    // If this is a single op dag, the op is definitely ready.
+    if (rinfo->single_op_dag == 1)
+        return;
+
+    uint n_inkeys = array_len(currentOp_->inkeys);
+    RAI_ContextReadLock(rinfo);
+
+    for (int i = 0; i < n_inkeys; i++) {
+        if (AI_dictFind(rinfo->dagTensorsContext, currentOp_->inkeys[i]) == NULL) {
+            RAI_ContextUnlock(rinfo);
+            *currentOpReady = 0;
+            return;
+        }
+    }
+    RAI_ContextUnlock(rinfo);
+}
+
+void RedisAI_DagOpBatchInfo(RedisAI_RunInfo *rinfo, RAI_DagOp *op, size_t *batchsize,
+                            size_t *minbatchsize, size_t *minbatchtimeout, size_t *inbatchsize) {
+    *batchsize = 0;
+    *minbatchsize = 0;
+    *minbatchtimeout = 0;
+    *inbatchsize = 0;
+    if (!op->mctx)
+        return;
+
+    *batchsize = op->mctx->model->opts.batchsize;
+    *minbatchsize = op->mctx->model->opts.minbatchsize;
+    *minbatchtimeout = op->mctx->model->opts.minbatchtimeout;
+    *inbatchsize = RAI_DagOpBatchSize(op, rinfo);
+}
+
+void RedisAI_DagOpBatchingMatch(RedisAI_RunInfo *rinfo1, RAI_DagOp *op1, RedisAI_RunInfo *rinfo2,
+                                RAI_DagOp *op2, int *batched, size_t *inbatchsize) {
+    *batched = 0;
+    *inbatchsize = 0;
+
+    if (op2->mctx) {
+        int match = RAI_DagOpBatchable(op1, rinfo1, op2, rinfo2);
+
+        if (match) {
+            *batched = 1;
+            *inbatchsize = RAI_DagOpBatchSize(op2, rinfo2);
+        }
+    }
+}
+
+void RedisAI_DagRunSessionStep(RedisAI_RunInfo *rinfo, const char *devicestr) {
+    RAI_DagOp *currentOp = RedisAI_DagCurrentOp(rinfo);
+
+    switch (currentOp->commandType) {
+    case REDISAI_DAG_CMD_TENSORSET: {
+        RedisAI_DagRunSession_TensorSet_Step(rinfo, currentOp);
+        break;
+    }
+    case REDISAI_DAG_CMD_TENSORGET: {
+        RedisAI_DagRunSession_TensorGet_Step(rinfo, currentOp);
+        break;
+    }
+    case REDISAI_DAG_CMD_MODELRUN: {
+        RedisAI_DagRunSession_ModelRun_Step(rinfo, currentOp);
+        break;
+    }
+    case REDISAI_DAG_CMD_SCRIPTRUN: {
+        RedisAI_DagRunSession_ScriptRun_Step(rinfo, currentOp);
+        break;
+    }
+    default: {
+        /* unsupported DAG's command */
+        RAI_SetError(currentOp->err, RAI_EDAGRUN, "ERR unsupported command within DAG");
+        currentOp->result = REDISMODULE_ERR;
+        break;
+    }
+    }
+
+    if (currentOp->result != REDISMODULE_OK) {
+        __atomic_store_n(rinfo->dagError, 1, __ATOMIC_RELAXED);
+    }
+}
+
+void RedisAI_BatchedDagRunSessionStep(RedisAI_RunInfo **batched_rinfo, const char *devicestr) {
+    // Assumption: ops are guaranteed to be all MODELRUN
+
+    int n_ops = array_len(batched_rinfo);
+
+    assert(n_ops > 1);
+
+    RAI_DagOp *currentOps[n_ops];
+
+    for (int i = 0; i < n_ops; i++) {
+        RedisAI_RunInfo *rinfo = batched_rinfo[i];
+
+        RAI_DagOp *currentOp = RedisAI_DagCurrentOp(rinfo);
+
+        currentOps[i] = currentOp;
+    }
+
+    RedisAI_BatchedDagRunSession_ModelRun_Step(batched_rinfo, currentOps);
+
+    for (int i = 0; i < n_ops; i++) {
+        RedisAI_RunInfo *rinfo = batched_rinfo[i];
+        RAI_DagOp *currentOp = currentOps[i];
+
+        if (currentOp->result != REDISMODULE_OK) {
+            __atomic_store_n(rinfo->dagError, 1, __ATOMIC_RELAXED);
+        }
+    }
+    return;
+}
+
+static int _StoreTensorInKeySpace(RedisModuleCtx *ctx, RAI_Tensor *tensor,
+                                  RedisModuleString *persist_key_name, bool mangled_name) {
+
+    int ret = REDISMODULE_ERR;
+    RedisModuleKey *key;
+    size_t persist_key_len;
+    const char *persist_key_str = RedisModule_StringPtrLen(persist_key_name, &persist_key_len);
+
+    RedisModuleString *demangled_key_name;
+    if (mangled_name) {
+        demangled_key_name = RedisModule_CreateString(NULL, persist_key_str, persist_key_len - 4);
+    } else {
+        demangled_key_name = RedisModule_CreateString(NULL, persist_key_str, persist_key_len);
+    }
+
+    const int status =
+        RAI_OpenKey_Tensor(ctx, demangled_key_name, &key, REDISMODULE_READ | REDISMODULE_WRITE);
+    if (status == REDISMODULE_ERR) {
+        RedisModule_ReplyWithError(ctx, "ERR could not save tensor");
+        goto clean_up;
+    } else {
+        if (RedisModule_ModuleTypeSetValue(key, RedisAI_TensorType,
+                                           RAI_TensorGetShallowCopy(tensor)) != REDISMODULE_OK) {
+            RedisModule_ReplyWithError(ctx, "ERR could not save tensor");
+            goto clean_up;
+        }
+    }
+    ret = REDISMODULE_OK;
+
+clean_up:
+    RedisModule_CloseKey(key);
+    RedisAI_ReplicateTensorSet(ctx, demangled_key_name, tensor);
+    RedisModule_FreeString(NULL, demangled_key_name);
+    return ret;
+}
+
+static void PersistTensors(RedisModuleCtx *ctx, RedisAI_RunInfo *rinfo) {
+    AI_dictIterator *persist_iter = AI_dictGetSafeIterator(rinfo->dagTensorsPersistedContext);
+    AI_dictEntry *persist_entry = AI_dictNext(persist_iter);
+    while (persist_entry) {
+        RedisModuleString *persist_key_name = AI_dictGetKey(persist_entry);
+
+        AI_dictEntry *tensor_entry = AI_dictFind(rinfo->dagTensorsContext, persist_key_name);
+
+        if (tensor_entry) {
+            RAI_Tensor *tensor = AI_dictGetVal(tensor_entry);
+
+            if (tensor == NULL) {
+                persist_entry = AI_dictNext(persist_iter);
+                continue;
+            }
+            if (_StoreTensorInKeySpace(ctx, tensor, persist_key_name, true) == REDISMODULE_ERR)
+                rinfo->dagReplyLength++;
+
+        } else {
+            RedisModule_ReplyWithError(ctx,
+                                       "ERR specified persistent key that was not used in DAG");
+            rinfo->dagReplyLength++;
+
+            RedisModule_Log(ctx, "warning",
+                            "on DAGRUN's PERSIST pecified persistent key (%s) that "
+                            "was not used on DAG. Logging all local context keys",
+                            persist_key_name);
+            AI_dictIterator *local_iter = AI_dictGetSafeIterator(rinfo->dagTensorsContext);
+            AI_dictEntry *local_entry = AI_dictNext(local_iter);
+            while (local_entry) {
+                RedisModuleString *localcontext_key_name = AI_dictGetKey(local_entry);
+                RedisModule_Log(ctx, "warning", "DAG's local context key (%s)",
+                                localcontext_key_name);
+                local_entry = AI_dictNext(local_iter);
+            }
+            AI_dictReleaseIterator(local_iter);
+
+            for (size_t opN = 0; opN < array_len(rinfo->dagOps); opN++) {
+                RedisModule_Log(ctx, "warning", "DAG's op n#  %zu - cmdType %d ( argc %d )", opN,
+                                rinfo->dagOps[opN]->commandType, rinfo->dagOps[opN]->argc);
+            }
+        }
+        persist_entry = AI_dictNext(persist_iter);
+    }
+    AI_dictReleaseIterator(persist_iter);
+}
+
+static void ModelSingleOp_PersistTensors(RedisModuleCtx *ctx, RAI_DagOp *op) {
+    const size_t noutputs = RAI_ModelRunCtxNumOutputs(op->mctx);
+    for (size_t outputNumber = 0; outputNumber < noutputs; outputNumber++) {
+        RAI_Tensor *tensor = RAI_ModelRunCtxOutputTensor(op->mctx, outputNumber);
+        tensor = tensor ? RAI_TensorGetShallowCopy(tensor) : NULL;
+        if (tensor)
+            _StoreTensorInKeySpace(ctx, tensor, op->outkeys[outputNumber], false);
+    }
+}
+
+int RedisAI_DagRun_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+    REDISMODULE_NOT_USED(argv);
+    REDISMODULE_NOT_USED(argc);
+    RedisAI_RunInfo *rinfo = RedisModule_GetBlockedClientPrivateData(ctx);
+
+    if (RAI_GetErrorCode(rinfo->err) == RAI_EDAGRUN) {
+        RedisModule_ReplyWithError(ctx, RAI_GetErrorOneLine(rinfo->err));
+        RAI_FreeRunInfo(rinfo);
+        return REDISMODULE_ERR;
+    }
+    int dag_error = 0;
+    char *detail_oneline;
+
+    size_t n_dagOps = array_len(rinfo->dagOps);
+
+    if (*rinfo->timedOut) {
+        RedisModule_ReplyWithSimpleString(ctx, "TIMEDOUT");
+        RAI_FreeRunInfo(rinfo);
+        return REDISMODULE_OK;
+    }
+
+    if (rinfo->single_op_dag == 0) {
+        RedisModule_ReplyWithArray(ctx, REDISMODULE_POSTPONED_ARRAY_LEN);
+    }
+
+    for (size_t i = 0; i < n_dagOps; i++) {
+        RAI_DagOp *currentOp = rinfo->dagOps[i];
+        switch (currentOp->commandType) {
+        case REDISAI_DAG_CMD_TENSORSET: {
+            rinfo->dagReplyLength++;
+            if (currentOp->result == REDISMODULE_ERR) {
+                RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
+                dag_error = 1;
+            } else if (currentOp->result == -1) {
+                RedisModule_ReplyWithSimpleString(ctx, "NA");
+            } else {
+                RedisModule_ReplyWithSimpleString(ctx, "OK");
+            }
+            break;
+        }
+
+        case REDISAI_DAG_CMD_TENSORGET: {
+            rinfo->dagReplyLength++;
+            if (currentOp->result == REDISMODULE_ERR) {
+                RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
+                dag_error = 1;
+            } else {
+                if (array_len(currentOp->outTensors) > 0) {
+                    RAI_Tensor *tensor = currentOp->outTensors[0];
+                    RAI_parseTensorGetArgs(ctx, currentOp->argv, currentOp->argc, tensor);
+                } else if (currentOp->result == -1) {
+                    RedisModule_ReplyWithSimpleString(ctx, "NA");
+                } else {
+                    RedisModule_ReplyWithError(ctx, "ERR error getting tensor from local context");
+                }
+            }
+            break;
+        }
+
+        case REDISAI_DAG_CMD_MODELRUN: {
+            rinfo->dagReplyLength++;
+            struct RedisAI_RunStats *rstats = NULL;
+            RAI_GetRunStats(currentOp->runkey, &rstats);
+            if (currentOp->result == REDISMODULE_ERR) {
+                RAI_SafeAddDataPoint(rstats, 0, 1, 1, 0);
+                RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
+                dag_error = 1;
+            } else if (currentOp->result == -1) {
+                RedisModule_ReplyWithSimpleString(ctx, "NA");
+            } else {
+                RAI_Tensor *t = NULL;
+                if (array_len(currentOp->mctx->outputs) > 0) {
+                    t = currentOp->mctx->outputs[0].tensor;
+                }
+                int batch_size = 0;
+                if (t) {
+                    batch_size = RAI_TensorDim(t, 0);
+                }
+                RAI_SafeAddDataPoint(rstats, currentOp->duration_us, 1, 0, batch_size);
+                RedisModule_ReplyWithSimpleString(ctx, "OK");
+            }
+            break;
+        }
+
+        case REDISAI_DAG_CMD_SCRIPTRUN: {
+            rinfo->dagReplyLength++;
+            struct RedisAI_RunStats *rstats = NULL;
+            RAI_GetRunStats(currentOp->runkey, &rstats);
+            if (currentOp->result == REDISMODULE_ERR) {
+                RAI_SafeAddDataPoint(rstats, 0, 1, 1, 0);
+                RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
+                dag_error = 1;
+            } else if (currentOp->result == -1) {
+                RedisModule_ReplyWithSimpleString(ctx, "NA");
+            } else {
+                int batch_size = 1;
+                RAI_SafeAddDataPoint(rstats, currentOp->duration_us, 1, 0, batch_size);
+                RedisModule_ReplyWithSimpleString(ctx, "OK");
+            }
+            break;
+        }
+
+        default:
+            /* no-op */
+            break;
+        }
+    }
+
+    if (dag_error) {
+        if (rinfo->single_op_dag == 0) {
+            RedisModule_ReplySetArrayLength(ctx, rinfo->dagReplyLength);
+        }
+        RAI_FreeRunInfo(rinfo);
+        return REDISMODULE_ERR;
+    }
+
+    // TODO: Take care of script single op
+    if (rinfo->single_op_dag == 0 || rinfo->dagOps[0]->commandType == REDISAI_DAG_CMD_SCRIPTRUN) {
+        // Save the required tensors in redis key space.
+        PersistTensors(ctx, rinfo);
+        if (rinfo->single_op_dag == 0)
+            RedisModule_ReplySetArrayLength(ctx, rinfo->dagReplyLength);
+    } else {
+        ModelSingleOp_PersistTensors(ctx, rinfo->dagOps[0]);
+    }
+
+    RAI_FreeRunInfo(rinfo);
+
+    return REDISMODULE_OK;
+}
+
+int RedisAI_DagRun_IsKeysPositionRequest_ReportKeys(RedisModuleCtx *ctx, RedisModuleString **argv,
+                                                    int argc) {
+    for (size_t argpos = 1; argpos < argc; argpos++) {
+        const char *arg_string = RedisModule_StringPtrLen(argv[argpos], NULL);
+        if ((!strcasecmp(arg_string, "LOAD") || !strcasecmp(arg_string, "PERSIST")) &&
+            (argpos + 1 < argc)) {
+            long long n_keys;
+            argpos++;
+            const int retval = RedisModule_StringToLongLong(argv[argpos], &n_keys);
+            if (retval != REDISMODULE_OK) {
+                return REDISMODULE_ERR;
+            }
+            argpos++;
+            if (n_keys > 0) {
+                size_t last_persist_argpos = n_keys + argpos;
+                for (; argpos < last_persist_argpos && argpos < argc; argpos++) {
+                    RedisModule_KeyAtPos(ctx, argpos);
+                }
+            }
+        }
+    }
+    return REDISMODULE_OK;
+}
+
+void RunInfo_FreeData(RedisModuleCtx *ctx, void *rinfo) {}
+
+void RedisAI_Disconnected(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc) {
+    RedisModule_Log(ctx, "warning", "Blocked client %p disconnected!", (void *)bc);
+}
+
+// Add Shallow copies of the DAG run info to the devices' queues.
+// Return REDISMODULE_OK in case of success, REDISMODULE_ERR if (at least) one insert op had
+// failed.
+int DAG_InsertDAGToQueue(RedisAI_RunInfo *rinfo) {
+    const char **devices = array_new(const char *, 10);
+
+    for (long long i = 0; i < array_len(rinfo->dagOps); i++) {
+        const char *devicestr = rinfo->dagOps[i]->devicestr;
+        bool found = false;
+        for (long long j = 0; j < array_len(devices); j++) {
+            if (strcasecmp(devicestr, devices[j]) == 0) {
+                found = true;
+                break;
+            }
+        }
+        if (!found) {
+            devices = array_append(devices, devicestr);
+        }
+    }
+
+    size_t ndevices = array_len(devices);
+    if (ndevices == 1)
+        rinfo->single_device_dag = 1;
+    RedisAI_RunInfo **rinfo_copies = array_new(RedisAI_RunInfo *, ndevices);
+
+    for (long long i = 0; i < ndevices; i++) {
+        RedisAI_RunInfo *rinfo_copy;
+        RAI_ShallowCopyDagRunInfo(&rinfo_copy, rinfo);
+        rinfo_copies = array_append(rinfo_copies, rinfo_copy);
+    }
+
+    for (long long i = 0; i < ndevices; i++) {
+        RedisAI_RunInfo *rinfo_copy = rinfo_copies[i];
+        for (long long j = 0; j < rinfo_copy->dagOpCount; j++) {
+            if (strcasecmp(rinfo_copy->dagOps[j]->devicestr, devices[i]) == 0) {
+                rinfo_copy->dagDeviceOps =
+                    array_append(rinfo_copy->dagDeviceOps, rinfo_copy->dagOps[j]);
+            }
+        }
+        rinfo_copy->dagDeviceOpCount = array_len(rinfo_copy->dagDeviceOps);
+    }
+
+    RunQueueInfo **run_queues_info = array_new(RunQueueInfo *, ndevices);
+    for (long long i = 0; i < ndevices; i++) {
+        const char *devicestr = devices[i];
+        RunQueueInfo *run_queue_info = NULL;
+        if (ensureRunQueue(devicestr, &run_queue_info) == REDISMODULE_ERR) {
+            // A device run queue was not created properly, so we free everything,
+            // set an error and finish.
+            array_free(devices);
+            for (int j = 0; j < ndevices; j++) {
+                RAI_DagRunInfoFreeShallowCopy(rinfo_copies[j]);
+            }
+            array_free(rinfo_copies);
+            array_free(run_queues_info);
+            RAI_SetError(rinfo->err, RAI_EDAGRUN, "ERR Queue not initialized for device");
+            rinfo->OnFinish((RedisAI_OnFinishCtx *)rinfo, rinfo->private_data);
+            return REDISMODULE_ERR;
+        }
+        run_queues_info = array_append(run_queues_info, run_queue_info);
+    }
+    for (long long i = 0; i < ndevices; i++) {
+        RedisAI_RunInfo *rinfo_copy = rinfo_copies[i];
+        RunQueueInfo *run_queue_info = run_queues_info[i];
+        gettimeofday(&rinfo_copy->queuingTime, NULL);
+
+        pthread_mutex_lock(&run_queue_info->run_queue_mutex);
+        queuePush(run_queue_info->run_queue, rinfo_copy);
+        pthread_cond_signal(&run_queue_info->queue_condition_var);
+        pthread_mutex_unlock(&run_queue_info->run_queue_mutex);
+    }
+
+    array_free(devices);
+    array_free(rinfo_copies);
+    array_free(run_queues_info);
+    return REDISMODULE_OK;
+}
+
+void DAG_ReplyAndUnblock(RedisAI_OnFinishCtx *ctx, void *private_data) {
+
+    RedisAI_RunInfo *rinfo = (RedisAI_RunInfo *)ctx;
+    if (rinfo->client)
+        RedisModule_UnblockClient(rinfo->client, rinfo);
+}
+
+void Dag_PopulateOp(RAI_DagOp *currentOp, void *rctx, RedisModuleString **inkeys,
+                    RedisModuleString **outkeys, RedisModuleString *runkey) {
+
+    if (currentOp->commandType == REDISAI_DAG_CMD_MODELRUN) {
+        currentOp->mctx = (RAI_ModelRunCtx *)rctx;
+        currentOp->devicestr = currentOp->mctx->model->devicestr;
+    } else {
+        assert(currentOp->commandType == REDISAI_DAG_CMD_SCRIPTRUN);
+        currentOp->sctx = (RAI_ScriptRunCtx *)rctx;
+        currentOp->devicestr = currentOp->sctx->script->devicestr;
+    }
+
+    currentOp->inkeys = inkeys;
+    currentOp->outkeys = outkeys;
+    currentOp->runkey = runkey;
+}
diff --git a/src/dag.h b/src/DAG/dag.h
similarity index 71%
rename from src/dag.h
rename to src/DAG/dag.h
index 67b3f9cf0..5ec0dd0f3 100644
--- a/src/dag.h
+++ b/src/DAG/dag.h
@@ -107,39 +107,6 @@ void RedisAI_BatchedDagRunSessionStep(RedisAI_RunInfo **rinfo, const char *devic
  */
 int RedisAI_DagRun_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc);
 
-/**
- * DAGRUN Building Block to parse [LOAD <nkeys> key1 key2... ]
- *
- * @param ctx Context in which Redis modules operate
- * @param argv Redis command arguments, as an array of strings
- * @param argc Redis command number of arguments
- * @param loadedContextDict local non-blocking hash table containing key names
- * loaded from the keyspace tensors
- * @param localContextDict local non-blocking hash table containing DAG's
- * tensors
- * @param chaining_operator operator used to split operations. Any command
- * argument after the chaining operator is not considered
- * @return processed number of arguments on success, or -1 if the parsing failed
- */
-int RAI_parseDAGLoadArgs(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
-                         AI_dict **loadedContextDict, AI_dict **localContextDict,
-                         const char *chaining_operator);
-
-/**
- * DAGRUN Building Block to parse [PERSIST <nkeys> key1 key2... ]
- *
- * @param ctx Context in which Redis modules operate
- * @param argv Redis command arguments, as an array of strings
- * @param argc Redis command number of arguments
- * @param localContextDict local non-blocking hash table containing DAG's
- * keynames marked as persistent
- * @param chaining_operator operator used to split operations. Any command
- * argument after the chaining operator is not considered
- * @return processed number of arguments on success, or -1 if the parsing failed
- */
-int RAI_parseDAGPersistArgs(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
-                            AI_dict **localContextDict, const char *chaining_operator);
-
 /**
  * When a module command is called in order to obtain the position of
  * keys, since it was flagged as "getkeys-api" during the registration,
@@ -155,20 +122,6 @@ int RAI_parseDAGPersistArgs(RedisModuleCtx *ctx, RedisModuleString **argv, int a
 int RedisAI_DagRun_IsKeysPositionRequest_ReportKeys(RedisModuleCtx *ctx, RedisModuleString **argv,
                                                     int argc);
 
-/**
- * DAGRUN and DAGRUN_RO parser, which reads the the sequence of
- * arguments and decides whether the sequence conforms to the syntax
- * specified by the DAG grammar.
- *
- * @param ctx Context in which Redis modules operate
- * @param argv Redis command arguments, as an array of strings
- * @param argc Redis command number of arguments
- * @param dagMode access mode, for now REDISAI_DAG_READONLY_MODE or REDISAI_DAG_WRITE_MODE
- * @return
- */
-int RedisAI_ProcessDagRunCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
-                                 int dagMode);
-
 /**
  * @brief This callback is called at the end of a DAG run and performs unblock client and reply.
  * This is the callback of RedisAI AI.MODELRUN, AI.SCRIPTRUN, AI.DAGRUN
@@ -177,4 +130,34 @@ int RedisAI_ProcessDagRunCommand(RedisModuleCtx *ctx, RedisModuleString **argv,
  */
 void DAG_ReplyAndUnblock(RedisAI_OnFinishCtx *ctx, void *private_data);
 
+/**
+ * @brief Insert DAG runInfo to the worker queues
+ * @param RunInfo object to insert.
+ */
+int DAG_InsertDAGToQueue(RedisAI_RunInfo *rinfo);
+
+/**
+ * @brief A callback to send to BlockClient (we only send this function but we
+ * don't use it for freeing the runInfo object, we use RAI_FreeRunInfo)
+ */
+void RunInfo_FreeData(RedisModuleCtx *ctx, void *rinfo);
+
+/**
+ * @brief A callback to send to BlockClient.
+ */
+void RedisAI_Disconnected(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc);
+
+/**
+ * @brief Populate a DAG modelrun/scriptrun op with its params .
+ * @param rinfo An existing DAG to populate.
+ * @param rctx ModelRunCtx or ScriptRunCtx that represents the single MODELRUN op.
+ * @param inkeys The DAG operation inkeys (the input tensors).
+ * @param outkeys The DAG operation outkeys (the output tensors).
+ * @param runkey The model key.
+ * @param cmd The DAG command (modelrun/scriptrun).
+ */
+
+void Dag_PopulateOp(RAI_DagOp *currentOp, void *rctx, RedisModuleString **inkeys,
+                    RedisModuleString **outkeys, RedisModuleString *runkey);
+
 #endif /* SRC_DAG_H_ */
diff --git a/src/DAG/dag_parser.c b/src/DAG/dag_parser.c
new file mode 100644
index 000000000..f81f59ce6
--- /dev/null
+++ b/src/DAG/dag_parser.c
@@ -0,0 +1,465 @@
+#include "util/dict.h"
+#include "redismodule.h"
+#include "tensor.h"
+#include "dag_parser.h"
+#include "modelRun_ctx.h"
+#include <string.h>
+#include "util/string_utils.h"
+
+/**
+ * DAGRUN Building Block to parse [LOAD <nkeys> key1 key2... ]
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param argv Redis command arguments, as an array of strings
+ * @param argc Redis command number of arguments
+ * @param loadedContextDict local non-blocking hash table containing key names
+ * loaded from the keyspace tensors
+ * @param localContextDict local non-blocking hash table containing DAG's
+ * tensors
+ * @param chaining_operator operator used to split operations. Any command
+ * argument after the chaining operator is not considered
+ * @return processed number of arguments on success, or -1 if the parsing failed
+ */
+static int DAG_ParseLoadArgs(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
+                             AI_dict **loadedContextDict, AI_dict **localContextDict,
+                             const char *chaining_operator) {
+    if (argc < 3) {
+        RedisModule_WrongArity(ctx);
+        return -1;
+    }
+
+    long long n_keys;
+    const int retval = RedisModule_StringToLongLong(argv[1], &n_keys);
+    if (retval != REDISMODULE_OK || n_keys <= 0) {
+        RedisModule_ReplyWithError(ctx,
+                                   "ERR invalid or negative value found in number of keys to LOAD");
+        return -1;
+    }
+    int number_loaded_keys = 0;
+    int separator_flag = 0;
+    size_t argpos = 2;
+    for (; (argpos <= argc - 1) && (number_loaded_keys < n_keys); argpos++) {
+        size_t arg_len;
+        const char *arg_string = RedisModule_StringPtrLen(argv[argpos], &arg_len);
+        if (!strcasecmp(arg_string, chaining_operator)) {
+            separator_flag = 1;
+            break;
+        } else {
+            RAI_Tensor *t;
+            RedisModuleKey *key;
+            const int status =
+                RAI_GetTensorFromKeyspace(ctx, argv[argpos], &key, &t, REDISMODULE_READ);
+            if (status == REDISMODULE_ERR) {
+                RedisModule_Log(ctx, "warning",
+                                "on DAGRUN's LOAD could not load tensor %s from keyspace",
+                                arg_string);
+                return -1;
+            }
+            char buf[16];
+            sprintf(buf, "%04d", 1);
+            RedisModuleString *dictKey = RedisModule_CreateStringFromString(NULL, argv[argpos]);
+            RedisModule_StringAppendBuffer(NULL, dictKey, buf, strlen(buf));
+
+            AI_dictAdd(*localContextDict, (void *)dictKey, (void *)RAI_TensorGetShallowCopy(t));
+            AI_dictAdd(*loadedContextDict, (void *)dictKey, (void *)1);
+            RedisModule_FreeString(NULL, dictKey);
+            number_loaded_keys++;
+        }
+    }
+    if (number_loaded_keys != n_keys) {
+        RedisModule_WrongArity(ctx);
+        return -1;
+    }
+    return argpos;
+}
+
+/**
+ * DAGRUN Building Block to parse [PERSIST <nkeys> key1 key2... ]
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param argv Redis command arguments, as an array of strings
+ * @param argc Redis command number of arguments
+ * @param localContextDict local non-blocking hash table containing DAG's
+ * keynames marked as persistent
+ * @param chaining_operator operator used to split operations. Any command
+ * argument after the chaining operator is not considered
+ * @return processed number of arguments on success, or -1 if the parsing failed
+ */
+static int DAG_ParsePersistArgs(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
+                                AI_dict **persistContextDict, const char *chaining_operator) {
+    if (argc < 3) {
+        RedisModule_WrongArity(ctx);
+        return -1;
+    }
+
+    long long n_keys;
+    const int retval = RedisModule_StringToLongLong(argv[1], &n_keys);
+    if (retval != REDISMODULE_OK || n_keys <= 0) {
+        RedisModule_ReplyWithError(
+            ctx, "ERR invalid or negative value found in number of keys to PERSIST");
+        return -1;
+    }
+
+    int number_loaded_keys = 0;
+    int separator_flag = 0;
+    size_t argpos = 2;
+    for (; (argpos < argc) && (number_loaded_keys < n_keys); argpos++) {
+        const char *arg_string = RedisModule_StringPtrLen(argv[argpos], NULL);
+        if (!strcasecmp(arg_string, chaining_operator)) {
+            separator_flag = 1;
+            break;
+        } else {
+            AI_dictAdd(*persistContextDict, (void *)argv[argpos], (void *)1);
+            number_loaded_keys++;
+        }
+    }
+    if (number_loaded_keys != n_keys) {
+        RedisModule_WrongArity(ctx);
+        return -1;
+    }
+    return argpos;
+}
+
+// Parse the DAG run command and return REDISMODULE_OK only if it is a valid command to execute.
+int DAG_CommandParser(RedisModuleCtx *ctx, RedisModuleString **argv, int argc, bool dag_ro,
+                      RedisAI_RunInfo **rinfo_ptr) {
+
+    if (argc < 4) {
+        RedisModule_WrongArity(ctx);
+        return REDISMODULE_ERR;
+    }
+    RedisAI_RunInfo *rinfo = *rinfo_ptr;
+    RAI_DagOp *currentDagOp = NULL;
+    RAI_InitDagOp(&currentDagOp);
+    rinfo->dagOps = array_append(rinfo->dagOps, currentDagOp);
+
+    int chainingOpCount = 0;
+    bool load_complete = false;
+    bool persist_complete = false;
+    int arg_pos = 1;
+
+    // If we're parsing a AI.SCRIPTRUN command, we don't expect there to be a chaining |> operator
+    if (!strcasecmp(RedisModule_StringPtrLen(argv[0], NULL), "AI.SCRIPTRUN")) {
+        arg_pos = 0;
+        chainingOpCount++;
+        rinfo->single_op_dag = 1;
+        rinfo->single_device_dag = 1;
+    }
+
+    // The first arg is "AI.DAGRUN", so we go over from the next arg.
+    for (; arg_pos < argc; arg_pos++) {
+        const char *arg_string = RedisModule_StringPtrLen(argv[arg_pos], NULL);
+
+        if (!strcasecmp(arg_string, "LOAD") && !load_complete) {
+            /* Load the required tensors from key space and store them in both
+               dagTensorsLoadedContext and dagTensorsContext dicts. */
+            const int parse_result = DAG_ParseLoadArgs(ctx, &argv[arg_pos], argc - arg_pos,
+                                                       &(rinfo->dagTensorsLoadedContext),
+                                                       &(rinfo->dagTensorsContext), "|>");
+            if (parse_result > 0) {
+                arg_pos += parse_result - 1;
+                load_complete = true;
+            } else {
+                RAI_FreeRunInfo(rinfo);
+                return REDISMODULE_ERR;
+            }
+        } else if (!strcasecmp(arg_string, "PERSIST") && !persist_complete) {
+            if (dag_ro) {
+                RAI_FreeRunInfo(rinfo);
+                RedisModule_ReplyWithError(ctx,
+                                           "ERR PERSIST cannot be specified in a read-only DAG");
+                return REDISMODULE_ERR;
+            }
+            /* Store the keys to persist in dagTensorsPersistedContext dict.
+               These keys will be populated late on with actual tensors. */
+            const int parse_result = DAG_ParsePersistArgs(
+                ctx, &argv[arg_pos], argc - arg_pos, &(rinfo->dagTensorsPersistedContext), "|>");
+            if (parse_result > 0) {
+                arg_pos += parse_result - 1;
+                persist_complete = true;
+            } else {
+                RAI_FreeRunInfo(rinfo);
+                return REDISMODULE_ERR;
+            }
+        } else if (!strcasecmp(arg_string, "TIMEOUT")) {
+            if (!((chainingOpCount == 0) || (chainingOpCount == 1 && rinfo->single_op_dag == 1))) {
+                RAI_FreeRunInfo(rinfo);
+                RedisModule_ReplyWithError(ctx, "ERR TIMEOUT not allowed within a DAG command");
+                return REDISMODULE_ERR;
+            }
+            if (arg_pos == argc - 1) {
+                RAI_FreeRunInfo(rinfo);
+                RedisModule_ReplyWithError(ctx, "ERR No value provided for TIMEOUT");
+                return REDISMODULE_ERR;
+            }
+            long long timeout;
+            const int retval = RedisModule_StringToLongLong(argv[arg_pos + 1], &timeout);
+            if (retval != REDISMODULE_OK || timeout <= 0) {
+                RAI_FreeRunInfo(rinfo);
+                RedisModule_ReplyWithError(ctx, "ERR Invalid value for TIMEOUT");
+                return REDISMODULE_ERR;
+            }
+            rinfo->timeout = timeout;
+            arg_pos += 1;
+            continue;
+        } else if (!strcasecmp(arg_string, "|>") && arg_pos < argc - 1) {
+            // on the first pipe operator, if LOAD or PERSIST were used, we've already
+            // allocated memory
+            if (chainingOpCount > 0) {
+                rinfo->dagOpCount++;
+                RAI_DagOp *currentDagOp = NULL;
+                RAI_InitDagOp(&currentDagOp);
+                rinfo->dagOps = array_append(rinfo->dagOps, currentDagOp);
+            }
+            chainingOpCount++;
+        } else {
+            if (!strcasecmp(arg_string, "AI.TENSORGET")) {
+                rinfo->dagOps[rinfo->dagOpCount]->commandType = REDISAI_DAG_CMD_TENSORGET;
+                rinfo->dagOps[rinfo->dagOpCount]->devicestr = "CPU";
+            }
+            if (!strcasecmp(arg_string, "AI.TENSORSET")) {
+                rinfo->dagOps[rinfo->dagOpCount]->commandType = REDISAI_DAG_CMD_TENSORSET;
+                rinfo->dagOps[rinfo->dagOpCount]->devicestr = "CPU";
+            }
+            if (!strcasecmp(arg_string, "AI.MODELRUN")) {
+                if (argc - 2 < arg_pos) {
+                    RedisModule_WrongArity(ctx);
+                    return REDISMODULE_ERR;
+                }
+                RAI_DagOp *currentOp = rinfo->dagOps[rinfo->dagOpCount];
+                currentOp->commandType = REDISAI_DAG_CMD_MODELRUN;
+                RAI_Model *mto;
+                RedisModuleKey *modelKey;
+                const int status = RAI_GetModelFromKeyspace(ctx, argv[arg_pos + 1], &modelKey, &mto,
+                                                            REDISMODULE_READ);
+                if (status == REDISMODULE_ERR) {
+                    RAI_FreeRunInfo(rinfo);
+                    RedisModule_ReplyWithError(ctx, "ERR Model not found");
+                    return REDISMODULE_ERR;
+                }
+                currentOp->devicestr = mto->devicestr;
+                currentOp->runkey = argv[arg_pos + 1];
+                currentOp->mctx = RAI_ModelRunCtxCreate(mto);
+            }
+            if (!strcasecmp(arg_string, "AI.SCRIPTRUN")) {
+                if (argc - 3 < arg_pos) {
+                    RedisModule_WrongArity(ctx);
+                    return REDISMODULE_ERR;
+                }
+                RAI_DagOp *currentOp = rinfo->dagOps[rinfo->dagOpCount];
+                currentOp->commandType = REDISAI_DAG_CMD_SCRIPTRUN;
+                RAI_Script *sto;
+                RedisModuleKey *scriptKey;
+                const int status = RAI_GetScriptFromKeyspace(ctx, argv[arg_pos + 1], &scriptKey,
+                                                             &sto, REDISMODULE_READ);
+                if (status == REDISMODULE_ERR) {
+                    RAI_FreeRunInfo(rinfo);
+                    return REDISMODULE_ERR;
+                }
+                currentOp->devicestr = sto->devicestr;
+                const char *functionName = RedisModule_StringPtrLen(argv[arg_pos + 2], NULL);
+                currentOp->runkey = argv[arg_pos + 1];
+                currentOp->sctx = RAI_ScriptRunCtxCreate(sto, functionName);
+            }
+            RAI_HoldString(NULL, argv[arg_pos]);
+            RAI_DagOp *currentOp = rinfo->dagOps[rinfo->dagOpCount];
+            currentOp->argv = array_append(currentOp->argv, argv[arg_pos]);
+            currentOp->argc++;
+        }
+    }
+
+    rinfo->dagOpCount = array_len(rinfo->dagOps);
+
+    for (long long i = 0; i < array_len(rinfo->dagOps); i++) {
+        RAI_DagOp *currentOp = rinfo->dagOps[i];
+        if (currentOp == NULL)
+            continue;
+        int parse_result;
+        switch (currentOp->commandType) {
+        case REDISAI_DAG_CMD_TENSORSET:
+            currentOp->outkeys = array_append(currentOp->outkeys, currentOp->argv[1]);
+            break;
+        case REDISAI_DAG_CMD_TENSORGET:
+            currentOp->inkeys = array_append(currentOp->inkeys, currentOp->argv[1]);
+            break;
+        case REDISAI_DAG_CMD_MODELRUN:
+            parse_result = RedisAI_Parse_ModelRun_RedisCommand(
+                NULL, currentOp->argv, currentOp->argc, &(currentOp->mctx), &(currentOp->inkeys),
+                &(currentOp->outkeys), &(currentOp->mctx->model), currentOp->err);
+            if (parse_result < 0) {
+                RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
+                return REDISMODULE_ERR;
+            }
+            break;
+        case REDISAI_DAG_CMD_SCRIPTRUN:
+            parse_result = RedisAI_Parse_ScriptRun_RedisCommand(
+                NULL, currentOp->argv, currentOp->argc, &(currentOp->inkeys), &(currentOp->outkeys),
+                &(currentOp->sctx->variadic), currentOp->err);
+            if (parse_result < 0) {
+                RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
+                return REDISMODULE_ERR;
+            }
+            break;
+        }
+    }
+
+    if (rinfo->single_op_dag && rinfo->dagOps[0]->commandType == REDISAI_DAG_CMD_SCRIPTRUN) {
+        RAI_DagOp *op = rinfo->dagOps[0];
+        RAI_Tensor *t;
+        RedisModuleKey *key;
+        for (size_t i = 0; i < array_len(op->inkeys); i++) {
+            RedisModuleString *inkey = op->inkeys[i];
+            const int status = RAI_GetTensorFromKeyspace(ctx, inkey, &key, &t, REDISMODULE_READ);
+            if (status == REDISMODULE_ERR) {
+                RedisModule_Log(ctx, "warning",
+                                "on DAGRUN's LOAD could not load tensor %s from keyspace",
+                                RedisModule_StringPtrLen(inkey, NULL));
+                return REDISMODULE_ERR;
+            }
+            char buf[16];
+            sprintf(buf, "%04d", 1);
+            RedisModuleString *dictKey = RedisModule_CreateStringFromString(NULL, inkey);
+            RedisModule_StringAppendBuffer(NULL, dictKey, buf, strlen(buf));
+            AI_dictAdd(rinfo->dagTensorsContext, (void *)dictKey,
+                       (void *)RAI_TensorGetShallowCopy(t));
+            AI_dictAdd(rinfo->dagTensorsLoadedContext, (void *)dictKey, (void *)1);
+            RedisModule_Free(dictKey);
+        }
+
+        for (size_t i = 0; i < array_len(op->outkeys); i++) {
+            RedisModuleString *outkey = op->outkeys[i];
+            AI_dictAdd(rinfo->dagTensorsPersistedContext, (void *)outkey, (void *)1);
+        }
+    }
+
+    // At this point, we have built a sequence of DAG operations, each with its own
+    // input and output keys. The names of the keys will be used to look whether the
+    // inputs to a DAG operation have all been realized by previous operations (or if
+    // they are available as part of LOADed keys from keyspace).
+    // This strategy is fine if keys are not aliased, that is, if a command's output
+    // overwrites the key of a previous command. This would trick DAG operations into
+    // thinking that their input is ready when it's not.
+    // To overcome this, we make key names unique, so that names are not aliased. We
+    // mangle the names by appending a numerical suffix ":0001". After computing, we
+    // demangle the keys in order to persist them.
+
+    AI_dict *mangled_tensors = AI_dictCreate(&AI_dictTypeHeapRStrings, NULL);
+    if (!mangled_tensors) {
+        return REDISMODULE_ERR;
+    }
+
+    {
+        AI_dictIterator *iter = AI_dictGetSafeIterator(rinfo->dagTensorsLoadedContext);
+        AI_dictEntry *entry = AI_dictNext(iter);
+        while (entry) {
+            RedisModuleString *key = (RedisModuleString *)AI_dictGetKey(entry);
+            size_t key_len;
+            const char *key_str = RedisModule_StringPtrLen(key, &key_len);
+            RedisModuleString *demangled_key = RedisModule_CreateString(NULL, key_str, key_len - 4);
+            int *instance = RedisModule_Alloc(sizeof(int));
+            *instance = 1;
+            AI_dictAdd(mangled_tensors, (void *)demangled_key, (void *)instance);
+            RedisModule_FreeString(NULL, demangled_key);
+            entry = AI_dictNext(iter);
+        }
+        AI_dictReleaseIterator(iter);
+    }
+
+    for (long long i = 0; i < array_len(rinfo->dagOps); i++) {
+        RAI_DagOp *currentOp = rinfo->dagOps[i];
+
+        RedisModuleString **mangled_inkeys =
+            array_new(RedisModuleString *, array_len(currentOp->inkeys));
+        for (long long j = 0; j < array_len(currentOp->inkeys); j++) {
+            RedisModuleString *key = currentOp->inkeys[j];
+            AI_dictEntry *entry = AI_dictFind(mangled_tensors, key);
+            if (!entry) {
+                AI_dictRelease(mangled_tensors);
+                RedisModule_ReplyWithError(ctx, "ERR INPUT key cannot be found in DAG");
+                return REDISMODULE_ERR;
+            }
+            int *instance = AI_dictGetVal(entry);
+            char buf[16];
+            sprintf(buf, "%04d", *instance);
+            RedisModuleString *mangled_key = RedisModule_CreateStringFromString(NULL, key);
+            RedisModule_StringAppendBuffer(NULL, mangled_key, buf, strlen(buf));
+            mangled_inkeys = array_append(mangled_inkeys, mangled_key);
+        }
+
+        RedisModuleString **mangled_outkeys =
+            array_new(RedisModuleString *, array_len(currentOp->outkeys));
+        for (long long j = 0; j < array_len(currentOp->outkeys); j++) {
+            RedisModuleString *key = currentOp->outkeys[j];
+            AI_dictEntry *entry = AI_dictFind(mangled_tensors, key);
+            int *instance = NULL;
+            if (entry) {
+                instance = AI_dictGetVal(entry);
+                *instance += 1;
+            } else {
+                instance = RedisModule_Alloc(sizeof(int));
+                *instance = 1;
+                AI_dictAdd(mangled_tensors, (void *)key, (void *)instance);
+            }
+            char buf[16];
+            sprintf(buf, "%04d", *instance);
+            RedisModuleString *mangled_key = RedisModule_CreateStringFromString(NULL, key);
+            RedisModule_StringAppendBuffer(NULL, mangled_key, buf, strlen(buf));
+            mangled_outkeys = array_append(mangled_outkeys, mangled_key);
+        }
+
+        array_free(currentOp->inkeys);
+        array_free(currentOp->outkeys);
+
+        currentOp->inkeys = mangled_inkeys;
+        currentOp->outkeys = mangled_outkeys;
+    }
+
+    AI_dict *mangled_persisted = AI_dictCreate(&AI_dictTypeHeapRStrings, NULL);
+    {
+        AI_dictIterator *iter = AI_dictGetSafeIterator(rinfo->dagTensorsPersistedContext);
+        AI_dictEntry *entry = AI_dictNext(iter);
+        while (entry) {
+            RedisModuleString *key = (RedisModuleString *)AI_dictGetKey(entry);
+            AI_dictEntry *mangled_entry = AI_dictFind(mangled_tensors, key);
+            if (!mangled_entry) {
+                AI_dictRelease(mangled_tensors);
+                AI_dictRelease(mangled_persisted);
+                AI_dictReleaseIterator(iter);
+                RedisModule_ReplyWithError(ctx, "ERR PERSIST key cannot be found in DAG");
+                return REDISMODULE_ERR;
+            }
+            int *instance = AI_dictGetVal(mangled_entry);
+            char buf[16];
+            sprintf(buf, "%04d", *instance);
+            RedisModuleString *mangled_key = RedisModule_CreateStringFromString(NULL, key);
+            RedisModule_StringAppendBuffer(NULL, mangled_key, buf, strlen(buf));
+
+            AI_dictAdd(mangled_persisted, (void *)mangled_key, (void *)1);
+            entry = AI_dictNext(iter);
+        }
+        AI_dictReleaseIterator(iter);
+    }
+
+    AI_dictRelease(rinfo->dagTensorsPersistedContext);
+    rinfo->dagTensorsPersistedContext = mangled_persisted;
+
+    {
+        AI_dictIterator *iter = AI_dictGetSafeIterator(mangled_tensors);
+        AI_dictEntry *entry = AI_dictNext(iter);
+        while (entry) {
+            int *val = (int *)AI_dictGetVal(entry);
+            RedisModule_Free(val);
+            entry = AI_dictNext(iter);
+        }
+        AI_dictReleaseIterator(iter);
+    }
+    AI_dictRelease(mangled_tensors);
+    mangled_tensors = NULL;
+
+    for (long long i = 0; i < array_len(rinfo->dagOps); i++) {
+        if (rinfo->dagOps[i]->devicestr == NULL) {
+            rinfo->dagOps[i]->devicestr = "CPU";
+        }
+    }
+    return REDISMODULE_OK;
+}
\ No newline at end of file
diff --git a/src/DAG/dag_parser.h b/src/DAG/dag_parser.h
new file mode 100644
index 000000000..8ede62070
--- /dev/null
+++ b/src/DAG/dag_parser.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#include "run_info.h"
+
+int DAG_CommandParser(RedisModuleCtx *ctx, RedisModuleString **argv, int argc, bool dag_ro,
+                      RedisAI_RunInfo **rinfo_ptr);
diff --git a/src/background_workers.c b/src/background_workers.c
index 946c81975..d8cd7788e 100644
--- a/src/background_workers.c
+++ b/src/background_workers.c
@@ -9,18 +9,8 @@
  */
 
 #include "background_workers.h"
-#include "dag.h"
-#include "model.h"
 #include "redisai.h"
-#include "rmutil/alloc.h"
-#include "rmutil/args.h"
 #include "run_info.h"
-#include "script.h"
-#include "stats.h"
-#include "tensor.h"
-#include "util/arr_rm_alloc.h"
-#include "util/dict.h"
-#include "util/queue.h"
 #include <pthread.h>
 #include <stdio.h>
 #include <string.h>
diff --git a/src/background_workers.h b/src/background_workers.h
index 6634106d7..5d87a4f60 100644
--- a/src/background_workers.h
+++ b/src/background_workers.h
@@ -19,7 +19,7 @@
 #include <pthread.h>
 
 #include "config.h"
-#include "dag.h"
+#include "DAG/dag.h"
 #include "model.h"
 #include "redisai.h"
 #include "rmutil/alloc.h"
diff --git a/src/command_parser.c b/src/command_parser.c
new file mode 100644
index 000000000..c3aadf2f8
--- /dev/null
+++ b/src/command_parser.c
@@ -0,0 +1,221 @@
+
+#include "command_parser.h"
+#include "redismodule.h"
+#include "run_info.h"
+#include "modelRun_ctx.h"
+#include "DAG/dag.h"
+#include "DAG/dag_parser.h"
+#include "util/string_utils.h"
+
+static int _parseTimeout(RedisModuleString *timeout_arg, RAI_Error *error, long long *timeout) {
+
+    const int retval = RedisModule_StringToLongLong(timeout_arg, timeout);
+    if (retval != REDISMODULE_OK || timeout <= 0) {
+        RAI_SetError(error, RAI_EMODELRUN, "ERR Invalid value for TIMEOUT");
+        return REDISMODULE_ERR;
+    }
+    return REDISMODULE_OK;
+}
+
+static int _ModelRunCommand_ParseArgs(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
+                                      RAI_Model **model, RAI_Error *error,
+                                      RedisModuleString ***inkeys, RedisModuleString ***outkeys,
+                                      RedisModuleString **runkey, long long *timeout) {
+
+    if (argc < 4) {
+        RAI_SetError(error, RAI_EMODELRUN,
+                     "ERR wrong number of arguments for 'AI.MODELRUN' command");
+        return REDISMODULE_ERR;
+    }
+    size_t argpos = 1;
+    RedisModuleKey *modelKey;
+    const int status =
+        RAI_GetModelFromKeyspace(ctx, argv[argpos], &modelKey, model, REDISMODULE_READ);
+    if (status == REDISMODULE_ERR) {
+        RAI_SetError(error, RAI_EMODELRUN, "ERR Model not found");
+        return REDISMODULE_ERR;
+    }
+    RAI_HoldString(NULL, argv[argpos]);
+    *runkey = argv[argpos];
+    const char *arg_string = RedisModule_StringPtrLen(argv[++argpos], NULL);
+
+    // Parse timeout arg if given and store it in timeout
+    if (!strcasecmp(arg_string, "TIMEOUT")) {
+        if (_parseTimeout(argv[++argpos], error, timeout) == REDISMODULE_ERR)
+            return REDISMODULE_ERR;
+        arg_string = RedisModule_StringPtrLen(argv[++argpos], NULL);
+    }
+    if (strcasecmp(arg_string, "INPUTS") != 0) {
+        RAI_SetError(error, RAI_EMODELRUN, "ERR INPUTS not specified");
+        return REDISMODULE_ERR;
+    }
+
+    bool is_input = true, is_output = false;
+    size_t ninputs = 0, noutputs = 0;
+
+    while (++argpos < argc) {
+        arg_string = RedisModule_StringPtrLen(argv[argpos], NULL);
+        if (!strcasecmp(arg_string, "OUTPUTS") && !is_output) {
+            is_input = false;
+            is_output = true;
+        } else {
+            RAI_HoldString(NULL, argv[argpos]);
+            if (is_input) {
+                ninputs++;
+                *inkeys = array_append(*inkeys, argv[argpos]);
+            } else {
+                noutputs++;
+                *outkeys = array_append(*outkeys, argv[argpos]);
+            }
+        }
+    }
+    if ((*model)->inputs && (*model)->ninputs != ninputs) {
+        RAI_SetError(error, RAI_EMODELRUN,
+                     "Number of names given as INPUTS during MODELSET and keys given as "
+                     "INPUTS here do not match");
+        return REDISMODULE_ERR;
+    }
+
+    if ((*model)->outputs && (*model)->noutputs != noutputs) {
+        RAI_SetError(error, RAI_EMODELRUN,
+                     "Number of names given as OUTPUTS during MODELSET and keys given as "
+                     "OUTPUTS here do not match");
+        return REDISMODULE_ERR;
+    }
+    return REDISMODULE_OK;
+}
+
+/**
+ * Extract the params for the ModelCtxRun object from AI.MODELRUN arguments.
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param inkeys Model input tensors keys, as an array of strings
+ * @param outkeys Model output tensors keys, as an array of strings
+ * @param mctx Destination Model context to store the parsed data
+ * @return REDISMODULE_OK in case of success, REDISMODULE_ERR otherwise
+ */
+
+static int _ModelRunCtx_SetParams(RedisModuleCtx *ctx, RedisModuleString **inkeys,
+                                  RedisModuleString **outkeys, RAI_ModelRunCtx *mctx) {
+
+    RAI_Model *model = mctx->model;
+    RAI_Tensor *t;
+    RedisModuleKey *key;
+    char *opname = NULL;
+    size_t ninputs = array_len(inkeys), noutputs = array_len(outkeys);
+    for (size_t i = 0; i < ninputs; i++) {
+        const int status = RAI_GetTensorFromKeyspace(ctx, inkeys[i], &key, &t, REDISMODULE_READ);
+        if (status == REDISMODULE_ERR) {
+            RedisModule_Log(ctx, "warning", "could not load tensor %s from keyspace",
+                            RedisModule_StringPtrLen(inkeys[i], NULL));
+            return REDISMODULE_ERR;
+        }
+        if (model->inputs)
+            opname = model->inputs[i];
+        RAI_ModelRunCtxAddInput(mctx, opname, t);
+    }
+    for (size_t i = 0; i < noutputs; i++) {
+        if (model->outputs)
+            opname = model->outputs[i];
+        RAI_ModelRunCtxAddOutput(mctx, opname);
+    }
+    return REDISMODULE_OK;
+}
+
+int ParseModelRunCommand(RedisAI_RunInfo *rinfo, RedisModuleCtx *ctx, RedisModuleString **argv,
+                         int argc) {
+
+    // Build a ModelRunCtx from command.
+    RAI_Error error = {0};
+    RAI_Model *model;
+    RedisModuleString **inkeys = array_new(RedisModuleString *, 1);
+    RedisModuleString **outkeys = array_new(RedisModuleString *, 1);
+    RedisModuleString *runkey = NULL;
+    RAI_ModelRunCtx *mctx = NULL;
+    RAI_DagOp *currentOp;
+
+    long long timeout = 0;
+    if (_ModelRunCommand_ParseArgs(ctx, argv, argc, &model, &error, &inkeys, &outkeys, &runkey,
+                                   &timeout) == REDISMODULE_ERR) {
+        RedisModule_ReplyWithError(ctx, RAI_GetErrorOneLine(&error));
+        goto cleanup;
+    }
+    mctx = RAI_ModelRunCtxCreate(model);
+
+    if (rinfo->single_op_dag) {
+        rinfo->timeout = timeout;
+        // Set params in ModelRunCtx, bring inputs from key space.
+        if (_ModelRunCtx_SetParams(ctx, inkeys, outkeys, mctx) == REDISMODULE_ERR)
+            goto cleanup;
+    }
+    if (RAI_InitDagOp(&currentOp) == REDISMODULE_ERR) {
+        RedisModule_ReplyWithError(
+            ctx, "ERR Unable to allocate the memory and initialise the RAI_dagOp structure");
+        goto cleanup;
+    }
+    currentOp->commandType = REDISAI_DAG_CMD_MODELRUN;
+    Dag_PopulateOp(currentOp, mctx, inkeys, outkeys, runkey);
+    rinfo->dagOps = array_append(rinfo->dagOps, currentOp);
+    return REDISMODULE_OK;
+
+cleanup:
+    for (size_t i = 0; i < array_len(inkeys); i++) {
+        RedisModule_FreeString(NULL, inkeys[i]);
+    }
+    array_free(inkeys);
+    for (size_t i = 0; i < array_len(outkeys); i++) {
+        RedisModule_FreeString(NULL, outkeys[i]);
+    }
+    array_free(outkeys);
+    if (runkey)
+        RedisModule_FreeString(NULL, runkey);
+    if (mctx)
+        RAI_ModelRunCtxFree(mctx);
+    RAI_FreeRunInfo(rinfo);
+    return REDISMODULE_ERR;
+}
+
+int RedisAI_ExecuteCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
+                           RunCommand command, bool ro_dag) {
+
+    int flags = RedisModule_GetContextFlags(ctx);
+    bool blocking_not_allowed = (flags & (REDISMODULE_CTX_FLAGS_MULTI | REDISMODULE_CTX_FLAGS_LUA));
+    if (blocking_not_allowed)
+        return RedisModule_ReplyWithError(
+            ctx, "ERR Cannot run RedisAI command within a transaction or a LUA script");
+
+    RedisAI_RunInfo *rinfo = NULL;
+    if (RAI_InitRunInfo(&rinfo) != REDISMODULE_OK) {
+        RedisModule_ReplyWithError(
+            ctx, "ERR Unable to allocate the memory and initialize the RedisAI_RunInfo structure");
+        return REDISMODULE_ERR;
+    }
+
+    int status = REDISMODULE_ERR;
+    switch (command) {
+    case CMD_MODELRUN:
+        rinfo->single_op_dag = 1;
+        status = ParseModelRunCommand(rinfo, ctx, argv, argc);
+        break;
+    case CMD_SCRIPTRUN:
+        rinfo->single_op_dag = 1;
+        status = DAG_CommandParser(ctx, argv, argc, ro_dag, &rinfo);
+        break;
+    case CMD_DAGRUN:
+        status = DAG_CommandParser(ctx, argv, argc, ro_dag, &rinfo);
+        break;
+    default:
+        status = REDISMODULE_ERR;
+    }
+    if (status == REDISMODULE_ERR) {
+        return REDISMODULE_OK;
+    }
+
+    rinfo->dagOpCount = array_len(rinfo->dagOps);
+
+    // Block the client before adding rinfo to the run queues (sync call).
+    rinfo->client = RedisModule_BlockClient(ctx, RedisAI_DagRun_Reply, NULL, RunInfo_FreeData, 0);
+    RedisModule_SetDisconnectCallback(rinfo->client, RedisAI_Disconnected);
+    rinfo->OnFinish = DAG_ReplyAndUnblock;
+    return DAG_InsertDAGToQueue(rinfo);
+}
diff --git a/src/command_parser.h b/src/command_parser.h
new file mode 100644
index 000000000..5565047b8
--- /dev/null
+++ b/src/command_parser.h
@@ -0,0 +1,19 @@
+#pragma once
+
+#include "redismodule.h"
+#include "model.h"
+
+typedef enum RunCommand { CMD_MODELRUN = 0, CMD_SCRIPTRUN, CMD_DAGRUN } RunCommand;
+
+/**
+ * @brief  Validates MODELRUN command and write the model obtained from
+ * the key space to the model pointer. The keys of the input and output tensord
+ * are stored in the inkeys and outkeys arrays, the model key is saved in runkey,
+ * and the given timeout is saved as well (if given, otherwise it is zero).
+ * @return Returns REDISMODULE_OK if the command is valid, REDISMODULE_ERR otherwise.
+ */
+int ParseModelRunCommand(RedisAI_RunInfo *rinfo, RedisModuleCtx *ctx, RedisModuleString **argv,
+                         int argc);
+
+int RedisAI_ExecuteCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
+                           RunCommand command, bool ro_dag);
diff --git a/src/dag.c b/src/dag.c
deleted file mode 100644
index a46c7b575..000000000
--- a/src/dag.c
+++ /dev/null
@@ -1,1338 +0,0 @@
-/**
- * dag.c
- *
- * Contains the helper methods for both parsing, running the command in the
- * background, and replying DAG structured commands.
- *
- * The way we allow DAG operations to run on different devices in parallel
- * (when possible) is the following: instead of running the whole DAG in one
- * swoop, the DAG run info is created on one
- * queue/device and shallow copied (appropriately) across other queues/devices
- * as indicated by the DAG specification. A DAG mutex is shared across all
- * copies.
- * The DAG run info is placed on the queue for each device and evicted for
- * execution (in background_workers). Execution happens one DAG op at a time:
- * once the individual op has executed, it is marked as such and the DAG run
- * info is placed back on the queue. The current un-executed op is checked for
- * its inputs. If all inputs are found in the tensor context, then the DAG op
- * can be executed. If not, the execution quits and control is given back to
- * the worker. If there are other items in the queue the op is placed after the
- * next item. When all ops for a device have been executed, the DAG is not
- * placed back on the queue. When all ops in a DAG have been executed or an
- * error occurs, the client is unblocked.
- *
- * See background_workers.c for the queue logic, everything else DAG is here.
- */
-
-#include "dag.h"
-
-#include <pthread.h>
-#include <stdbool.h>
-#include <string.h>
-#include <sys/time.h>
-#include <unistd.h>
-
-#include "model.h"
-#include "redisai.h"
-#include "background_workers.h"
-#include "rmutil/alloc.h"
-#include "rmutil/args.h"
-#include "run_info.h"
-#include "stats.h"
-#include "tensor.h"
-#include "util/arr_rm_alloc.h"
-#include "util/dict.h"
-#include "util/queue.h"
-#include "util/string_utils.h"
-
-/**
- * Execution of a TENSORSET DAG step.
- * If an error occurs, it is recorded in the DagOp struct.
- *
- * @param rinfo context in which RedisAI blocking commands operate.
- * @param currentOp TENSORSET DagOp to be executed
- * @return
- */
-void RedisAI_DagRunSession_TensorSet_Step(RedisAI_RunInfo *rinfo, RAI_DagOp *currentOp) {
-    RAI_Tensor *t = NULL;
-    const int parse_result =
-        RAI_parseTensorSetArgs(NULL, currentOp->argv, currentOp->argc, &t, 0, currentOp->err);
-    if (parse_result > 0) {
-        RedisModuleString *key_string = currentOp->outkeys[0];
-        RAI_ContextWriteLock(rinfo);
-        AI_dictReplace(rinfo->dagTensorsContext, (void *)key_string, t);
-        RAI_ContextUnlock(rinfo);
-        currentOp->result = REDISMODULE_OK;
-    } else {
-        currentOp->result = REDISMODULE_ERR;
-    }
-}
-
-/**
- * Execution of a TENSORGET DAG step.
- * If an error occurs, it is recorded in the DagOp struct.
- *
- * @param rinfo context in which RedisAI blocking commands operate.
- * @param currentOp TENSORGET DagOp to be executed
- * @return
- */
-void RedisAI_DagRunSession_TensorGet_Step(RedisAI_RunInfo *rinfo, RAI_DagOp *currentOp) {
-    RedisModuleString *key_string = currentOp->inkeys[0];
-    RAI_Tensor *t = NULL;
-    RAI_ContextReadLock(rinfo);
-    currentOp->result = RAI_getTensorFromLocalContext(NULL, rinfo->dagTensorsContext, key_string,
-                                                      &t, currentOp->err);
-    RAI_ContextUnlock(rinfo);
-    if (currentOp->result == REDISMODULE_OK) {
-        RAI_Tensor *outTensor = NULL;
-        // TODO: check tensor copy return value
-        RAI_TensorDeepCopy(t, &outTensor);
-        currentOp->outTensors = array_append(currentOp->outTensors, outTensor);
-    }
-}
-
-/**
- * Execution of a MODELRUN DAG step.
- * If an error occurs, it is recorded in the DagOp struct.
- *
- * @param rinfo context in which RedisAI blocking commands operate.
- * @param currentOp MODELRUN DagOp to be executed
- * @return
- */
-void RedisAI_DagRunSession_ModelRun_Step(RedisAI_RunInfo *rinfo, RAI_DagOp *currentOp) {
-    uint n_inkeys = array_len(currentOp->inkeys);
-    uint n_outkeys = array_len(currentOp->outkeys);
-
-    RAI_ContextReadLock(rinfo);
-
-    RAI_Tensor *inputTensors[n_inkeys];
-    for (uint i = 0; i < n_inkeys; i++) {
-        RAI_Tensor *inputTensor;
-        const int get_result = RAI_getTensorFromLocalContext(
-            NULL, rinfo->dagTensorsContext, currentOp->inkeys[i], &inputTensor, currentOp->err);
-        if (get_result == REDISMODULE_ERR) {
-            // We check for this outside the function
-            // this check cannot be covered by tests
-            currentOp->result = REDISMODULE_ERR;
-            RAI_ContextUnlock(rinfo);
-            return;
-        }
-        inputTensors[i] = inputTensor;
-    }
-
-    RAI_ContextUnlock(rinfo);
-
-    for (uint i = 0; i < n_inkeys; i++) {
-        const char *opname = NULL;
-        if (currentOp->mctx->model->inputs) {
-            opname = currentOp->mctx->model->inputs[i];
-        }
-        RAI_ModelRunCtxAddInput(currentOp->mctx, opname, inputTensors[i]);
-    }
-
-    for (uint i = 0; i < n_outkeys; i++) {
-        const char *opname = NULL;
-        if (currentOp->mctx->model->inputs) {
-            opname = currentOp->mctx->model->outputs[i];
-        }
-        RAI_ModelRunCtxAddOutput(currentOp->mctx, opname);
-    }
-
-    RAI_ModelRunCtx *mctxs[1];
-    mctxs[0] = currentOp->mctx;
-    const long long start = ustime();
-    int result = RAI_ModelRun(mctxs, 1, currentOp->err);
-    const long long end = ustime();
-
-    if (result == REDISMODULE_ERR) {
-        currentOp->result = result;
-        return;
-    }
-
-    RAI_ContextWriteLock(rinfo);
-
-    currentOp->duration_us = end - start;
-
-    const size_t noutputs = RAI_ModelRunCtxNumOutputs(currentOp->mctx);
-    for (size_t outputNumber = 0; outputNumber < noutputs; outputNumber++) {
-        RAI_Tensor *tensor = RAI_ModelRunCtxOutputTensor(currentOp->mctx, outputNumber);
-        RedisModuleString *key_string = currentOp->outkeys[outputNumber];
-        tensor = tensor ? RAI_TensorGetShallowCopy(tensor) : NULL;
-        AI_dictReplace(rinfo->dagTensorsContext, (void *)key_string, tensor);
-    }
-
-    currentOp->result = result;
-
-    RAI_ContextUnlock(rinfo);
-
-    return;
-}
-
-/**
- * Execution of a batched (MODELRUN) DAG step.
- * If an error occurs, it is recorded in all DagOp structs.
- *
- * @param batched_rinfo array of contexts in which RedisAI blocking commands operate.
- * @param currentOps MODELRUN DagOps to be executed
- * @return
- */
-void RedisAI_BatchedDagRunSession_ModelRun_Step(RedisAI_RunInfo **batched_rinfo,
-                                                RAI_DagOp **currentOps) {
-
-    int n_rinfo = array_len(batched_rinfo);
-
-    RAI_ModelRunCtx *mctxs[n_rinfo];
-
-    for (int i = 0; i < n_rinfo; i++) {
-        RedisAI_RunInfo *rinfo = batched_rinfo[i];
-        RAI_DagOp *currentOp = currentOps[i];
-
-        uint n_inkeys = array_len(currentOp->inkeys);
-        uint n_outkeys = array_len(currentOp->outkeys);
-
-        RAI_ContextReadLock(rinfo);
-
-        RAI_Tensor *inputTensors[n_inkeys];
-        for (uint i = 0; i < n_inkeys; i++) {
-            RAI_Tensor *inputTensor;
-            const int get_result = RAI_getTensorFromLocalContext(
-                NULL, rinfo->dagTensorsContext, currentOp->inkeys[i], &inputTensor, currentOp->err);
-            if (get_result == REDISMODULE_ERR) {
-                // We check for this outside the function
-                // this check cannot be covered by tests
-                currentOp->result = REDISMODULE_ERR;
-                RAI_ContextUnlock(rinfo);
-                return;
-            }
-            inputTensors[i] = inputTensor;
-        }
-
-        RAI_ContextUnlock(rinfo);
-
-        for (uint i = 0; i < n_inkeys; i++) {
-            const char *input_name = NULL;
-            if (currentOp->mctx->model->inputs) {
-                input_name = currentOp->mctx->model->inputs[i];
-            }
-            RAI_ModelRunCtxAddInput(currentOp->mctx, input_name, inputTensors[i]);
-        }
-
-        for (uint i = 0; i < n_outkeys; i++) {
-            const char *output_name = NULL;
-            if (currentOp->mctx->model->outputs) {
-                output_name = currentOp->mctx->model->outputs[i];
-            }
-            RAI_ModelRunCtxAddOutput(currentOp->mctx, output_name);
-        }
-
-        mctxs[i] = currentOp->mctx;
-    }
-
-    RAI_Error err = {0};
-    const long long start = ustime();
-    int result = RAI_ModelRun(mctxs, n_rinfo, &err);
-    const long long end = ustime();
-
-    long long duration = end - start;
-
-    for (int i = 0; i < n_rinfo; i++) {
-        RedisAI_RunInfo *rinfo = batched_rinfo[i];
-        RAI_DagOp *currentOp = currentOps[i];
-
-        if (result == REDISMODULE_ERR) {
-            currentOp->result = result;
-            RAI_SetError(currentOp->err, err.code, err.detail);
-            continue;
-        }
-
-        RAI_ContextWriteLock(rinfo);
-
-        currentOp->duration_us = duration;
-
-        const size_t noutputs = RAI_ModelRunCtxNumOutputs(currentOp->mctx);
-        for (size_t outputNumber = 0; outputNumber < noutputs; outputNumber++) {
-            RAI_Tensor *tensor = RAI_ModelRunCtxOutputTensor(currentOp->mctx, outputNumber);
-            RedisModuleString *key_string = currentOp->outkeys[outputNumber];
-            tensor = tensor ? RAI_TensorGetShallowCopy(tensor) : NULL;
-            AI_dictReplace(rinfo->dagTensorsContext, (void *)key_string, tensor);
-        }
-
-        currentOp->result = result;
-
-        RAI_ContextUnlock(rinfo);
-    }
-
-    if (result == REDISMODULE_ERR) {
-        RAI_ClearError(&err);
-    }
-
-    return;
-}
-
-/**
- * Execution of a SCRIPTRUN DAG step.
- * If an error occurs, it is recorded in the DagOp struct.
- *
- * @param rinfo context in which RedisAI blocking commands operate.
- * @param currentOp SCRIPTRUN DagOp to be executed
- * @return
- */
-void RedisAI_DagRunSession_ScriptRun_Step(RedisAI_RunInfo *rinfo, RAI_DagOp *currentOp) {
-    uint n_inkeys = array_len(currentOp->inkeys);
-    uint n_outkeys = array_len(currentOp->outkeys);
-
-    RAI_ContextReadLock(rinfo);
-
-    RAI_Tensor *inputTensors[n_inkeys];
-    for (uint i = 0; i < n_inkeys; i++) {
-        RAI_Tensor *inputTensor;
-        const int get_result = RAI_getTensorFromLocalContext(
-            NULL, rinfo->dagTensorsContext, currentOp->inkeys[i], &inputTensor, currentOp->err);
-        if (get_result == REDISMODULE_ERR) {
-            // We check for this outside the function
-            // this check cannot be covered by tests
-            currentOp->result = REDISMODULE_ERR;
-            RAI_ContextUnlock(rinfo);
-            return;
-        }
-        inputTensors[i] = inputTensor;
-    }
-
-    RAI_ContextUnlock(rinfo);
-
-    for (uint i = 0; i < n_inkeys; i++) {
-        RAI_ScriptRunCtxAddInput(currentOp->sctx, inputTensors[i], currentOp->err);
-    }
-
-    for (uint i = 0; i < n_outkeys; i++) {
-        RAI_ScriptRunCtxAddOutput(currentOp->sctx);
-    }
-
-    const long long start = ustime();
-    int result = RAI_ScriptRun(currentOp->sctx, currentOp->err);
-    const long long end = ustime();
-
-    RAI_ContextWriteLock(rinfo);
-
-    const size_t noutputs = RAI_ScriptRunCtxNumOutputs(currentOp->sctx);
-    for (size_t outputNumber = 0; outputNumber < noutputs; outputNumber++) {
-        RAI_Tensor *tensor = RAI_ScriptRunCtxOutputTensor(currentOp->sctx, outputNumber);
-        RedisModuleString *key_string = currentOp->outkeys[outputNumber];
-        tensor = tensor ? RAI_TensorGetShallowCopy(tensor) : NULL;
-        AI_dictReplace(rinfo->dagTensorsContext, (void *)key_string, tensor);
-    }
-
-    currentOp->result = result;
-    currentOp->duration_us = end - start;
-
-    RAI_ContextUnlock(rinfo);
-
-    return;
-}
-
-size_t RAI_DagOpBatchSize(RAI_DagOp *op, AI_dict *opTensorsContext) {
-    if (op->mctx == NULL) {
-        return -1;
-    }
-
-    // size_t ninputs = RAI_ModelRunCtxNumInputs(op->mctx);
-    size_t ninputs = array_len(op->inkeys);
-
-    int batchsize = 0;
-
-    if (ninputs == 0) {
-        return batchsize;
-    }
-
-    for (size_t i = 0; i < ninputs; i++) {
-        RAI_Tensor *input;
-        RAI_getTensorFromLocalContext(NULL, opTensorsContext, op->inkeys[i], &input, op->err);
-        // We are expecting input != NULL, because we only reach this function if all inputs
-        // are available in context for the current dagOp. We could be more defensive eventually.
-
-        if (i == 0) {
-            batchsize = RAI_TensorDim(input, 0);
-            continue;
-        }
-
-        if (batchsize != RAI_TensorDim(input, 0)) {
-            batchsize = 0;
-            break;
-        }
-    }
-
-    return batchsize;
-}
-
-int RAI_DagOpBatchable(RAI_DagOp *op1, AI_dict *op1TensorsContext, RAI_DagOp *op2,
-                       AI_dict *op2TensorsContext) {
-
-    if (op1->mctx == NULL || op2->mctx == NULL) {
-        return 0;
-    }
-
-    if (op1->mctx->model != op2->mctx->model) {
-        return 0;
-    }
-
-    const int ninputs1 = RAI_ModelRunCtxNumInputs(op1->mctx);
-    const int ninputs2 = RAI_ModelRunCtxNumInputs(op2->mctx);
-
-    if (ninputs1 != ninputs2) {
-        return 0;
-    }
-
-    for (int i = 0; i < ninputs1; i++) {
-        RAI_Tensor *input1;
-        RAI_getTensorFromLocalContext(NULL, op1TensorsContext, op1->inkeys[i], &input1, op1->err);
-
-        RAI_Tensor *input2;
-        RAI_getTensorFromLocalContext(NULL, op2TensorsContext, op2->inkeys[i], &input2, op2->err);
-
-        if (input1 == NULL || input2 == NULL) {
-            return 0;
-        }
-
-        int ndims1 = RAI_TensorNumDims(input1);
-        int ndims2 = RAI_TensorNumDims(input2);
-
-        if (ndims1 != ndims2) {
-            return 0;
-        }
-
-        if (ndims1 == 0) {
-            continue;
-        }
-
-        for (int j = 1; j < ndims1; j++) {
-            int dim1 = RAI_TensorDim(input1, j);
-            int dim2 = RAI_TensorDim(input2, j);
-            if (dim1 != dim2) {
-                return 0;
-            }
-        }
-    }
-
-    return 1;
-}
-
-int RedisAI_DagDeviceComplete(RedisAI_RunInfo *rinfo) {
-    return rinfo->dagDeviceCompleteOpCount == rinfo->dagDeviceOpCount;
-}
-
-int RedisAI_DagComplete(RedisAI_RunInfo *rinfo) {
-    int completeOpCount = __atomic_load_n(rinfo->dagCompleteOpCount, __ATOMIC_RELAXED);
-
-    return completeOpCount == rinfo->dagOpCount;
-}
-
-RAI_DagOp *RedisAI_DagCurrentOp(RedisAI_RunInfo *rinfo) {
-    if (rinfo->dagDeviceCompleteOpCount == rinfo->dagDeviceOpCount) {
-        return NULL;
-    }
-
-    return rinfo->dagDeviceOps[rinfo->dagDeviceCompleteOpCount];
-}
-
-void RedisAI_DagCurrentOpInfo(RedisAI_RunInfo *rinfo, int *currentOpReady,
-                              int *currentOpBatchable) {
-    RAI_DagOp *currentOp_ = RedisAI_DagCurrentOp(rinfo);
-
-    *currentOpReady = 0;
-    *currentOpBatchable = 0;
-
-    if (currentOp_ == NULL) {
-        return;
-    }
-
-    if (currentOp_->mctx && currentOp_->mctx->model->opts.batchsize > 0) {
-        *currentOpBatchable = 1;
-    }
-
-    uint n_inkeys = array_len(currentOp_->inkeys);
-
-    RAI_ContextReadLock(rinfo);
-
-    *currentOpReady = 1;
-    for (int i = 0; i < n_inkeys; i++) {
-        if (AI_dictFind(rinfo->dagTensorsContext, currentOp_->inkeys[i]) == NULL) {
-            RAI_ContextUnlock(rinfo);
-            *currentOpReady = 0;
-            return;
-        }
-    }
-
-    RAI_ContextUnlock(rinfo);
-}
-
-void RedisAI_DagOpBatchInfo(RedisAI_RunInfo *rinfo, RAI_DagOp *op, size_t *batchsize,
-                            size_t *minbatchsize, size_t *minbatchtimeout, size_t *inbatchsize) {
-    *batchsize = 0;
-    *minbatchsize = 0;
-    *minbatchtimeout = 0;
-    *inbatchsize = 0;
-
-    RAI_ContextReadLock(rinfo);
-
-    if (op->mctx) {
-        *batchsize = op->mctx->model->opts.batchsize;
-        *minbatchsize = op->mctx->model->opts.minbatchsize;
-        *minbatchtimeout = op->mctx->model->opts.minbatchtimeout;
-        *inbatchsize = RAI_DagOpBatchSize(op, rinfo->dagTensorsContext);
-    }
-
-    RAI_ContextUnlock(rinfo);
-}
-
-void RedisAI_DagOpBatchingMatch(RedisAI_RunInfo *rinfo1, RAI_DagOp *op1, RedisAI_RunInfo *rinfo2,
-                                RAI_DagOp *op2, int *batched, size_t *inbatchsize) {
-    *batched = 0;
-    *inbatchsize = 0;
-
-    RAI_ContextReadLock(rinfo2);
-
-    if (op2->mctx) {
-        int match =
-            RAI_DagOpBatchable(op1, rinfo1->dagTensorsContext, op2, rinfo2->dagTensorsContext);
-
-        if (match) {
-            *batched = 1;
-            *inbatchsize = RAI_DagOpBatchSize(op2, rinfo2->dagTensorsContext);
-        }
-    }
-
-    RAI_ContextUnlock(rinfo2);
-}
-
-void RedisAI_DagRunSessionStep(RedisAI_RunInfo *rinfo, const char *devicestr) {
-    RAI_DagOp *currentOp = RedisAI_DagCurrentOp(rinfo);
-
-    switch (currentOp->commandType) {
-    case REDISAI_DAG_CMD_TENSORSET: {
-        RedisAI_DagRunSession_TensorSet_Step(rinfo, currentOp);
-        break;
-    }
-    case REDISAI_DAG_CMD_TENSORGET: {
-        RedisAI_DagRunSession_TensorGet_Step(rinfo, currentOp);
-        break;
-    }
-    case REDISAI_DAG_CMD_MODELRUN: {
-        RedisAI_DagRunSession_ModelRun_Step(rinfo, currentOp);
-        break;
-    }
-    case REDISAI_DAG_CMD_SCRIPTRUN: {
-        RedisAI_DagRunSession_ScriptRun_Step(rinfo, currentOp);
-        break;
-    }
-    default: {
-        /* unsupported DAG's command */
-        RAI_SetError(currentOp->err, RAI_EDAGRUN, "ERR unsupported command within DAG");
-        currentOp->result = REDISMODULE_ERR;
-        break;
-    }
-    }
-
-    if (currentOp->result != REDISMODULE_OK) {
-        __atomic_store_n(rinfo->dagError, 1, __ATOMIC_RELAXED);
-    }
-}
-
-void RedisAI_BatchedDagRunSessionStep(RedisAI_RunInfo **batched_rinfo, const char *devicestr) {
-    // Assumption: ops are guaranteed to be all MODELRUN
-
-    int n_ops = array_len(batched_rinfo);
-
-    assert(n_ops > 1);
-
-    RAI_DagOp *currentOps[n_ops];
-
-    for (int i = 0; i < n_ops; i++) {
-        RedisAI_RunInfo *rinfo = batched_rinfo[i];
-
-        RAI_DagOp *currentOp = RedisAI_DagCurrentOp(rinfo);
-
-        currentOps[i] = currentOp;
-    }
-
-    RedisAI_BatchedDagRunSession_ModelRun_Step(batched_rinfo, currentOps);
-
-    for (int i = 0; i < n_ops; i++) {
-        RedisAI_RunInfo *rinfo = batched_rinfo[i];
-        RAI_DagOp *currentOp = currentOps[i];
-
-        if (currentOp->result != REDISMODULE_OK) {
-            __atomic_store_n(rinfo->dagError, 1, __ATOMIC_RELAXED);
-        }
-    }
-
-    return;
-}
-
-int RedisAI_DagRun_Reply(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
-    REDISMODULE_NOT_USED(argv);
-    REDISMODULE_NOT_USED(argc);
-    RedisAI_RunInfo *rinfo = RedisModule_GetBlockedClientPrivateData(ctx);
-
-    if (RAI_GetErrorCode(rinfo->err) == RAI_EDAGRUN) {
-        RedisModule_ReplyWithError(ctx, RAI_GetErrorOneLine(rinfo->err));
-        RAI_FreeRunInfo(rinfo);
-        return REDISMODULE_ERR;
-    }
-    int dag_error = 0;
-    char *detail_oneline;
-
-    size_t n_dagOps = array_len(rinfo->dagOps);
-
-    if (*rinfo->timedOut) {
-        RedisModule_ReplyWithSimpleString(ctx, "TIMEDOUT");
-        RAI_FreeRunInfo(rinfo);
-        return REDISMODULE_OK;
-    }
-
-    if (rinfo->single_op_dag == 0) {
-        RedisModule_ReplyWithArray(ctx, REDISMODULE_POSTPONED_ARRAY_LEN);
-    }
-
-    for (size_t i = 0; i < n_dagOps; i++) {
-        RAI_DagOp *currentOp = rinfo->dagOps[i];
-        switch (currentOp->commandType) {
-        case REDISAI_DAG_CMD_TENSORSET: {
-            rinfo->dagReplyLength++;
-            if (currentOp->result == REDISMODULE_ERR) {
-                RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
-                dag_error = 1;
-            } else if (currentOp->result == -1) {
-                RedisModule_ReplyWithSimpleString(ctx, "NA");
-            } else {
-                RedisModule_ReplyWithSimpleString(ctx, "OK");
-            }
-            break;
-        }
-
-        case REDISAI_DAG_CMD_TENSORGET: {
-            rinfo->dagReplyLength++;
-            if (currentOp->result == REDISMODULE_ERR) {
-                RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
-                dag_error = 1;
-            } else {
-                if (array_len(currentOp->outTensors) > 0) {
-                    RAI_Tensor *tensor = currentOp->outTensors[0];
-                    RAI_parseTensorGetArgs(ctx, currentOp->argv, currentOp->argc, tensor);
-                } else if (currentOp->result == -1) {
-                    RedisModule_ReplyWithSimpleString(ctx, "NA");
-                } else {
-                    RedisModule_ReplyWithError(ctx, "ERR error getting tensor from local context");
-                }
-            }
-            break;
-        }
-
-        case REDISAI_DAG_CMD_MODELRUN: {
-            rinfo->dagReplyLength++;
-            struct RedisAI_RunStats *rstats = NULL;
-            RAI_GetRunStats(currentOp->runkey, &rstats);
-            if (currentOp->result == REDISMODULE_ERR) {
-                RAI_SafeAddDataPoint(rstats, 0, 1, 1, 0);
-                RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
-                dag_error = 1;
-            } else if (currentOp->result == -1) {
-                RedisModule_ReplyWithSimpleString(ctx, "NA");
-            } else {
-                RAI_Tensor *t = NULL;
-                if (array_len(currentOp->mctx->outputs) > 0) {
-                    t = currentOp->mctx->outputs[0].tensor;
-                }
-                int batch_size = 0;
-                if (t) {
-                    batch_size = RAI_TensorDim(t, 0);
-                }
-                RAI_SafeAddDataPoint(rstats, currentOp->duration_us, 1, 0, batch_size);
-                RedisModule_ReplyWithSimpleString(ctx, "OK");
-            }
-            break;
-        }
-
-        case REDISAI_DAG_CMD_SCRIPTRUN: {
-            rinfo->dagReplyLength++;
-            struct RedisAI_RunStats *rstats = NULL;
-            RAI_GetRunStats(currentOp->runkey, &rstats);
-            if (currentOp->result == REDISMODULE_ERR) {
-                RAI_SafeAddDataPoint(rstats, 0, 1, 1, 0);
-                RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
-                dag_error = 1;
-            } else if (currentOp->result == -1) {
-                RedisModule_ReplyWithSimpleString(ctx, "NA");
-            } else {
-                int batch_size = 1;
-                RAI_SafeAddDataPoint(rstats, currentOp->duration_us, 1, 0, batch_size);
-                RedisModule_ReplyWithSimpleString(ctx, "OK");
-            }
-            break;
-        }
-
-        default:
-            /* no-op */
-            break;
-        }
-    }
-
-    if (dag_error) {
-        if (rinfo->single_op_dag == 0) {
-            RedisModule_ReplySetArrayLength(ctx, rinfo->dagReplyLength);
-        }
-        RAI_FreeRunInfo(rinfo);
-        return REDISMODULE_ERR;
-    }
-
-    AI_dictIterator *persist_iter = AI_dictGetSafeIterator(rinfo->dagTensorsPersistedContext);
-    AI_dictEntry *persist_entry = AI_dictNext(persist_iter);
-    while (persist_entry) {
-        RedisModuleString *persist_key_name = AI_dictGetKey(persist_entry);
-
-        AI_dictEntry *tensor_entry = AI_dictFind(rinfo->dagTensorsContext, persist_key_name);
-
-        if (tensor_entry) {
-            RAI_Tensor *tensor = AI_dictGetVal(tensor_entry);
-
-            if (tensor == NULL) {
-                persist_entry = AI_dictNext(persist_iter);
-                continue;
-            }
-            RedisModuleKey *key;
-            size_t persist_key_len;
-            const char *persist_key_str =
-                RedisModule_StringPtrLen(persist_key_name, &persist_key_len);
-            RedisModuleString *demangled_key_name =
-                RedisModule_CreateString(NULL, persist_key_str, persist_key_len - 4);
-            const int status = RAI_OpenKey_Tensor(ctx, demangled_key_name, &key,
-                                                  REDISMODULE_READ | REDISMODULE_WRITE);
-            if (status == REDISMODULE_ERR) {
-                RedisModule_ReplyWithError(ctx, "ERR could not save tensor");
-                rinfo->dagReplyLength++;
-            } else {
-                if (RedisModule_ModuleTypeSetValue(key, RedisAI_TensorType,
-                                                   RAI_TensorGetShallowCopy(tensor)) !=
-                    REDISMODULE_OK) {
-                    RedisModule_ReplyWithError(ctx, "ERR could not save tensor");
-                    rinfo->dagReplyLength++;
-                }
-            }
-            RedisModule_CloseKey(key);
-            RedisAI_ReplicateTensorSet(ctx, demangled_key_name, tensor);
-            RedisModule_FreeString(NULL, demangled_key_name);
-        } else {
-            RedisModule_ReplyWithError(ctx,
-                                       "ERR specified persistent key that was not used in DAG");
-            rinfo->dagReplyLength++;
-
-            RedisModule_Log(ctx, "warning",
-                            "on DAGRUN's PERSIST pecified persistent key (%s) that "
-                            "was not used on DAG. Logging all local context keys",
-                            RedisModule_StringPtrLen(persist_key_name, NULL));
-            AI_dictIterator *local_iter = AI_dictGetSafeIterator(rinfo->dagTensorsContext);
-            AI_dictEntry *local_entry = AI_dictNext(local_iter);
-            while (local_entry) {
-                RedisModuleString *localcontext_key_name = AI_dictGetKey(local_entry);
-                RedisModule_Log(ctx, "warning", "DAG's local context key (%s)",
-                                RedisModule_StringPtrLen(localcontext_key_name, NULL));
-                local_entry = AI_dictNext(local_iter);
-            }
-            AI_dictReleaseIterator(local_iter);
-
-            for (size_t opN = 0; opN < array_len(rinfo->dagOps); opN++) {
-                RedisModule_Log(ctx, "warning", "DAG's op n#  %zu - cmdType %d ( argc %d )", opN,
-                                rinfo->dagOps[opN]->commandType, rinfo->dagOps[opN]->argc);
-            }
-        }
-
-        persist_entry = AI_dictNext(persist_iter);
-    }
-
-    AI_dictReleaseIterator(persist_iter);
-
-    if (rinfo->single_op_dag == 0) {
-        RedisModule_ReplySetArrayLength(ctx, rinfo->dagReplyLength);
-    }
-
-    RAI_FreeRunInfo(rinfo);
-
-    return REDISMODULE_OK;
-}
-
-/**
- * DAGRUN Building Block to parse [LOAD <nkeys> key1 key2... ]
- */
-int RAI_parseDAGLoadArgs(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
-                         AI_dict **loadedContextDict, AI_dict **localContextDict,
-                         const char *chaining_operator) {
-    if (argc < 3) {
-        RedisModule_WrongArity(ctx);
-        return -1;
-    }
-
-    long long n_keys;
-    const int retval = RedisModule_StringToLongLong(argv[1], &n_keys);
-    if (retval != REDISMODULE_OK || n_keys <= 0) {
-        RedisModule_ReplyWithError(ctx,
-                                   "ERR invalid or negative value found in number of keys to LOAD");
-        return -1;
-    }
-    int number_loaded_keys = 0;
-    int separator_flag = 0;
-    size_t argpos = 2;
-    for (; (argpos <= argc - 1) && (number_loaded_keys < n_keys); argpos++) {
-        size_t arg_len;
-        const char *arg_string = RedisModule_StringPtrLen(argv[argpos], &arg_len);
-        if (!strcasecmp(arg_string, chaining_operator)) {
-            separator_flag = 1;
-            break;
-        } else {
-            RAI_Tensor *t;
-            RedisModuleKey *key;
-            const int status =
-                RAI_GetTensorFromKeyspace(ctx, argv[argpos], &key, &t, REDISMODULE_READ);
-            if (status == REDISMODULE_ERR) {
-                RedisModule_Log(ctx, "warning",
-                                "on DAGRUN's LOAD could not load tensor %s from keyspace",
-                                arg_string);
-                return -1;
-            }
-            RedisModule_CloseKey(key);
-            char buf[16];
-            sprintf(buf, "%04d", 1);
-            RedisModuleString *dictKey = RedisModule_CreateStringFromString(NULL, argv[argpos]);
-            RedisModule_StringAppendBuffer(NULL, dictKey, buf, strlen(buf));
-
-            AI_dictAdd(*localContextDict, (void *)dictKey, (void *)RAI_TensorGetShallowCopy(t));
-            AI_dictAdd(*loadedContextDict, (void *)dictKey, (void *)1);
-            RedisModule_FreeString(NULL, dictKey);
-            number_loaded_keys++;
-        }
-    }
-    if (number_loaded_keys != n_keys) {
-        RedisModule_WrongArity(ctx);
-        return -1;
-    }
-    return argpos;
-}
-
-/**
- * DAGRUN Building Block to parse [PERSIST <nkeys> key1 key2... ]
- */
-int RAI_parseDAGPersistArgs(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
-                            AI_dict **persistContextDict, const char *chaining_operator) {
-    if (argc < 3) {
-        RedisModule_WrongArity(ctx);
-        return -1;
-    }
-
-    long long n_keys;
-    const int retval = RedisModule_StringToLongLong(argv[1], &n_keys);
-    if (retval != REDISMODULE_OK || n_keys <= 0) {
-        RedisModule_ReplyWithError(
-            ctx, "ERR invalid or negative value found in number of keys to PERSIST");
-        return -1;
-    }
-
-    int number_loaded_keys = 0;
-    int separator_flag = 0;
-    size_t argpos = 2;
-    for (; (argpos < argc) && (number_loaded_keys < n_keys); argpos++) {
-        const char *arg_string = RedisModule_StringPtrLen(argv[argpos], NULL);
-        if (!strcasecmp(arg_string, chaining_operator)) {
-            separator_flag = 1;
-            break;
-        } else {
-            AI_dictAdd(*persistContextDict, (void *)argv[argpos], (void *)1);
-            number_loaded_keys++;
-        }
-    }
-    if (number_loaded_keys != n_keys) {
-        RedisModule_WrongArity(ctx);
-        return -1;
-    }
-    return argpos;
-}
-
-int RedisAI_DagRun_IsKeysPositionRequest_ReportKeys(RedisModuleCtx *ctx, RedisModuleString **argv,
-                                                    int argc) {
-    for (size_t argpos = 1; argpos < argc; argpos++) {
-        const char *arg_string = RedisModule_StringPtrLen(argv[argpos], NULL);
-        if ((!strcasecmp(arg_string, "LOAD") || !strcasecmp(arg_string, "PERSIST")) &&
-            (argpos + 1 < argc)) {
-            long long n_keys;
-            argpos++;
-            const int retval = RedisModule_StringToLongLong(argv[argpos], &n_keys);
-            if (retval != REDISMODULE_OK) {
-                return REDISMODULE_ERR;
-            }
-            argpos++;
-            if (n_keys > 0) {
-                size_t last_persist_argpos = n_keys + argpos;
-                for (; argpos < last_persist_argpos && argpos < argc; argpos++) {
-                    RedisModule_KeyAtPos(ctx, argpos);
-                }
-            }
-        }
-    }
-    return REDISMODULE_OK;
-}
-
-void RedisAI_FreeData(RedisModuleCtx *ctx, void *rinfo) {}
-
-void RedisAI_Disconnected(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc) {
-    RedisModule_Log(ctx, "warning", "Blocked client %p disconnected!", (void *)bc);
-}
-
-// Parse the DAG run command and return REDISMODULE_OK only if it is a valid command to execute.
-static int DAG_CommandParser(RedisModuleCtx *ctx, RedisModuleString **argv, int argc, int dagMode,
-                             RedisAI_RunInfo **rinfo_ptr) {
-
-    if (argc < 4) {
-        RedisModule_WrongArity(ctx);
-        return REDISMODULE_ERR;
-    }
-    RedisAI_RunInfo *rinfo = *rinfo_ptr;
-    RAI_DagOp *currentDagOp = NULL;
-    RAI_InitDagOp(&currentDagOp);
-    rinfo->dagOps = array_append(rinfo->dagOps, currentDagOp);
-
-    int persistFlag = 0;
-    int loadFlag = 0;
-    int chainingOpCount = 0;
-
-    int argstart = 1;
-
-    // If we're parsing a AI.MODELRUN or AI.SCRIPTRUN command, we don't
-    // expect there to be a chaining |> operator
-    if (!strcasecmp(RedisModule_StringPtrLen(argv[0], NULL), "AI.MODELRUN") ||
-        !strcasecmp(RedisModule_StringPtrLen(argv[0], NULL), "AI.SCRIPTRUN")) {
-        argstart = 0;
-        chainingOpCount++;
-        rinfo->single_op_dag = 1;
-        rinfo->single_device_dag = 1;
-    }
-
-    for (size_t argpos = argstart; argpos <= argc - 1; argpos++) {
-        const char *arg_string = RedisModule_StringPtrLen(argv[argpos], NULL);
-        if (!strcasecmp(arg_string, "LOAD")) {
-            loadFlag = 1;
-            const int parse_result = RAI_parseDAGLoadArgs(ctx, &argv[argpos], argc - argpos,
-                                                          &(rinfo->dagTensorsLoadedContext),
-                                                          &(rinfo->dagTensorsContext), "|>");
-            if (parse_result > 0) {
-                argpos += parse_result - 1;
-            } else {
-                RAI_FreeRunInfo(rinfo);
-                return REDISMODULE_ERR;
-            }
-        } else if (!strcasecmp(arg_string, "PERSIST")) {
-            if (dagMode == REDISAI_DAG_READONLY_MODE) {
-                RAI_FreeRunInfo(rinfo);
-                RedisModule_ReplyWithError(ctx,
-                                           "ERR PERSIST cannot be specified in a read-only DAG");
-                return REDISMODULE_ERR;
-            }
-            persistFlag = 1;
-            const int parse_result = RAI_parseDAGPersistArgs(
-                ctx, &argv[argpos], argc - argpos, &(rinfo->dagTensorsPersistedContext), "|>");
-            if (parse_result > 0) {
-                argpos += parse_result - 1;
-            } else {
-                RAI_FreeRunInfo(rinfo);
-                return REDISMODULE_ERR;
-            }
-        } else if (!strcasecmp(arg_string, "TIMEOUT")) {
-            if (!((chainingOpCount == 0) || (chainingOpCount == 1 && rinfo->single_op_dag == 1))) {
-                RAI_FreeRunInfo(rinfo);
-                RedisModule_ReplyWithError(ctx, "ERR TIMEOUT not allowed within a DAG command");
-                return REDISMODULE_ERR;
-            }
-            if (argpos == argc - 1) {
-                RAI_FreeRunInfo(rinfo);
-                RedisModule_ReplyWithError(ctx, "ERR No value provided for TIMEOUT");
-                return REDISMODULE_ERR;
-            }
-            long long timeout;
-            const int retval = RedisModule_StringToLongLong(argv[argpos + 1], &timeout);
-            if (retval != REDISMODULE_OK || timeout <= 0) {
-                RAI_FreeRunInfo(rinfo);
-                RedisModule_ReplyWithError(ctx, "ERR Invalid value for TIMEOUT");
-                return REDISMODULE_ERR;
-            }
-            rinfo->timeout = timeout;
-            argpos += 1;
-            continue;
-        } else if (!strcasecmp(arg_string, "|>") && argpos < argc - 1) {
-            // on the first pipe operator, if LOAD or PERSIST were used, we've already
-            // allocated memory
-            if (chainingOpCount > 0) {
-                rinfo->dagOpCount++;
-                RAI_DagOp *currentDagOp = NULL;
-                RAI_InitDagOp(&currentDagOp);
-                rinfo->dagOps = array_append(rinfo->dagOps, currentDagOp);
-            }
-            chainingOpCount++;
-        } else {
-            if (!strcasecmp(arg_string, "AI.TENSORGET")) {
-                rinfo->dagOps[rinfo->dagOpCount]->commandType = REDISAI_DAG_CMD_TENSORGET;
-                rinfo->dagOps[rinfo->dagOpCount]->devicestr = "CPU";
-            }
-            if (!strcasecmp(arg_string, "AI.TENSORSET")) {
-                rinfo->dagOps[rinfo->dagOpCount]->commandType = REDISAI_DAG_CMD_TENSORSET;
-                rinfo->dagOps[rinfo->dagOpCount]->devicestr = "CPU";
-            }
-            if (!strcasecmp(arg_string, "AI.MODELRUN")) {
-                if (argc - 2 < argpos) {
-                    RedisModule_WrongArity(ctx);
-                    return REDISMODULE_ERR;
-                }
-                RAI_DagOp *currentOp = rinfo->dagOps[rinfo->dagOpCount];
-                currentOp->commandType = REDISAI_DAG_CMD_MODELRUN;
-                RAI_Model *mto;
-                RedisModuleKey *modelKey;
-                const int status = RAI_GetModelFromKeyspace(ctx, argv[argpos + 1], &modelKey, &mto,
-                                                            REDISMODULE_READ);
-                if (status == REDISMODULE_ERR) {
-                    RAI_FreeRunInfo(rinfo);
-                    return REDISMODULE_ERR;
-                }
-                currentOp->devicestr = mto->devicestr;
-                currentOp->runkey = argv[argpos + 1];
-                currentOp->mctx = RAI_ModelRunCtxCreate(mto);
-            }
-            if (!strcasecmp(arg_string, "AI.SCRIPTRUN")) {
-                if (argc - 3 < argpos) {
-                    RedisModule_WrongArity(ctx);
-                    return REDISMODULE_ERR;
-                }
-                RAI_DagOp *currentOp = rinfo->dagOps[rinfo->dagOpCount];
-                currentOp->commandType = REDISAI_DAG_CMD_SCRIPTRUN;
-                RAI_Script *sto;
-                RedisModuleKey *scriptKey;
-                const int status = RAI_GetScriptFromKeyspace(ctx, argv[argpos + 1], &scriptKey,
-                                                             &sto, REDISMODULE_READ);
-                if (status == REDISMODULE_ERR) {
-                    RAI_FreeRunInfo(rinfo);
-                    return REDISMODULE_ERR;
-                }
-                currentOp->devicestr = sto->devicestr;
-                const char *functionName = RedisModule_StringPtrLen(argv[argpos + 2], NULL);
-                currentOp->runkey = argv[argpos + 1];
-                currentOp->sctx = RAI_ScriptRunCtxCreate(sto, functionName);
-            }
-            if (RMAPI_FUNC_SUPPORTED(RedisModule_HoldString)) {
-                RedisModule_HoldString(NULL, argv[argpos]);
-            } else {
-                RedisModule_RetainString(NULL, argv[argpos]);
-            }
-            RAI_DagOp *currentOp = rinfo->dagOps[rinfo->dagOpCount];
-            currentOp->argv = array_append(currentOp->argv, argv[argpos]);
-            currentOp->argc++;
-        }
-    }
-
-    rinfo->dagOpCount = array_len(rinfo->dagOps);
-
-    for (long long i = 0; i < array_len(rinfo->dagOps); i++) {
-        RAI_DagOp *currentOp = rinfo->dagOps[i];
-        if (currentOp == NULL)
-            continue;
-        int parse_result;
-        switch (currentOp->commandType) {
-        case REDISAI_DAG_CMD_TENSORSET:
-            currentOp->outkeys = array_append(currentOp->outkeys, currentOp->argv[1]);
-            break;
-        case REDISAI_DAG_CMD_TENSORGET:
-            currentOp->inkeys = array_append(currentOp->inkeys, currentOp->argv[1]);
-            break;
-        case REDISAI_DAG_CMD_MODELRUN:
-            parse_result = RedisAI_Parse_ModelRun_RedisCommand(
-                NULL, currentOp->argv, currentOp->argc, &(currentOp->mctx), &(currentOp->inkeys),
-                &(currentOp->outkeys), &(currentOp->mctx->model), currentOp->err);
-            if (parse_result < 0) {
-                RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
-                return REDISMODULE_ERR;
-            }
-            break;
-        case REDISAI_DAG_CMD_SCRIPTRUN:
-            parse_result = RedisAI_Parse_ScriptRun_RedisCommand(
-                NULL, currentOp->argv, currentOp->argc, &(currentOp->inkeys), &(currentOp->outkeys),
-                &(currentOp->sctx->variadic), currentOp->err);
-            if (parse_result < 0) {
-                RedisModule_ReplyWithError(ctx, currentOp->err->detail_oneline);
-                return REDISMODULE_ERR;
-            }
-            break;
-        }
-    }
-
-    if (rinfo->single_op_dag) {
-        RAI_DagOp *op = rinfo->dagOps[0];
-        RAI_Tensor *t;
-        RedisModuleKey *key;
-        for (size_t i = 0; i < array_len(op->inkeys); i++) {
-            RedisModuleString *inkey = op->inkeys[i];
-            const int status =
-                RAI_GetTensorFromKeyspace(ctx, op->inkeys[i], &key, &t, REDISMODULE_READ);
-            if (status == REDISMODULE_ERR) {
-                RedisModule_Log(ctx, "warning",
-                                "on DAGRUN's LOAD could not load tensor %s from keyspace",
-                                RedisModule_StringPtrLen(op->inkeys[i], NULL));
-                return REDISMODULE_ERR;
-            }
-            RedisModule_CloseKey(key);
-            char buf[16];
-            sprintf(buf, "%04d", 1);
-            RedisModuleString *dictKey = RedisModule_CreateStringFromString(NULL, inkey);
-            RedisModule_StringAppendBuffer(NULL, dictKey, buf, strlen(buf));
-            AI_dictAdd(rinfo->dagTensorsContext, (void *)dictKey,
-                       (void *)RAI_TensorGetShallowCopy(t));
-            AI_dictAdd(rinfo->dagTensorsLoadedContext, (void *)dictKey, (void *)1);
-            RedisModule_Free(dictKey);
-        }
-
-        for (size_t i = 0; i < array_len(op->outkeys); i++) {
-            RedisModuleString *outkey = op->outkeys[i];
-            AI_dictAdd(rinfo->dagTensorsPersistedContext, (void *)outkey, (void *)1);
-        }
-    }
-
-    // At this point, we have built a sequence of DAG operations, each with its own
-    // input and output keys. The names of the keys will be used to look whether the
-    // inputs to a DAG operation have all been realized by previous operations (or if
-    // they are available as part of LOADed keys from keyspace).
-    // This strategy is fine if keys are not aliased, that is, if a command's output
-    // overwrites the key of a previous command. This would trick DAG operations into
-    // thinking that their input is ready when it's not.
-    // To overcome this, we make key names unique, so that names are not aliased. We
-    // mangle the names by appending a numerical suffix ":0001". After computing, we
-    // demangle the keys in order to persist them.
-
-    AI_dict *mangled_tensors = AI_dictCreate(&AI_dictTypeHeapRStrings, NULL);
-    if (!mangled_tensors) {
-        return REDISMODULE_ERR;
-    }
-
-    {
-        AI_dictIterator *iter = AI_dictGetSafeIterator(rinfo->dagTensorsLoadedContext);
-        AI_dictEntry *entry = AI_dictNext(iter);
-        while (entry) {
-            RedisModuleString *key = (RedisModuleString *)AI_dictGetKey(entry);
-            size_t key_len;
-            const char *key_str = RedisModule_StringPtrLen(key, &key_len);
-            RedisModuleString *demangled_key = RedisModule_CreateString(NULL, key_str, key_len - 4);
-            int *instance = RedisModule_Alloc(sizeof(int));
-            *instance = 1;
-            AI_dictAdd(mangled_tensors, (void *)demangled_key, (void *)instance);
-            RedisModule_FreeString(NULL, demangled_key);
-            entry = AI_dictNext(iter);
-        }
-        AI_dictReleaseIterator(iter);
-    }
-
-    for (long long i = 0; i < array_len(rinfo->dagOps); i++) {
-        RAI_DagOp *currentOp = rinfo->dagOps[i];
-
-        RedisModuleString **mangled_inkeys =
-            array_new(RedisModuleString *, array_len(currentOp->inkeys));
-        for (long long j = 0; j < array_len(currentOp->inkeys); j++) {
-            RedisModuleString *key = currentOp->inkeys[j];
-            AI_dictEntry *entry = AI_dictFind(mangled_tensors, key);
-            if (!entry) {
-                AI_dictRelease(mangled_tensors);
-                RedisModule_ReplyWithError(ctx, "ERR INPUT key cannot be found in DAG");
-                return REDISMODULE_ERR;
-            }
-            int *instance = AI_dictGetVal(entry);
-            char buf[16];
-            sprintf(buf, "%04d", *instance);
-            RedisModuleString *mangled_key = RedisModule_CreateStringFromString(NULL, key);
-            RedisModule_StringAppendBuffer(NULL, mangled_key, buf, strlen(buf));
-            mangled_inkeys = array_append(mangled_inkeys, mangled_key);
-        }
-
-        RedisModuleString **mangled_outkeys =
-            array_new(RedisModuleString *, array_len(currentOp->outkeys));
-        for (long long j = 0; j < array_len(currentOp->outkeys); j++) {
-            RedisModuleString *key = currentOp->outkeys[j];
-            AI_dictEntry *entry = AI_dictFind(mangled_tensors, key);
-            int *instance = NULL;
-            if (entry) {
-                instance = AI_dictGetVal(entry);
-                *instance += 1;
-            } else {
-                instance = RedisModule_Alloc(sizeof(int));
-                *instance = 1;
-                AI_dictAdd(mangled_tensors, (void *)key, (void *)instance);
-            }
-            char buf[16];
-            sprintf(buf, "%04d", *instance);
-            RedisModuleString *mangled_key = RedisModule_CreateStringFromString(NULL, key);
-            RedisModule_StringAppendBuffer(NULL, mangled_key, buf, strlen(buf));
-            mangled_outkeys = array_append(mangled_outkeys, mangled_key);
-        }
-
-        array_free(currentOp->inkeys);
-        array_free(currentOp->outkeys);
-
-        currentOp->inkeys = mangled_inkeys;
-        currentOp->outkeys = mangled_outkeys;
-    }
-
-    AI_dict *mangled_persisted = AI_dictCreate(&AI_dictTypeHeapRStrings, NULL);
-    {
-        AI_dictIterator *iter = AI_dictGetSafeIterator(rinfo->dagTensorsPersistedContext);
-        AI_dictEntry *entry = AI_dictNext(iter);
-        while (entry) {
-            RedisModuleString *key = (RedisModuleString *)AI_dictGetKey(entry);
-            AI_dictEntry *mangled_entry = AI_dictFind(mangled_tensors, key);
-            if (!mangled_entry) {
-                AI_dictRelease(mangled_tensors);
-                AI_dictRelease(mangled_persisted);
-                AI_dictReleaseIterator(iter);
-                RedisModule_ReplyWithError(ctx, "ERR PERSIST key cannot be found in DAG");
-                return REDISMODULE_ERR;
-            }
-            int *instance = AI_dictGetVal(mangled_entry);
-            char buf[16];
-            sprintf(buf, "%04d", *instance);
-            RedisModuleString *mangled_key = RedisModule_CreateStringFromString(NULL, key);
-            RedisModule_StringAppendBuffer(NULL, mangled_key, buf, strlen(buf));
-
-            AI_dictAdd(mangled_persisted, (void *)mangled_key, (void *)1);
-            entry = AI_dictNext(iter);
-        }
-        AI_dictReleaseIterator(iter);
-    }
-
-    AI_dictRelease(rinfo->dagTensorsPersistedContext);
-    rinfo->dagTensorsPersistedContext = mangled_persisted;
-
-    {
-        AI_dictIterator *iter = AI_dictGetSafeIterator(mangled_tensors);
-        AI_dictEntry *entry = AI_dictNext(iter);
-        while (entry) {
-            int *val = (int *)AI_dictGetVal(entry);
-            RedisModule_Free(val);
-            entry = AI_dictNext(iter);
-        }
-        AI_dictReleaseIterator(iter);
-    }
-    AI_dictRelease(mangled_tensors);
-    mangled_tensors = NULL;
-
-    for (long long i = 0; i < array_len(rinfo->dagOps); i++) {
-        if (rinfo->dagOps[i]->devicestr == NULL) {
-            rinfo->dagOps[i]->devicestr = "CPU";
-        }
-    }
-    return REDISMODULE_OK;
-}
-
-// Add Shallow copies of the DAG run info to the devices' queues.
-// Return REDISMODULE_OK in case of success, REDISMODULE_ERR if (at least) one insert op had failed.
-static int DAG_InsertDAGToQueue(RedisAI_RunInfo *rinfo) {
-    const char **devices = array_new(const char *, 10);
-
-    for (long long i = 0; i < array_len(rinfo->dagOps); i++) {
-        const char *devicestr = rinfo->dagOps[i]->devicestr;
-        bool found = false;
-        for (long long j = 0; j < array_len(devices); j++) {
-            if (strcasecmp(devicestr, devices[j]) == 0) {
-                found = true;
-                break;
-            }
-        }
-        if (!found) {
-            devices = array_append(devices, devicestr);
-        }
-    }
-
-    size_t ndevices = array_len(devices);
-    RedisAI_RunInfo **rinfo_copies = array_new(RedisAI_RunInfo *, ndevices);
-
-    for (long long i = 0; i < ndevices; i++) {
-        RedisAI_RunInfo *rinfo_copy;
-        RAI_ShallowCopyDagRunInfo(&rinfo_copy, rinfo);
-        rinfo_copies = array_append(rinfo_copies, rinfo_copy);
-    }
-
-    for (long long i = 0; i < ndevices; i++) {
-        RedisAI_RunInfo *rinfo_copy = rinfo_copies[i];
-        for (long long j = 0; j < rinfo_copy->dagOpCount; j++) {
-            if (strcasecmp(rinfo_copy->dagOps[j]->devicestr, devices[i]) == 0) {
-                rinfo_copy->dagDeviceOps =
-                    array_append(rinfo_copy->dagDeviceOps, rinfo_copy->dagOps[j]);
-            }
-        }
-        rinfo_copy->dagDeviceOpCount = array_len(rinfo_copy->dagDeviceOps);
-    }
-
-    RunQueueInfo **run_queues_info = array_new(RunQueueInfo *, ndevices);
-    for (long long i = 0; i < ndevices; i++) {
-        const char *devicestr = devices[i];
-        RunQueueInfo *run_queue_info = NULL;
-        if (ensureRunQueue(devicestr, &run_queue_info) == REDISMODULE_ERR) {
-            // A device run queue was not created properly, so we free everything,
-            // set an error and finish.
-            array_free(devices);
-            for (int j = 0; j < ndevices; j++) {
-                RAI_DagRunInfoFreeShallowCopy(rinfo_copies[j]);
-            }
-            array_free(rinfo_copies);
-            array_free(run_queues_info);
-            RAI_SetError(rinfo->err, RAI_EDAGRUN, "ERR Queue not initialized for device");
-            rinfo->OnFinish((RedisAI_OnFinishCtx *)rinfo, rinfo->private_data);
-            return REDISMODULE_ERR;
-        }
-        run_queues_info = array_append(run_queues_info, run_queue_info);
-    }
-    for (long long i = 0; i < ndevices; i++) {
-        RedisAI_RunInfo *rinfo_copy = rinfo_copies[i];
-        RunQueueInfo *run_queue_info = run_queues_info[i];
-        gettimeofday(&rinfo_copy->queuingTime, NULL);
-
-        pthread_mutex_lock(&run_queue_info->run_queue_mutex);
-        queuePush(run_queue_info->run_queue, rinfo_copy);
-        pthread_cond_signal(&run_queue_info->queue_condition_var);
-        pthread_mutex_unlock(&run_queue_info->run_queue_mutex);
-    }
-
-    array_free(devices);
-    array_free(rinfo_copies);
-    array_free(run_queues_info);
-    return REDISMODULE_OK;
-}
-
-void DAG_ReplyAndUnblock(RedisAI_OnFinishCtx *ctx, void *private_data) {
-
-    RedisAI_RunInfo *rinfo = (RedisAI_RunInfo *)ctx;
-    if (rinfo->client)
-        RedisModule_UnblockClient(rinfo->client, rinfo);
-}
-
-int RedisAI_ProcessDagRunCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
-                                 int dagMode) {
-
-    int flags = RedisModule_GetContextFlags(ctx);
-    bool blocking_not_allowed = (flags & (REDISMODULE_CTX_FLAGS_MULTI | REDISMODULE_CTX_FLAGS_LUA));
-    if (blocking_not_allowed)
-        return RedisModule_ReplyWithError(
-            ctx, "ERR Cannot run RedisAI command within a transaction or a LUA script");
-    RedisAI_RunInfo *rinfo = NULL;
-    if (RAI_InitRunInfo(&rinfo) == REDISMODULE_ERR) {
-        RedisModule_ReplyWithError(
-            ctx, "ERR Unable to allocate the memory and initialise the RedisAI_RunInfo structure");
-        return REDISMODULE_ERR;
-    }
-    // Parse DAG string command and store the data in rinfo obj.
-    int status = DAG_CommandParser(ctx, argv, argc, dagMode, &rinfo);
-    if (status == REDISMODULE_ERR)
-        return REDISMODULE_OK;
-    // Block the client before adding rinfo to the run queues (sync call).
-    rinfo->client = RedisModule_BlockClient(ctx, RedisAI_DagRun_Reply, NULL, RedisAI_FreeData, 0);
-    RedisModule_SetDisconnectCallback(rinfo->client, RedisAI_Disconnected);
-    rinfo->OnFinish = DAG_ReplyAndUnblock;
-    return DAG_InsertDAGToQueue(rinfo);
-}
diff --git a/src/err.c b/src/err.c
index b0a5bb176..f911eafb1 100644
--- a/src/err.c
+++ b/src/err.c
@@ -57,13 +57,13 @@ int RAI_InitError(RAI_Error **result) {
     RAI_Error *err;
     err = (RAI_Error *)RedisModule_Calloc(1, sizeof(RAI_Error));
     if (!err) {
-        return 1;
+        return REDISMODULE_ERR;
     }
     err->code = 0;
     err->detail = NULL;
     err->detail_oneline = NULL;
     *result = err;
-    return 0;
+    return REDISMODULE_OK;
 }
 
 void RAI_ClearError(RAI_Error *err) {
diff --git a/src/model.c b/src/model.c
index cc33fa8f4..481fb59c2 100644
--- a/src/model.c
+++ b/src/model.c
@@ -19,6 +19,7 @@
 #include "util/dict.h"
 #include "util/string_utils.h"
 #include <pthread.h>
+#include "DAG/dag.h"
 
 RedisModuleType *RedisAI_ModelType = NULL;
 
@@ -327,6 +328,8 @@ RAI_Model *RAI_ModelCreate(RAI_Backend backend, const char *devicestr, RedisModu
         } else {
             model->tag = RedisModule_CreateString(NULL, "", 0);
         }
+        model->ninputs = ninputs;
+        model->noutputs = noutputs;
     }
 
     return model;
@@ -373,76 +376,6 @@ void RAI_ModelFree(RAI_Model *model, RAI_Error *err) {
     RedisModule_Free(model);
 }
 
-RAI_ModelRunCtx *RAI_ModelRunCtxCreate(RAI_Model *model) {
-#define PARAM_INITIAL_SIZE 10
-    RAI_ModelRunCtx *mctx = RedisModule_Calloc(1, sizeof(*mctx));
-    mctx->model = RAI_ModelGetShallowCopy(model);
-    mctx->inputs = array_new(RAI_ModelCtxParam, PARAM_INITIAL_SIZE);
-    mctx->outputs = array_new(RAI_ModelCtxParam, PARAM_INITIAL_SIZE);
-    return mctx;
-#undef PARAM_INITIAL_SIZE
-}
-
-static int Model_RunCtxAddParam(RAI_ModelRunCtx *mctx, RAI_ModelCtxParam **paramArr,
-                                const char *name, RAI_Tensor *tensor) {
-
-    RAI_ModelCtxParam param = {
-        .name = name,
-        .tensor = tensor ? RAI_TensorGetShallowCopy(tensor) : NULL,
-    };
-    *paramArr = array_append(*paramArr, param);
-    return 1;
-}
-
-int RAI_ModelRunCtxAddInput(RAI_ModelRunCtx *mctx, const char *inputName, RAI_Tensor *inputTensor) {
-    return Model_RunCtxAddParam(mctx, &mctx->inputs, inputName, inputTensor);
-}
-
-int RAI_ModelRunCtxAddOutput(RAI_ModelRunCtx *mctx, const char *outputName) {
-    return Model_RunCtxAddParam(mctx, &mctx->outputs, outputName, NULL);
-}
-
-size_t RAI_ModelRunCtxNumInputs(RAI_ModelRunCtx *mctx) { return array_len(mctx->inputs); }
-
-size_t RAI_ModelRunCtxNumOutputs(RAI_ModelRunCtx *mctx) { return array_len(mctx->outputs); }
-
-RAI_Tensor *RAI_ModelRunCtxInputTensor(RAI_ModelRunCtx *mctx, size_t index) {
-    assert(RAI_ModelRunCtxNumInputs(mctx) > index && index >= 0);
-    return mctx->inputs[index].tensor;
-}
-
-RAI_Tensor *RAI_ModelRunCtxOutputTensor(RAI_ModelRunCtx *mctx, size_t index) {
-    assert(RAI_ModelRunCtxNumOutputs(mctx) > index && index >= 0);
-    return mctx->outputs[index].tensor;
-}
-
-void RAI_ModelRunCtxFree(RAI_ModelRunCtx *mctx, int freeTensors) {
-    if (freeTensors) {
-        for (size_t i = 0; i < array_len(mctx->inputs); ++i) {
-            RAI_TensorFree(mctx->inputs[i].tensor);
-        }
-
-        for (size_t i = 0; i < array_len(mctx->outputs); ++i) {
-            if (mctx->outputs[i].tensor) {
-                RAI_TensorFree(mctx->outputs[i].tensor);
-            }
-        }
-    }
-
-    array_free(mctx->inputs);
-    array_free(mctx->outputs);
-
-    RAI_Error err = {0};
-    RAI_ModelFree(mctx->model, &err);
-
-    if (err.code != RAI_OK) {
-        // TODO: take it to client somehow
-        RAI_ClearError(&err);
-    }
-
-    RedisModule_Free(mctx);
-}
-
 int RAI_ModelRun(RAI_ModelRunCtx **mctxs, long long n, RAI_Error *err) {
     int ret;
 
@@ -565,58 +498,26 @@ int RedisAI_ModelRun_IsKeysPositionRequest_ReportKeys(RedisModuleCtx *ctx, Redis
     return REDISMODULE_OK;
 }
 
-int RedisAI_Parse_ModelRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
-                                        RAI_ModelRunCtx **mctx, RedisModuleString ***inkeys,
-                                        RedisModuleString ***outkeys, RAI_Model **mto,
-                                        RAI_Error *error) {
-    if (argc < 3) {
-        RAI_SetError(error, RAI_EMODELRUN,
-                     "ERR wrong number of arguments for 'AI.MODELRUN' command");
-        return -1;
-    }
-
-    const char *inputstr = RedisModule_StringPtrLen(argv[2], NULL);
-    if (strcasecmp(inputstr, "INPUTS")) {
-        RAI_SetError(error, RAI_EMODELRUN, "ERR INPUTS not specified");
-        return -1;
-    }
-
-    int is_input = 0;
-    size_t ninputs = 0;
-    size_t noutputs = 0;
-    int outputs_flag_count = 0;
-    size_t argpos = 3;
-
-    for (; argpos <= argc - 1; argpos++) {
-        const char *arg_string = RedisModule_StringPtrLen(argv[argpos], NULL);
-        if (!strcasecmp(arg_string, "OUTPUTS") && outputs_flag_count == 0) {
-            is_input = 1;
-            outputs_flag_count = 1;
-        } else {
-            RedisModuleString *arg = RAI_HoldString(ctx, argv[argpos]);
-            if (is_input == 0) {
-                *inkeys = array_append(*inkeys, arg);
-                ninputs++;
-            } else {
-                *outkeys = array_append(*outkeys, arg);
-                noutputs++;
-            }
-        }
-    }
-    if ((*mto)->inputs && array_len((*mto)->inputs) != ninputs) {
-        RAI_SetError(error, RAI_EMODELRUN,
-                     "Number of names given as INPUTS during MODELSET and keys given as "
-                     "INPUTS here do not match");
-        return -1;
+RedisModuleType *RAI_ModelRedisType(void) { return RedisAI_ModelType; }
+
+int RAI_ModelRunAsync(RAI_ModelRunCtx *mctx, RAI_OnFinishCB ModelAsyncFinish, void *private_data) {
+
+    RedisAI_RunInfo *rinfo = NULL;
+    if (RAI_InitRunInfo(&rinfo) == REDISMODULE_ERR) {
+        return REDISMODULE_ERR;
     }
+    rinfo->single_op_dag = 1;
+    rinfo->OnFinish = (RedisAI_OnFinishCB)ModelAsyncFinish;
+    rinfo->private_data = private_data;
 
-    if ((*mto)->outputs && array_len((*mto)->outputs) != noutputs) {
-        RAI_SetError(error, RAI_EMODELRUN,
-                     "Number of names given as OUTPUTS during MODELSET and keys given as "
-                     "OUTPUTS here do not match");
-        return -1;
+    RAI_DagOp *op;
+    if (RAI_InitDagOp(&op) == REDISMODULE_ERR) {
+        return REDISMODULE_ERR;
     }
-    return argpos;
-}
+    op->commandType = REDISAI_DAG_CMD_MODELRUN;
+    Dag_PopulateOp(op, mctx, NULL, NULL, NULL);
 
-RedisModuleType *RAI_ModelRedisType(void) { return RedisAI_ModelType; }
+    rinfo->dagOps = array_append(rinfo->dagOps, op);
+    rinfo->dagOpCount = 1;
+    return DAG_InsertDAGToQueue(rinfo);
+}
diff --git a/src/model.h b/src/model.h
index 51cc992c6..703528826 100644
--- a/src/model.h
+++ b/src/model.h
@@ -64,81 +64,6 @@ RAI_Model *RAI_ModelCreate(RAI_Backend backend, const char *devicestr, RedisModu
  */
 void RAI_ModelFree(RAI_Model *model, RAI_Error *err);
 
-/**
- * Allocates the RAI_ModelRunCtx data structure required for async background
- * work within `RedisAI_RunInfo` structure on RedisAI blocking commands
- *
- * @param model input model
- * @return RAI_ModelRunCtx to be used within
- */
-RAI_ModelRunCtx *RAI_ModelRunCtxCreate(RAI_Model *model);
-
-/**
- * Frees the RAI_ModelRunCtx data structure used within for async background
- * work
- *
- * @param mctx
- * @param freeTensors free input and output tensors or leave them allocated
- */
-void RAI_ModelRunCtxFree(RAI_ModelRunCtx *mctx, int freeTensors);
-
-/**
- * Allocates a RAI_ModelCtxParam data structure, and enforces a shallow copy of
- * the provided input tensor, adding it to the input tensors array of the
- * RAI_ModelRunCtx.
- *
- * @param mctx input RAI_ModelRunCtx to add the input tensor
- * @param inputName input tensor name
- * @param inputTensor input tensor structure
- * @return returns 1 on success ( always returns success )
- */
-int RAI_ModelRunCtxAddInput(RAI_ModelRunCtx *mctx, const char *inputName, RAI_Tensor *inputTensor);
-
-/**
- * Allocates a RAI_ModelCtxParam data structure, and sets the tensor reference
- * to NULL ( will be set after MODELRUN ), adding it to the outputs tensors
- * array of the RAI_ModelRunCtx.
- *
- * @param mctx RAI_ModelRunCtx to add the output tensor
- * @param outputName output tensor name
- * @return returns 1 on success ( always returns success )
- */
-int RAI_ModelRunCtxAddOutput(RAI_ModelRunCtx *mctx, const char *outputName);
-
-/**
- * Returns the total number of input tensors of the RAI_ModelRunCtx
- *
- * @param mctx RAI_ModelRunCtx
- * @return the total number of input tensors of the RAI_ModelRunCtx
- */
-size_t RAI_ModelRunCtxNumInputs(RAI_ModelRunCtx *mctx);
-
-/**
- * Returns the total number of output tensors of the RAI_ModelCtxParam
- *
- * @param mctx RAI_ModelRunCtx
- * @return the total number of output tensors of the RAI_ModelCtxParam
- */
-size_t RAI_ModelRunCtxNumOutputs(RAI_ModelRunCtx *mctx);
-
-/**
- * Get the RAI_Tensor at the input array index position
- *
- * @param mctx RAI_ModelRunCtx
- * @param index input array index position
- * @return RAI_Tensor
- */
-RAI_Tensor *RAI_ModelRunCtxInputTensor(RAI_ModelRunCtx *mctx, size_t index);
-
-/**
- * Get the RAI_Tensor at the output array index position
- *
- * @param mctx RAI_ModelRunCtx
- * @param index input array index position
- * @return RAI_Tensor
- */
-RAI_Tensor *RAI_ModelRunCtxOutputTensor(RAI_ModelRunCtx *mctx, size_t index);
-
 /**
  * Given the input array of mctxs, run the associated backend
  * session. If the input array of model context runs is larger than one, then
@@ -213,28 +138,22 @@ int RAI_GetModelFromKeyspace(RedisModuleCtx *ctx, RedisModuleString *keyName, Re
 int RedisAI_ModelRun_IsKeysPositionRequest_ReportKeys(RedisModuleCtx *ctx, RedisModuleString **argv,
                                                       int argc);
 
-/**
- * Helper method to parse AI.MODELRUN arguments
- *
- * @param ctx Context in which Redis modules operate
- * @param argv Redis command arguments, as an array of strings
- * @param argc Redis command number of arguments
- * @param mctx Destination Model context to store the parsed data
- * @param outkeys array to store the parsed output keys
- * @param mto model to run the session from
- * @param error error data structure to store error message in the case of
- * parsing failures
- * @return processed number of arguments on success, or -1 if the parsing failed
- */
-int RedisAI_Parse_ModelRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
-                                        RAI_ModelRunCtx **mctx, RedisModuleString ***inkeys,
-                                        RedisModuleString ***outkeys, RAI_Model **mto,
-                                        RAI_Error *error);
-
 /**
  * @brief  Returns the redis module type representing a model.
  * @return redis module type representing a model.
  */
 RedisModuleType *RAI_ModelRedisType(void);
 
+/**
+ * Insert the ModelRunCtx to the run queues so it will run asynchronously.
+ *
+ * @param mctx ModelRunCtx to execute
+ * @param ModelAsyncFinish A callback that will be called when the execution is finished.
+ * @param private_data This is going to be sent to to the ModelAsyncFinish.
+ * @return REDISMODULE_OK if the mctx was insert to the queues successfully, REDISMODULE_ERR
+ * otherwise.
+ */
+
+int RAI_ModelRunAsync(RAI_ModelRunCtx *mctx, RAI_OnFinishCB ModelAsyncFinish, void *private_data);
+
 #endif /* SRC_MODEL_H_ */
diff --git a/src/modelRun_ctx.c b/src/modelRun_ctx.c
new file mode 100644
index 000000000..37d8e697f
--- /dev/null
+++ b/src/modelRun_ctx.c
@@ -0,0 +1,127 @@
+
+#include "modelRun_ctx.h"
+
+static int _Model_RunCtxAddParam(RAI_ModelCtxParam **paramArr, const char *name,
+                                 RAI_Tensor *tensor) {
+
+    RAI_ModelCtxParam param = {
+        .name = name,
+        .tensor = tensor ? RAI_TensorGetShallowCopy(tensor) : NULL,
+    };
+    *paramArr = array_append(*paramArr, param);
+    return 1;
+}
+
+RAI_ModelRunCtx *RAI_ModelRunCtxCreate(RAI_Model *model) {
+#define PARAM_INITIAL_SIZE 10
+    RAI_ModelRunCtx *mctx = RedisModule_Calloc(1, sizeof(*mctx));
+    mctx->model = RAI_ModelGetShallowCopy(model);
+    mctx->inputs = array_new(RAI_ModelCtxParam, PARAM_INITIAL_SIZE);
+    mctx->outputs = array_new(RAI_ModelCtxParam, PARAM_INITIAL_SIZE);
+    return mctx;
+#undef PARAM_INITIAL_SIZE
+}
+
+int RAI_ModelRunCtxAddInput(RAI_ModelRunCtx *mctx, const char *inputName, RAI_Tensor *inputTensor) {
+    return _Model_RunCtxAddParam(&mctx->inputs, inputName, inputTensor);
+}
+
+int RAI_ModelRunCtxAddOutput(RAI_ModelRunCtx *mctx, const char *outputName) {
+    return _Model_RunCtxAddParam(&mctx->outputs, outputName, NULL);
+}
+
+size_t RAI_ModelRunCtxNumInputs(RAI_ModelRunCtx *mctx) { return array_len(mctx->inputs); }
+
+size_t RAI_ModelRunCtxNumOutputs(RAI_ModelRunCtx *mctx) { return array_len(mctx->outputs); }
+
+RAI_Tensor *RAI_ModelRunCtxInputTensor(RAI_ModelRunCtx *mctx, size_t index) {
+    assert(RAI_ModelRunCtxNumInputs(mctx) > index && index >= 0);
+    return mctx->inputs[index].tensor;
+}
+
+RAI_Tensor *RAI_ModelRunCtxOutputTensor(RAI_ModelRunCtx *mctx, size_t index) {
+    assert(RAI_ModelRunCtxNumOutputs(mctx) > index && index >= 0);
+    return mctx->outputs[index].tensor;
+}
+
+void RAI_ModelRunCtxFree(RAI_ModelRunCtx *mctx) {
+    for (size_t i = 0; i < array_len(mctx->inputs); ++i) {
+        RAI_TensorFree(mctx->inputs[i].tensor);
+    }
+
+    for (size_t i = 0; i < array_len(mctx->outputs); ++i) {
+        if (mctx->outputs[i].tensor) {
+            RAI_TensorFree(mctx->outputs[i].tensor);
+        }
+    }
+
+    array_free(mctx->inputs);
+    array_free(mctx->outputs);
+
+    RAI_Error err = {0};
+    RAI_ModelFree(mctx->model, &err);
+
+    if (err.code != RAI_OK) {
+        // TODO: take it to client somehow
+        RAI_ClearError(&err);
+    }
+    RedisModule_Free(mctx);
+}
+
+int RedisAI_Parse_ModelRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
+                                        RAI_ModelRunCtx **mctx, RedisModuleString ***inkeys,
+                                        RedisModuleString ***outkeys, RAI_Model **mto,
+                                        RAI_Error *error) {
+    if (argc < 3) {
+        RAI_SetError(error, RAI_EMODELRUN,
+                     "ERR wrong number of arguments for 'AI.MODELRUN' command");
+        return -1;
+    }
+
+    const char *inputstr = RedisModule_StringPtrLen(argv[2], NULL);
+    if (strcasecmp(inputstr, "INPUTS") != 0) {
+        RAI_SetError(error, RAI_EMODELRUN, "ERR INPUTS not specified");
+        return -1;
+    }
+
+    int is_input = 0;
+    size_t ninputs = 0;
+    size_t noutputs = 0;
+    int outputs_flag_count = 0;
+    size_t argpos = 3;
+
+    for (; argpos <= argc - 1; argpos++) {
+        const char *arg_string = RedisModule_StringPtrLen(argv[argpos], NULL);
+        if (!strcasecmp(arg_string, "OUTPUTS") && outputs_flag_count == 0) {
+            is_input = 1;
+            outputs_flag_count = 1;
+        } else {
+            if (RMAPI_FUNC_SUPPORTED(RedisModule_HoldString)) {
+                RedisModule_HoldString(NULL, argv[argpos]);
+            } else {
+                RedisModule_RetainString(NULL, argv[argpos]);
+            }
+            if (is_input == 0) {
+                *inkeys = array_append(*inkeys, argv[argpos]);
+                ninputs++;
+            } else {
+                *outkeys = array_append(*outkeys, argv[argpos]);
+                noutputs++;
+            }
+        }
+    }
+    if ((*mto)->inputs && array_len((*mto)->inputs) != ninputs) {
+        RAI_SetError(error, RAI_EMODELRUN,
+                     "Number of names given as INPUTS during MODELSET and keys given as "
+                     "INPUTS here do not match");
+        return -1;
+    }
+
+    if ((*mto)->outputs && array_len((*mto)->outputs) != noutputs) {
+        RAI_SetError(error, RAI_EMODELRUN,
+                     "Number of names given as OUTPUTS during MODELSET and keys given as "
+                     "OUTPUTS here do not match");
+        return -1;
+    }
+    return argpos;
+}
diff --git a/src/modelRun_ctx.h b/src/modelRun_ctx.h
new file mode 100644
index 000000000..3fc62c75b
--- /dev/null
+++ b/src/modelRun_ctx.h
@@ -0,0 +1,97 @@
+#pragma once
+
+#include "model.h"
+
+/**
+ * Allocates the RAI_ModelRunCtx data structure required for async background
+ * work within `RedisAI_RunInfo` structure on RedisAI blocking commands
+ *
+ * @param model input model
+ * @return RAI_ModelRunCtx to be used within
+ */
+RAI_ModelRunCtx *RAI_ModelRunCtxCreate(RAI_Model *model);
+
+/**
+ * Frees the RAI_ModelRunCtx data structure used within for async background
+ * work
+ *
+ * @param mctx
+ * @param freeTensors free input and output tensors or leave them allocated
+ */
+void RAI_ModelRunCtxFree(RAI_ModelRunCtx *mctxs);
+
+/**
+ * Allocates a RAI_ModelCtxParam data structure, and enforces a shallow copy of
+ * the provided input tensor, adding it to the input tensors array of the
+ * RAI_ModelRunCtx.
+ *
+ * @param mctx input RAI_ModelRunCtx to add the input tensor
+ * @param inputName input tensor name
+ * @param inputTensor input tensor structure
+ * @return returns 1 on success ( always returns success )
+ */
+int RAI_ModelRunCtxAddInput(RAI_ModelRunCtx *mctx, const char *inputName, RAI_Tensor *inputTensor);
+
+/**
+ * Allocates a RAI_ModelCtxParam data structure, and sets the tensor reference
+ * to NULL ( will be set after MODELRUN ), adding it to the outputs tensors
+ * array of the RAI_ModelRunCtx.
+ *
+ * @param mctx RAI_ModelRunCtx to add the output tensor
+ * @param outputName output tensor name
+ * @return returns 1 on success ( always returns success )
+ */
+int RAI_ModelRunCtxAddOutput(RAI_ModelRunCtx *mctx, const char *outputName);
+
+/**
+ * Returns the total number of input tensors of the RAI_ModelRunCtx
+ *
+ * @param mctx RAI_ModelRunCtx
+ * @return the total number of input tensors of the RAI_ModelRunCtx
+ */
+size_t RAI_ModelRunCtxNumInputs(RAI_ModelRunCtx *mctx);
+
+/**
+ * Returns the total number of output tensors of the RAI_ModelCtxParam
+ *
+ * @param mctx RAI_ModelRunCtx
+ * @return the total number of output tensors of the RAI_ModelCtxParam
+ */
+size_t RAI_ModelRunCtxNumOutputs(RAI_ModelRunCtx *mctx);
+
+/**
+ * Get the RAI_Tensor at the input array index position
+ *
+ * @param mctx RAI_ModelRunCtx
+ * @param index input array index position
+ * @return RAI_Tensor
+ */
+RAI_Tensor *RAI_ModelRunCtxInputTensor(RAI_ModelRunCtx *mctx, size_t index);
+
+/**
+ * Get the RAI_Tensor at the output array index position
+ *
+ * @param mctx RAI_ModelRunCtx
+ * @param index input array index position
+ * @return RAI_Tensor
+ */
+RAI_Tensor *RAI_ModelRunCtxOutputTensor(RAI_ModelRunCtx *mctx, size_t index);
+
+/**
+ * Helper method to parse AI.MODELRUN arguments
+ *
+ * @param ctx Context in which Redis modules operate
+ * @param argv Redis command arguments, as an array of strings
+ * @param argc Redis command number of arguments
+ * @param mctx Destination Model context to store the parsed data
+ * @param outkeys array to store the parsed output keys
+ * @param mto model to run the session from
+ * @param error error data structure to store error message in the case of
+ * parsing failures
+ * @return processed number of arguments on success, or -1 if the parsing failed
+ */
+// todo: remove this after DAG LLAPI is done.
+int RedisAI_Parse_ModelRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, int argc,
+                                        RAI_ModelRunCtx **mctx, RedisModuleString ***inkeys,
+                                        RedisModuleString ***outkeys, RAI_Model **mto,
+                                        RAI_Error *error);
diff --git a/src/redisai.c b/src/redisai.c
index 5402537f6..f24999173 100644
--- a/src/redisai.c
+++ b/src/redisai.c
@@ -6,12 +6,13 @@
 #define REDISMODULE_MAIN
 #include "redismodule.h"
 #include "tensor.h"
-
+#include "command_parser.h"
 #include "backends.h"
 #include "backends/util.h"
 #include "background_workers.h"
-#include "dag.h"
+#include "DAG/dag.h"
 #include "model.h"
+#include "modelRun_ctx.h"
 #include "script.h"
 #include "stats.h"
 #include <pthread.h>
@@ -136,7 +137,6 @@ int RedisAI_TensorGet_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv
 
     const int parse_result = RAI_parseTensorGetArgs(ctx, argv, argc, t);
 
-    RedisModule_CloseKey(key);
     // if the number of parsed args is negative something went wrong
     if (parse_result < 0) {
         return REDISMODULE_ERR;
@@ -572,12 +572,7 @@ int RedisAI_ModelRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv,
     if (RedisModule_IsKeysPositionRequest(ctx)) {
         return RedisAI_ModelRun_IsKeysPositionRequest_ReportKeys(ctx, argv, argc);
     }
-
-    if (argc < 3)
-        return RedisModule_WrongArity(ctx);
-
-    // Convert The model run command into A DAG command that contains a single op.
-    return RedisAI_ProcessDagRunCommand(ctx, argv, argc, REDISAI_DAG_WRITE_MODE);
+    return RedisAI_ExecuteCommand(ctx, argv, argc, CMD_MODELRUN, false);
 }
 
 /**
@@ -592,8 +587,8 @@ int RedisAI_ScriptRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv
     if (argc < 6)
         return RedisModule_WrongArity(ctx);
 
-    // Convert The script run command into A DAG command that contains a single op.
-    return RedisAI_ProcessDagRunCommand(ctx, argv, argc, REDISAI_DAG_WRITE_MODE);
+    // Convert The script run command into a DAG command that contains a single op.
+    return RedisAI_ExecuteCommand(ctx, argv, argc, CMD_SCRIPTRUN, false);
 }
 
 /**
@@ -906,7 +901,7 @@ int RedisAI_DagRun_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv, i
     if (RedisModule_IsKeysPositionRequest(ctx)) {
         return RedisAI_DagRun_IsKeysPositionRequest_ReportKeys(ctx, argv, argc);
     }
-    return RedisAI_ProcessDagRunCommand(ctx, argv, argc, REDISAI_DAG_WRITE_MODE);
+    return RedisAI_ExecuteCommand(ctx, argv, argc, CMD_DAGRUN, false);
 }
 
 /**
@@ -921,7 +916,7 @@ int RedisAI_DagRunRO_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv,
     if (RedisModule_IsKeysPositionRequest(ctx)) {
         return RedisAI_DagRun_IsKeysPositionRequest_ReportKeys(ctx, argv, argc);
     }
-    return RedisAI_ProcessDagRunCommand(ctx, argv, argc, REDISAI_DAG_READONLY_MODE);
+    return RedisAI_ExecuteCommand(ctx, argv, argc, CMD_DAGRUN, true);
 }
 
 #define EXECUTION_PLAN_FREE_MSG 100
@@ -983,6 +978,8 @@ static int RedisAI_RegisterApi(RedisModuleCtx *ctx) {
     REGISTER_API(ModelSerialize, ctx);
     REGISTER_API(ModelGetShallowCopy, ctx);
     REGISTER_API(ModelRedisType, ctx);
+    REGISTER_API(ModelRunAsync, ctx);
+    REGISTER_API(GetAsModelRunCtx, ctx)
 
     REGISTER_API(ScriptCreate, ctx);
     REGISTER_API(ScriptFree, ctx);
diff --git a/src/redisai.h b/src/redisai.h
index 0f613ce3c..44c6614d8 100644
--- a/src/redisai.h
+++ b/src/redisai.h
@@ -14,9 +14,12 @@ typedef struct RAI_Script RAI_Script;
 
 typedef struct RAI_ModelRunCtx RAI_ModelRunCtx;
 typedef struct RAI_ScriptRunCtx RAI_ScriptRunCtx;
+typedef struct RAI_DAGRunCtx RAI_DAGRunCtx;
 typedef struct RAI_Error RAI_Error;
 typedef struct RAI_ModelOpts RAI_ModelOpts;
 typedef struct RAI_OnFinishCtx RAI_OnFinishCtx;
+
+typedef void (*RAI_OnFinishCB)(RAI_OnFinishCtx *ctx, void *private_data);
 #endif
 
 #define REDISAI_BACKEND_TENSORFLOW  0
@@ -44,7 +47,8 @@ typedef struct RAI_OnFinishCtx RAI_OnFinishCtx;
 #define RedisAI_ErrorCode_ESCRIPTFREE         13
 #define RedisAI_ErrorCode_ETENSORSET          14
 #define RedisAI_ErrorCode_ETENSORGET          15
-#define RedisAI_ErrorCode_EDAGRUN             17
+#define RedisAI_ErrorCode_EDAGRUN             16
+#define RedisAI_ErrorCode_EFINISHCTX          17
 
 enum RedisAI_DataFmt { REDISAI_DATA_BLOB = 0, REDISAI_DATA_VALUES, REDISAI_DATA_NONE };
 
@@ -96,6 +100,9 @@ RAI_Model *MODULE_API_FUNC(RedisAI_ModelGetShallowCopy)(RAI_Model *model);
 int MODULE_API_FUNC(RedisAI_ModelSerialize)(RAI_Model *model, char **buffer, size_t *len,
                                             RAI_Error *err);
 RedisModuleType *MODULE_API_FUNC(RedisAI_ModelRedisType)(void);
+int MODULE_API_FUNC(RedisAI_ModelRunAsync)(RAI_ModelRunCtx *mctxs, RAI_OnFinishCB DAGAsyncFinish,
+                                           void *private_data);
+RAI_ModelRunCtx *MODULE_API_FUNC(RedisAI_GetAsModelRunCtx)(RAI_OnFinishCtx *ctx, RAI_Error *err);
 
 RAI_Script *MODULE_API_FUNC(RedisAI_ScriptCreate)(char *devicestr, char *tag, const char *scriptdef,
                                                   RAI_Error *err);
@@ -182,6 +189,8 @@ static int RedisAI_Initialize(RedisModuleCtx *ctx) {
     REDISAI_MODULE_INIT_FUNCTION(ctx, ModelGetShallowCopy);
     REDISAI_MODULE_INIT_FUNCTION(ctx, ModelSerialize);
     REDISAI_MODULE_INIT_FUNCTION(ctx, ModelRedisType);
+    REDISAI_MODULE_INIT_FUNCTION(ctx, ModelRunAsync);
+    REDISAI_MODULE_INIT_FUNCTION(ctx, GetAsModelRunCtx);
 
     REDISAI_MODULE_INIT_FUNCTION(ctx, ScriptCreate);
     REDISAI_MODULE_INIT_FUNCTION(ctx, ScriptFree);
diff --git a/src/run_info.c b/src/run_info.c
index 9dbb96ab5..579d956cf 100644
--- a/src/run_info.c
+++ b/src/run_info.c
@@ -9,6 +9,7 @@
 
 #include "err.h"
 #include "model.h"
+#include "modelRun_ctx.h"
 #include "model_struct.h"
 #include "redismodule.h"
 #include "script.h"
@@ -163,7 +164,7 @@ void RAI_FreeDagOp(RAI_DagOp *dagOp) {
         array_free(dagOp->outTensors);
 
         if (dagOp->mctx) {
-            RAI_ModelRunCtxFree(dagOp->mctx, true);
+            RAI_ModelRunCtxFree(dagOp->mctx);
         }
         if (dagOp->sctx) {
             RAI_ScriptRunCtxFree(dagOp->sctx, true);
@@ -328,3 +329,16 @@ int RAI_RunInfoBatchable(struct RAI_DagOp *op1, struct RAI_DagOp *op2) {
 
     return 1;
 }
+RAI_ModelRunCtx *RAI_GetAsModelRunCtx(RedisAI_RunInfo *rinfo, RAI_Error *err) {
+
+    RAI_DagOp *op = rinfo->dagOps[0];
+    if (!rinfo->single_op_dag || !op->mctx) {
+        RAI_SetError(err, RedisAI_ErrorCode_EFINISHCTX, "Finish ctx is not a model run ctx");
+        return NULL;
+    }
+    RAI_SetError(err, RAI_GetErrorCode(op->err), RAI_GetError(op->err));
+    RAI_ModelRunCtx *mctx = op->mctx;
+    rinfo->dagOps[0]->mctx = NULL;
+    RAI_FreeRunInfo(rinfo);
+    return mctx;
+}
diff --git a/src/run_info.h b/src/run_info.h
index 65aa315db..a53a40b1a 100644
--- a/src/run_info.h
+++ b/src/run_info.h
@@ -18,13 +18,13 @@
 #include "util/arr_rm_alloc.h"
 #include "util/dict.h"
 
-enum RedisAI_DAGCommands {
+typedef enum DAGCommand {
     REDISAI_DAG_CMD_NONE = 0,
     REDISAI_DAG_CMD_TENSORSET,
     REDISAI_DAG_CMD_TENSORGET,
     REDISAI_DAG_CMD_MODELRUN,
     REDISAI_DAG_CMD_SCRIPTRUN
-};
+} DAGCommand;
 
 enum RedisAI_DAGMode { REDISAI_DAG_READONLY_MODE = 0, REDISAI_DAG_WRITE_MODE };
 
@@ -76,7 +76,7 @@ typedef RedisAI_RunInfo RedisAI_OnFinishCtx;
  * @param ctx parameter includes the running results and errors.
  * @param private_data is an optional pointer to the user's private data.
  */
-typedef void (*RAI_OnFinishCB)(RedisAI_OnFinishCtx *ctx, void *private_data);
+typedef void (*RedisAI_OnFinishCB)(RedisAI_OnFinishCtx *ctx, void *private_data);
 
 /**
  * This structure represents the context in which RedisAI blocking commands
@@ -112,7 +112,7 @@ struct RedisAI_RunInfo {
     long long timeout;
     int *timedOut;
     struct timeval queuingTime;
-    RAI_OnFinishCB OnFinish;
+    RedisAI_OnFinishCB OnFinish;
     RedisAI_RunInfo *orig_copy;
     void *private_data; // This is going to be sent to the OnFinish callback.
 };
@@ -180,6 +180,14 @@ size_t RAI_RunInfoBatchSize(struct RAI_DagOp *op);
  */
 int RAI_RunInfoBatchable(struct RAI_DagOp *op1, struct RAI_DagOp *op2);
 
+/**
+ * Retreive the ModelRunCtx of a DAG runInfo that contains a single op of type
+ * MODELRUN.
+ * @param DAG runInfo.
+ * @return Pointer to the ModelRunCtx in DAG's single op.
+ */
+RAI_ModelRunCtx *RAI_GetAsModelRunCtx(RedisAI_RunInfo *rinfo, RAI_Error *err);
+
 #ifdef __cplusplus
 } // extern "C"
 #endif
diff --git a/src/tensor.c b/src/tensor.c
index e8dc0300b..d46554ed3 100644
--- a/src/tensor.c
+++ b/src/tensor.c
@@ -570,12 +570,12 @@ int RAI_TensorGetValueAsDouble(RAI_Tensor *t, long long i, double *val) {
             *val = ((double *)data)[i];
             break;
         default:
-            return 0;
+            return 1;
         }
     } else {
-        return 0;
+        return 1;
     }
-    return 1;
+    return 0;
 }
 
 int RAI_TensorGetValueAsLongLong(RAI_Tensor *t, long long i, long long *val) {
@@ -674,6 +674,7 @@ int RAI_GetTensorFromKeyspace(RedisModuleCtx *ctx, RedisModuleString *keyName, R
         return REDISMODULE_ERR;
     }
     *tensor = RedisModule_ModuleTypeGetValue(*key);
+    RedisModule_CloseKey(*key);
     return REDISMODULE_OK;
 }
 
@@ -924,7 +925,7 @@ int RAI_TensorReplyWithValues(RedisModuleCtx *ctx, RAI_Tensor *t) {
         double val;
         for (i = 0; i < len; i++) {
             int ret = RAI_TensorGetValueAsDouble(t, i, &val);
-            if (!ret) {
+            if (ret == 1) {
                 RedisModule_ReplyWithError(ctx, "ERR cannot get values for this datatype");
                 return -1;
             }
diff --git a/tests/flow/tests_llapi.py b/tests/flow/tests_llapi.py
index 0497ab72e..fe443e8e3 100644
--- a/tests/flow/tests_llapi.py
+++ b/tests/flow/tests_llapi.py
@@ -6,9 +6,11 @@
 '''
 python -m RLTest --test tests_llapi.py --module path/to/redisai.so
 '''
+
 goal_dir = os.path.join(os.getcwd(), "../module/LLAPI.so")
 TEST_MODULE_PATH = os.path.abspath(goal_dir)
 
+
 def test_basic_check(env):
 
     con = env.getConnection()
@@ -16,3 +18,24 @@ def test_basic_check(env):
     env.assertEqual(ret, b'OK')
     ret = con.execute_command("RAI_llapi.basic_check")
     env.assertEqual(ret, b'OK')
+
+
+def test_model_run_async(env):
+
+    con = env.getConnection()
+    ret = con.execute_command("MODULE", "LOAD", TEST_MODULE_PATH)
+    env.assertEqual(ret, b'OK')
+
+    test_data_path = os.path.join(os.path.dirname(__file__), 'test_data')
+    model_filename = os.path.join(test_data_path, 'graph.pb')
+
+    with open(model_filename, 'rb') as f:
+        model_pb = f.read()
+
+    ret = con.execute_command('AI.MODELSET', 'm{1}', 'TF', DEVICE,
+                              'INPUTS', 'a', 'b', 'OUTPUTS', 'mul', 'BLOB', model_pb)
+    env.assertEqual(ret, b'OK')
+    con.execute_command('AI.TENSORSET', 'a{1}', 'FLOAT', 2, 2, 'VALUES', 2, 3, 2, 3)
+    con.execute_command('AI.TENSORSET', 'b{1}', 'FLOAT', 2, 2, 'VALUES', 2, 3, 2, 3)
+    ret = con.execute_command("RAI_llapi.modelRun")
+    env.assertEqual(ret, b'Async run success')
diff --git a/tests/module/LLAPI.c b/tests/module/LLAPI.c
index ad8a9f76c..7e38f9e0e 100644
--- a/tests/module/LLAPI.c
+++ b/tests/module/LLAPI.c
@@ -4,6 +4,18 @@
 #include "../../src/redisai.h"
 #include <errno.h>
 #include <string.h>
+#include <stdbool.h>
+#include <pthread.h>
+
+pthread_mutex_t global_lock = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t global_cond = PTHREAD_COND_INITIALIZER;
+
+typedef enum LLAPI_status {LLAPI_RUN_NONE = 0,
+						   LLAPI_RUN_SUCCESS,
+						   LLAPI_RUN_ERROR,
+						   LLAPI_NUM_OUTPUTS_ERROR
+} LLAPI_status;
+
 
 int RAI_llapi_basic_check(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
 	REDISMODULE_NOT_USED(argv);
@@ -20,6 +32,106 @@ int RAI_llapi_basic_check(RedisModuleCtx *ctx, RedisModuleString **argv, int arg
 	return RedisModule_ReplyWithError(ctx, "ERROR");
 }
 
+static void ModelFinishFunc(RAI_OnFinishCtx *onFinishCtx, void *private_data) {
+
+	RAI_Error *err;
+	if (RedisAI_InitError(&err) != REDISMODULE_OK) goto finish;
+	RAI_ModelRunCtx* mctx = RedisAI_GetAsModelRunCtx(onFinishCtx, err);
+	if(RedisAI_GetErrorCode(err) != RedisAI_ErrorCode_OK) {
+		*(int *) private_data = LLAPI_RUN_ERROR;
+		goto finish;
+	}
+	if(RedisAI_ModelRunCtxNumOutputs(mctx) != 1) {
+		*(int *) private_data = LLAPI_NUM_OUTPUTS_ERROR;
+		goto finish;
+	}
+	RAI_Tensor *tensor = RedisAI_ModelRunCtxOutputTensor(mctx, 0);
+	double expceted[4] = {4, 9, 4, 9};
+	double val[4];
+
+	// Verify that we received the expected tensor at the end of the run.
+	for (long long i = 0; i < 4; i++) {
+		if(RedisAI_TensorGetValueAsDouble(tensor, i, &val[i]) != 0) {
+			goto finish;
+		}
+		if (expceted[i] != val[i]) {
+			goto finish;
+		}
+	}
+	*(int *)private_data = LLAPI_RUN_SUCCESS;
+
+	finish:
+	RedisAI_FreeError(err);
+	pthread_cond_signal(&global_cond);
+}
+
+static int _ExecuteModelRunAsync(RedisModuleCtx *ctx, RAI_ModelRunCtx* mctx) {
+	LLAPI_status status = LLAPI_RUN_NONE;
+	pthread_mutex_lock(&global_lock);
+	if (RedisAI_ModelRunAsync(mctx, ModelFinishFunc, &status) != REDISMODULE_OK) {
+		pthread_mutex_unlock(&global_lock);
+		RedisAI_ModelRunCtxFree(mctx);
+		RedisModule_ReplyWithError(ctx, "Async run could not start");
+		return LLAPI_RUN_NONE;
+	}
+
+	// Wait until the onFinish callback returns.
+	pthread_cond_wait(&global_cond, &global_lock);
+	pthread_mutex_unlock(&global_lock);
+	RedisAI_ModelRunCtxFree(mctx);
+	return status;
+}
+
+int RAI_llapi_modelRun(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
+	REDISMODULE_NOT_USED(argv);
+
+	if (argc>1) {
+		RedisModule_WrongArity(ctx);
+		return REDISMODULE_OK;
+	}
+	// The model m{1} should exist in key space.
+	const char *keyNameStr = "m{1}";
+	RedisModuleString *keyRedisStr = RedisModule_CreateString(ctx, keyNameStr, strlen(keyNameStr));
+	RedisModuleKey *key = RedisModule_OpenKey(ctx, keyRedisStr, REDISMODULE_READ);
+	RAI_Model *model = RedisModule_ModuleTypeGetValue(key);
+	RAI_ModelRunCtx* mctx = RedisAI_ModelRunCtxCreate(model);
+	RedisModule_FreeString(ctx, keyRedisStr);
+	RedisModule_CloseKey(key);
+
+	// Test the case of a failure in the model run execution (no inputs specified).
+	if(_ExecuteModelRunAsync(ctx, mctx) != LLAPI_RUN_ERROR) {
+		return RedisModule_ReplyWithSimpleString(ctx, "Async run should end with an error");
+	}
+
+	mctx = RedisAI_ModelRunCtxCreate(model);
+	// The tensors a{1} and b{1} should exist in key space.
+	// Load the tensors a{1} and b{1} and add them as inputs for m{1}.
+	keyNameStr = "a{1}";
+	keyRedisStr = RedisModule_CreateString(ctx, keyNameStr,
+	  strlen(keyNameStr));
+	key = RedisModule_OpenKey(ctx, keyRedisStr, REDISMODULE_READ);
+	RAI_Tensor *input1 = RedisModule_ModuleTypeGetValue(key);
+	RedisAI_ModelRunCtxAddInput(mctx, "a", input1);
+	RedisModule_FreeString(ctx, keyRedisStr);
+	RedisModule_CloseKey(key);
+
+	keyNameStr = "b{1}";
+	keyRedisStr = RedisModule_CreateString(ctx, keyNameStr,
+	  strlen(keyNameStr));
+	key = RedisModule_OpenKey(ctx, keyRedisStr, REDISMODULE_READ);
+	RAI_Tensor *input2 = RedisModule_ModuleTypeGetValue(key);
+	RedisAI_ModelRunCtxAddInput(mctx, "b", input2);
+	RedisModule_FreeString(ctx, keyRedisStr);
+	RedisModule_CloseKey(key);
+
+	// Add the expected output tensor.
+	RedisAI_ModelRunCtxAddOutput(mctx, "mul");
+
+	if (_ExecuteModelRunAsync(ctx, mctx) != LLAPI_RUN_SUCCESS)
+		return RedisModule_ReplyWithSimpleString(ctx, "Async run failed");
+	return RedisModule_ReplyWithSimpleString(ctx, "Async run success");
+}
+
 int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
 	REDISMODULE_NOT_USED(argv);
 	REDISMODULE_NOT_USED(argc);
@@ -35,5 +147,9 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
 	if(RedisModule_CreateCommand(ctx, "RAI_llapi.basic_check", RAI_llapi_basic_check, "",
 	  0, 0, 0) == REDISMODULE_ERR)
 		return REDISMODULE_ERR;
+
+	if(RedisModule_CreateCommand(ctx, "RAI_llapi.modelRun", RAI_llapi_modelRun, "",
+	  0, 0, 0) == REDISMODULE_ERR)
+		return REDISMODULE_ERR;
 	return REDISMODULE_OK;
 }
diff --git a/tests/module/Makefile b/tests/module/Makefile
index 7ea54830c..22f57ee62 100644
--- a/tests/module/Makefile
+++ b/tests/module/Makefile
@@ -2,6 +2,12 @@
 # find the OS
 uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
 
+# if DEBUG env var is set, we compile with "debug" cflags
+DEBUGFLAGS = -g -ggdb -O3
+ifeq ($(DEBUG), 1)
+	DEBUGFLAGS = -fno-omit-frame-pointer -g -ggdb -O0
+endif
+
 # Compile flags for linux / osx
 ifeq ($(uname_S),Linux)
 	SHOBJ_CFLAGS ?= -W -Wall -fno-common -g -ggdb -std=c99 -O2
@@ -21,7 +27,7 @@ all: $(TEST_MODULES)
 	$(MAKE) CFLAGS="-m32" LDFLAGS="-melf_i386"
 
 %.o: %.c
-	$(CC) -I../../src -DREDIS_MODULE_TARGET -DREDISMODULE_EXPERIMENTAL_API $(SHOBJ_CFLAGS) -fPIC -c $< -o $@
+	$(CC) $(DEBUGFLAGS) -I../../src -DREDIS_MODULE_TARGET -DREDISMODULE_EXPERIMENTAL_API $(SHOBJ_CFLAGS) -fPIC -c $< -o $@
 
 %.so: %.o
 	$(CC) -o $@ LLAPI.o $(SHOBJ_LDFLAGS) -lc -lm