apache · MasterJH5574 · Sep 12, 2025 · Sep 11, 2025
diff --git a/ffi/CMakeLists.txt b/ffi/CMakeLists.txt
@@ -73,7 +73,7 @@ set(tvm_ffi_extra_objs_sources
   "${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/library_module.cc"
   "${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/library_module_system_lib.cc"
   "${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/library_module_dynamic_lib.cc"
-  "${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/stream_context.cc"
+  "${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/env_context.cc"
   "${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/env_c_api.cc"
   "${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/extra/testing.cc"
 )
@@ -249,6 +249,7 @@ endif()
 
 install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/tvm/ffi/ DESTINATION include/tvm/ffi/)
 install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/3rdparty/dlpack/include/ DESTINATION include/)
+install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/python/tvm_ffi/cython/tvm_ffi_python_helpers.h DESTINATION include/)
 install(TARGETS tvm_ffi_shared  DESTINATION lib)
 # ship additional dSYM files for debugging symbols on if available
 if (APPLE)

diff --git a/ffi/docs/get_started/quick_start.md b/ffi/docs/get_started/quick_start.md
@@ -125,7 +125,7 @@ void AddOneCUDA(DLTensor* x, DLTensor* y) {
 
   // Get current CUDA stream from environment
   cudaStream_t stream = static_cast<cudaStream_t>(
-      TVMFFIEnvGetCurrentStream(x->device.device_type, x->device.device_id));
+      TVMFFIEnvGetStream(x->device.device_type, x->device.device_id));
 
   // Launch kernel
   AddOneKernel<<<nblock, nthread_per_block, 0, stream>>>(
@@ -136,7 +136,7 @@ TVM_FFI_DLL_EXPORT_TYPED_FUNC(add_one_cuda, tvm_ffi_example::AddOneCUDA);
 ```
 
 **Key Points:**
-- We use `TVMFFIEnvGetCurrentStream` to obtain the current stream from the environement
+- We use `TVMFFIEnvGetStream` to obtain the current stream from the environement
 - When invoking ffi Function from python end with PyTorch tensor as argument,
   the stream will be populated with torch's current stream.
 

diff --git a/ffi/examples/inline_module/main.py b/ffi/examples/inline_module/main.py
@@ -63,7 +63,7 @@ def main():
               // it will be set to torch.cuda.current_stream() when calling the function
               // with torch.Tensors
               cudaStream_t stream = static_cast<cudaStream_t>(
-                  TVMFFIEnvGetCurrentStream(x->device.device_type, x->device.device_id));
+                  TVMFFIEnvGetStream(x->device.device_type, x->device.device_id));
               // launch the kernel
               AddOneKernel<<<nblock, nthread_per_block, 0, stream>>>(static_cast<float*>(x->data),
                                                                      static_cast<float*>(y->data), n);

diff --git a/ffi/examples/quick_start/run_example.py b/ffi/examples/quick_start/run_example.py
@@ -64,7 +64,7 @@ def run_add_one_cuda():
     with torch.cuda.stream(stream):
         # tvm-ffi automatically handles DLPack compatible tensors
         # it also handles interactions with torch runtime
-        # torch.cuda.current_stream() will be set and available via TVMFFIEnvGetCurrentStream
+        # torch.cuda.current_stream() will be set and available via TVMFFIEnvGetStream
         # when calling the function
         mod.add_one_cuda(x, y)
     stream.synchronize()

diff --git a/ffi/examples/quick_start/src/add_one_cuda.cu b/ffi/examples/quick_start/src/add_one_cuda.cu
@@ -46,8 +46,8 @@ void AddOneCUDA(tvm::ffi::Tensor x, tvm::ffi::Tensor y) {
   // Obtain the current stream from the environment
   // it will be set to torch.cuda.current_stream() when calling the function
   // with torch.Tensors
-  cudaStream_t stream = static_cast<cudaStream_t>(
-      TVMFFIEnvGetCurrentStream(x->device.device_type, x->device.device_id));
+  cudaStream_t stream =
+      static_cast<cudaStream_t>(TVMFFIEnvGetStream(x->device.device_type, x->device.device_id));
   // launch the kernel
   AddOneKernel<<<nblock, nthread_per_block, 0, stream>>>(static_cast<float*>(x->data),
                                                          static_cast<float*>(y->data), n);

diff --git a/ffi/include/tvm/ffi/c_api.h b/ffi/include/tvm/ffi/c_api.h
@@ -27,6 +27,21 @@
 #include <dlpack/dlpack.h>
 #include <stdint.h>
 
+/*
+ * \brief C-style Allocator that allocates memory for a DLPack tensor.
+ * \param prototype The prototype DLTensor to offer details about device and shape.
+ * \param out The output DLManagedTensorVersioned.
+ * \param error_ctx The context to set the error.
+ * \param SetError The function to set the error.
+ * \return 0 on success, -1 on failure.
+ *         call SetError(error_ctx, kind, message) to set the error kind and message.
+ * \note Error propagation via SetError.
+ */
+typedef int (*DLPackTensorAllocator)(                                         //
+    DLTensor* prototype, DLManagedTensorVersioned** out, void* error_ctx,     //
+    void (*SetError)(void* error_ctx, const char* kind, const char* message)  //
+);
+
 // Macros to do weak linking
 #ifdef _MSC_VER
 #define TVM_FFI_WEAK __declspec(selectany)

diff --git a/ffi/include/tvm/ffi/container/tensor.h b/ffi/include/tvm/ffi/container/tensor.h
@@ -32,6 +32,7 @@
 
 #include <atomic>
 #include <memory>
+#include <string>
 #include <utility>
 
 namespace tvm {
@@ -341,7 +342,60 @@ class Tensor : public ObjectRef {
     return Tensor(make_object<details::TensorObjFromNDAlloc<TNDAlloc>>(
         alloc, shape, dtype, device, std::forward<ExtraArgs>(extra_args)...));
   }
-
+  /*!
+   * \brief Create a Tensor from a DLPackTensorAllocator
+   *
+   * This function can be used together with TVMFFIEnvSetTensorAllocator
+   * in the extra/c_env_api.h to create Tensor from the thread-local
+   * environment allocator.
+   *
+   * \code
+   *
+   * ffi::Tensor tensor = ffi::Tensor::FromDLPackAlloc(
+   *   TVMFFIEnvGetTensorAllocator(), shape, dtype, device
+   * );
+   * \endcode
+   *
+   * \param allocator The DLPack allocator.
+   * \param shape The shape of the Tensor.
+   * \param dtype The data type of the Tensor.
+   * \param device The device of the Tensor.
+   * \return The created Tensor.
+   */
+  static Tensor FromDLPackAlloc(DLPackTensorAllocator allocator, ffi::Shape shape, DLDataType dtype,
+                                DLDevice device) {
+    if (allocator == nullptr) {
+      TVM_FFI_THROW(RuntimeError)
+          << "FromDLPackAlloc: allocator is nullptr, "
+          << "likely because TVMFFIEnvSetTensorAllocator has not been called.";
+    }
+    DLTensor prototype;
+    prototype.device = device;
+    prototype.dtype = dtype;
+    prototype.shape = const_cast<int64_t*>(shape.data());
+    prototype.ndim = static_cast<int>(shape.size());
+    prototype.strides = nullptr;
+    prototype.byte_offset = 0;
+    prototype.data = nullptr;
+    DLManagedTensorVersioned* tensor = nullptr;
+    // error context to be used to propagate error
+    struct ErrorContext {
+      std::string kind;
+      std::string message;
+      static void SetError(void* error_ctx, const char* kind, const char* message) {
+        ErrorContext* error_context = static_cast<ErrorContext*>(error_ctx);
+        error_context->kind = kind;
+        error_context->message = message;
+      }
+    };
+    ErrorContext error_context;
+    int ret = (*allocator)(&prototype, &tensor, &error_context, ErrorContext::SetError);
+    if (ret != 0) {
+      throw ffi::Error(error_context.kind, error_context.message,
+                       TVMFFITraceback(__FILE__, __LINE__, __func__, 0));
+    }
+    return Tensor(make_object<details::TensorObjFromDLPack<DLManagedTensorVersioned>>(tensor));
+  }
   /*!
    * \brief Create a Tensor from a DLPack managed tensor, pre v1.0 API.
    * \param tensor The input DLPack managed tensor.

diff --git a/ffi/include/tvm/ffi/extra/c_env_api.h b/ffi/include/tvm/ffi/extra/c_env_api.h
@@ -46,12 +46,11 @@ typedef void* TVMFFIStreamHandle;
  * \param device_id The id of the device.
  * \param stream The stream to set.
  * \param opt_out_original_stream Output original stream if the address is not nullptr.
- * \note The stream is a weak reference that is cached/owned by the module.
  * \return 0 when success, nonzero when failure happens
  */
-TVM_FFI_DLL int TVMFFIEnvSetCurrentStream(int32_t device_type, int32_t device_id,
-                                          TVMFFIStreamHandle stream,
-                                          TVMFFIStreamHandle* opt_out_original_stream);
+TVM_FFI_DLL int TVMFFIEnvSetStream(int32_t device_type, int32_t device_id,
+                                   TVMFFIStreamHandle stream,
+                                   TVMFFIStreamHandle* opt_out_original_stream);
 
 /*!
  * \brief FFI function to get the current stream for a device
@@ -60,7 +59,29 @@ TVM_FFI_DLL int TVMFFIEnvSetCurrentStream(int32_t device_type, int32_t device_id
  * \param device_id The id of the device.
  * \return The current stream of the device.
  */
-TVM_FFI_DLL TVMFFIStreamHandle TVMFFIEnvGetCurrentStream(int32_t device_type, int32_t device_id);
+TVM_FFI_DLL TVMFFIStreamHandle TVMFFIEnvGetStream(int32_t device_type, int32_t device_id);
+
+/*!
+ * \brief FFI function to set the current DLPack allocator in thread-local(TLS) context
+ *
+ * \param allocator The allocator to set.
+ * \param write_to_global_context Whether to also set the allocator to the global context.
+ * \param opt_out_original_allocator Output original TLS allocator if the address is not nullptr.
+ * \return 0 when success, nonzero when failure happens
+ */
+TVM_FFI_DLL int TVMFFIEnvSetTensorAllocator(DLPackTensorAllocator allocator,
+                                            int write_to_global_context,
+                                            DLPackTensorAllocator* opt_out_original_allocator);
+
+/*!
+ * \brief FFI function get the current DLPack allocator stored in context.
+ *
+ * This function first queries the global context, and if not found,
+ * queries the thread-local context.
+ *
+ * \return The current DLPack allocator.
+ */
+TVM_FFI_DLL DLPackTensorAllocator TVMFFIEnvGetTensorAllocator();
 
 /*!
  * \brief Check if there are any signals raised in the surrounding env.

diff --git a/ffi/licenses/LICENSE.pytorch.txt b/ffi/licenses/LICENSE.pytorch.txt
@@ -0,0 +1,84 @@
+From PyTorch:
+
+Copyright (c) 2016-     Facebook, Inc            (Adam Paszke)
+Copyright (c) 2014-     Facebook, Inc            (Soumith Chintala)
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies    (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU                      (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
+Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
+Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
+
+From Caffe2:
+
+Copyright (c) 2016-present, Facebook Inc. All rights reserved.
+
+All contributions by Facebook:
+Copyright (c) 2016 Facebook Inc.
+
+All contributions by Google:
+Copyright (c) 2015 Google Inc.
+All rights reserved.
+
+All contributions by Yangqing Jia:
+Copyright (c) 2015 Yangqing Jia
+All rights reserved.
+
+All contributions by Kakao Brain:
+Copyright 2019-2020 Kakao Brain
+
+All contributions by Cruise LLC:
+Copyright (c) 2022 Cruise LLC.
+All rights reserved.
+
+All contributions by Tri Dao:
+Copyright (c) 2024 Tri Dao.
+All rights reserved.
+
+All contributions by Arm:
+Copyright (c) 2021, 2023-2024 Arm Limited and/or its affiliates
+
+All contributions from Caffe:
+Copyright(c) 2013, 2014, 2015, the respective contributors
+All rights reserved.
+
+All other contributions:
+Copyright(c) 2015, 2016 the respective contributors
+All rights reserved.
+
+Caffe2 uses a copyright model similar to Caffe: each contributor holds
+copyright over their contributions to Caffe2. The project versioning records
+all such contribution and copyright details. If a contributor wants to further
+mark their specific copyright on a particular contribution, they should
+indicate their copyright solely in the commit message of the change when it is
+committed.
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories America
+   and IDIAP Research Institute nor the names of its contributors may be
+   used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.