From 9e2e0aeddef0cd3e22055f7d4ceb0b1506e24369 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Sat, 31 May 2025 01:07:24 +0000 Subject: [PATCH] bump dlpack header to 1.1 --- cuda_core/cuda/core/experimental/dlpack.h | 42 ++++++++++++++++++++--- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/cuda_core/cuda/core/experimental/dlpack.h b/cuda_core/cuda/core/experimental/dlpack.h index bcb77949a..0b41961b4 100644 --- a/cuda_core/cuda/core/experimental/dlpack.h +++ b/cuda_core/cuda/core/experimental/dlpack.h @@ -19,7 +19,7 @@ #define DLPACK_MAJOR_VERSION 1 /*! \brief The current minor version of dlpack */ -#define DLPACK_MINOR_VERSION 0 +#define DLPACK_MINOR_VERSION 1 /*! \brief DLPACK_DLL prefix for windows */ #ifdef _WIN32 @@ -157,6 +157,26 @@ typedef enum { kDLComplex = 5U, /*! \brief boolean */ kDLBool = 6U, + /*! \brief FP8 data types */ + kDLFloat8_e3m4 = 7U, + kDLFloat8_e4m3 = 8U, + kDLFloat8_e4m3b11fnuz = 9U, + kDLFloat8_e4m3fn = 10U, + kDLFloat8_e4m3fnuz = 11U, + kDLFloat8_e5m2 = 12U, + kDLFloat8_e5m2fnuz = 13U, + kDLFloat8_e8m0fnu = 14U, + /*! \brief FP6 data types + * Setting bits != 6 is currently unspecified, and the producer must ensure it is set + * while the consumer must stop importing if the value is unexpected. + */ + kDLFloat6_e2m3fn = 15U, + kDLFloat6_e3m2fn = 16U, + /*! \brief FP4 data types + * Setting bits != 4 is currently unspecified, and the producer must ensure it is set + * while the consumer must stop importing if the value is unexpected. + */ + kDLFloat4_e2m1fn = 17U, } DLDataTypeCode; /*! @@ -170,6 +190,12 @@ typedef enum { * - int8: type_code = 0, bits = 8, lanes = 1 * - std::complex: type_code = 5, bits = 64, lanes = 1 * - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits) + * - float8_e4m3: type_code = 8, bits = 8, lanes = 1 (packed in memory) + * - float6_e3m2fn: type_code = 16, bits = 6, lanes = 1 (packed in memory) + * - float4_e2m1fn: type_code = 17, bits = 4, lanes = 1 (packed in memory) + * + * When a sub-byte type is packed, DLPack requires the data to be in little bit-endian, i.e., + * for a packed data set D ((D >> (i * bits)) && bit_mask) stores the i-th element. */ typedef struct { /*! @@ -196,8 +222,8 @@ typedef struct { * types. This pointer is always aligned to 256 bytes as in CUDA. The * `byte_offset` field should be used to point to the beginning of the data. * - * Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow, - * TVM, perhaps others) do not adhere to this 256 byte aligment requirement + * Note that as of Nov 2021, multiple libraries (CuPy, PyTorch, TensorFlow, + * TVM, perhaps others) do not adhere to this 256 byte alignment requirement * on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed * (after which this note will be updated); at the moment it is recommended * to not rely on the data pointer being correctly aligned. @@ -267,7 +293,7 @@ typedef struct DLManagedTensor { void (*deleter)(struct DLManagedTensor * self); } DLManagedTensor; -// bit masks used in in the DLManagedTensorVersioned +// bit masks used in the DLManagedTensorVersioned /*! \brief bit mask to indicate that the tensor is read only. */ #define DLPACK_FLAG_BITMASK_READ_ONLY (1UL << 0UL) @@ -280,6 +306,14 @@ typedef struct DLManagedTensor { */ #define DLPACK_FLAG_BITMASK_IS_COPIED (1UL << 1UL) +/*! + * \brief bit mask to indicate that whether a sub-byte type is packed or padded. + * + * The default for sub-byte types (ex: fp4/fp6) is assumed packed. This flag can + * be set by the producer to signal that a tensor of sub-byte type is padded. + */ +#define DLPACK_FLAG_BITMASK_IS_SUBBYTE_TYPE_PADDED (1UL << 2UL) + /*! * \brief A versioned and managed C Tensor object, manage memory of DLTensor. *