@@ -19,13 +19,15 @@ extern "C" {
1919
2020 struct ggml_backend_buffer_type_i {
2121 const char * (* GGML_CALL get_name ) (ggml_backend_buffer_type_t buft );
22+ // allocate a buffer of this type
2223 ggml_backend_buffer_t (* GGML_CALL alloc_buffer ) (ggml_backend_buffer_type_t buft , size_t size );
23- size_t (* GGML_CALL get_alignment ) (ggml_backend_buffer_type_t buft ); // tensor alignment
24- size_t (* GGML_CALL get_max_size ) (ggml_backend_buffer_type_t buft ); // allocation max size
25- size_t (* GGML_CALL get_alloc_size ) (ggml_backend_buffer_type_t buft , const struct ggml_tensor * tensor ); // data size needed to allocate the tensor, including padding
26- bool (* GGML_CALL supports_backend )(ggml_backend_buffer_type_t buft , ggml_backend_t backend ); // check if the buffer type is usable by the backend
24+ // tensor alignment
25+ size_t (* GGML_CALL get_alignment ) (ggml_backend_buffer_type_t buft );
26+ // max buffer size that can be allocated
27+ size_t (* GGML_CALL get_max_size ) (ggml_backend_buffer_type_t buft );
28+ // data size needed to allocate the tensor, including padding
29+ size_t (* GGML_CALL get_alloc_size ) (ggml_backend_buffer_type_t buft , const struct ggml_tensor * tensor );
2730 // check if tensor data is in host memory
28- // should be equivalent to supports_backend(buft, ggml_backend_cpu_init())
2931 bool (* GGML_CALL is_host ) (ggml_backend_buffer_type_t buft );
3032 };
3133
@@ -94,27 +96,37 @@ extern "C" {
9496 void (* GGML_CALL synchronize )(ggml_backend_t backend );
9597
9698 // compute graph with a plan (not used currently)
99+ // create a new plan for a graph
97100 ggml_backend_graph_plan_t (* GGML_CALL graph_plan_create ) (ggml_backend_t backend , const struct ggml_cgraph * cgraph );
98101 void (* GGML_CALL graph_plan_free ) (ggml_backend_t backend , ggml_backend_graph_plan_t plan );
102+ // update the plan with a new graph - this should be faster than creating a new plan when the graph has the same topology
103+ void (* GGML_CALL graph_plan_update ) (ggml_backend_t backend , ggml_backend_graph_plan_t plan , const struct ggml_cgraph * cgraph );
104+ // compute the graph with the plan
105+ enum ggml_status (* GGML_CALL graph_plan_compute )(ggml_backend_t backend , ggml_backend_graph_plan_t plan );
99106
100- // compute graph with a plan
101- enum ggml_status (* GGML_CALL graph_plan_compute )(ggml_backend_t backend , ggml_backend_graph_plan_t plan );
102107 // compute graph without a plan (async)
103108 enum ggml_status (* GGML_CALL graph_compute ) (ggml_backend_t backend , struct ggml_cgraph * cgraph );
104109
105- // check if the backend supports an operation
110+ // check if the backend can compute an operation
106111 bool (* GGML_CALL supports_op )(ggml_backend_t backend , const struct ggml_tensor * op );
107112
113+ // check if the backend can use tensors allocated in a buffer type
114+ bool (* GGML_CALL supports_buft )(ggml_backend_t backend , ggml_backend_buffer_type_t buft );
115+
108116 // check if the backend wants to run an operation, even if the weights are allocated in a CPU buffer
109117 // these should be expensive operations with large batch sizes that may benefit from running on this backend
110118 // even if the weight has to be copied from the CPU temporarily
111119 bool (* GGML_CALL offload_op )(ggml_backend_t backend , const struct ggml_tensor * op );
112120
113121 // (optional) event synchronization
122+ // create a new event that can record events on this backend instance
114123 ggml_backend_event_t (* GGML_CALL event_new ) (ggml_backend_t backend );
115124 void (* GGML_CALL event_free ) (ggml_backend_event_t event );
125+ // record an event on the backend instance that created it
116126 void (* GGML_CALL event_record ) (ggml_backend_event_t event );
127+ // wait for an event on on a different backend instance
117128 void (* GGML_CALL event_wait ) (ggml_backend_t backend , ggml_backend_event_t event );
129+ // block until an event is recorded
118130 void (* GGML_CALL event_synchronize ) (ggml_backend_event_t event );
119131 };
120132
@@ -163,7 +175,7 @@ extern "C" {
163175 void (* GGML_CALL ggml_backend_tensor_set )(struct ggml_tensor * , const void * , size_t , size_t );
164176 bool (* GGML_CALL ggml_is_quantized )(enum ggml_type );
165177 size_t (* GGML_CALL ggml_type_size )(enum ggml_type );
166- int (* GGML_CALL ggml_blck_size )(enum ggml_type );
178+ int64_t (* GGML_CALL ggml_blck_size )(enum ggml_type );
167179 bool (* GGML_CALL ggml_is_transposed )(const struct ggml_tensor * );
168180 size_t (* GGML_CALL ggml_nbytes )(const struct ggml_tensor * );
169181 enum ggml_unary_op (* GGML_CALL ggml_get_unary_op )(const struct ggml_tensor * );
@@ -180,7 +192,11 @@ extern "C" {
180192 bool (* GGML_CALL ggml_backend_buffer_is_host )(ggml_backend_buffer_t );
181193 bool (* GGML_CALL ggml_guid_matches )(ggml_guid_t , ggml_guid_t );
182194 bool (* GGML_CALL ggml_is_empty )(const struct ggml_tensor * );
195+ enum ggml_backend_buffer_usage (* GGML_CALL ggml_backend_buffer_get_usage )(ggml_backend_buffer_t );
183196 bool (* GGML_CALL ggml_are_same_shape )(const struct ggml_tensor * , const struct ggml_tensor * );
197+ void (* GGML_CALL ggml_abort )(const char * , int , const char * , ...);
198+ bool (* GGML_CALL ggml_is_contiguous_1 )(const struct ggml_tensor * );
199+ bool (* GGML_CALL ggml_is_contiguous_2 )(const struct ggml_tensor * );
184200 };
185201
186202#ifdef __cplusplus
0 commit comments