@@ -916,6 +916,12 @@ module Redirector = struct
916916 Parallel atoms, creating a deadlock. *)
917917 let parallel_queues = {queues= Queues. create () ; mutex= Mutex. create () }
918918
919+ (* We create another queue only for VM_receive_memory operations for the same reason as Parallel atoms.
920+ Migration spawns 2 operations, send and receive, so if there is limited available worker space
921+ a deadlock can happen when VMs are migrating between hosts or on localhost migration
922+ as the receiver has no free workers to receive memory. *)
923+ let receive_memory_queues = {queues= Queues. create () ; mutex= Mutex. create () }
924+
919925 (* When a thread is actively processing a queue, items are redirected to a
920926 thread-private queue *)
921927 let overrides = ref StringMap. empty
@@ -1035,6 +1041,7 @@ module Redirector = struct
10351041 List. concat_map one
10361042 (default.queues
10371043 :: parallel_queues.queues
1044+ :: receive_memory_queues.queues
10381045 :: List. map snd (StringMap. bindings ! overrides)
10391046 )
10401047 )
@@ -1268,7 +1275,8 @@ module WorkerPool = struct
12681275 let start size =
12691276 for _i = 1 to size do
12701277 incr Redirector. default ;
1271- incr Redirector. parallel_queues
1278+ incr Redirector. parallel_queues ;
1279+ incr Redirector. receive_memory_queues
12721280 done
12731281
12741282 let set_size size =
@@ -1283,7 +1291,8 @@ module WorkerPool = struct
12831291 done
12841292 in
12851293 inner Redirector. default ;
1286- inner Redirector. parallel_queues
1294+ inner Redirector. parallel_queues ;
1295+ inner Redirector. receive_memory_queues
12871296end
12881297
12891298(* Keep track of which VMs we're rebooting so we avoid transient glitches where
@@ -3276,19 +3285,20 @@ let uses_mxgpu id =
32763285 )
32773286 (VGPU_DB. ids id)
32783287
3279- let queue_operation_int ?traceparent dbg id op =
3288+ let queue_operation_int ?traceparent ?(redirector = Redirector. default) dbg id
3289+ op =
32803290 let task =
32813291 Xenops_task. add ?traceparent tasks dbg
32823292 (let r = ref None in
32833293 fun t -> perform ~result: r op t ; ! r
32843294 )
32853295 in
32863296 let tag = if uses_mxgpu id then " mxgpu" else id in
3287- Redirector. push Redirector. default tag (op, task) ;
3297+ Redirector. push redirector tag (op, task) ;
32883298 task
32893299
3290- let queue_operation ?traceparent dbg id op =
3291- let task = queue_operation_int ?traceparent dbg id op in
3300+ let queue_operation ?traceparent ? redirector dbg id op =
3301+ let task = queue_operation_int ?traceparent ?redirector dbg id op in
32923302 Xenops_task. id_of_handle task
32933303
32943304let queue_operation_and_wait dbg id op =
@@ -3737,7 +3747,12 @@ module VM = struct
37373747 ; vmr_compressed= compressed_memory
37383748 }
37393749 in
3740- let task = Some (queue_operation ?traceparent dbg id op) in
3750+ let task =
3751+ Some
3752+ (queue_operation ?traceparent
3753+ ~redirector: Redirector. receive_memory_queues dbg id op
3754+ )
3755+ in
37413756 Option. iter
37423757 (fun t -> t |> Xenops_client. wait_for_task dbg |> ignore)
37433758 task
0 commit comments