liulinC · liulinC · Apr 15, 2019 · Apr 23, 2019 · Apr 28, 2019
diff --git a/ocaml/xapi/vgpuops.ml b/ocaml/xapi/vgpuops.ml
@@ -18,7 +18,7 @@ open Stdext
 open Listext
 open Xstringext
 
-type vgpu = {
+type vgpu_t = {
   vgpu_ref: API.ref_VGPU;
   gpu_group_ref: API.ref_GPU_group;
   devid: int;
@@ -27,7 +27,7 @@ type vgpu = {
   requires_passthrough: [ `PF | `VF ] option;
 }
 
-let vgpu_of_vgpu ~__context vm_r vgpu =
+let vgpu_of_ref ~__context vgpu =
   let vgpu_r = Db.VGPU.get_record ~__context ~self:vgpu in
   {
     vgpu_ref = vgpu;
@@ -39,7 +39,7 @@ let vgpu_of_vgpu ~__context vm_r vgpu =
   }
 
 let vgpus_of_vm ~__context vm_r =
-  List.map (vgpu_of_vgpu ~__context vm_r) vm_r.API.vM_VGPUs
+  List.map (vgpu_of_ref ~__context ) vm_r.API.vM_VGPUs
 
 let fail_creation vm vgpu =
   match vgpu.requires_passthrough with
@@ -55,10 +55,40 @@ let fail_creation vm vgpu =
         Ref.string_of vgpu.gpu_group_ref
       ]))
 
-let allocate_vgpu_to_gpu ~__context vm host vgpu =
+let allocate_vgpu_to_gpu ?dry_run ?pre_allocate_list ~__context vm host vgpu =
+  (* Get all pGPU from the host *)
   let available_pgpus = Db.Host.get_PGPUs ~__context ~self:host in
+  (* Get all pGPU from the required groups *)
   let compatible_pgpus = Db.GPU_group.get_PGPUs ~__context ~self:vgpu.gpu_group_ref in
   let pgpus = List.intersect compatible_pgpus available_pgpus in
+
+  let pgpu_can_hold_vgpu pgpu vgpu =
+    try Xapi_pgpu.assert_can_run_VGPU ~__context ~self:pgpu ~vgpu;
+      true
+    with e -> false in
+  (* Get all pGPU that can hold the vGPU *)
+  let active_pgpus = List.filter ( fun pgpu ->  pgpu_can_hold_vgpu pgpu vgpu.vgpu_ref ) pgpus in
+
+  let remaining_capacity_for_vgpu_from_pgpu vgpu pgpu =
+    let db_remaining =  Helpers.call_api_functions ~__context
+        (fun rpc session_id ->
+           Client.Client.PGPU.get_remaining_capacity ~rpc ~session_id
+             ~self:pgpu ~vgpu_type:vgpu.type_ref) in
+    (* Check if any pre_allocation existed, the pre_allocation is a set of vGPU allocation that
+     * not reflected in the database, usually in the dry run mode, with following format
+     * [(v1,p1);(v2,p2);(v3,p1)...]*)
+    match pre_allocate_list with
+    | Some pre_allocate_list ->
+      let virtul_allocation = List.fold_left (fun num ele ->
+          match ele with
+          |(_,cpgpu) when cpgpu = pgpu -> Int64.add num 1L
+          |(_,_) -> num )
+          0L pre_allocate_list in
+      Int64.sub db_remaining  virtul_allocation
+    (* Probablly need check here, assert >0*)
+    | _ -> db_remaining
+  in
+
   (* Sort the pgpus in lists of equal optimality for vGPU placement based on
    * the GPU groups allocation algorithm *)
   let sort_desc =
@@ -67,11 +97,8 @@ let allocate_vgpu_to_gpu ~__context vm host vgpu =
     | `breadth_first -> true
   in
   let sorted_pgpus = Helpers.sort_by_schwarzian ~descending:sort_desc
-      (fun pgpu ->
-         Helpers.call_api_functions ~__context (fun rpc session_id ->
-             Client.Client.PGPU.get_remaining_capacity ~rpc ~session_id
-               ~self:pgpu ~vgpu_type:vgpu.type_ref))
-      pgpus
+      (fun pgpu -> remaining_capacity_for_vgpu_from_pgpu vgpu pgpu )
+      active_pgpus
   in
   let rec choose_pgpu = function
     | [] -> None
@@ -85,9 +112,14 @@ let allocate_vgpu_to_gpu ~__context vm host vgpu =
   match choose_pgpu sorted_pgpus with
   | None -> fail_creation vm vgpu
   | Some pgpu ->
-    Db.VGPU.set_scheduled_to_be_resident_on ~__context
-      ~self:vgpu.vgpu_ref ~value:pgpu;
-    pgpu
+    begin match dry_run with
+      |Some true -> ()
+      |_ -> Db.VGPU.set_scheduled_to_be_resident_on ~__context ~self:vgpu.vgpu_ref ~value:pgpu
+    end;
+    let pre_list = match pre_allocate_list with
+      | Some pre_allocate_list -> pre_allocate_list
+      | _ -> [] in
+    (vgpu.vgpu_ref,pgpu)::pre_list
 
 (* Take a PCI device and assign it, and any dependent devices, to the VM *)
 let add_pcis_to_vm ~__context host vm pci =
@@ -117,8 +149,8 @@ let reserve_free_virtual_function ~__context vm pf =
         (* We may still need to load the driver... do that and try again *)
         let pf_host = Db.PCI.get_host ~__context ~self:pf in
         Helpers.call_api_functions ~__context (fun rpc session_id ->
-          Client.Client.Host.mxgpu_vf_setup rpc session_id pf_host
-        );
+            Client.Client.Host.mxgpu_vf_setup rpc session_id pf_host
+          );
         get false
       end else
         (* This probably means that our capacity checking went wrong! *)
@@ -135,14 +167,14 @@ let add_vgpus_to_vm ~__context host vm vgpus vgpu_manual_setup =
     match vgpu.requires_passthrough with
     | Some `PF ->
       debug "Creating passthrough VGPUs";
-      let pgpu = allocate_vgpu_to_gpu ~__context vm host vgpu in
+      let pgpu = List.assoc vgpu.vgpu_ref (allocate_vgpu_to_gpu ~__context vm host vgpu) in
       let pci = Db.PGPU.get_PCI ~__context ~self:pgpu in
       add_pcis_to_vm ~__context host vm pci
     | Some `VF ->
       Pool_features.assert_enabled ~__context ~f:Features.VGPU;
       debug "Creating SR-IOV VGPUs";
       if not vgpu_manual_setup then
-        let pgpu = allocate_vgpu_to_gpu ~__context vm host vgpu in
+        let pgpu = List.assoc vgpu.vgpu_ref (allocate_vgpu_to_gpu ~__context vm host vgpu) in
         Db.PGPU.get_PCI ~__context ~self:pgpu
         |> reserve_free_virtual_function ~__context vm
         |> add_pcis_to_vm ~__context host vm
@@ -166,7 +198,7 @@ let vgpu_manual_setup_of_vm vm_r =
 let create_vgpus ~__context host (vm, vm_r) hvm =
   let vgpus = vgpus_of_vm ~__context vm_r in
   if vgpus <> [] && not hvm then
-      raise (Api_errors.Server_error (Api_errors.feature_requires_hvm, ["vGPU- and GPU-passthrough needs HVM"]));
+    raise (Api_errors.Server_error (Api_errors.feature_requires_hvm, ["vGPU- and GPU-passthrough needs HVM"]));
   add_vgpus_to_vm ~__context host vm vgpus (vgpu_manual_setup_of_vm vm_r)
 
 (* This function is called from Xapi_xenops, after forwarding, so possibly on a slave. *)

diff --git a/ocaml/xapi/vgpuops.mli b/ocaml/xapi/vgpuops.mli
@@ -30,3 +30,20 @@ val vgpu_manual_setup_of_vm : API.vM_t -> bool
 (** Return a list of the GPU PCI devices which have been assigned to this VM *)
 val list_pcis_for_passthrough :
   __context:Context.t -> vm:API.ref_VM -> (int * (int * int * int * int)) list
+
+(** Allocate a vGPU to a pGPU of a host for the VM
+ *  return a list indicate which pGPU is allocated for the vGPU in following format
+ *  [(v1,p1);(v2,p2);(v3,p1)]
+ *  Two additional arguments dry_run and pre_allocate_list is added to this fuction.
+ *  They are designed to be optional to keep the arguments interface backward-compatibility
+ *  dry_run set to "false", pre_allocate_list set to "[]" by default.
+ *  if dry_run mode is specified, the function just dry run the allocation process
+ *  without any database operation. pre_allocate_list is used to record the dry run
+ *  states *)
+type vgpu_t
+val allocate_vgpu_to_gpu :
+  ?dry_run:bool -> ?pre_allocate_list:(API.ref_VGPU * API.ref_PGPU) list ->
+  __context:Context.t -> API.ref_VM ->  API.ref_host -> vgpu_t -> (API.ref_VGPU * API.ref_PGPU) list
+
+(** Get a vgpu record from vgpu ref *)
+val vgpu_of_ref : __context:Context.t -> API.ref_VGPU -> vgpu_t
diff --git a/ocaml/xapi/xapi_globs.ml b/ocaml/xapi/xapi_globs.ml
@@ -440,6 +440,7 @@ let vgpu_manual_setup_key = "vgpu_manual_setup"
 let vgpu_pci_key = "vgpu_pci_id"
 let vgpu_config_key = "vgpu_config"
 let vgpu_extra_args_key = "vgpu_extra_args"
+let vgpu_pci_prefix = "0000:0:"
 
 let igd_passthru_key = "igd_passthrough"
 

diff --git a/ocaml/xapi/xapi_pgpu.ml b/ocaml/xapi/xapi_pgpu.ml
@@ -312,7 +312,14 @@ let get_remaining_capacity ~__context ~self ~vgpu_type =
 
 let assert_can_run_VGPU ~__context ~self ~vgpu =
   let vgpu_type = Db.VGPU.get_type ~__context ~self:vgpu in
-  Xapi_pgpu_helpers.assert_capacity_exists_for_VGPU_type ~__context ~self ~vgpu_type
+  Xapi_pgpu_helpers.assert_capacity_exists_for_VGPU_type ~__context ~self ~vgpu_type;
+
+  (** Check whether Nvidia NVML allow the vGPU by gpumon *)
+  let nvidia_compatible = Xapi_gpumon.Nvidia.vgpu_pgpu_are_compatible ~__context ~vgpu ~pgpu:self in
+  if not nvidia_compatible then raise (Api_errors.Server_error
+                                         (** This should be a new exception **)
+                                         (Api_errors.pgpu_insufficient_capacity_for_vgpu, [Ref.string_of self;Ref.string_of vgpu_type]))
+
 
 let update_dom0_access ~__context ~self ~action =
   let db_current = Db.PGPU.get_dom0_access ~__context ~self in

diff --git a/ocaml/xapi/xapi_pgpu_helpers.ml b/ocaml/xapi/xapi_pgpu_helpers.ml
@@ -50,20 +50,30 @@ let get_allocated_VGPUs ~__context ~self =
 
 let assert_VGPU_type_allowed ~__context ~self ~vgpu_type =
   assert_VGPU_type_enabled ~__context ~self ~vgpu_type;
-  (match get_allocated_VGPUs ~__context ~self with
-   | [] -> ()
-   | resident_VGPU :: _ ->
-     let running_type =
-       Db.VGPU.get_type ~__context ~self:resident_VGPU
-     in
-     if running_type <> vgpu_type
-     then raise (Api_errors.Server_error (
-         Api_errors.vgpu_type_not_compatible_with_running_type,
-         [
-           Ref.string_of self;
-           Ref.string_of vgpu_type;
-           Ref.string_of running_type;
-         ])))
+  let allocated_vgpu_list = get_allocated_VGPUs ~__context ~self in
+  (** Now check whether the requested type is permitted *)
+  match allocated_vgpu_list with
+  | [] -> () (* Not allocated on this pgpu, does not need to check compatibility*)
+  | hd::tail ->
+    let grant_vgpu_type_list = List.fold_left
+        (fun grant_list current_list -> Listext.List.intersect grant_list current_list)
+        (Db.VGPU_type.get_compatible_types_on_pgpu ~__context ~self:(Db.VGPU.get_type ~__context ~self:hd))
+        (
+          List.map (fun self -> Db.VGPU.get_type ~__context ~self) tail
+          |> List.sort_uniq Pervasives.compare (* Remove the duplicated elements *)
+          |> List.map (fun self -> Db.VGPU_type.get_compatible_types_on_pgpu ~__context ~self)
+        ) in
+    if not (List.mem vgpu_type (List.map Ref.of_string (*Remove this when String-> Ref *) grant_vgpu_type_list)) then
+      let sep = ";" in
+      raise (Api_errors.Server_error (
+          Api_errors.vgpu_type_not_compatible_with_running_type, [
+            Ref.string_of self;
+            Ref.string_of vgpu_type;
+            List.map (fun self-> Db.VGPU.get_type ~__context ~self) allocated_vgpu_list
+            |> List.sort_uniq Pervasives.compare
+            |> List.map (fun vgpu_ref -> Ref.string_of vgpu_ref)
+            |> String.concat sep
+          ]))
 
 let assert_no_resident_VGPUs_of_type ~__context ~self ~vgpu_type =
   let open Db_filter_types in
@@ -212,9 +222,9 @@ let assert_destination_has_pgpu_compatible_with_vm ~__context ~vm ~vgpu_map ~hos
     | `nvidia ->
       Db.VGPU.get_GPU_group ~__context ~self:vgpu
       |> fun self -> Db.GPU_group.get_GPU_types ~__context ~self
-      |> fun pgpu_types -> get_first_suitable_pgpu pgpu_types vgpu pgpus
-      |> fun pgpu ->
-        assert_destination_pgpu_is_compatible_with_vm ~__context ~vm ~vgpu ~pgpu ~host ?remote ()
+                     |> fun pgpu_types -> get_first_suitable_pgpu pgpu_types vgpu pgpus
+                                          |> fun pgpu ->
+                                          assert_destination_pgpu_is_compatible_with_vm ~__context ~vm ~vgpu ~pgpu ~host ?remote ()
   in
   let vgpus = Db.VM.get_VGPUs ~__context ~self:vm in
   let _mapped, unmapped = List.partition (fun vgpu -> List.mem_assoc vgpu vgpu_map) vgpus in

diff --git a/ocaml/xapi/xapi_vgpu_type.ml b/ocaml/xapi/xapi_vgpu_type.ml
@@ -36,6 +36,7 @@ module Identifier = struct
     psubdev_id : int option;
     vdev_id : int;
     vsubdev_id : int;
+    type_id : string;
   }
 
   type gvt_g_id = {
@@ -64,13 +65,14 @@ module Identifier = struct
       match id with
       | Passthrough -> "passthrough"
       | Nvidia nvidia_id ->
-        Printf.sprintf "nvidia,%04x,%s,%04x,%04x"
+        Printf.sprintf "nvidia,%04x,%s,%04x,%04x,%s"
           nvidia_id.pdev_id
           (match nvidia_id.psubdev_id with
            | Some id -> Printf.sprintf "%04x" id
            | None -> "")
           nvidia_id.vdev_id
           nvidia_id.vsubdev_id
+          nvidia_id.type_id
       | GVT_g gvt_g_id ->
         Printf.sprintf "gvt-g,%04x,%Lx,%Lx,%Lx"
           gvt_g_id.pdev_id
@@ -306,11 +308,14 @@ module Nvidia_old = struct
                   (List.assoc "plugin0.num_heads" args) in
               let max_instance = Int64.of_string
                   (List.assoc "plugin0.max_instance" args) in
+              (* Define "0" here because now we are using new way to read from nvidia conf file  *)
+              let type_id = "0" in
               let identifier = Identifier.({
                   pdev_id;
                   psubdev_id;
                   vdev_id;
                   vsubdev_id;
+                  type_id;
                 }) in
               let compatible_types_in_vm = [] in
               let compatible_types_on_pgpu = [] in
@@ -625,6 +630,7 @@ module Vendor_nvidia = struct
             psubdev_id;
             vdev_id = int_of_string (get_attr "deviceId" devid);
             vsubdev_id = int_of_string (get_attr "subsystemId" devid);
+            type_id = id;
           } in
         let file_path = whitelist in
         (* Multiple vgpu support:
@@ -668,8 +674,8 @@ module Vendor_nvidia = struct
 
   let vgpu_type_of_conf pci_access vendor_name _ conf =
     let open Identifier in
-    debug "Pci.lookup_subsystem_device_name: vendor=%04x device=%04x subdev=%04x"
-      vendor_id conf.identifier.vdev_id conf.identifier.vsubdev_id;
+    debug "Pci.lookup_subsystem_device_name: vendor=%04x device=%04x subdev=%04x type_id=%s"
+      vendor_id conf.identifier.vdev_id conf.identifier.vsubdev_id conf.identifier.type_id;
     let default v =
       match v with
       | Some v -> v