Skip to content

Commit ae2d859

Browse files
authored
Add experimental feature to use hard affinities when using best-effort (CP-54234) (#6491)
This needed: - Changes in the C bindings to be able to set the hard affinity for each vcpu. This needed change in the basic call to be able to set hard and soft affinities, at the same time. - Stop feeding xenguest the hard affinities when user did not set any in particular. Prior to the all->all priorities were set instead, with the misfortune of setting them after the NUMA code had decided the affinities and set them, overriding the desired values. Fortunately xenguest is able to cope with the missing values. (edwin found this strange behaviour) I took time to fix the exhaustivity issue when defining features, and now xenopsd sets the hard affinities when the user has hardcoded them for a VM, instead of sending them for xenguest to read them. Tests: - manually tested that soft-pinning works by default - manually tested that enabling the feature and restart xapi enables hard-pinning - manually tested that setting the vcpu mask works (`xe vm-param-set uuid=A_UUID VCPUs-params:mask=20`) - manually checked that the all_features contains the same features as before using the REPL, in case some got forgotten (there are also unit-tests for the module) The vcpu affinities were checked using `xl vcpu-list` and restarting a VM repeatedly while changing the feature and the vm arguments.
2 parents bb63de6 + 18c952f commit ae2d859

File tree

11 files changed

+264
-174
lines changed

11 files changed

+264
-174
lines changed

ocaml/xapi-idl/xen/xenops_interface.ml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -496,9 +496,12 @@ module Host = struct
496496
[@@deriving rpcty]
497497

498498
type numa_affinity_policy =
499-
| Any (** VMs may run on any NUMA nodes. This is the default in 8.2CU1 *)
499+
| Any (** VMs may run on any NUMA nodes. *)
500500
| Best_effort
501-
(** best effort placement on the smallest number of NUMA nodes where possible *)
501+
(** Best-effort placement. Assigns the memory of the VM to a single
502+
node, and soft-pins its VCPUs to the node, if possible. Otherwise
503+
behaves like Any. *)
504+
| Best_effort_hard (** Like Best_effort, but hard-pins the VCPUs *)
502505
[@@deriving rpcty]
503506

504507
type numa_affinity_policy_opt = numa_affinity_policy option [@@deriving rpcty]

ocaml/xapi-types/dune

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
xapi-stdext-unix
2424
)
2525
(wrapped false)
26-
(preprocess (per_module ((pps ppx_deriving_rpc) API Event_types Features SecretString)))
26+
(preprocess
27+
(per_module
28+
((pps ppx_deriving_rpc) API Event_types SecretString)
29+
((pps ppx_deriving_rpc ppx_deriving.enum) Features)))
2730
)
28-

ocaml/xapi-types/features.ml

Lines changed: 126 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -68,79 +68,119 @@ type feature =
6868
| VM_groups
6969
| VM_start
7070
| VM_appliance_start
71-
[@@deriving rpc]
71+
[@@deriving rpc, enum]
7272

7373
type orientation = Positive | Negative
7474

75-
let keys_of_features =
76-
[
77-
(VLAN, ("restrict_vlan", Negative, "VLAN"))
78-
; (QoS, ("restrict_qos", Negative, "QoS"))
79-
; (Shared_storage, ("restrict_pool_attached_storage", Negative, "SStorage"))
80-
; (Netapp, ("restrict_netapp", Negative, "NTAP"))
81-
; (Equalogic, ("restrict_equalogic", Negative, "EQL"))
82-
; (Pooling, ("restrict_pooling", Negative, "Pool"))
83-
; (HA, ("enable_xha", Positive, "XHA"))
84-
; (Marathon, ("restrict_marathon", Negative, "MTC"))
85-
; (Email, ("restrict_email_alerting", Negative, "email"))
86-
; (Performance, ("restrict_historical_performance", Negative, "perf"))
87-
; (WLB, ("restrict_wlb", Negative, "WLB"))
88-
; (RBAC, ("restrict_rbac", Negative, "RBAC"))
89-
; (DMC, ("restrict_dmc", Negative, "DMC"))
90-
; (Checkpoint, ("restrict_checkpoint", Negative, "chpt"))
91-
; (CPU_masking, ("restrict_cpu_masking", Negative, "Mask"))
92-
; (Connection, ("restrict_connection", Negative, "Cnx"))
93-
; (No_platform_filter, ("platform_filter", Negative, "Plat"))
94-
; (No_nag_dialog, ("regular_nag_dialog", Negative, "nonag"))
95-
; (VMPR, ("restrict_vmpr", Negative, "VMPR"))
96-
; (VMSS, ("restrict_vmss", Negative, "VMSS"))
97-
; (IntelliCache, ("restrict_intellicache", Negative, "IntelliCache"))
98-
; (GPU, ("restrict_gpu", Negative, "GPU"))
99-
; (DR, ("restrict_dr", Negative, "DR"))
100-
; (VIF_locking, ("restrict_vif_locking", Negative, "VIFLock"))
101-
; (Storage_motion, ("restrict_storage_xen_motion", Negative, "SXM"))
102-
; (VGPU, ("restrict_vgpu", Negative, "vGPU"))
103-
; (Integrated_GPU, ("restrict_integrated_gpu_passthrough", Negative, "iGPU"))
104-
; (VSS, ("restrict_vss", Negative, "VSS"))
105-
; ( Guest_agent_auto_update
106-
, ("restrict_guest_agent_auto_update", Negative, "GAAU")
107-
)
108-
; ( PCI_device_for_auto_update
109-
, ("restrict_pci_device_for_auto_update", Negative, "PciAU")
110-
)
111-
; (Xen_motion, ("restrict_xen_motion", Negative, "Live_migration"))
112-
; (Guest_ip_setting, ("restrict_guest_ip_setting", Negative, "GuestIP"))
113-
; (AD, ("restrict_ad", Negative, "AD"))
114-
; (Nested_virt, ("restrict_nested_virt", Negative, "Nested_virt"))
115-
; (Live_patching, ("restrict_live_patching", Negative, "Live_patching"))
116-
; ( Live_set_vcpus
117-
, ("restrict_set_vcpus_number_live", Negative, "Live_set_vcpus")
118-
)
119-
; (PVS_proxy, ("restrict_pvs_proxy", Negative, "PVS_proxy"))
120-
; (IGMP_snooping, ("restrict_igmp_snooping", Negative, "IGMP_snooping"))
121-
; (RPU, ("restrict_rpu", Negative, "RPU"))
122-
; (Pool_size, ("restrict_pool_size", Negative, "Pool_size"))
123-
; (CBT, ("restrict_cbt", Negative, "CBT"))
124-
; (USB_passthrough, ("restrict_usb_passthrough", Negative, "USB_passthrough"))
125-
; (Network_sriov, ("restrict_network_sriov", Negative, "Network_sriov"))
126-
; (Corosync, ("restrict_corosync", Negative, "Corosync"))
127-
; (Cluster_address, ("restrict_cluster_address", Negative, "Cluster_address"))
128-
; (Zstd_export, ("restrict_zstd_export", Negative, "Zstd_export"))
129-
; ( Pool_secret_rotation
130-
, ("restrict_pool_secret_rotation", Negative, "Pool_secret_rotation")
131-
)
132-
; ( Certificate_verification
133-
, ("restrict_certificate_verification", Negative, "Certificate_verification")
134-
)
135-
; (Updates, ("restrict_updates", Negative, "Upd"))
136-
; ( Internal_repo_access
137-
, ("restrict_internal_repo_access", Negative, "Internal_repo_access")
138-
)
139-
; (VTPM, ("restrict_vtpm", Negative, "VTPM"))
140-
; (VM_groups, ("restrict_vm_groups", Negative, "VM_groups"))
141-
; (VM_start, ("restrict_vm_start", Negative, "Start"))
142-
; (VM_appliance_start, ("restrict_vm_appliance_start", Negative, "Start"))
143-
]
75+
let props_of_feature = function
76+
| VLAN ->
77+
("restrict_vlan", Negative, "VLAN")
78+
| QoS ->
79+
("restrict_qos", Negative, "QoS")
80+
| Shared_storage ->
81+
("restrict_pool_attached_storage", Negative, "SStorage")
82+
| Netapp ->
83+
("restrict_netapp", Negative, "NTAP")
84+
| Equalogic ->
85+
("restrict_equalogic", Negative, "EQL")
86+
| Pooling ->
87+
("restrict_pooling", Negative, "Pool")
88+
| HA ->
89+
("enable_xha", Positive, "XHA")
90+
| Marathon ->
91+
("restrict_marathon", Negative, "MTC")
92+
| Email ->
93+
("restrict_email_alerting", Negative, "email")
94+
| Performance ->
95+
("restrict_historical_performance", Negative, "perf")
96+
| WLB ->
97+
("restrict_wlb", Negative, "WLB")
98+
| RBAC ->
99+
("restrict_rbac", Negative, "RBAC")
100+
| DMC ->
101+
("restrict_dmc", Negative, "DMC")
102+
| Checkpoint ->
103+
("restrict_checkpoint", Negative, "chpt")
104+
| CPU_masking ->
105+
("restrict_cpu_masking", Negative, "Mask")
106+
| Connection ->
107+
("restrict_connection", Negative, "Cnx")
108+
| No_platform_filter ->
109+
("platform_filter", Negative, "Plat")
110+
| No_nag_dialog ->
111+
("regular_nag_dialog", Negative, "nonag")
112+
| VMPR ->
113+
("restrict_vmpr", Negative, "VMPR")
114+
| VMSS ->
115+
("restrict_vmss", Negative, "VMSS")
116+
| IntelliCache ->
117+
("restrict_intellicache", Negative, "IntelliCache")
118+
| GPU ->
119+
("restrict_gpu", Negative, "GPU")
120+
| DR ->
121+
("restrict_dr", Negative, "DR")
122+
| VIF_locking ->
123+
("restrict_vif_locking", Negative, "VIFLock")
124+
| Storage_motion ->
125+
("restrict_storage_xen_motion", Negative, "SXM")
126+
| VGPU ->
127+
("restrict_vgpu", Negative, "vGPU")
128+
| Integrated_GPU ->
129+
("restrict_integrated_gpu_passthrough", Negative, "iGPU")
130+
| VSS ->
131+
("restrict_vss", Negative, "VSS")
132+
| Guest_agent_auto_update ->
133+
("restrict_guest_agent_auto_update", Negative, "GAAU")
134+
| PCI_device_for_auto_update ->
135+
("restrict_pci_device_for_auto_update", Negative, "PciAU")
136+
| Xen_motion ->
137+
("restrict_xen_motion", Negative, "Live_migration")
138+
| Guest_ip_setting ->
139+
("restrict_guest_ip_setting", Negative, "GuestIP")
140+
| AD ->
141+
("restrict_ad", Negative, "AD")
142+
| Nested_virt ->
143+
("restrict_nested_virt", Negative, "Nested_virt")
144+
| Live_patching ->
145+
("restrict_live_patching", Negative, "Live_patching")
146+
| Live_set_vcpus ->
147+
("restrict_set_vcpus_number_live", Negative, "Live_set_vcpus")
148+
| PVS_proxy ->
149+
("restrict_pvs_proxy", Negative, "PVS_proxy")
150+
| IGMP_snooping ->
151+
("restrict_igmp_snooping", Negative, "IGMP_snooping")
152+
| RPU ->
153+
("restrict_rpu", Negative, "RPU")
154+
| Pool_size ->
155+
("restrict_pool_size", Negative, "Pool_size")
156+
| CBT ->
157+
("restrict_cbt", Negative, "CBT")
158+
| USB_passthrough ->
159+
("restrict_usb_passthrough", Negative, "USB_passthrough")
160+
| Network_sriov ->
161+
("restrict_network_sriov", Negative, "Network_sriov")
162+
| Corosync ->
163+
("restrict_corosync", Negative, "Corosync")
164+
| Cluster_address ->
165+
("restrict_cluster_address", Negative, "Cluster_address")
166+
| Zstd_export ->
167+
("restrict_zstd_export", Negative, "Zstd_export")
168+
| Pool_secret_rotation ->
169+
("restrict_pool_secret_rotation", Negative, "Pool_secret_rotation")
170+
| Certificate_verification ->
171+
("restrict_certificate_verification", Negative, "Certificate_verification")
172+
| Updates ->
173+
("restrict_updates", Negative, "Upd")
174+
| Internal_repo_access ->
175+
("restrict_internal_repo_access", Negative, "Internal_repo_access")
176+
| VTPM ->
177+
("restrict_vtpm", Negative, "VTPM")
178+
| VM_groups ->
179+
("restrict_vm_groups", Negative, "VM_groups")
180+
| VM_start ->
181+
("restrict_vm_start", Negative, "Start")
182+
| VM_appliance_start ->
183+
("restrict_vm_appliance_start", Negative, "Start")
144184

145185
(* A list of features that must be considered "enabled" by `of_assoc_list`
146186
if the feature string is missing from the list. These are existing features
@@ -149,52 +189,40 @@ let keys_of_features =
149189
let enabled_when_unknown =
150190
[Xen_motion; AD; Updates; VM_start; VM_appliance_start]
151191

152-
let name_of_feature f = rpc_of_feature f |> Rpc.string_of_rpc
153-
154-
let string_of_feature f =
155-
let str, o, _ = List.assoc f keys_of_features in
156-
(str, o)
192+
let all_features =
193+
let length = max_feature - min_feature + 1 in
194+
let start = min_feature in
195+
List.init length (fun i -> feature_of_enum (i + start) |> Option.get)
157196

158-
let tag_of_feature f =
159-
let _, _, tag = List.assoc f keys_of_features in
160-
tag
197+
let name_of_feature f = rpc_of_feature f |> Rpc.string_of_rpc
161198

162-
let all_features = List.map (fun (f, _) -> f) keys_of_features
199+
let is_enabled v = function Positive -> v | Negative -> not v
163200

164201
let to_compact_string (s : feature list) =
165202
let get_tag f =
166-
let tag = tag_of_feature f in
203+
let _, _, tag = props_of_feature f in
167204
if List.mem f s then
168205
tag
169206
else
170207
String.make (String.length tag) ' '
171208
in
172-
let tags = List.map get_tag all_features in
173-
String.concat " " tags
209+
List.map get_tag all_features |> String.concat " "
174210

175211
let to_assoc_list (s : feature list) =
176212
let get_map f =
177-
let str, o = string_of_feature f in
213+
let str, o, _ = props_of_feature f in
178214
let switch = List.mem f s in
179-
let switch = string_of_bool (if o = Positive then switch else not switch) in
215+
let switch = string_of_bool (is_enabled switch o) in
180216
(str, switch)
181217
in
182218
List.map get_map all_features
183219

184220
let of_assoc_list l =
185-
let get_feature f =
221+
let enabled f =
186222
try
187-
let str, o = string_of_feature f in
188-
let v = bool_of_string (List.assoc str l) in
189-
let v = if o = Positive then v else not v in
190-
if v then Some f else None
191-
with _ -> if List.mem f enabled_when_unknown then Some f else None
223+
let str, o, _ = props_of_feature f in
224+
let v = List.assoc str l in
225+
is_enabled (bool_of_string v) o
226+
with _ -> List.mem f enabled_when_unknown
192227
in
193-
(* Filter_map to avoid having to carry the whole xapi-stdext-std
194-
* Note that the following is not tail recursive, in this case I
195-
* have chosen such implementation because the feature list is small
196-
* and the implementation looks readable and fairly self-contained.
197-
* Do not use this pattern for lists that can be long. *)
198-
List.fold_right
199-
(fun f acc -> match get_feature f with Some v -> v :: acc | None -> acc)
200-
all_features []
228+
List.filter enabled all_features

ocaml/xapi/xapi_xenops.ml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3110,6 +3110,12 @@ let resync_all_vms ~__context =
31103110
in
31113111
List.iter (fun vm -> refresh_vm ~__context ~self:vm) resident_vms_in_db
31123112

3113+
(* experimental feature for hard-pinning vcpus *)
3114+
let hard_numa_enabled ~__context =
3115+
let pool = Helpers.get_pool ~__context in
3116+
let restrictions = Db.Pool.get_restrictions ~__context ~self:pool in
3117+
List.assoc_opt "restrict_hard_numa" restrictions = Some "false"
3118+
31133119
let set_numa_affinity_policy ~__context ~value =
31143120
let dbg = Context.string_of_task __context in
31153121
let open Xapi_xenops_queue in
@@ -3119,6 +3125,8 @@ let set_numa_affinity_policy ~__context ~value =
31193125
match value with
31203126
| `any ->
31213127
Some Any
3128+
| `best_effort when hard_numa_enabled ~__context ->
3129+
Some Best_effort_hard
31223130
| `best_effort ->
31233131
Some Best_effort
31243132
| `default_policy ->

0 commit comments

Comments
 (0)