Skip to content

Commit 0010c74

Browse files
authored
Replace ha_operation_would_break_failover_plan with constraint errors (#6666)
Now the HA shared SR and network constraint violations are used when plugging and unplugging. When unplugging a pbd or pif, enabling a host, or adding a vbd or vif, the shared SR or network constraint violations could be violated, but the error used in these cases was that the operation blocked the failover planning. This was confusing because the main reason was not mentioned in the error. Instead use the shared constraint violation error, and log a more descriptive message in the logs as info, because these can happen during normal operation and there's nothing dodgy going on. I previously wanted to know Tina's opinion on how we change the reason in a way that can be better treated by clients and internationalized, but saw that the error used was simply not the right one.
2 parents e3d4f34 + b81aa1d commit 0010c74

File tree

5 files changed

+47
-48
lines changed

5 files changed

+47
-48
lines changed

ocaml/xapi/xapi_ha_vm_failover.ml

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1223,9 +1223,6 @@ let assert_configuration_change_preserves_ha_plan ~__context c =
12231223
"assert_configuration_change_preserves_ha_plan: plan exists after \
12241224
change"
12251225
| Plan_exists_excluding_non_agile_VMs | No_plan_exists ->
1226-
debug
1227-
"assert_configuration_change_preserves_ha_plan: proposed change \
1228-
breaks plan" ;
12291226
raise
12301227
(Api_errors.Server_error
12311228
(Api_errors.ha_operation_would_break_failover_plan, [])
@@ -1413,8 +1410,9 @@ let restart_auto_run_vms ~__context ~last_live_set ~live_set n =
14131410
let open TaskChains.Infix in
14141411
(* execute the plan *)
14151412
Helpers.call_api_functions ~__context (fun rpc session_id ->
1416-
(* Helper function to start a VM somewhere. If the HA overcommit protection stops us then disable it and try once more.
1417-
Returns true if the VM was restarted and false otherwise. *)
1413+
(* Helper function to start a VM somewhere. If the HA overcommit
1414+
protection stops us then disable it and try once more. Returns true if
1415+
the VM was restarted and false otherwise. *)
14181416
let restart_vm vm ?host () =
14191417
let go () =
14201418
( if Xapi_fist.simulate_restart_failure () then
@@ -1579,10 +1577,11 @@ let restart_auto_run_vms ~__context ~last_live_set ~live_set n =
15791577
in
15801578
gc_table last_start_attempt ;
15811579
gc_table restart_failed ;
1582-
(* Consider restarting the best-effort VMs we *think* have failed (but we might get this wrong --
1583-
ok since this is 'best-effort'). NOTE we do not use the restart_vm function above as this will mark the
1584-
pool as overcommitted if an HA_OPERATION_WOULD_BREAK_FAILOVER_PLAN is received (although this should never
1585-
happen it's better safe than sorry) *)
1580+
(* Consider restarting the best-effort VMs we *think* have failed (but we
1581+
might get this wrong -- ok since this is 'best-effort'). NOTE we do
1582+
not use the restart_vm function above as this will mark the pool as
1583+
overcommitted if an HA_OPERATION_WOULD_BREAK_FAILOVER_PLAN is received
1584+
(although this should never happen it's better safe than sorry) *)
15861585
let is_best_effort r =
15871586
r.API.vM_ha_restart_priority = Constants.ha_restart_best_effort
15881587
&& r.API.vM_power_state = `Halted

ocaml/xapi/xapi_pbd.ml

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,18 @@ let abort_if_storage_attached_to_protected_vms ~__context ~self =
114114
(fun vbd ->
115115
let vdi = Db.VBD.get_VDI ~__context ~self:vbd in
116116
if List.mem vdi vdis then (
117-
warn
118-
"PBD.unplug will make protected VM %s not agile since it has a \
119-
VBD attached to VDI %s"
120-
(Ref.string_of vm_ref) (Ref.string_of vdi) ;
117+
let vm = Ref.string_of vm_ref in
118+
let pbd = Ref.string_of self in
119+
let sr = Ref.string_of sr in
120+
info
121+
"The protected VM %s must remain agile and blocked the \
122+
operation. The PBD %s of must be plugged to ensure this. This \
123+
happened because the SR %s is used by both the VM and the \
124+
PBD."
125+
vm pbd sr ;
121126
raise
122-
(Api_errors.Server_error
123-
(Api_errors.ha_operation_would_break_failover_plan, [])
127+
Api_errors.(
128+
Server_error (ha_constraint_violation_sr_not_shared, [sr])
124129
)
125130
)
126131
)

ocaml/xapi/xapi_pif.ml

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -271,13 +271,17 @@ let abort_if_network_attached_to_protected_vms ~__context ~self =
271271
List.iter
272272
(fun vm ->
273273
if Helpers.is_xha_protected ~__context ~self:vm then (
274-
warn
275-
"PIF.unplug will make protected VM %s not agile since it has a VIF \
276-
attached to network %s"
277-
(Ref.string_of vm) (Ref.string_of net) ;
274+
let vm = Ref.string_of vm in
275+
let pif = Ref.string_of self in
276+
let net = Ref.string_of net in
277+
info
278+
"The protected VM %s must remain agile and blocked the operation. \
279+
PIF %s must be plugged this. This happened because network %s is \
280+
used by both the VM and the PIF"
281+
vm pif net ;
278282
raise
279-
(Api_errors.Server_error
280-
(Api_errors.ha_operation_would_break_failover_plan, [])
283+
Api_errors.(
284+
Server_error (ha_constraint_violation_network_not_shared, [net])
281285
)
282286
)
283287
)

ocaml/xapi/xapi_vbd_helpers.ml

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -358,24 +358,17 @@ let assert_attachable ~__context ~self =
358358

359359
let assert_doesnt_make_vm_non_agile ~__context ~vm ~vdi =
360360
let pool = Helpers.get_pool ~__context in
361-
let properly_shared =
362-
Agility.is_sr_properly_shared ~__context
363-
~self:(Db.VDI.get_SR ~__context ~self:vdi)
364-
in
361+
let sr = Db.VDI.get_SR ~__context ~self:vdi in
362+
let properly_shared = Agility.is_sr_properly_shared ~__context ~self:sr in
365363
if
366364
true
367365
&& Db.Pool.get_ha_enabled ~__context ~self:pool
368366
&& (not (Db.Pool.get_ha_allow_overcommit ~__context ~self:pool))
369367
&& Helpers.is_xha_protected ~__context ~self:vm
370368
&& not properly_shared
371-
then (
372-
warn "Attaching VDI %s makes VM %s not agile" (Ref.string_of vdi)
373-
(Ref.string_of vm) ;
374-
raise
375-
(Api_errors.Server_error
376-
(Api_errors.ha_operation_would_break_failover_plan, [])
377-
)
378-
)
369+
then
370+
let sr = Ref.string_of sr in
371+
raise Api_errors.(Server_error (ha_constraint_violation_sr_not_shared, [sr]))
379372

380373
let update_allowed_operations ~__context ~self : unit =
381374
let all = Db.VBD.get_record_internal ~__context ~self in

ocaml/xapi/xapi_vif_helpers.ml

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -267,19 +267,18 @@ let create ~__context ~device ~network ~vM ~mAC ~mTU ~other_config
267267
raise (Api_errors.Server_error (Api_errors.mac_invalid, [mAC])) ;
268268
(* Make people aware that non-shared networks being added to VMs makes them not agile *)
269269
let pool = Helpers.get_pool ~__context in
270-
if
271-
true
272-
&& Db.Pool.get_ha_enabled ~__context ~self:pool
273-
&& (not (Db.Pool.get_ha_allow_overcommit ~__context ~self:pool))
274-
&& Helpers.is_xha_protected ~__context ~self:vM
275-
&& not (Agility.is_network_properly_shared ~__context ~self:network)
276-
then (
277-
warn "Creating VIF %s makes VM %s not agile" (Ref.string_of ref)
278-
(Ref.string_of vM) ;
279-
raise
280-
(Api_errors.Server_error
281-
(Api_errors.ha_operation_would_break_failover_plan, [])
282-
)
270+
( if
271+
true
272+
&& Db.Pool.get_ha_enabled ~__context ~self:pool
273+
&& (not (Db.Pool.get_ha_allow_overcommit ~__context ~self:pool))
274+
&& Helpers.is_xha_protected ~__context ~self:vM
275+
&& not (Agility.is_network_properly_shared ~__context ~self:network)
276+
then
277+
let net = Ref.string_of network in
278+
raise
279+
Api_errors.(
280+
Server_error (ha_constraint_violation_network_not_shared, [net])
281+
)
283282
) ;
284283
(* Check to make sure the device is unique *)
285284
Xapi_stdext_threads.Threadext.Mutex.execute m (fun () ->
@@ -291,8 +290,7 @@ let create ~__context ~device ~network ~vM ~mAC ~mTU ~other_config
291290
in
292291
let new_device = int_of_string device in
293292
if List.exists (fun (_, d) -> d = new_device) all_vifs_with_devices then
294-
raise
295-
(Api_errors.Server_error (Api_errors.device_already_exists, [device])) ;
293+
raise Api_errors.(Server_error (device_already_exists, [device])) ;
296294

297295
(* If the VM uses a PVS_proxy, then the proxy _must_ be associated with
298296
the VIF that has the lowest device number. Check that the new VIF

0 commit comments

Comments
 (0)