Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 6 additions & 15 deletions ocaml/xcp-rrdd/bin/rrdd/rrdd_monitor.ml
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,7 @@ let convert_to_owner_map dss =
Also resets the value of datasources that are enabled in the RRD, but
weren't updated on this refresh cycle.
*)
let update_rrds uuid_domids paused_vms plugins_dss =
let uuid_domids = List.to_seq uuid_domids |> StringMap.of_seq in
let paused_vms = List.to_seq paused_vms |> StringSet.of_seq in
let update_rrds uuid_domids plugins_dss =
let per_owner_flattened_map, per_plugin_map =
convert_to_owner_map plugins_dss
in
Expand Down Expand Up @@ -237,18 +235,11 @@ let update_rrds uuid_domids paused_vms plugins_dss =
match vm_rrd with
| Some rrdi ->
let updated_dss, rrd = merge_new_dss rrdi dss in
(* CA-34383: Memory updates from paused domains serve no useful
purpose. During a migrate such updates can also cause undesirable
discontinuities in the observed value of memory_actual. Hence, we
ignore changes from paused domains: *)
( if not (StringSet.mem vm_uuid paused_vms) then
let named_updates =
StringMap.map to_named_updates dss
in
Rrd.ds_update_named rrd
~new_rrd:(domid <> rrdi.domid) timestamp
named_updates
) ;
let named_updates =
StringMap.map to_named_updates dss
in
Rrd.ds_update_named rrd ~new_rrd:(domid <> rrdi.domid)
timestamp named_updates ;
Some {rrd; dss= updated_dss; domid}
| None ->
debug "%s: Creating fresh RRD for VM uuid=%s"
Expand Down
153 changes: 80 additions & 73 deletions ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,51 @@ let mem_available () =
let* size, kb = scan "/proc/meminfo" in
match kb with "kB" -> ok size | _ -> res_error "unexpected unit: %s" kb

let dss_mem_vms doms =
List.fold_left
(fun acc (dom, uuid, domid) ->
let uuid_blacklist = ["00000000-0000-0000"; "deadbeef-dead-beef"]

module IntSet = Set.Make (Int)

let domain_snapshot xc =
let metadata_of_domain dom =
let ( let* ) = Option.bind in
let* uuid_raw = Uuidx.of_int_array dom.Xenctrl.handle in
let uuid = Uuidx.to_string uuid_raw in
let domid = dom.Xenctrl.domid in
let start = String.sub uuid 0 18 in
(* Actively hide migrating VM uuids, these are temporary and xenops writes
the original and the final uuid to xenstore *)
let uuid_from_key key =
let path = Printf.sprintf "/vm/%s/%s" uuid key in
try Ezxenstore_core.Xenstore.(with_xs (fun xs -> xs.read path))
with Xs_protocol.Enoent _hint ->
info "Couldn't read path %s; falling back to actual uuid" path ;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Report __FUNCTION__?

uuid
in
let stable_uuid = Option.fold ~none:uuid ~some:uuid_from_key in
if List.mem start uuid_blacklist then
None
else
let key =
if Astring.String.is_suffix ~affix:"000000000000" uuid then
Some "origin-uuid"
else if Astring.String.is_suffix ~affix:"000000000001" uuid then
Some "final-uuid"
else
None
in
Some (dom, stable_uuid key, domid)
in
let domains =
Xenctrl.domain_getinfolist xc 0 |> List.filter_map metadata_of_domain
in
let domids = List.map (fun (_, _, i) -> i) domains |> IntSet.of_list in
let domains_only k v = Option.map (Fun.const v) (IntSet.find_opt k domids) in
Hashtbl.filter_map_inplace domains_only Rrdd_shared.memory_targets ;
domains |> List.to_seq

let dss_mem_vms xc =
let mem_metrics_of (dom, uuid, domid) =
let vm_metrics () =
let kib =
Xenctrl.pages_to_kib (Int64.of_nativeint dom.Xenctrl.total_memory_pages)
in
Expand Down Expand Up @@ -317,14 +359,20 @@ let dss_mem_vms doms =
)
with Not_found -> None
in
List.concat
[
main_mem_ds :: Option.to_list other_ds
; Option.to_list mem_target_ds
; acc
]
)
[] doms
let metrics =
List.concat
[main_mem_ds :: Option.to_list other_ds; Option.to_list mem_target_ds]
in
Some (List.to_seq metrics)
in
(* CA-34383: Memory updates from paused domains serve no useful purpose.
During a migrate such updates can also cause undesirable
discontinuities in the observed value of memory_actual. Hence, we
ignore changes from paused domains: *)
if dom.Xenctrl.paused then None else vm_metrics ()
in
let domains = domain_snapshot xc in
Seq.filter_map mem_metrics_of domains |> Seq.concat |> List.of_seq

(**** Local cache SR stuff *)

Expand Down Expand Up @@ -429,66 +477,18 @@ let handle_exn log f default =
(Printexc.to_string e) ;
default

let uuid_blacklist = ["00000000-0000-0000"; "deadbeef-dead-beef"]

module IntSet = Set.Make (Int)

let domain_snapshot xc =
let metadata_of_domain dom =
let ( let* ) = Option.bind in
let* uuid_raw = Uuidx.of_int_array dom.Xenctrl.handle in
let uuid = Uuidx.to_string uuid_raw in
let domid = dom.Xenctrl.domid in
let start = String.sub uuid 0 18 in
(* Actively hide migrating VM uuids, these are temporary and xenops writes
the original and the final uuid to xenstore *)
let uuid_from_key key =
let path = Printf.sprintf "/vm/%s/%s" uuid key in
try Ezxenstore_core.Xenstore.(with_xs (fun xs -> xs.read path))
with Xs_protocol.Enoent _hint ->
info "Couldn't read path %s; falling back to actual uuid" path ;
uuid
in
let stable_uuid = Option.fold ~none:uuid ~some:uuid_from_key in
if List.mem start uuid_blacklist then
None
else
let key =
if Astring.String.is_suffix ~affix:"000000000000" uuid then
Some "origin-uuid"
else if Astring.String.is_suffix ~affix:"000000000001" uuid then
Some "final-uuid"
else
None
in
Some (dom, stable_uuid key, domid)
in
let domains =
Xenctrl.domain_getinfolist xc 0 |> List.filter_map metadata_of_domain
in
let domain_paused (d, uuid, _) =
if d.Xenctrl.paused then Some uuid else None
in
let paused_uuids = List.filter_map domain_paused domains in
let domids = List.map (fun (_, _, i) -> i) domains |> IntSet.of_list in
let domains_only k v = Option.map (Fun.const v) (IntSet.find_opt k domids) in
Hashtbl.filter_map_inplace domains_only Rrdd_shared.memory_targets ;
(domains, paused_uuids)

let dom0_stat_generators =
[
("ha", fun _ _ _ -> Rrdd_ha_stats.all ())
; ("mem_host", fun xc _ _ -> dss_mem_host xc)
; ("mem_vms", fun _ _ domains -> dss_mem_vms domains)
; ("cache", fun _ timestamp _ -> dss_cache timestamp)
("ha", fun _ _ -> Rrdd_ha_stats.all ())
; ("mem_host", fun xc _ -> dss_mem_host xc)
; ("mem_vms", fun xc _ -> dss_mem_vms xc)
; ("cache", fun _ timestamp -> dss_cache timestamp)
]

let generate_all_dom0_stats xc domains =
let generate_all_dom0_stats xc =
let handle_generator (name, generator) =
let timestamp = Unix.gettimeofday () in
( name
, (timestamp, handle_exn name (fun _ -> generator xc timestamp domains) [])
)
(name, (timestamp, handle_exn name (fun _ -> generator xc timestamp) []))
in
List.map handle_generator dom0_stat_generators

Expand All @@ -505,10 +505,9 @@ let write_dom0_stats writers tagged_dss =
in
List.iter write_dss writers

let do_monitor_write xc writers =
let do_monitor_write domains_before xc writers =
Rrdd_libs.Stats.time_this "monitor" (fun _ ->
let domains, my_paused_vms = domain_snapshot xc in
let tagged_dom0_stats = generate_all_dom0_stats xc domains in
let tagged_dom0_stats = generate_all_dom0_stats xc in
write_dom0_stats writers tagged_dom0_stats ;
let dom0_stats =
tagged_dom0_stats
Expand All @@ -518,26 +517,34 @@ let do_monitor_write xc writers =
)
in
let plugins_stats = Rrdd_server.Plugin.read_stats () in
let domains_after = domain_snapshot xc in
let stats = Seq.append plugins_stats dom0_stats in
Rrdd_stats.print_snapshot () ;
let uuid_domids = List.map (fun (_, u, i) -> (u, i)) domains in

(* merge the domain ids from the previous iteration and the current one
to avoid missing updates *)
let uuid_domids =
Seq.append domains_before domains_after
|> Seq.map (fun (_, u, i) -> (u, i))
|> Rrd.StringMap.of_seq
in
(* stats are grouped per plugin, which provides its timestamp *)
Rrdd_monitor.update_rrds uuid_domids my_paused_vms stats ;
Rrdd_monitor.update_rrds uuid_domids stats ;

Rrdd_libs.Constants.datasource_dump_file
|> Rrdd_server.dump_host_dss_to_file ;
Rrdd_libs.Constants.datasource_vm_dump_file
|> Rrdd_server.dump_vm_dss_to_file
|> Rrdd_server.dump_vm_dss_to_file ;
domains_after
)

let monitor_write_loop writers =
Debug.with_thread_named "monitor_write"
(fun () ->
Xenctrl.with_intf (fun xc ->
let domains = ref Seq.empty in
while true do
try
do_monitor_write xc writers ;
domains := do_monitor_write !domains xc writers ;
with_lock Rrdd_shared.next_iteration_start_m (fun _ ->
Rrdd_shared.next_iteration_start :=
Clock.Timer.extend_by !Rrdd_shared.timeslice
Expand Down
26 changes: 12 additions & 14 deletions ocaml/xcp-rrdd/test/rrdd/test_rrdd_monitor.ml
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,11 @@ let host_rrds rrd_info =
Hashtbl.add h "host" rrd_info ;
Some h

let update_rrds_test ~timestamp ~dss ~uuid_domids ~paused_vms ~expected_vm_rrds
let update_rrds_test ~timestamp ~dss ~uuid_domids ~expected_vm_rrds
~expected_sr_rrds ~expected_host_dss =
let test () =
reset_rrdd_shared_state () ;
Rrdd_monitor.update_rrds uuid_domids paused_vms
Rrdd_monitor.update_rrds uuid_domids
(List.to_seq [("update_rrds_test", timestamp, List.to_seq dss)]) ;
check_datasources "VM" (Some Rrdd_shared.vm_rrds) expected_vm_rrds ;
check_datasources "SR" (Some Rrdd_shared.sr_rrds) expected_sr_rrds ;
Expand All @@ -74,63 +74,61 @@ let update_rrds_test ~timestamp ~dss ~uuid_domids ~paused_vms ~expected_vm_rrds

let update_rrds =
let open Rrd in
let map_of_list ls = StringMap.of_seq (List.to_seq ls) in
[
( "Null update"
, update_rrds_test ~timestamp:0. ~dss:[] ~uuid_domids:[] ~paused_vms:[]
, update_rrds_test ~timestamp:0. ~dss:[] ~uuid_domids:StringMap.empty
~expected_vm_rrds:[] ~expected_sr_rrds:[] ~expected_host_dss:[]
)
; ( "Single host update"
, update_rrds_test ~timestamp:0.
~dss:[(Host, ds_a)]
~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[]
~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[]
~expected_host_dss:[("host", [ds_a])]
)
; ( "Multiple host updates"
, update_rrds_test ~timestamp:0.
~dss:[(Host, ds_a); (Host, ds_b)]
~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[]
~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[]
~expected_host_dss:[("host", [ds_a; ds_b])]
)
; ( "Single non-resident VM update"
, update_rrds_test ~timestamp:0.
~dss:[(VM "a", ds_a)]
~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[]
~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[]
~expected_host_dss:[]
)
; ( "Multiple non-resident VM updates"
, update_rrds_test ~timestamp:0.
~dss:[(VM "a", ds_a); (VM "b", ds_a)]
~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[] ~expected_sr_rrds:[]
~uuid_domids:StringMap.empty ~expected_vm_rrds:[] ~expected_sr_rrds:[]
~expected_host_dss:[]
)
; ( "Single resident VM update"
, update_rrds_test ~timestamp:0.
~dss:[(VM "a", ds_a)]
~uuid_domids:[("a", 1)]
~paused_vms:[]
~uuid_domids:(map_of_list [("a", 1)])
~expected_vm_rrds:[("a", [ds_a])]
~expected_sr_rrds:[] ~expected_host_dss:[]
)
; ( "Multiple resident VM updates"
, update_rrds_test ~timestamp:0.
~dss:[(VM "a", ds_a); (VM "b", ds_a); (VM "b", ds_b)]
~uuid_domids:[("a", 1); ("b", 1)]
~paused_vms:[]
~uuid_domids:(map_of_list [("a", 1); ("b", 1)])
~expected_vm_rrds:[("a", [ds_a]); ("b", [ds_a; ds_b])]
~expected_sr_rrds:[] ~expected_host_dss:[]
)
; ( "Multiple resident and non-resident VM updates"
, update_rrds_test ~timestamp:0.
~dss:[(VM "a", ds_a); (VM "b", ds_a); (VM "c", ds_a)]
~uuid_domids:[("a", 1); ("b", 1)]
~paused_vms:[]
~uuid_domids:(map_of_list [("a", 1); ("b", 1)])
~expected_vm_rrds:[("a", [ds_a]); ("b", [ds_a])]
~expected_sr_rrds:[] ~expected_host_dss:[]
)
; ( "Multiple SR updates"
, update_rrds_test ~timestamp:0.
~dss:[(SR "a", ds_a); (SR "b", ds_a); (SR "b", ds_b)]
~uuid_domids:[] ~paused_vms:[] ~expected_vm_rrds:[]
~uuid_domids:StringMap.empty ~expected_vm_rrds:[]
~expected_sr_rrds:[("a", [ds_a]); ("b", [ds_a; ds_b])]
~expected_host_dss:[]
)
Expand Down
Loading