@@ -255,85 +255,124 @@ let mem_available () =
255255 let * size, kb = scan " /proc/meminfo" in
256256 match kb with "kB" -> ok size | _ -> res_error " unexpected unit: %s" kb
257257
258- let dss_mem_vms doms =
259- List. fold_left
260- (fun acc (dom , uuid , domid ) ->
261- let add_vm_metrics () =
262- let kib =
263- Xenctrl. pages_to_kib
264- (Int64. of_nativeint dom.Xenctrl. total_memory_pages)
265- in
266- let memory = Int64. mul kib 1024L in
267- let main_mem_ds =
268- ( Rrd. VM uuid
269- , Ds. ds_make ~name: " memory"
270- ~description: " Memory currently allocated to VM" ~units: " B"
271- ~value: (Rrd. VT_Int64 memory) ~ty: Rrd. Gauge ~min: 0.0 ~default: true
272- ()
273- )
274- in
275- let memory_target_opt =
276- with_lock Rrdd_shared. memory_targets_m (fun _ ->
277- Hashtbl. find_opt Rrdd_shared. memory_targets domid
278- )
279- in
280- let mem_target_ds =
281- Option. map
282- (fun memory_target ->
283- ( Rrd. VM uuid
284- , Ds. ds_make ~name: " memory_target"
285- ~description: " Target of VM balloon driver" ~units: " B"
286- ~value: (Rrd. VT_Int64 memory_target) ~ty: Rrd. Gauge ~min: 0.0
287- ~default: true ()
288- )
258+ let uuid_blacklist = [" 00000000-0000-0000" ; " deadbeef-dead-beef" ]
259+
260+ module IntSet = Set. Make (Int )
261+
262+ let domain_snapshot xc =
263+ let metadata_of_domain dom =
264+ let ( let * ) = Option. bind in
265+ let * uuid_raw = Uuidx. of_int_array dom.Xenctrl. handle in
266+ let uuid = Uuidx. to_string uuid_raw in
267+ let domid = dom.Xenctrl. domid in
268+ let start = String. sub uuid 0 18 in
269+ (* Actively hide migrating VM uuids, these are temporary and xenops writes
270+ the original and the final uuid to xenstore *)
271+ let uuid_from_key key =
272+ let path = Printf. sprintf " /vm/%s/%s" uuid key in
273+ try Ezxenstore_core.Xenstore. (with_xs (fun xs -> xs.read path))
274+ with Xs_protocol. Enoent _hint ->
275+ info " Couldn't read path %s; falling back to actual uuid" path ;
276+ uuid
277+ in
278+ let stable_uuid = Option. fold ~none: uuid ~some: uuid_from_key in
279+ if List. mem start uuid_blacklist then
280+ None
281+ else
282+ let key =
283+ if Astring.String. is_suffix ~affix: " 000000000000" uuid then
284+ Some " origin-uuid"
285+ else if Astring.String. is_suffix ~affix: " 000000000001" uuid then
286+ Some " final-uuid"
287+ else
288+ None
289+ in
290+ Some (dom, stable_uuid key, domid)
291+ in
292+ let domains =
293+ Xenctrl. domain_getinfolist xc 0 |> List. filter_map metadata_of_domain
294+ in
295+ let domids = List. map (fun (_ , _ , i ) -> i) domains |> IntSet. of_list in
296+ let domains_only k v = Option. map (Fun. const v) (IntSet. find_opt k domids) in
297+ Hashtbl. filter_map_inplace domains_only Rrdd_shared. memory_targets ;
298+ domains |> List. to_seq
299+
300+ let dss_mem_vms xc =
301+ let mem_metrics_of (dom , uuid , domid ) =
302+ let vm_metrics () =
303+ let kib =
304+ Xenctrl. pages_to_kib (Int64. of_nativeint dom.Xenctrl. total_memory_pages)
305+ in
306+ let memory = Int64. mul kib 1024L in
307+ let main_mem_ds =
308+ ( Rrd. VM uuid
309+ , Ds. ds_make ~name: " memory"
310+ ~description: " Memory currently allocated to VM" ~units: " B"
311+ ~value: (Rrd. VT_Int64 memory) ~ty: Rrd. Gauge ~min: 0.0 ~default: true ()
312+ )
313+ in
314+ let memory_target_opt =
315+ with_lock Rrdd_shared. memory_targets_m (fun _ ->
316+ Hashtbl. find_opt Rrdd_shared. memory_targets domid
317+ )
318+ in
319+ let mem_target_ds =
320+ Option. map
321+ (fun memory_target ->
322+ ( Rrd. VM uuid
323+ , Ds. ds_make ~name: " memory_target"
324+ ~description: " Target of VM balloon driver" ~units: " B"
325+ ~value: (Rrd. VT_Int64 memory_target) ~ty: Rrd. Gauge ~min: 0.0
326+ ~default: true ()
289327 )
290- memory_target_opt
291- in
292- let other_ds =
293- if domid = 0 then
294- match mem_available () with
295- | Ok mem ->
296- Some
297- ( Rrd. VM uuid
298- , Ds. ds_make ~name: " memory_internal_free" ~units: " KiB"
299- ~description: " Dom0 current free memory"
300- ~value: (Rrd. VT_Int64 mem) ~ty: Rrd. Gauge ~min: 0.0
301- ~default: true ()
302- )
303- | Error msg ->
304- let _ =
305- error " %s: retrieving Dom0 free memory failed: %s"
306- __FUNCTION__ msg
307- in
308- None
309- else
310- try
311- let mem_free =
312- Watch.IntMap. find domid ! current_meminfofree_values
313- in
328+ )
329+ memory_target_opt
330+ in
331+ let other_ds =
332+ if domid = 0 then
333+ match mem_available () with
334+ | Ok mem ->
314335 Some
315336 ( Rrd. VM uuid
316337 , Ds. ds_make ~name: " memory_internal_free" ~units: " KiB"
317- ~description: " Memory used as reported by the guest agent "
318- ~value: (Rrd. VT_Int64 mem_free ) ~ty: Rrd. Gauge ~min: 0.0
338+ ~description: " Dom0 current free memory "
339+ ~value: (Rrd. VT_Int64 mem ) ~ty: Rrd. Gauge ~min: 0.0
319340 ~default: true ()
320341 )
321- with Not_found -> None
322- in
342+ | Error msg ->
343+ let _ =
344+ error " %s: retrieving Dom0 free memory failed: %s" __FUNCTION__
345+ msg
346+ in
347+ None
348+ else
349+ try
350+ let mem_free =
351+ Watch.IntMap. find domid ! current_meminfofree_values
352+ in
353+ Some
354+ ( Rrd. VM uuid
355+ , Ds. ds_make ~name: " memory_internal_free" ~units: " KiB"
356+ ~description: " Memory used as reported by the guest agent"
357+ ~value: (Rrd. VT_Int64 mem_free) ~ty: Rrd. Gauge ~min: 0.0
358+ ~default: true ()
359+ )
360+ with Not_found -> None
361+ in
362+ let metrics =
323363 List. concat
324- [
325- main_mem_ds :: Option. to_list other_ds
326- ; Option. to_list mem_target_ds
327- ; acc
328- ]
364+ [main_mem_ds :: Option. to_list other_ds; Option. to_list mem_target_ds]
329365 in
330- (* CA-34383: Memory updates from paused domains serve no useful purpose.
331- During a migrate such updates can also cause undesirable
332- discontinuities in the observed value of memory_actual. Hence, we
333- ignore changes from paused domains: *)
334- if dom.Xenctrl. paused then acc else add_vm_metrics ()
335- )
336- [] doms
366+ Some (List. to_seq metrics)
367+ in
368+ (* CA-34383: Memory updates from paused domains serve no useful purpose.
369+ During a migrate such updates can also cause undesirable
370+ discontinuities in the observed value of memory_actual. Hence, we
371+ ignore changes from paused domains: *)
372+ if dom.Xenctrl. paused then None else vm_metrics ()
373+ in
374+ let domains = domain_snapshot xc in
375+ Seq. filter_map mem_metrics_of domains |> Seq. concat |> List. of_seq
337376
338377(* *** Local cache SR stuff *)
339378
@@ -438,62 +477,18 @@ let handle_exn log f default =
438477 (Printexc. to_string e) ;
439478 default
440479
441- let uuid_blacklist = [" 00000000-0000-0000" ; " deadbeef-dead-beef" ]
442-
443- module IntSet = Set. Make (Int )
444-
445- let domain_snapshot xc =
446- let metadata_of_domain dom =
447- let ( let * ) = Option. bind in
448- let * uuid_raw = Uuidx. of_int_array dom.Xenctrl. handle in
449- let uuid = Uuidx. to_string uuid_raw in
450- let domid = dom.Xenctrl. domid in
451- let start = String. sub uuid 0 18 in
452- (* Actively hide migrating VM uuids, these are temporary and xenops writes
453- the original and the final uuid to xenstore *)
454- let uuid_from_key key =
455- let path = Printf. sprintf " /vm/%s/%s" uuid key in
456- try Ezxenstore_core.Xenstore. (with_xs (fun xs -> xs.read path))
457- with Xs_protocol. Enoent _hint ->
458- info " Couldn't read path %s; falling back to actual uuid" path ;
459- uuid
460- in
461- let stable_uuid = Option. fold ~none: uuid ~some: uuid_from_key in
462- if List. mem start uuid_blacklist then
463- None
464- else
465- let key =
466- if Astring.String. is_suffix ~affix: " 000000000000" uuid then
467- Some " origin-uuid"
468- else if Astring.String. is_suffix ~affix: " 000000000001" uuid then
469- Some " final-uuid"
470- else
471- None
472- in
473- Some (dom, stable_uuid key, domid)
474- in
475- let domains =
476- Xenctrl. domain_getinfolist xc 0 |> List. filter_map metadata_of_domain
477- in
478- let domids = List. map (fun (_ , _ , i ) -> i) domains |> IntSet. of_list in
479- let domains_only k v = Option. map (Fun. const v) (IntSet. find_opt k domids) in
480- Hashtbl. filter_map_inplace domains_only Rrdd_shared. memory_targets ;
481- domains
482-
483480let dom0_stat_generators =
484481 [
485- (" ha" , fun _ _ _ -> Rrdd_ha_stats. all () )
486- ; (" mem_host" , fun xc _ _ -> dss_mem_host xc)
487- ; (" mem_vms" , fun _ _ domains -> dss_mem_vms domains )
488- ; (" cache" , fun _ timestamp _ -> dss_cache timestamp)
482+ (" ha" , fun _ _ -> Rrdd_ha_stats. all () )
483+ ; (" mem_host" , fun xc _ -> dss_mem_host xc)
484+ ; (" mem_vms" , fun xc _ -> dss_mem_vms xc )
485+ ; (" cache" , fun _ timestamp -> dss_cache timestamp)
489486 ]
490487
491- let generate_all_dom0_stats xc domains =
488+ let generate_all_dom0_stats xc =
492489 let handle_generator (name , generator ) =
493490 let timestamp = Unix. gettimeofday () in
494- ( name
495- , (timestamp, handle_exn name (fun _ -> generator xc timestamp domains) [] )
496- )
491+ (name, (timestamp, handle_exn name (fun _ -> generator xc timestamp) [] ))
497492 in
498493 List. map handle_generator dom0_stat_generators
499494
@@ -510,10 +505,9 @@ let write_dom0_stats writers tagged_dss =
510505 in
511506 List. iter write_dss writers
512507
513- let do_monitor_write xc writers =
508+ let do_monitor_write domains_before xc writers =
514509 Rrdd_libs.Stats. time_this " monitor" (fun _ ->
515- let domains = domain_snapshot xc in
516- let tagged_dom0_stats = generate_all_dom0_stats xc domains in
510+ let tagged_dom0_stats = generate_all_dom0_stats xc in
517511 write_dom0_stats writers tagged_dom0_stats ;
518512 let dom0_stats =
519513 tagged_dom0_stats
@@ -523,26 +517,34 @@ let do_monitor_write xc writers =
523517 )
524518 in
525519 let plugins_stats = Rrdd_server.Plugin. read_stats () in
520+ let domains_after = domain_snapshot xc in
526521 let stats = Seq. append plugins_stats dom0_stats in
527522 Rrdd_stats. print_snapshot () ;
528- let uuid_domids = List. map (fun (_ , u , i ) -> (u, i)) domains in
529-
523+ (* merge the domain ids from the previous iteration and the current one
524+ to avoid missing updates *)
525+ let uuid_domids =
526+ Seq. append domains_before domains_after
527+ |> Seq. map (fun (_ , u , i ) -> (u, i))
528+ |> Rrd.StringMap. of_seq
529+ in
530530 (* stats are grouped per plugin, which provides its timestamp *)
531531 Rrdd_monitor. update_rrds uuid_domids stats ;
532532
533533 Rrdd_libs.Constants. datasource_dump_file
534534 |> Rrdd_server. dump_host_dss_to_file ;
535535 Rrdd_libs.Constants. datasource_vm_dump_file
536- |> Rrdd_server. dump_vm_dss_to_file
536+ |> Rrdd_server. dump_vm_dss_to_file ;
537+ domains_after
537538 )
538539
539540let monitor_write_loop writers =
540541 Debug. with_thread_named " monitor_write"
541542 (fun () ->
542543 Xenctrl. with_intf (fun xc ->
544+ let domains = ref Seq. empty in
543545 while true do
544546 try
545- do_monitor_write xc writers ;
547+ domains := do_monitor_write ! domains xc writers ;
546548 with_lock Rrdd_shared. next_iteration_start_m (fun _ ->
547549 Rrdd_shared. next_iteration_start :=
548550 Clock.Timer. extend_by ! Rrdd_shared. timeslice
0 commit comments