Skip to content

Commit c6f4781

Browse files
committed
CA-409482: Using computed delay for RRD loop
RRD loop is executed each 5 seconds. It delays fixed 5 seconds between each loop. But the loop self also consumes time (The time consuming depends on CPU's count. If there are many CPUs, the time consuming may be hundreds milliseconds). This implementation leads RRD will take an offset after several loops. Then one of RRD data lose and a gap can be observed on XenCenter performance graph. The solution is to use computed delay (timeslice - loop time consuming) instead of fixed delay. Signed-off-by: Bengang Yuan <[email protected]>
1 parent 18e8584 commit c6f4781

File tree

4 files changed

+51
-14
lines changed

4 files changed

+51
-14
lines changed

ocaml/xcp-rrdd/bin/rrdd/dune

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
http_lib
4747
httpsvr
4848
inotify
49+
mtime
50+
mtime.clock
4951
rpclib.core
5052
rpclib.json
5153
rpclib.xml

ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -715,9 +715,20 @@ module Plugin = struct
715715
specified unique ID. If the plugin is not registered, -1 is returned. *)
716716
let next_reading (uid : P.uid) : float =
717717
let open Rrdd_shared in
718+
let ( --- ) a b = Mtime.Span.abs_diff a b in
718719
if with_lock registered_m (fun _ -> Hashtbl.mem registered uid) then
719-
with_lock last_loop_end_time_m (fun _ ->
720-
!last_loop_end_time +. !timeslice -. Unix.gettimeofday ()
720+
with_lock last_iteration_end_m (fun _ ->
721+
let time_in_iteration =
722+
Mtime_clock.count from_loop_start --- !last_iteration_end
723+
in
724+
if
725+
Mtime.Span.is_longer ~than:!Rrdd_shared.timeslice
726+
time_in_iteration
727+
then
728+
-1.
729+
else
730+
Mtime.Span.to_float_ns (!timeslice --- time_in_iteration)
731+
/. 1_000_000_000.
721732
)
722733
else
723734
-1.

ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,17 @@ module StringSet = Set.Make (String)
2020
(* Whether to enable all non-default datasources *)
2121
let enable_all_dss = ref false
2222

23-
(* The time between each monitoring loop. *)
24-
let timeslice : float ref = ref 5.
23+
(* The expected time span between each monitoring loop. *)
24+
let timeslice : Mtime.span ref = ref Mtime.Span.(5 * s)
2525

26-
(* Timestamp of the last monitoring loop end. *)
27-
let last_loop_end_time : float ref = ref neg_infinity
26+
(* The counter since the start of all monitoring loops. *)
27+
let from_loop_start : Mtime_clock.counter = Mtime_clock.counter ()
2828

29-
(* The mutex that protects the last_loop_end_time against data corruption. *)
30-
let last_loop_end_time_m : Mutex.t = Mutex.create ()
29+
(* The time span of the last monitoring loop end. *)
30+
let last_iteration_end : Mtime.Span.t ref = ref Mtime.Span.zero
31+
32+
(* The mutex that protects the last_iteration_end against data corruption. *)
33+
let last_iteration_end_m : Mutex.t = Mutex.create ()
3134

3235
(** Cache memory/target values *)
3336
let memory_targets : (int, int64) Hashtbl.t = Hashtbl.create 20

ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -535,19 +535,40 @@ let monitor_write_loop writers =
535535
Debug.with_thread_named "monitor_write"
536536
(fun () ->
537537
Xenctrl.with_intf (fun xc ->
538+
let ( --- ) = Mtime.Span.abs_diff in
538539
while true do
539540
try
541+
let last_iteration_start =
542+
Mtime_clock.count Rrdd_shared.from_loop_start
543+
in
540544
do_monitor_write xc writers ;
541-
with_lock Rrdd_shared.last_loop_end_time_m (fun _ ->
542-
Rrdd_shared.last_loop_end_time := Unix.gettimeofday ()
545+
with_lock Rrdd_shared.last_iteration_end_m (fun _ ->
546+
Rrdd_shared.last_iteration_end :=
547+
Mtime_clock.count Rrdd_shared.from_loop_start
543548
) ;
544-
Thread.delay !Rrdd_shared.timeslice
549+
let time_in_iteration =
550+
!Rrdd_shared.last_iteration_end --- last_iteration_start
551+
in
552+
if
553+
Mtime.Span.is_longer ~than:!Rrdd_shared.timeslice
554+
time_in_iteration
555+
then
556+
warn
557+
"%s: Monitor write iteration took (%a), this is longer than \
558+
a full cycle, skipping the delay"
559+
__FUNCTION__ Debug.Pp.mtime_span time_in_iteration
560+
else
561+
Thread.delay
562+
(Mtime.Span.to_float_ns
563+
(!Rrdd_shared.timeslice --- time_in_iteration)
564+
/. 1_000_000_000.
565+
)
545566
with e ->
546567
Backtrace.is_important e ;
547568
warn
548-
"Monitor/write thread caught an exception. Pausing for 10s, \
549-
then restarting: %s"
550-
(Printexc.to_string e) ;
569+
"%s: Monitor/write thread caught an exception. Pausing for \
570+
10s, then restarting: %s"
571+
__FUNCTION__ (Printexc.to_string e) ;
551572
log_backtrace e ;
552573
Thread.delay 10.
553574
done

0 commit comments

Comments
 (0)