Skip to content

Commit b0631f5

Browse files
committed
CA-409482: Using computed delay for RRD loop
RRD loop is executed each 5 seconds. It delays fixed 5 seconds between each loop. But the loop self also consumes time (The time consuming depends on CPU's count. If there are many CPUs, the time consuming may be hundreds milliseconds). This implementation leads RRD will take an offset after several loops. Then one of RRD data lose and a gap can be observed on XenCenter performance graph. The solution is to use a fixed deadline as each iteration start time and to use a computed delay (timeslice - loop time consuming) instead of fixed delay. Signed-off-by: Bengang Yuan <[email protected]>
1 parent 18e8584 commit b0631f5

File tree

4 files changed

+38
-17
lines changed

4 files changed

+38
-17
lines changed

ocaml/xcp-rrdd/bin/rrdd/dune

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
http_lib
1111
httpsvr
1212
inotify
13-
mtime
14-
mtime.clock.os
13+
clock
14+
mtime.clock
1515
rpclib.core
1616
rrd-transport
1717
rrd-transport.lib
@@ -46,6 +46,7 @@
4646
http_lib
4747
httpsvr
4848
inotify
49+
clock
4950
rpclib.core
5051
rpclib.json
5152
rpclib.xml

ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -716,8 +716,12 @@ module Plugin = struct
716716
let next_reading (uid : P.uid) : float =
717717
let open Rrdd_shared in
718718
if with_lock registered_m (fun _ -> Hashtbl.mem registered uid) then
719-
with_lock last_loop_end_time_m (fun _ ->
720-
!last_loop_end_time +. !timeslice -. Unix.gettimeofday ()
719+
with_lock next_iteration_start_m (fun _ ->
720+
match Clock.Timer.remaining !next_iteration_start with
721+
| Remaining diff ->
722+
Clock.Timer.span_to_s diff
723+
| Expired diff ->
724+
Clock.Timer.span_to_s diff *. -1.
721725
)
722726
else
723727
-1.

ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,15 @@ module StringSet = Set.Make (String)
2020
(* Whether to enable all non-default datasources *)
2121
let enable_all_dss = ref false
2222

23-
(* The time between each monitoring loop. *)
24-
let timeslice : float ref = ref 5.
23+
(* The expected time span between each monitoring loop. *)
24+
let timeslice : Mtime.span ref = ref Mtime.Span.(5 * s)
2525

26-
(* Timestamp of the last monitoring loop end. *)
27-
let last_loop_end_time : float ref = ref neg_infinity
26+
(* A timer that expires at the start of the next iteration *)
27+
let next_iteration_start : Clock.Timer.t ref =
28+
ref (Clock.Timer.start ~duration:!timeslice)
2829

29-
(* The mutex that protects the last_loop_end_time against data corruption. *)
30-
let last_loop_end_time_m : Mutex.t = Mutex.create ()
30+
(* The mutex that protects the next_iteration_start against data corruption. *)
31+
let next_iteration_start_m : Mutex.t = Mutex.create ()
3132

3233
(** Cache memory/target values *)
3334
let memory_targets : (int, int64) Hashtbl.t = Hashtbl.create 20

ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -538,18 +538,33 @@ let monitor_write_loop writers =
538538
while true do
539539
try
540540
do_monitor_write xc writers ;
541-
with_lock Rrdd_shared.last_loop_end_time_m (fun _ ->
542-
Rrdd_shared.last_loop_end_time := Unix.gettimeofday ()
541+
with_lock Rrdd_shared.next_iteration_start_m (fun _ ->
542+
Rrdd_shared.next_iteration_start :=
543+
Clock.Timer.extend_by !Rrdd_shared.timeslice
544+
!Rrdd_shared.next_iteration_start
543545
) ;
544-
Thread.delay !Rrdd_shared.timeslice
546+
match Clock.Timer.remaining !Rrdd_shared.next_iteration_start with
547+
| Remaining remaining ->
548+
Thread.delay (Clock.Timer.span_to_s remaining)
549+
| Expired missed_by ->
550+
warn
551+
"%s: Monitor write iteration missed cycle by %a, skipping \
552+
the delay"
553+
__FUNCTION__ Debug.Pp.mtime_span missed_by
545554
with e ->
546555
Backtrace.is_important e ;
547556
warn
548-
"Monitor/write thread caught an exception. Pausing for 10s, \
549-
then restarting: %s"
550-
(Printexc.to_string e) ;
557+
"%s: Monitor/write thread caught an exception. Pausing for \
558+
10s, then restarting: %s"
559+
__FUNCTION__ (Printexc.to_string e) ;
551560
log_backtrace e ;
552-
Thread.delay 10.
561+
Thread.delay 10. ;
562+
with_lock Rrdd_shared.next_iteration_start_m (fun _ ->
563+
Rrdd_shared.next_iteration_start :=
564+
Clock.Timer.extend_by
565+
Mtime.Span.(10 * s)
566+
!Rrdd_shared.next_iteration_start
567+
)
553568
done
554569
)
555570
)

0 commit comments

Comments
 (0)