Skip to content

Commit 3bc9fae

Browse files
committed
CA-409482: Using computed delay for RRD loop
RRD loop is executed each 5 seconds. It delays fixed 5 seconds between each loop. But the loop self also consumes time (The time consuming depends on CPU's count. If there are many CPUs, the time consuming may be hundreds milliseconds). This implementation leads RRD will take an offset after several loops. Then one of RRD data lose and a gap can be observed on XenCenter performance graph. The solution is to use computed delay (timeslice - loop time consuming) instead of fixed delay. Signed-off-by: Bengang Yuan <[email protected]>
1 parent 18e8584 commit 3bc9fae

File tree

4 files changed

+26
-7
lines changed

4 files changed

+26
-7
lines changed

ocaml/xcp-rrdd/bin/rrdd/dune

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@
4646
http_lib
4747
httpsvr
4848
inotify
49+
mtime
50+
mtime.clock
4951
rpclib.core
5052
rpclib.json
5153
rpclib.xml

ocaml/xcp-rrdd/bin/rrdd/rrdd_server.ml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,8 @@ module Plugin = struct
717717
let open Rrdd_shared in
718718
if with_lock registered_m (fun _ -> Hashtbl.mem registered uid) then
719719
with_lock last_loop_end_time_m (fun _ ->
720-
!last_loop_end_time +. !timeslice -. Unix.gettimeofday ()
720+
let span = Mtime.span !last_loop_end_time (Mtime_clock.now ()) in
721+
!timeslice -. (Mtime.Span.to_float_ns span /. 1_000_000_000.)
721722
)
722723
else
723724
-1.

ocaml/xcp-rrdd/bin/rrdd/rrdd_shared.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ let enable_all_dss = ref false
2424
let timeslice : float ref = ref 5.
2525

2626
(* Timestamp of the last monitoring loop end. *)
27-
let last_loop_end_time : float ref = ref neg_infinity
27+
let last_loop_end_time : Mtime.t ref = ref Mtime.min_stamp
2828

2929
(* The mutex that protects the last_loop_end_time against data corruption. *)
3030
let last_loop_end_time_m : Mutex.t = Mutex.create ()

ocaml/xcp-rrdd/bin/rrdd/xcp_rrdd.ml

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -537,17 +537,33 @@ let monitor_write_loop writers =
537537
Xenctrl.with_intf (fun xc ->
538538
while true do
539539
try
540+
let last_loop_start_time = Mtime_clock.now () in
540541
do_monitor_write xc writers ;
541542
with_lock Rrdd_shared.last_loop_end_time_m (fun _ ->
542-
Rrdd_shared.last_loop_end_time := Unix.gettimeofday ()
543+
Rrdd_shared.last_loop_end_time := Mtime_clock.now ()
543544
) ;
544-
Thread.delay !Rrdd_shared.timeslice
545+
let span =
546+
Mtime.span last_loop_start_time !Rrdd_shared.last_loop_end_time
547+
in
548+
let duration_in_second =
549+
Mtime.Span.to_float_ns span /. 1_000_000_000.
550+
in
551+
let delay = !Rrdd_shared.timeslice -. duration_in_second in
552+
(* Using computed delay (timeslice - loop time consuming) instead
553+
of fixed delay*)
554+
if delay > 0.0 then
555+
Thread.delay delay
556+
else
557+
warn
558+
"%s: Monitor write loop took so long time that the delay \
559+
(%f) is less than 0, so skip the delay"
560+
__FUNCTION__ delay
545561
with e ->
546562
Backtrace.is_important e ;
547563
warn
548-
"Monitor/write thread caught an exception. Pausing for 10s, \
549-
then restarting: %s"
550-
(Printexc.to_string e) ;
564+
"%s: Monitor/write thread caught an exception. Pausing for \
565+
10s, then restarting: %s"
566+
__FUNCTION__ (Printexc.to_string e) ;
551567
log_backtrace e ;
552568
Thread.delay 10.
553569
done

0 commit comments

Comments
 (0)