Skip to content

Commit 379e2c5

Browse files
committed
CA-383867: Add startup procedure to xapi-guard
Because timestamps depend on a monotonic timestamp that depends on boot, files need to be renamed to ensure future writes have higher timestamps to be considered newer and be uploaded to xapi. On top of this, allows to report about remnant temporary files, delete invalid files and remove empty directories. Signed-off-by: Pau Ruiz Safont <[email protected]>
1 parent c2ce365 commit 379e2c5

File tree

5 files changed

+101
-5
lines changed

5 files changed

+101
-5
lines changed

doc/content/xapi-guard/_index.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ This situation usually happens when xapi is being restarted as part of an update
3535
SWTPM, the vTPM daemon, reads the contents of the TPM from xapi-guard on startup, suspend, and resume.
3636
During normal operation SWTPM does not send read requests from xapi-guard.
3737

38+
Structure
39+
---------
40+
3841
The cache module consists of two Lwt threads, one that writes to disk, and another one that reads from disk.
3942
The writer is triggered when a VM writes to the vTPM.
4043
It never blocks if xapi is unreachable, but responds as soon as the data has been stored either by xapi or on the local disk, such that the VM receives a timely response to the write request.
@@ -82,3 +85,14 @@ stateDiagram-v2
8285
Engaged --> Engaged : Writer receives TPM, queue is not full
8386
Engaged --> Disengaged : Writer receives TPM, queue is full
8487
```
88+
89+
Startup
90+
------
91+
92+
At startup, there's a dedicated routine to transform the existing contents of the cache.
93+
This is currently done because the timestamp reference change on each boot.
94+
This means that the existing contents might have timestamps considered more recent than timestamps of writes coming from running events, leading to missing content updates.
95+
This must be avoided and instead the updates with offending timestamps are renamed to a timestamp taken from the current timestamp, ensuring a consistent
96+
ordering.
97+
The routine is also used to keep a minimal file tree: unrecognised files are deleted, temporary files created to ensure atomic writes are left untouched, and empty directories are deleted.
98+
This mechanism can be changed in the future to migrate to other formats.

ocaml/xapi-guard/lib/disk_cache.ml

Lines changed: 82 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,6 @@ type channel = {
179179
IDEA: carryover: read contents of cache and "convert it" to the current run
180180
181181
TODO:
182-
- Add startup step to convert existing content to new time
183182
- Exponential backoff on xapi push error
184183
- Limit error logging on xapi push error: once per downtime is enough
185184
*)
@@ -449,8 +448,89 @@ end = struct
449448
loop
450449
end
451450

451+
(** Module use to change the cache contents before the reader and writer start
452+
running *)
453+
module Setup : sig
454+
val retime_cache_contents : Types.Service.t -> unit Lwt.t
455+
end = struct
456+
type file_action =
457+
| Keep of file
458+
| Delete of string
459+
| Move of {from: string; into: string}
460+
461+
let get_fs_action root now = function
462+
| Latest ((uuid, timestamp, key), from) as latest ->
463+
if Mtime.is_later ~than:now timestamp then
464+
let timestamp = now in
465+
let into = path_of_key root (uuid, timestamp, key) in
466+
Move {from; into}
467+
else
468+
Keep latest
469+
| Temporary _ as temp ->
470+
Keep temp
471+
| Invalid p | Outdated (_, p) ->
472+
Delete p
473+
474+
let commit __FUN = function
475+
| Keep (Temporary p) ->
476+
Logs_lwt.warn (fun m ->
477+
m "%s: Found temporary file, ignoring '%s'" __FUN p
478+
)
479+
| Keep _ ->
480+
Lwt.return_unit
481+
| Delete p ->
482+
let* () = Logs_lwt.info (fun m -> m "%s: Deleting '%s'" __FUN p) in
483+
Lwt_unix.unlink p
484+
| Move {from; into} ->
485+
let* () =
486+
Logs_lwt.info (fun m -> m "%s: Moving '%s' to '%s'" __FUN from into)
487+
in
488+
Lwt_unix.rename from into
489+
490+
let rec delete_empty_dirs ~delete_root root =
491+
(* Delete subdirectories, then *)
492+
let* files = files_in root ~otherwise:(fun _ -> Lwt.return []) in
493+
let* () =
494+
Lwt_list.iter_p
495+
(fun path ->
496+
let* {st_kind; _} = Lwt_unix.stat path in
497+
match st_kind with
498+
| S_DIR ->
499+
delete_empty_dirs ~delete_root:true path
500+
| _ ->
501+
Lwt.return_unit
502+
)
503+
files
504+
in
505+
if not delete_root then
506+
Lwt.return_unit
507+
else
508+
let* files = files_in root ~otherwise:(fun _ -> Lwt.return []) in
509+
Lwt.catch
510+
(fun () ->
511+
if files = [] then
512+
Lwt_unix.rmdir root
513+
else
514+
Lwt.return_unit
515+
)
516+
(fun _ -> Lwt.return_unit)
517+
518+
(* The code assumes it's the only with access to the disk cache while running *)
519+
let retime_cache_contents typ =
520+
let now = Mtime_clock.now () in
521+
let root = cache_of typ in
522+
let* contents = get_all_contents root in
523+
let* () =
524+
contents
525+
|> List.map (get_fs_action root now)
526+
|> Lwt_list.iter_p (commit __FUNCTION__)
527+
in
528+
delete_empty_dirs ~delete_root:false root
529+
end
530+
452531
let setup typ direct =
532+
let* () = Setup.retime_cache_contents typ in
453533
let queue, push = Lwt_bounded_stream.create 4098 in
454534
let lock = Lwt_mutex.create () in
455535
let q = {queue; push; lock; state= Disengaged} in
456-
(Writer.with_cache ~direct typ q, Watcher.watch ~direct typ q)
536+
Lwt.return (Writer.with_cache ~direct typ q, Watcher.watch ~direct typ q)

ocaml/xapi-guard/lib/disk_cache.mli

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@ type t = Uuidm.t * Mtime.t * Types.Tpm.key
1818
val setup :
1919
Types.Service.t
2020
-> (t -> string -> (unit, exn) Lwt_result.t)
21-
-> (((t -> string -> unit Lwt.t) -> 'a Lwt.t) -> 'a Lwt.t)
21+
-> ( (((t -> string -> unit Lwt.t) -> 'a Lwt.t) -> 'a Lwt.t)
2222
* (unit -> unit Lwt.t)
23+
)
24+
Lwt.t
2325
(** [setup service push_callback] Returns a local disk buffer for [service]
2426
which will use [push_callback] to push the elements to their final
2527
destination *)

ocaml/xapi-guard/src/main.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ let retry_forever fname f =
269269
let cache_reader with_watcher = retry_forever "cache watcher" with_watcher
270270

271271
let make_message_switch_server () =
272-
let with_swtpm_push, with_watch =
272+
let* with_swtpm_push, with_watch =
273273
Xapi_guard.Disk_cache.(setup Swtpm (Server_interface.push_vtpm ~cache))
274274
in
275275
let open Message_switch_lwt.Protocol_lwt in

ocaml/xapi-guard/test/cache_test.ml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ let to_cache with_writer =
128128
let from_cache with_watcher = retry_forever "watcher" with_watcher
129129

130130
let main () =
131-
let with_writer, with_watcher = Xapi_guard.Disk_cache.(setup Swtpm log) in
131+
let* with_writer, with_watcher = Xapi_guard.Disk_cache.(setup Swtpm log) in
132132
let reader = from_cache with_watcher in
133133
let writers = to_cache with_writer in
134134
let* _ = Lwt.all (reader :: writers) in

0 commit comments

Comments
 (0)