Skip to content

Commit e3d4f34

Browse files
authored
CP-308811: Add an option to limit the span depth in tracing (#6607)
Adds a new span.depth key to the trace context baggage, and a configurable max_span_depth. This defaults to 100 and so will not limit traces (the traces I've seen with the most depth are ~40 depth e.g. https://jaeger.kfd.eng.citrite.net/trace/ea5ddca5509b3ae1102bc7279092652d), but is useful when wanting to analyse large traces which can often become slow if all the spans are recorded in a trace. This isn't perfect, the span.depth seems to get lost sometimes between xapi and xenops, resulting in a greater depth than that listed, but I have created ticket CP-308999 for this and this works well enough to greatly reduce the number of spans in a trace when needed, which is the intention. As an example, a host evacuate trace with max_span_depth 10 goes down to ~1000 spans rather than the 34k+ withou a depth limit.
2 parents 2bef63a + 78df744 commit e3d4f34

File tree

15 files changed

+261
-18
lines changed

15 files changed

+261
-18
lines changed

ocaml/libs/tracing/tracing.ml

Lines changed: 93 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,8 @@ module TraceContext = struct
222222

223223
let empty = {traceparent= None; baggage= None}
224224

225+
let depth_key = "span.depth"
226+
225227
let with_traceparent traceparent ctx = {ctx with traceparent}
226228

227229
let with_baggage baggage ctx = {ctx with baggage}
@@ -230,6 +232,20 @@ module TraceContext = struct
230232

231233
let baggage_of ctx = ctx.baggage
232234

235+
let baggage_depth_of ctx =
236+
Option.bind (baggage_of ctx) (List.assoc_opt depth_key)
237+
|> Option.value ~default:"1"
238+
|> int_of_string
239+
240+
let update_with_baggage k v ctx =
241+
let new_baggage =
242+
baggage_of ctx
243+
|> Option.value ~default:[]
244+
|> List.remove_assoc k
245+
|> List.cons (k, v)
246+
in
247+
with_baggage (Some new_baggage) ctx
248+
233249
let parse input =
234250
let open Astring.String in
235251
let trim_pair (key, value) = (trim key, trim value) in
@@ -322,22 +338,36 @@ module Span = struct
322338

323339
let start ?(attributes = Attributes.empty)
324340
?(trace_context : TraceContext.t option) ~name ~parent ~span_kind () =
325-
let trace_id, extra_context =
341+
let trace_id, extra_context, depth =
326342
match parent with
327343
| None ->
328-
(Trace_id.make (), TraceContext.empty)
344+
(Trace_id.make (), TraceContext.empty, 1)
329345
| Some span_parent ->
330-
(span_parent.context.trace_id, span_parent.context.trace_context)
346+
( span_parent.context.trace_id
347+
, span_parent.context.trace_context
348+
, TraceContext.baggage_depth_of span_parent.context.trace_context + 1
349+
)
331350
in
332351
let span_id = Span_id.make () in
352+
let extra_context_with_depth =
353+
TraceContext.(
354+
update_with_baggage depth_key (string_of_int depth) extra_context
355+
)
356+
in
333357
let context : SpanContext.t =
334-
{trace_id; span_id; trace_context= extra_context}
358+
{trace_id; span_id; trace_context= extra_context_with_depth}
335359
in
336360
let context =
337-
(* If trace_context is provided to the call, override any inherited trace context. *)
338-
trace_context
339-
|> Option.fold ~none:context
340-
~some:(Fun.flip SpanContext.with_trace_context context)
361+
(* If trace_context is provided to the call, override any inherited trace
362+
context except span.depth which should still be maintained. *)
363+
match trace_context with
364+
| Some tc ->
365+
let tc_with_depth =
366+
TraceContext.(update_with_baggage depth_key (string_of_int depth) tc)
367+
in
368+
SpanContext.with_trace_context tc_with_depth context
369+
| None ->
370+
context
341371
in
342372
(* Using gettimeofday over Mtime as it is better for sharing timestamps between the systems *)
343373
let begin_time = Unix.gettimeofday () in
@@ -473,6 +503,11 @@ module Spans = struct
473503

474504
let set_max_traces x = Atomic.set max_traces x
475505

506+
(* Default is much larger than the largest current traces, so effectively off *)
507+
let max_depth = Atomic.make 100
508+
509+
let set_max_depth x = Atomic.set max_depth x
510+
476511
let finished_spans = Atomic.make ([], 0)
477512

478513
let span_hashtbl_is_empty () = TraceMap.is_empty (Atomic.get spans)
@@ -713,12 +748,18 @@ module Tracer = struct
713748
let get_tracer ~name:_ = TracerProvider.get_current ()
714749

715750
let span_of_span_context context name : Span.t =
751+
let tc = SpanContext.context_of_span_context context in
752+
let new_depth = TraceContext.baggage_depth_of tc in
753+
let new_tc =
754+
TraceContext.(update_with_baggage depth_key (string_of_int new_depth) tc)
755+
in
756+
let context = SpanContext.with_trace_context new_tc context in
716757
{
717758
context
718759
; status= {status_code= Status.Unset; _description= None}
719760
; name
720761
; parent= None
721-
; span_kind= SpanKind.Client (* This will be the span of the client call*)
762+
; span_kind= SpanKind.Client (* This will be the span of the client call *)
722763
; begin_time= Unix.gettimeofday ()
723764
; end_time= None
724765
; links= []
@@ -730,10 +771,32 @@ module Tracer = struct
730771
?(span_kind = SpanKind.Internal) ~name ~parent () :
731772
(Span.t option, exn) result =
732773
let open TracerProvider in
733-
(* Do not start span if the TracerProvider is disabled*)
774+
let parent_depth =
775+
Option.fold ~none:1
776+
~some:(fun parent ->
777+
parent.Span.context
778+
|> SpanContext.context_of_span_context
779+
|> TraceContext.baggage_depth_of
780+
)
781+
parent
782+
in
783+
(* Do not start span if the TracerProvider is disabled *)
734784
if not t.enabled then
785+
ok_none (* Do not start span if the max depth has been reached *)
786+
else if parent_depth >= Atomic.get Spans.max_depth then (
787+
let parent_trace_id =
788+
Option.fold ~none:"None"
789+
~some:(fun p ->
790+
p.Span.context
791+
|> SpanContext.span_id_of_span_context
792+
|> Span_id.to_string
793+
)
794+
parent
795+
in
796+
debug "Max_span_depth limit reached, not creating span %s (parent %s)"
797+
name parent_trace_id ;
735798
ok_none
736-
else
799+
) else
737800
let attributes = Attributes.merge_into t.attributes attributes in
738801
let span =
739802
Span.start ~attributes ?trace_context ~name ~parent ~span_kind ()
@@ -750,16 +813,24 @@ module Tracer = struct
750813
|> Spans.remove_from_spans
751814
|> Option.map (fun existing_span ->
752815
let old_context = Span.get_context existing_span in
816+
let parent_trace_context = Span.get_trace_context parent in
817+
let new_depth =
818+
TraceContext.baggage_depth_of parent_trace_context + 1
819+
in
753820
let new_context : SpanContext.t =
754-
let trace_context = span.Span.context.trace_context in
821+
let trace_context =
822+
TraceContext.(
823+
update_with_baggage depth_key (string_of_int new_depth)
824+
span.Span.context.trace_context
825+
)
826+
in
755827
SpanContext.context
756828
(SpanContext.trace_id_of_span_context parent.context)
757829
old_context.span_id
758830
|> SpanContext.with_trace_context trace_context
759831
in
760832
let updated_span = {existing_span with parent= Some parent} in
761833
let updated_span = {updated_span with context= new_context} in
762-
763834
let () = Spans.add_to_spans ~span:updated_span in
764835
updated_span
765836
)
@@ -926,7 +997,15 @@ module Propagator = struct
926997
let trace_context' =
927998
TraceContext.with_traceparent (Some traceparent) trace_context
928999
in
929-
let carrier' = P.inject_into trace_context' carrier in
1000+
let new_depth =
1001+
TraceContext.baggage_depth_of trace_context' + 1 |> string_of_int
1002+
in
1003+
let trace_context'' =
1004+
TraceContext.(
1005+
update_with_baggage depth_key new_depth trace_context'
1006+
)
1007+
in
1008+
let carrier' = P.inject_into trace_context'' carrier in
9301009
f carrier'
9311010
| _ ->
9321011
f carrier

ocaml/libs/tracing/tracing.mli

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,8 @@ module Spans : sig
165165

166166
val set_max_traces : int -> unit
167167

168+
val set_max_depth : int -> unit
169+
168170
val span_count : unit -> int
169171

170172
val since : unit -> Span.t list * int

ocaml/libs/tracing/tracing_export.ml

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ let export_interval = ref 30.
2424

2525
let set_export_interval t = export_interval := t
2626

27+
let export_chunk_size = Atomic.make 10000
28+
29+
let set_export_chunk_size x = Atomic.set export_chunk_size x
30+
2731
let host_id = ref "localhost"
2832

2933
let set_host_id id = host_id := id
@@ -289,17 +293,40 @@ module Destination = struct
289293
with exn ->
290294
debug "Tracing: unable to export span : %s" (Printexc.to_string exn)
291295

296+
let rec span_info_chunks span_info batch_size =
297+
let rec list_to_chunks_inner l n curr chunks =
298+
if n = 0 then
299+
if l <> [] then
300+
list_to_chunks_inner l batch_size [] ((curr, batch_size) :: chunks)
301+
else
302+
(curr, batch_size) :: chunks
303+
else
304+
match l with
305+
| [] ->
306+
(curr, List.length curr) :: chunks
307+
| h :: t ->
308+
list_to_chunks_inner t (n - 1) (h :: curr) chunks
309+
in
310+
list_to_chunks_inner (fst span_info) batch_size [] []
311+
292312
let flush_spans () =
293313
let ((_span_list, span_count) as span_info) = Spans.since () in
294314
let attributes = [("export.traces.count", string_of_int span_count)] in
295315
let@ parent =
296316
with_tracing ~span_kind:Server ~trace_context:TraceContext.empty
297317
~parent:None ~attributes ~name:"Tracing.flush_spans"
298318
in
299-
TracerProvider.get_tracer_providers ()
300-
|> List.filter TracerProvider.get_enabled
301-
|> List.concat_map TracerProvider.get_endpoints
302-
|> List.iter (export_to_endpoint parent span_info)
319+
let endpoints =
320+
TracerProvider.get_tracer_providers ()
321+
|> List.filter TracerProvider.get_enabled
322+
|> List.concat_map TracerProvider.get_endpoints
323+
in
324+
let span_info_chunks =
325+
span_info_chunks span_info (Atomic.get export_chunk_size)
326+
in
327+
List.iter
328+
(fun s_i -> List.iter (export_to_endpoint parent s_i) endpoints)
329+
span_info_chunks
303330

304331
let delay = Delay.make ()
305332

ocaml/libs/tracing/tracing_export.mli

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ val set_export_interval : float -> unit
2323
Default is every [30.] seconds.
2424
*)
2525

26+
val set_export_chunk_size : int -> unit
27+
(** [set_export_chunk_size size] sets the maximum number of finished spans that
28+
can be exported in one chunk to [size].
29+
30+
Default is 10000 spans.
31+
*)
32+
2633
val set_host_id : string -> unit
2734
(** [set_host_id id] sets the id of the host to [id].
2835

ocaml/tests/test_cluster.ml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,11 @@ let test_clusterd_rpc ~__context call =
3434
| "Observer.init"
3535
| "Observer.set_trace_log_dir"
3636
| "Observer.set_export_interval"
37+
| "Observer.set_export_chunk_size"
3738
| "Observer.set_host_id"
3839
| "Observer.set_max_traces"
3940
| "Observer.set_max_spans"
41+
| "Observer.set_max_depth"
4042
| "Observer.set_max_file_size"
4143
| "Observer.set_compress_tracing_files" )
4244
, _ ) ->

ocaml/tests/test_observer.ml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ let verify_json_fields_and_values ~json =
305305
; ("xs.host.uuid", `String _)
306306
; ("xs.host.name", `String _)
307307
; ("service.name", `String _)
308+
; ("span.depth", `String _)
308309
]
309310
)
310311
; ("annotations", `List _)

ocaml/xapi-idl/cluster/cli-help.t

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,14 @@
2121

2222
Observer.set_endpoints [OPTION]… dbg uuid endpoints
2323

24+
Observer.set_export_chunk_size [OPTION]… dbg int
25+
2426
Observer.set_export_interval [OPTION]… dbg float
2527

2628
Observer.set_host_id [OPTION]… dbg string
2729

30+
Observer.set_max_depth [OPTION]… dbg int
31+
2832
Observer.set_max_file_size [OPTION]… dbg int
2933

3034
Observer.set_max_spans [OPTION]… dbg int

ocaml/xapi-idl/lib/observer_helpers.ml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,10 @@ module ObserverAPI (R : RPC) = struct
138138
declare "Observer.set_export_interval" []
139139
(dbg_p @-> float_p @-> returning unit_p err)
140140

141+
let set_export_chunk_size =
142+
declare "Observer.set_export_chunk_size" []
143+
(dbg_p @-> int_p @-> returning unit_p err)
144+
141145
let set_max_spans =
142146
declare "Observer.set_max_spans" []
143147
(dbg_p @-> int_p @-> returning unit_p err)
@@ -146,6 +150,10 @@ module ObserverAPI (R : RPC) = struct
146150
declare "Observer.set_max_traces" []
147151
(dbg_p @-> int_p @-> returning unit_p err)
148152

153+
let set_max_depth =
154+
declare "Observer.set_max_depth" []
155+
(dbg_p @-> int_p @-> returning unit_p err)
156+
149157
let set_max_file_size =
150158
declare "Observer.set_max_file_size" []
151159
(dbg_p @-> int_p @-> returning unit_p err)
@@ -193,10 +201,14 @@ module type Server_impl = sig
193201

194202
val set_export_interval : context -> dbg:debug_info -> interval:float -> unit
195203

204+
val set_export_chunk_size : context -> dbg:debug_info -> size:int -> unit
205+
196206
val set_max_spans : context -> dbg:debug_info -> spans:int -> unit
197207

198208
val set_max_traces : context -> dbg:debug_info -> traces:int -> unit
199209

210+
val set_max_depth : context -> dbg:debug_info -> depth:int -> unit
211+
200212
val set_max_file_size : context -> dbg:debug_info -> file_size:int -> unit
201213

202214
val set_host_id : context -> dbg:debug_info -> host_id:string -> unit
@@ -227,8 +239,12 @@ module Server (Impl : Server_impl) () = struct
227239
S.set_export_interval (fun dbg interval ->
228240
Impl.set_export_interval () ~dbg ~interval
229241
) ;
242+
S.set_export_chunk_size (fun dbg size ->
243+
Impl.set_export_chunk_size () ~dbg ~size
244+
) ;
230245
S.set_max_spans (fun dbg spans -> Impl.set_max_spans () ~dbg ~spans) ;
231246
S.set_max_traces (fun dbg traces -> Impl.set_max_traces () ~dbg ~traces) ;
247+
S.set_max_depth (fun dbg depth -> Impl.set_max_depth () ~dbg ~depth) ;
232248
S.set_max_file_size (fun dbg file_size ->
233249
Impl.set_max_file_size () ~dbg ~file_size
234250
) ;

0 commit comments

Comments
 (0)