diff --git a/dune b/dune index 9cf03f02dfc..e2b4842adb5 100644 --- a/dune +++ b/dune @@ -3,7 +3,7 @@ (ocamlopt_flags (:standard -g -p -w -39)) (flags (:standard -w -39)) ) - (dev (flags (:standard -g -w -39 -warn-error -69))) + (dev (flags (:standard -g -w -39))) (release (flags (:standard -w -39-6@5)) (env-vars (ALCOTEST_COMPACT 1)) diff --git a/dune-project b/dune-project index 0efbe491956..88080ce624c 100644 --- a/dune-project +++ b/dune-project @@ -205,6 +205,7 @@ (xapi-rrd (= :version)) (xapi-stdext-threads (= :version)) (xapi-stdext-unix (= :version)) + xapi-tracing ) ) diff --git a/ocaml/libs/http-lib/dune b/ocaml/libs/http-lib/dune index f6de65dbe48..ead0f1d19f6 100644 --- a/ocaml/libs/http-lib/dune +++ b/ocaml/libs/http-lib/dune @@ -45,6 +45,7 @@ ipaddr polly threads.posix + tracing uri xapi-log xapi-stdext-pervasives diff --git a/ocaml/libs/http-lib/http.ml b/ocaml/libs/http-lib/http.ml index c2f7e2aeda8..1f1e790de24 100644 --- a/ocaml/libs/http-lib/http.ml +++ b/ocaml/libs/http-lib/http.ml @@ -694,6 +694,29 @@ module Request = struct let headers, body = to_headers_and_body x in let frame_header = if x.frame then make_frame_header headers else "" in frame_header ^ headers ^ body + + let traceparent_of req = + let open Tracing in + let ( let* ) = Option.bind in + let* traceparent = req.traceparent in + let* span_context = SpanContext.of_traceparent traceparent in + let span = Tracer.span_of_span_context span_context req.uri in + Some span + + let with_tracing ?attributes ~name req f = + let open Tracing in + let parent = traceparent_of req in + with_child_trace ?attributes parent ~name (fun (span : Span.t option) -> + match span with + | Some span -> + let traceparent = + Some (span |> Span.get_context |> SpanContext.to_traceparent) + in + let req = {req with traceparent} in + f req + | None -> + f req + ) end module Response = struct diff --git a/ocaml/libs/http-lib/http.mli b/ocaml/libs/http-lib/http.mli index 91590bcdcdd..3fbae8e4c6f 100644 --- a/ocaml/libs/http-lib/http.mli +++ b/ocaml/libs/http-lib/http.mli @@ -128,6 +128,11 @@ module Request : sig val to_wire_string : t -> string (** [to_wire_string t] returns a string which could be sent to a server *) + + val traceparent_of : t -> Tracing.Span.t option + + val with_tracing : + ?attributes:(string * string) list -> name:string -> t -> (t -> 'a) -> 'a end (** Parsed form of the HTTP response *) diff --git a/ocaml/libs/http-lib/http_svr.ml b/ocaml/libs/http-lib/http_svr.ml index 7c270874a96..e04520d8567 100644 --- a/ocaml/libs/http-lib/http_svr.ml +++ b/ocaml/libs/http-lib/http_svr.ml @@ -43,6 +43,8 @@ module E = Debug.Make (struct let name = "http_internal_errors" end) let ( let* ) = Option.bind +let ( let@ ) f x = f x + type uri_path = string module Stats = struct @@ -101,6 +103,7 @@ let response_of_request req hdrs = let response_fct req ?(hdrs = []) s (response_length : int64) (write_response_to_fd_fn : Unix.file_descr -> unit) = + let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in let res = { (response_of_request req hdrs) with @@ -441,9 +444,28 @@ let request_of_bio_exn ~proxy_seen ~read_timeout ~total_timeout ~max_length bio already sent back a suitable error code and response to the client. *) let request_of_bio ?proxy_seen ~read_timeout ~total_timeout ~max_length ic = try + let tracer = Tracing.Tracer.get_tracer ~name:"http_tracer" in + let loop_span = + match Tracing.Tracer.start ~tracer ~name:__FUNCTION__ ~parent:None () with + | Ok span -> + span + | Error _ -> + None + in let r, proxy = request_of_bio_exn ~proxy_seen ~read_timeout ~total_timeout ~max_length ic in + let parent_span = Http.Request.traceparent_of r in + let loop_span = + Option.fold ~none:None + ~some:(fun span -> + Tracing.Tracer.update_span_with_parent span parent_span + ) + loop_span + in + let _ : (Tracing.Span.t option, exn) result = + Tracing.Tracer.finish loop_span + in (Some r, proxy) with e -> D.warn "%s (%s)" (Printexc.to_string e) __LOC__ ; @@ -486,6 +508,8 @@ let request_of_bio ?proxy_seen ~read_timeout ~total_timeout ~max_length ic = (None, None) let handle_one (x : 'a Server.t) ss context req = + let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in + let span = Http.Request.traceparent_of req in let ic = Buf_io.of_fd ss in let finished = ref false in try @@ -499,6 +523,7 @@ let handle_one (x : 'a Server.t) ss context req = Option.value ~default:empty (Radix_tree.longest_prefix req.Request.uri method_map) in + let@ _ = Tracing.with_child_trace span ~name:"handler" in ( match te.TE.handler with | BufIO handlerfn -> handlerfn req ic context @@ -561,6 +586,7 @@ let handle_connection ~header_read_timeout ~header_total_timeout request_of_bio ?proxy_seen ~read_timeout ~total_timeout ~max_length:max_header_length ic in + (* 2. now we attempt to process the request *) let finished = Option.fold ~none:true diff --git a/ocaml/libs/tracing/tracing.ml b/ocaml/libs/tracing/tracing.ml index 8b22a8680a1..22d1e942288 100644 --- a/ocaml/libs/tracing/tracing.ml +++ b/ocaml/libs/tracing/tracing.ml @@ -127,7 +127,7 @@ let ok_none = Ok None module Status = struct type status_code = Unset | Ok | Error [@@deriving rpcty] - type t = {status_code: status_code; description: string option} + type t = {status_code: status_code; _description: string option} end module Attributes = struct @@ -151,6 +151,8 @@ end module SpanContext = struct type t = {trace_id: string; span_id: string} [@@deriving rpcty] + let context trace_id span_id = {trace_id; span_id} + let to_traceparent t = Printf.sprintf "00-%s-%s-01" t.trace_id t.span_id let of_traceparent traceparent = @@ -167,7 +169,7 @@ module SpanContext = struct end module SpanLink = struct - type t = {context: SpanContext.t; attributes: (string * string) list} + type t = {_context: SpanContext.t; _attributes: (string * string) list} end module Span = struct @@ -208,7 +210,7 @@ module Span = struct (* Using gettimeofday over Mtime as it is better for sharing timestamps between the systems *) let begin_time = Unix.gettimeofday () in let end_time = None in - let status : Status.t = {status_code= Status.Unset; description= None} in + let status : Status.t = {status_code= Status.Unset; _description= None} in let links = [] in let events = [] in { @@ -250,7 +252,7 @@ module Span = struct let set_span_kind span span_kind = {span with span_kind} let add_link span context attributes = - let link : SpanLink.t = {context; attributes} in + let link : SpanLink.t = {_context= context; _attributes= attributes} in {span with links= link :: span.links} let add_event span name attributes = @@ -263,7 +265,7 @@ module Span = struct | exn, stacktrace -> ( let msg = Printexc.to_string exn in let exn_type = Printexc.exn_slot_name exn in - let description = + let _description = Some (Printf.sprintf "Error: %s Type: %s Backtrace: %s" msg exn_type stacktrace @@ -286,17 +288,17 @@ module Span = struct span.attributes (Attributes.of_list exn_attributes) in - {span with status= {status_code; description}; attributes} + {span with status= {status_code; _description}; attributes} | _ -> span ) let set_ok span = - let description = None in + let _description = None in let status_code = Status.Ok in match span.status.status_code with | Unset -> - {span with status= {status_code; description}} + {span with status= {status_code; _description}} | _ -> span end @@ -311,7 +313,7 @@ module Spans = struct Hashtbl.length spans ) - let max_spans = Atomic.make 1000 + let max_spans = Atomic.make 2500 let set_max_spans x = Atomic.set max_spans x @@ -519,8 +521,8 @@ module TracerProvider = struct get_tracer_providers_unlocked let set ?enabled ?attributes ?endpoints ~uuid () = - let update_provider (provider : t) ?(enabled = provider.enabled) attributes - endpoints = + let update_provider (provider : t) enabled attributes endpoints = + let enabled = Option.value ~default:provider.enabled enabled in let attributes : string Attributes.t = Option.fold ~none:provider.attributes ~some:Attributes.of_list attributes @@ -537,7 +539,7 @@ module TracerProvider = struct let provider = match Hashtbl.find_opt tracer_providers uuid with | Some (provider : t) -> - update_provider provider ?enabled attributes endpoints + update_provider provider enabled attributes endpoints | None -> fail "The TracerProvider : %s does not exist" uuid in @@ -564,9 +566,9 @@ module TracerProvider = struct end module Tracer = struct - type t = {name: string; provider: TracerProvider.t} + type t = {_name: string; provider: TracerProvider.t} - let create ~name ~provider = {name; provider} + let create ~name ~provider = {_name= name; provider} let no_op = let provider : TracerProvider.t = @@ -577,7 +579,7 @@ module Tracer = struct ; enabled= false } in - {name= ""; provider} + {_name= ""; provider} let get_tracer ~name = if Atomic.get observe then ( @@ -598,7 +600,7 @@ module Tracer = struct let span_of_span_context context name : Span.t = { context - ; status= {status_code= Status.Unset; description= None} + ; status= {status_code= Status.Unset; _description= None} ; name ; parent= None ; span_kind= SpanKind.Client (* This will be the span of the client call*) @@ -624,6 +626,30 @@ module Tracer = struct let span = Span.start ~attributes ~name ~parent ~span_kind () in Spans.add_to_spans ~span ; Ok (Some span) + let update_span_with_parent span (parent : Span.t option) = + if Atomic.get observe then + match parent with + | None -> + Some span + | Some parent -> + span + |> Spans.remove_from_spans + |> Option.map (fun existing_span -> + let old_context = Span.get_context existing_span in + let new_context : SpanContext.t = + SpanContext.context + (SpanContext.trace_id_of_span_context parent.context) + old_context.span_id + in + let updated_span = {existing_span with parent= Some parent} in + let updated_span = {updated_span with context= new_context} in + + let () = Spans.add_to_spans ~span:updated_span in + updated_span + ) + else + Some span + let finish ?error span = Ok (Option.map @@ -673,6 +699,13 @@ let with_tracing ?(attributes = []) ?(parent = None) ~name f = ) else f None +let with_child_trace ?attributes parent ~name f = + match parent with + | None -> + f None + | Some _ as parent -> + with_tracing ?attributes ~parent ~name f + module EnvHelpers = struct let traceparent_key = "TRACEPARENT" diff --git a/ocaml/libs/tracing/tracing.mli b/ocaml/libs/tracing/tracing.mli index bfb37ddf292..42b700ebb51 100644 --- a/ocaml/libs/tracing/tracing.mli +++ b/ocaml/libs/tracing/tracing.mli @@ -57,6 +57,8 @@ end module SpanContext : sig type t + val context : string -> string -> t + val to_traceparent : t -> string val of_traceparent : string -> t option @@ -125,6 +127,16 @@ module Tracer : sig -> unit -> (Span.t option, exn) result + val update_span_with_parent : Span.t -> Span.t option -> Span.t option + (**[update_span_with_parent s p] returns [Some span] where [span] is an + updated verison of the span [s]. + If [p] is [Some parent], [span] is a child of [parent], otherwise it is the + original [s]. + + If the span [s] is finished or is no longer considered an on-going span, + returns [None]. + *) + val finish : ?error:exn * string -> Span.t option -> (Span.t option, exn) result @@ -199,6 +211,15 @@ val with_tracing : -> (Span.t option -> 'a) -> 'a +val with_child_trace : + ?attributes:(string * string) list + -> Span.t option + -> name:string + -> (Span.t option -> 'a) + -> 'a +(** [with_child_trace ?attributes ?parent ~name f] is like {!val:with_tracing}, but + only creates a span if the [parent] span exists. *) + val get_observe : unit -> bool val validate_attribute : string * string -> bool diff --git a/ocaml/xapi/api_server.ml b/ocaml/xapi/api_server.ml index ba95fbe03d9..38f39e9b50f 100644 --- a/ocaml/xapi/api_server.ml +++ b/ocaml/xapi/api_server.ml @@ -1,8 +1,11 @@ open Api_server_common module Server = Server.Make (Actions) (Forwarder) +let ( let@ ) f x = f x + (* This bit is called directly by the fake_rpc callback *) let callback1 ?(json_rpc_version = Jsonrpc.V1) is_json req fd call = + let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in (* We now have the body string, the xml and the call name, and can also tell *) (* if we're a master or slave and whether the call came in on the unix domain socket or the tcp socket *) (* If we're a slave, and the call is from the unix domain socket or from the HIMN, and the call *isn't* *) @@ -20,7 +23,10 @@ let callback1 ?(json_rpc_version = Jsonrpc.V1) is_json req fd call = then forward req call is_json else - let response = Server.dispatch_call req fd call in + let response = + let@ req = Http.Request.with_tracing ~name:"Server.dispatch_call" req in + Server.dispatch_call req fd call + in let translated = if is_json @@ -85,15 +91,26 @@ let create_thumbprint_header req response = (** HTML callback that dispatches an RPC and returns the response. *) let callback is_json req bio _ = + let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in + let span = Http.Request.traceparent_of req in let fd = Buf_io.fd_of bio in (* fd only used for writing *) let body = Http_svr.read_body ~limit:Constants.http_limit_max_rpc_size req bio in try - let rpc = Xmlrpc.call_of_string body in + let rpc = + let attributes = [("size", string_of_int (String.length body))] in + let@ _ = + Tracing.with_child_trace ~attributes ~name:"Xmlrpc.call_of_string" span + in + Xmlrpc.call_of_string body + in let response = callback1 is_json req fd rpc in let response_str = + let@ _ = + Tracing.with_child_trace ~name:"Xmlrpc.string_of_response" span + in if rpc.Rpc.name = "system.listMethods" then let inner = Xmlrpc.to_string response.Rpc.contents in Printf.sprintf @@ -129,6 +146,7 @@ let callback is_json req bio _ = (** HTML callback that dispatches an RPC and returns the response. *) let jsoncallback req bio _ = + let@ req = Http.Request.with_tracing ~name:__FUNCTION__ req in let fd = Buf_io.fd_of bio in (* fd only used for writing *) let body = diff --git a/ocaml/xapi/dune b/ocaml/xapi/dune index aebdf144225..e3708bed112 100644 --- a/ocaml/xapi/dune +++ b/ocaml/xapi/dune @@ -149,6 +149,7 @@ tar tar-unix threads.posix + tracing unixpwd uri uuid @@ -234,6 +235,7 @@ rpclib.xml stunnel threads.posix + tracing xapi-backtrace xapi-client xapi-consts diff --git a/ocaml/xapi/server_helpers.ml b/ocaml/xapi/server_helpers.ml index ad76e38e531..e4952769c2f 100644 --- a/ocaml/xapi/server_helpers.ml +++ b/ocaml/xapi/server_helpers.ml @@ -18,6 +18,8 @@ open D exception Dispatcher_FieldNotFound of string +let ( let@ ) f x = f x + let my_assoc fld assoc_list = try List.assoc fld assoc_list with Not_found -> raise (Dispatcher_FieldNotFound fld) @@ -120,6 +122,7 @@ let dispatch_exn_wrapper f = let do_dispatch ?session_id ?forward_op ?self:_ supports_async called_fn_name op_fn marshaller fd http_req label sync_ty generate_task_for = (* if the call has been forwarded to us, then they are responsible for completing the task, so we don't need to complete it *) + let@ http_req = Http.Request.with_tracing ~name:__FUNCTION__ http_req in let called_async = sync_ty <> `Sync in if called_async && not supports_async then API.response_of_fault diff --git a/xapi-rrdd.opam b/xapi-rrdd.opam index 745af249f4b..89b2d827a69 100644 --- a/xapi-rrdd.opam +++ b/xapi-rrdd.opam @@ -27,6 +27,7 @@ depends: [ "xapi-rrd" {= version} "xapi-stdext-threads" {= version} "xapi-stdext-unix" {= version} + "xapi-tracing" "odoc" {with-doc} ] build: [