From 1deb701257ccbb70125f9830160bfc27341b805d Mon Sep 17 00:00:00 2001 From: cmaddox5 Date: Thu, 24 Oct 2024 11:26:39 -0400 Subject: [PATCH] Add a Health module to track memory usage using Recon. --- lib/screens/application.ex | 3 +- lib/screens/health.ex | 88 ++++++++++++++++++++++++++++++++++++++ mix.exs | 3 +- mix.lock | 1 + 4 files changed, 93 insertions(+), 2 deletions(-) create mode 100644 lib/screens/health.ex diff --git a/lib/screens/application.ex b/lib/screens/application.ex index 5b77aab5e..b25c681b0 100644 --- a/lib/screens/application.ex +++ b/lib/screens/application.ex @@ -31,7 +31,8 @@ defmodule Screens.Application do {Screens.ScreenApiResponseCache, []}, Screens.LastTrip, {Phoenix.PubSub, name: ScreensWeb.PubSub}, - ScreensWeb.Endpoint + ScreensWeb.Endpoint, + Screens.Health ] # See https://hexdocs.pm/elixir/Supervisor.html diff --git a/lib/screens/health.ex b/lib/screens/health.ex new file mode 100644 index 000000000..98168657e --- /dev/null +++ b/lib/screens/health.ex @@ -0,0 +1,88 @@ +defmodule Screens.Health do + use GenServer + require Logger + + @process_health_interval_ms 300_000 + @process_metrics ~w(memory binary_memory heap_size total_heap_size message_queue_len reductions)a + + def start_link(opts \\ []) do + GenServer.start_link(__MODULE__, opts) + end + + @impl true + def init(_) do + {:ok, _timer_ref} = :timer.send_interval(@process_health_interval_ms, self(), :process_health) + + {:ok, nil} + end + + @impl true + def handle_info(:process_health, state) do + diagnostic_processes() + |> Stream.map(&process_metrics/1) + |> Enum.each(fn {name, supervisor, metrics} -> + Logger.info([ + ~c"screens_process_health name=\"#{inspect(name)}\" supervisor=\"#{inspect(supervisor)}\" ", + metrics + ]) + end) + + {:noreply, state} + end + + def handle_info(msg, state) do + Logger.info("Screens.Health unknown_message msg=#{inspect(msg)}") + {:noreply, state} + end + + @type process_info() :: {pid(), name :: term(), supervisor :: term()} + + @spec diagnostic_processes() :: Enumerable.t() + defp diagnostic_processes do + [ + Stream.flat_map( + Supervisor.which_children(Screens.Supervisor), + &descendants(&1, Screens.Supervisor) + ), + top_processes_by(:memory, limit: 20), + top_processes_by(:binary_memory, limit: 20) + ] + |> Stream.concat() + |> Stream.uniq_by(&elem(&1, 0)) + end + + @spec top_processes_by(atom(), limit: non_neg_integer()) :: Enumerable.t() + defp top_processes_by(attribute, limit: limit) do + Stream.map(:recon.proc_count(attribute, limit), &recon_entry/1) + end + + @spec descendants( + {name :: term(), child :: Supervisor.child() | :restarting, + type :: :worker | :supervisor, modules :: [module()] | :dynamic}, + supervisor :: term() + ) :: nil | [] | [process_info()] + defp descendants({_name, status, _type, _modules}, _supervisor) when is_atom(status), do: [] + + defp descendants({name, pid, :supervisor, _modules}, _supervisor) do + if Process.alive?(pid) do + pid |> Supervisor.which_children() |> Stream.flat_map(&descendants(&1, name)) + end + end + + defp descendants({name, pid, _, _}, supervisor), do: [{pid, name, supervisor}] + + @spec recon_entry(:recon.proc_attrs()) :: process_info() + defp recon_entry({pid, _count, [name | _]}) when is_atom(name), do: {pid, name, nil} + defp recon_entry({pid, _count, _info}), do: {pid, nil, nil} + + @spec process_metrics({pid(), term() | nil, term() | nil}) :: {term(), term(), iodata()} + defp process_metrics({pid, name, supervisor}) do + metrics = + pid + |> :recon.info(@process_metrics) + |> Stream.map(fn {metric, value} -> "#{metric}=#{value}" end) + |> Enum.intersperse(" ") + + {name, supervisor, metrics} + end +end diff --git a/mix.exs b/mix.exs index a41602cc5..52a54f70f 100644 --- a/mix.exs +++ b/mix.exs @@ -91,7 +91,8 @@ defmodule Screens.MixProject do {:nebulex, "~> 2.6"}, {:remote_ip, "~> 1.2"}, {:hackney_telemetry, "~> 0.2.0"}, - {:ex_cldr_messages, "~> 1.0"} + {:ex_cldr_messages, "~> 1.0"}, + {:recon, "~> 2.5.6"} ] end end diff --git a/mix.lock b/mix.lock index a096640b0..0a39c9912 100644 --- a/mix.lock +++ b/mix.lock @@ -59,6 +59,7 @@ "plug_cowboy": {:hex, :plug_cowboy, "2.7.2", "fdadb973799ae691bf9ecad99125b16625b1c6039999da5fe544d99218e662e4", [:mix], [{:cowboy, "~> 2.7", [hex: :cowboy, repo: "hexpm", optional: false]}, {:cowboy_telemetry, "~> 0.3", [hex: :cowboy_telemetry, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "245d8a11ee2306094840c000e8816f0cbed69a23fc0ac2bcf8d7835ae019bb2f"}, "plug_crypto": {:hex, :plug_crypto, "2.1.0", "f44309c2b06d249c27c8d3f65cfe08158ade08418cf540fd4f72d4d6863abb7b", [:mix], [], "hexpm", "131216a4b030b8f8ce0f26038bc4421ae60e4bb95c5cf5395e1421437824c4fa"}, "ranch": {:hex, :ranch, "1.8.0", "8c7a100a139fd57f17327b6413e4167ac559fbc04ca7448e9be9057311597a1d", [:make, :rebar3], [], "hexpm", "49fbcfd3682fab1f5d109351b61257676da1a2fdbe295904176d5e521a2ddfe5"}, + "recon": {:hex, :recon, "2.5.6", "9052588e83bfedfd9b72e1034532aee2a5369d9d9343b61aeb7fbce761010741", [:mix, :rebar3], [], "hexpm", "96c6799792d735cc0f0fd0f86267e9d351e63339cbe03df9d162010cefc26bb0"}, "remote_ip": {:hex, :remote_ip, "1.2.0", "fb078e12a44414f4cef5a75963c33008fe169b806572ccd17257c208a7bc760f", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "2ff91de19c48149ce19ed230a81d377186e4412552a597d6a5137373e5877cb7"}, "retry": {:hex, :retry, "0.18.0", "dc58ebe22c95aa00bc2459f9e0c5400e6005541cf8539925af0aa027dc860543", [:mix], [], "hexpm", "9483959cc7bf69c9e576d9dfb2b678b71c045d3e6f39ab7c9aa1489df4492d73"}, "screens_config": {:git, "https://github.com/mbta/screens-config-lib.git", "8ec6e1684a129b089edc5e867a32dfc90028b2e0", [ref: "8ec6e1684a129b089edc5e867a32dfc90028b2e0"]},