Skip to content

Commit

Permalink
Add pod monitor configuration to standalone console chart (#771)
Browse files Browse the repository at this point in the history
  • Loading branch information
michaeljguarino authored Mar 13, 2024
1 parent 521753e commit d620515
Show file tree
Hide file tree
Showing 21 changed files with 241 additions and 30 deletions.
32 changes: 32 additions & 0 deletions assets/src/generated/graphql.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1659,11 +1659,22 @@ export type GlobalService = {
provider?: Maybe<ClusterProvider>;
/** the service to replicate across clusters */
service?: Maybe<ServiceDeployment>;
services?: Maybe<ServiceDeploymentConnection>;
/** a set of tags to select clusters for this global service */
tags?: Maybe<Array<Maybe<Tag>>>;
updatedAt?: Maybe<Scalars['DateTime']['output']>;
};


/** a rules based mechanism to redeploy a service across a fleet of clusters */
export type GlobalServiceServicesArgs = {
after?: InputMaybe<Scalars['String']['input']>;
before?: InputMaybe<Scalars['String']['input']>;
first?: InputMaybe<Scalars['Int']['input']>;
last?: InputMaybe<Scalars['Int']['input']>;
q?: InputMaybe<Scalars['String']['input']>;
};

/** A reference for a globalized service, which targets clusters based on the configured criteria */
export type GlobalServiceAttributes = {
/** kubernetes distribution to target */
Expand All @@ -1676,6 +1687,18 @@ export type GlobalServiceAttributes = {
tags?: InputMaybe<Array<InputMaybe<TagAttributes>>>;
};

export type GlobalServiceConnection = {
__typename?: 'GlobalServiceConnection';
edges?: Maybe<Array<Maybe<GlobalServiceEdge>>>;
pageInfo: PageInfo;
};

export type GlobalServiceEdge = {
__typename?: 'GlobalServiceEdge';
cursor?: Maybe<Scalars['String']['output']>;
node?: Maybe<GlobalService>;
};

export type Group = {
__typename?: 'Group';
description?: Maybe<Scalars['String']['output']>;
Expand Down Expand Up @@ -4336,6 +4359,7 @@ export type RootQueryType = {
gitRepositories?: Maybe<GitRepositoryConnection>;
gitRepository?: Maybe<GitRepository>;
globalService?: Maybe<GlobalService>;
globalServices?: Maybe<GlobalServiceConnection>;
group?: Maybe<Group>;
groupMembers?: Maybe<GroupMemberConnection>;
groups?: Maybe<GroupConnection>;
Expand Down Expand Up @@ -4656,6 +4680,14 @@ export type RootQueryTypeGlobalServiceArgs = {
};


export type RootQueryTypeGlobalServicesArgs = {
after?: InputMaybe<Scalars['String']['input']>;
before?: InputMaybe<Scalars['String']['input']>;
first?: InputMaybe<Scalars['Int']['input']>;
last?: InputMaybe<Scalars['Int']['input']>;
};


export type RootQueryTypeGroupArgs = {
name: Scalars['String']['input'];
};
Expand Down
22 changes: 22 additions & 0 deletions charts/console/templates/monitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{{ if .Values.monitoring.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: {{ include "console.fullname" . }}
labels:
{{ include "console.labels" . | indent 4 }}
spec:
podMetricsEndpoints:
- port: http
path: '/metrics'
namespaceSelector:
matchNames:
- {{ .Release.Namespace }}
selector:
matchLabels:
app.kubernetes.io/name: console
app.kubernetes.io/instance: {{ .Release.Name }}
podTargetLabels:
- app.kubernetes.io/name
- app.kubernetes.io/instance
{{ end }}
26 changes: 0 additions & 26 deletions charts/console/templates/prometheusrule.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,30 +37,4 @@ spec:
annotations:
summary: Console deployment's memory has gotten too high
description: the memory utilization of your console deployment is higher than recommended
- alert: ConsoleDbCPU
expr: |
(
sum(rate(container_cpu_usage_seconds_total{namespace="{{ .Release.Namespace }}",pod=~"plural-console-[0-9]+"}[5m]))
/ sum(kube_pod_container_resource_requests{endpoint="http",namespace="{{ .Release.Namespace }}", pod=~"plural-console-[0-9]+", resource="cpu"})
) > 0.6
for: 5m
labels:
severity: warning
namespace: {{ .Release.Namespace }}
annotations:
summary: Console's postgres cpu has gotten too high
description: the cpu utilization of your console deployment's postgres db is higher than recommended
- alert: ConsoleDbMEM
expr: |
(
sum(container_memory_working_set_bytes{namespace="{{ .Release.Namespace }}",pod=~"plural-console-[0-9]+"})
/ sum(kube_pod_container_resource_requests{endpoint="http",resource="memory",namespace="{{ .Release.Namespace }}", pod=~"plural-console-[0-9]+"})
) > 0.6
for: 5m
labels:
severity: warning
namespace: {{ .Release.Namespace }}
annotations:
summary: Console postgres memory has gotten too high
description: the memory utilization of your console deployment's postgres db is higher than recommended
{{ end }}
2 changes: 2 additions & 0 deletions config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ config :console, Console.Repo,

config :libcluster, :topologies, []

config :tzdata, :autoupdate, :disabled

config :kazan, :server, :in_cluster

config :ra,
Expand Down
17 changes: 17 additions & 0 deletions lib/console.ex
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,23 @@ defmodule Console do
end
end

def df(path \\ ".", acc \\ {0, 0})
def df(path, {count, size}) do
cond do
File.regular?(path) ->
stat = File.stat!(path)
{count + 1, size + stat.size}
File.dir?(path) ->
File.ls!(path)
|> Enum.map(&Path.join(path, &1))
|> Enum.reduce({count, size}, fn p, {c, s} ->
{c2, s2} = df(p)
{c + c2, s + s2}
end)
true -> {count, size}
end
end

def dump_folder(path, contents) do
Enum.reduce_while(contents, :ok, fn {p, data}, _ ->
fullpath = Path.join(path, p)
Expand Down
5 changes: 4 additions & 1 deletion lib/console/application.ex
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
defmodule Console.Application do
use Application
alias Console.Prom.Setup

def start(_type, _args) do
topologies = Application.get_env(:libcluster, :topologies)
ConsoleWeb.Plugs.MetricsExporter.setup()
Setup.setup()
Setup.attach()

children = [
%{
Expand Down Expand Up @@ -36,6 +38,7 @@ defmodule Console.Application do
{Absinthe.Subscription, ConsoleWeb.Endpoint},
Console.Cached.Supervisor,
Console.Watchers.Supervisor,
Console.Prom.Scraper,
{OpenIDConnect.Worker, Application.get_env(:console, :oidc_providers)},
] ++ consumers() ++ [
Piazza.GracefulShutdown
Expand Down
7 changes: 7 additions & 0 deletions lib/console/deployments/git/agent.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ defmodule Console.Deployments.Git.Agent do
"""
use GenServer
import Console.Deployments.Git.Cmd
alias Console.Prom.Metrics
alias Console.Deployments.{Git.Cache, Git, Services}
alias Console.Schema.{GitRepository, Service}

Expand Down Expand Up @@ -50,6 +51,7 @@ defmodule Console.Deployments.Git.Agent do
schedule_pull()
:timer.send_interval(@poll, :move)
send self(), :clone
Metrics.inc(:git_agent, repo.url)
{:ok, %State{git: repo, cache: cache}}
end

Expand Down Expand Up @@ -135,6 +137,11 @@ defmodule Console.Deployments.Git.Agent do

def handle_info(_, state), do: {:noreply, state}

def terminate(_, %State{git: git}) do
Metrics.dec(:git_agent, git.url)
end
def terminate(_, _), do: :ok

defp refresh(%GitRepository{} = repo) do
with %GitRepository{} = git <- Console.Repo.get(GitRepository, repo.id),
do: Map.merge(git, Map.take(repo, [:private_key_file, :dir]))
Expand Down
15 changes: 15 additions & 0 deletions lib/console/deployments/git/statistics.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
defmodule Console.Deployments.Git.Statistics do
alias Console.Prom.Metrics

def disk() do
{count, size} =
:ets.tab2list(Briefly.Entry.Dir)
|> Enum.map(fn {_pid, dir} -> dir end)
|> Enum.reduce({0, 0}, fn dir, {count, size} ->
{dc, ds} = Console.df(dir)
{count + dc, size + ds}
end)

Metrics.filecache(count, size)
end
end
13 changes: 13 additions & 0 deletions lib/console/graphql/deployments/service.ex
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,12 @@ defmodule Console.GraphQl.Deployments.Service do
field :service, :service_deployment, resolve: dataloader(Deployments), description: "the service to replicate across clusters"
field :provider, :cluster_provider, resolve: dataloader(Deployments), description: "whether to only apply to clusters with this provider"

connection field :services, node_type: :service_deployment do
arg :q, :string

resolve &Deployments.services_for_owner/3
end

timestamps()
end

Expand Down Expand Up @@ -358,6 +364,7 @@ defmodule Console.GraphQl.Deployments.Service do

connection node_type: :service_deployment
connection node_type: :revision
connection node_type: :global_service

delta :service_deployment

Expand Down Expand Up @@ -425,6 +432,12 @@ defmodule Console.GraphQl.Deployments.Service do
safe_resolve &Deployments.resolve_global/2
end

connection field :global_services, node_type: :global_service do
middleware Authenticated

safe_resolve &Deployments.list_global_services/2
end

field :service_context, :service_context do
middleware Authenticated
arg :name, non_null(:string)
Expand Down
5 changes: 5 additions & 0 deletions lib/console/graphql/resolvers/deployments.ex
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,11 @@ defmodule Console.GraphQl.Resolvers.Deployments do
|> allow(user, :read)
end

def list_global_services(args, _) do
GlobalService.ordered()
|> paginate(args)
end

def settings(_, _), do: {:ok, Settings.fetch_consistent()}

def enable(_, %{context: %{current_user: user}}), do: Settings.enable(user)
Expand Down
9 changes: 9 additions & 0 deletions lib/console/graphql/resolvers/deployments/service.ex
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@ defmodule Console.GraphQl.Resolvers.Deployments.Service do
|> paginate(args)
end

def services_for_owner(%{id: id}, args, %{context: %{current_user: user}}) do
Service.for_user(user)
|> Service.for_owner(id)
|> service_filters(args)
|> maybe_search(Service, args)
|> Service.ordered()
|> paginate(args)
end

def list_revisions(%{id: id}, args, _) do
Revision.for_service(id)
|> Revision.ordered()
Expand Down
3 changes: 3 additions & 0 deletions lib/console/prom/ecto.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
defmodule Console.Prom.Ecto do
use Prometheus.EctoInstrumenter
end
30 changes: 30 additions & 0 deletions lib/console/prom/metrics.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
defmodule Console.Prom.Metrics do
use Prometheus.Metric

defmacrop metric_name(name), do: :"plural_console_#{name}"

def setup() do
Gauge.declare([name: metric_name(:git_agent_count),
labels: [:url],
help: "Count of active git agents in this Console node"])

Gauge.declare([name: metric_name(:local_cache_file_count),
help: "Count of the number of files w/in local caches at the moment"])

Gauge.declare([name: metric_name(:local_cache_filesize),
help: "Count of the number of files w/in local caches at the moment"])
end

def inc(:git_agent, label) do
Gauge.inc([name: metric_name(:git_agent_count), labels: [label]])
end

def dec(:git_agent, label) do
Gauge.dec([name: metric_name(:git_agent_count), labels: [label]])
end

def filecache(count, size) do
Gauge.set([name: metric_name(:local_cache_file_count)], count)
Gauge.set([name: metric_name(:local_cache_filesize)], size)
end
end
19 changes: 19 additions & 0 deletions lib/console/prom/scraper.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
defmodule Console.Prom.Scraper do
use GenServer

@scrape_interval :timer.minutes(10)

def start_link(opts \\ :ok) do
GenServer.start_link(__MODULE__, opts, name: __MODULE__)
end

def init(_) do
:timer.send_interval(@scrape_interval, :scrape)
{:ok, %{}}
end

def handle_info(:scrape, state) do
Console.Deployments.Git.Statistics.disk()
{:noreply, state}
end
end
19 changes: 19 additions & 0 deletions lib/console/prom/setup.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
defmodule Console.Prom.Setup do
alias Console.Prom.{Ecto, Metrics}
alias ConsoleWeb.Plugs.MetricsExporter

def setup() do
Ecto.setup()
Metrics.setup()
MetricsExporter.setup()
end

def attach() do
:ok = :telemetry.attach(
"prometheus-ecto",
[:console, :repo, :query],
&Ecto.handle_event/4,
%{}
)
end
end
1 change: 1 addition & 0 deletions mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ defmodule Console.MixProject do
{:argon2_elixir, "~> 2.0"},
{:prometheus_ex, "~> 3.0"},
{:prometheus_plugs, "~> 1.1.1"},
{:prometheus_ecto, "~> 1.4.3"},
{:guardian, "~> 1.2.1"},
{:httpoison, "~> 1.7", override: true},
{:nebulex, "== 2.0.0"},
Expand Down
1 change: 1 addition & 0 deletions mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@
"porcelain": {:hex, :porcelain, "2.0.3", "2d77b17d1f21fed875b8c5ecba72a01533db2013bd2e5e62c6d286c029150fdc", [:mix], [], "hexpm", "dc996ab8fadbc09912c787c7ab8673065e50ea1a6245177b0c24569013d23620"},
"postgrex": {:hex, :postgrex, "0.17.3", "c92cda8de2033a7585dae8c61b1d420a1a1322421df84da9a82a6764580c503d", [:mix], [{:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:decimal, "~> 1.5 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "946cf46935a4fdca7a81448be76ba3503cff082df42c6ec1ff16a4bdfbfb098d"},
"prometheus": {:hex, :prometheus, "4.6.0", "20510f381db1ccab818b4cf2fac5fa6ab5cc91bc364a154399901c001465f46f", [:mix, :rebar3], [], "hexpm", "4905fd2992f8038eccd7aa0cd22f40637ed618c0bed1f75c05aacec15b7545de"},
"prometheus_ecto": {:hex, :prometheus_ecto, "1.4.3", "3dd4da1812b8e0dbee81ea58bb3b62ed7588f2eae0c9e97e434c46807ff82311", [:mix], [{:ecto, "~> 2.0 or ~> 3.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:prometheus_ex, "~> 1.1 or ~> 2.0 or ~> 3.0", [hex: :prometheus_ex, repo: "hexpm", optional: false]}], "hexpm", "8d66289f77f913b37eda81fd287340c17e61a447549deb28efc254532b2bed82"},
"prometheus_ex": {:hex, :prometheus_ex, "3.0.5", "fa58cfd983487fc5ead331e9a3e0aa622c67232b3ec71710ced122c4c453a02f", [:mix], [{:prometheus, "~> 4.0", [hex: :prometheus, repo: "hexpm", optional: false]}], "hexpm", "9fd13404a48437e044b288b41f76e64acd9735fb8b0e3809f494811dfa66d0fb"},
"prometheus_plugs": {:hex, :prometheus_plugs, "1.1.5", "25933d48f8af3a5941dd7b621c889749894d8a1082a6ff7c67cc99dec26377c5", [:mix], [{:accept, "~> 0.1", [hex: :accept, repo: "hexpm", optional: false]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: false]}, {:prometheus_ex, "~> 1.1 or ~> 2.0 or ~> 3.0", [hex: :prometheus_ex, repo: "hexpm", optional: false]}, {:prometheus_process_collector, "~> 1.1", [hex: :prometheus_process_collector, repo: "hexpm", optional: true]}], "hexpm", "0273a6483ccb936d79ca19b0ab629aef0dba958697c94782bb728b920dfc6a79"},
"protobuf": {:hex, :protobuf, "0.5.4", "2e1b8eec211aff034ad8a14e3674220b0158bfb9a3c7128ac9d2a1ed1b3724d3", [:mix], [], "hexpm", "994348a4592408bc99c132603b0fdb686a2b5df0321a8eb1a582ec2bd3495886"},
Expand Down
2 changes: 1 addition & 1 deletion rel/config/console.exs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ config :console, Console.Guardian,

config :console, ConsoleWeb.Endpoint,
url: [host: get_env("HOST"), port: 80],
check_origin: ["//#{get_env("HOST")}", "//#{get_env("EXT_HOST") || get_env("HOST")}", "//console"]
check_origin: ["//#{get_env("HOST")}", "//#{get_env("EXT_HOST") || get_env("HOST")}", "//#{get_env("WEBHOOK_HOST") || get_env("HOST")}", "//console"]

provider = case get_env("PROVIDER") do
"google" -> :gcp
Expand Down
Loading

0 comments on commit d620515

Please sign in to comment.