Skip to content

Commit

Permalink
Untested, unverified, incomplete
Browse files Browse the repository at this point in the history
  • Loading branch information
johnml1135 committed Oct 18, 2023
1 parent b0643ec commit ae23940
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 0 deletions.
25 changes: 25 additions & 0 deletions deploy/serval/templates/alert-manager-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
apiVersion: monitoring.coreos.com/v1alpha1
kind: AlertmanagerConfig
metadata:
name: limit-alerts
namespace: {{ .Values.namespace }}
spec:
receivers:
- emailConfigs:
- authPassword:
key: ?????
name: ?????
authUsername: ??????
from: [email protected]
requireTLS: true
sendResolved: true
smarthost: ?????
tlsConfig: {}
to: '[email protected], [email protected]'
name: alert-nlp
route:
groupBy: [...]
groupInterval: 5m
groupWait: 10s
receiver: alert-nlp
repeatInterval: 4h
21 changes: 21 additions & 0 deletions deploy/serval/templates/prometheus-rules.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
namespace: {{ .Values.namespace }}
spec:
groups:
- name: cpu
rules:
- alert: cpu-80perc-{{ .Values.namspace }}
annotations:
description: >-
'{{ $labels.container }} has high CPU.'
expr: >-
max(rate (container_cpu_usage_seconds_total {image!="", namespace=~"serval|nlp", container!="POD" } [3m]))
by (container, namespace)
/ on (container, namespace)
min(kube_pod_container_resource_limits{resource="cpu", namespace=~"serval|nlp", container!="POD"})
by (container, namespace) * 100 >= 80
for: 0s
labels:
severity: warning

0 comments on commit ae23940

Please sign in to comment.