Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
fajpunk committed Sep 29, 2024
1 parent 69f063b commit 0f5e9aa
Show file tree
Hide file tree
Showing 18 changed files with 418 additions and 56 deletions.
27 changes: 27 additions & 0 deletions applications/mobu/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,20 @@ spec:
containers:
- name: {{ .Chart.Name }}
env:
- name: "KAFKA_SECURITY_PROTOCOL"
value: "SSL"
# From KafkaAccess
- name: "KAFKA_BOOTSTRAP_SERVERS"
valueFrom:
secretKeyRef:
name: mobu-kafka
key: "bootstrapServers"
- name: "KAFKA_CLUSTER_CA_PATH"
value: "/etc/kafkacluster/ca.crt"
- name: "KAFKA_CLIENT_CERT_PATH"
value: "/etc/kafkauser/user.crt"
- name: "KAFKA_CLIENT_KEY_PATH"
value: "/etc/kafkauser/user.key"
{{- if .Values.config.slackAlerts }}
- name: "MOBU_ALERT_HOOK"
valueFrom:
Expand Down Expand Up @@ -103,6 +117,15 @@ spec:
- ALL
readOnlyRootFilesystem: true
volumeMounts:
- name: "kafka"
mountPath: "/etc/kafkacluster/ca.crt"
subPath: "ssl.truststore.crt" # CA cert from the Kafka cluster
- name: "kafka"
mountPath: "/etc/kafkauser/user.crt"
subPath: "ssl.keystore.crt" # User cert from the Kafka cluster signed by the clients' CA
- name: "kafka"
mountPath: "/etc/kafkauser/user.key"
subPath: "ssl.keystore.key" # private key for the consuming client
- name: "config"
mountPath: "/etc/mobu"
readOnly: true
Expand All @@ -113,6 +136,10 @@ spec:
runAsUser: 1000
runAsGroup: 1000
volumes:
# This secret comes from the KafkaAccess operator
- name: "kafka"
secret:
secretName: mobu-kafka
- name: "config"
projected:
sources:
Expand Down
14 changes: 14 additions & 0 deletions applications/mobu/templates/kafkaaccess.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: access.strimzi.io/v1alpha1
kind: KafkaAccess
metadata:
name: mobu-kafka
spec:
kafka:
name: sasquatch
namespace: sasquatch
listener: tls
user:
kind: KafkaUser
apiGroup: kafka.strimzi.io
name: mobu
namespace: sasquatch
3 changes: 3 additions & 0 deletions applications/sasquatch/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ dependencies:
- name: square-events
condition: squareEvents.enabled
version: 1.0.0
- name: app-metrics
condition: app-metrics.enabled
version: 1.0.0

annotations:
phalanx.lsst.io/docs: |
Expand Down
24 changes: 23 additions & 1 deletion applications/sasquatch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@ Rubin Observatory's telemetry service

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| global.app-metrics-config.mobu.influxTags[0] | string | `"foo"` | |
| global.app-metrics-config.mobu.influxTags[1] | string | `"bar"` | |
| global.baseUrl | string | Set by Argo CD | Base URL for the environment |
| global.host | string | Set by Argo CD | Host name for ingress |
| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets |
| app-metrics.apps | list | `[]` | The apps to create configuration for. |
| app-metrics.enabled | bool | `false` | Enable the app-metrics subchart with topic, user, and telegraf configurations |
| chronograf.enabled | bool | `true` | Whether Chronograf is enabled |
| chronograf.env | object | See `values.yaml` | Additional environment variables for Chronograf |
| chronograf.envFromSecret | string | `"sasquatch"` | Name of secret to use. The keys `generic_client_id`, `generic_client_secret`, and `token_secret` should be set. |
Expand Down Expand Up @@ -81,6 +85,25 @@ Rubin Observatory's telemetry service
| strimzi-registry-operator.clusterNamespace | string | `"sasquatch"` | Namespace where the Strimzi Kafka cluster is deployed |
| strimzi-registry-operator.operatorNamespace | string | `"sasquatch"` | Namespace where the strimzi-registry-operator is deployed |
| telegraf-kafka-consumer | object | `{}` | Overrides for telegraf-kafka-consumer configuration |
| app-metrics.affinity | object | `{}` | Affinity for pod assignment |
| app-metrics.apps | list | `[]` | A list of applications that will publish metrics events, and the keys that should be ingested into InfluxDB as tags. The names should be the same as the app names in Phalanx. |
| app-metrics.args | list | `[]` | Arguments passed to the Telegraf agent containers |
| app-metrics.cluster.name | string | `"sasquatch"` | |
| app-metrics.debug | bool | false | Run Telegraf in debug mode. |
| app-metrics.env | list | See `values.yaml` | Telegraf agent enviroment variables |
| app-metrics.envFromSecret | string | `""` | Name of the secret with values to be added to the environment. |
| app-metrics.globalInfluxTags | list | `["service"]` | Keys in an every event sent by any app that should be recorded in InfluxDB as "tags" (vs. "fields"). |
| app-metrics.image.pullPolicy | string | `"Always"` | Image pull policy |
| app-metrics.image.repo | string | `"docker.io/library/telegraf"` | Telegraf image repository |
| app-metrics.image.tag | string | `"1.30.2-alpine"` | Telegraf image tag |
| app-metrics.imagePullSecrets | list | `[]` | Secret names to use for Docker pulls |
| app-metrics.influxdb.url | string | `"http://sasquatch-influxdb.sasquatch:8086"` | URL of the InfluxDB v1 instance to write to |
| app-metrics.nodeSelector | object | `{}` | Node labels for pod assignment |
| app-metrics.podAnnotations | object | `{}` | Annotations for telegraf-kafka-consumers pods |
| app-metrics.podLabels | object | `{}` | Labels for telegraf-kafka-consumer pods |
| app-metrics.replicaCount | int | `1` | Number of Telegraf replicas. Increase this value to increase the consumer throughput. |
| app-metrics.resources | object | See `values.yaml` | Kubernetes resources requests and limits |
| app-metrics.tolerations | list | `[]` | Tolerations for pod assignment |
| influxdb-enterprise.bootstrap.auth.secretName | string | `"sasquatch"` | Enable authentication of the data nodes using this secret, by creating a username and password for an admin account. The secret must contain keys `username` and `password`. |
| influxdb-enterprise.bootstrap.ddldml.configMap | string | Do not run DDL or DML | A config map containing DDL and DML that define databases, retention policies, and inject some data. The keys `ddl` and `dml` must exist, even if one of them is empty. DDL is executed before DML to ensure databases and retention policies exist. |
| influxdb-enterprise.bootstrap.ddldml.resources | object | `{}` | Kubernetes resources and limits for the bootstrap job |
Expand Down Expand Up @@ -389,7 +412,6 @@ Rubin Observatory's telemetry service
| strimzi-kafka.registry.resources | object | See `values.yaml` | Kubernetes requests and limits for the Schema Registry |
| strimzi-kafka.registry.schemaTopic | string | `"registry-schemas"` | Name of the topic used by the Schema Registry |
| strimzi-kafka.superusers | list | `["kafka-admin"]` | A list of usernames for users who should have global admin permissions. These users will be created, along with their credentials. |
| strimzi-kafka.users.appmetrics.enabled | bool | `false` | Enable user appmetrics |
| strimzi-kafka.users.camera.enabled | bool | `false` | Enable user camera, used at the camera environments |
| strimzi-kafka.users.consdb.enabled | bool | `false` | Enable user consdb |
| strimzi-kafka.users.kafdrop.enabled | bool | `false` | Enable user Kafdrop (deployed by parent Sasquatch chart). |
Expand Down
6 changes: 6 additions & 0 deletions applications/sasquatch/charts/app-metrics/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apiVersion: v2
name: app-metrics
version: 1.0.0
appVersion: "1.0.0"
description: Kafka topics, users, and a telegraf connector for metrics events.
type: application
27 changes: 27 additions & 0 deletions applications/sasquatch/charts/app-metrics/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# app-metrics

Kafka topics, users, and a telegraf connector for metrics events.

## Values

| Key | Type | Default | Description |
|-----|------|---------|-------------|
| affinity | object | `{}` | Affinity for pod assignment |
| apps | list | `[]` | A list of applications that will publish metrics events, and the keys that should be ingested into InfluxDB as tags. The names should be the same as the app names in Phalanx. |
| args | list | `[]` | Arguments passed to the Telegraf agent containers |
| cluster.name | string | `"sasquatch"` | |
| debug | bool | false | Run Telegraf in debug mode. |
| env | list | See `values.yaml` | Telegraf agent enviroment variables |
| envFromSecret | string | `""` | Name of the secret with values to be added to the environment. |
| globalInfluxTags | list | `["service"]` | Keys in an every event sent by any app that should be recorded in InfluxDB as "tags" (vs. "fields"). |
| image.pullPolicy | string | `"Always"` | Image pull policy |
| image.repo | string | `"docker.io/library/telegraf"` | Telegraf image repository |
| image.tag | string | `"1.30.2-alpine"` | Telegraf image tag |
| imagePullSecrets | list | `[]` | Secret names to use for Docker pulls |
| influxdb.url | string | `"http://sasquatch-influxdb.sasquatch:8086"` | URL of the InfluxDB v1 instance to write to |
| nodeSelector | object | `{}` | Node labels for pod assignment |
| podAnnotations | object | `{}` | Annotations for telegraf-kafka-consumers pods |
| podLabels | object | `{}` | Labels for telegraf-kafka-consumer pods |
| replicaCount | int | `1` | Number of Telegraf replicas. Increase this value to increase the consumer throughput. |
| resources | object | See `values.yaml` | Kubernetes resources requests and limits |
| tolerations | list | `[]` | Tolerations for pod assignment |
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{{- range .Values.apps }}
---
apiVersion: kafka.strimzi.io/v1beta2
kind: KafkaTopic
metadata:
name: "lsst.square.app-metrics.events.{{ . }}"
labels:
strimzi.io/cluster: {{ $.Values.cluster.name }}
spec:
partitions: 10
replicas: 3
config:
# http://kafka.apache.org/documentation/#topicconfigs
retention.ms: 86400000 # 1 day
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{{- range .Values.apps }}
---
apiVersion: kafka.strimzi.io/v1beta2
kind: KafkaUser
metadata:
name: square-app-metrics-{{ . }}
labels:
strimzi.io/cluster: {{ $.Values.cluster.name }}
spec:
authentication:
type: tls
authorization:
type: simple
acls:
- resource:
type: group
name: app-metrics-events
patternType: prefix
operations:
- "Read"
host: "*"
- resource:
type: topic
name: "lsst.square.app-metrics.events.{{ . }}"
patternType: literal
operations:
- "Describe"
- "Read"
- "Write"
host: "*"
{{- end }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
---
apiVersion: v1
kind: ConfigMap
metadata:
name: sasquatch-telegraf-app-metrics
labels:
app.kubernetes.io/name: sasquatch-telegraf
app.kubernetes.io/instance: sasquatch-telegraf-app-metrics
app.kubernetes.io/part-of: sasquatch
data:
telegraf.conf: |+
[agent]
metric_batch_size = 5000
metric_buffer_limit = 100000
collection_jitter = "0s"
flush_interval = "10s"
flush_jitter = "0s"
debug = {{ default false .Values.debug }}
omit_hostname = true
[[outputs.influxdb]]
urls = [
{{ .Values.influxdb.url | quote }}
]
database = "telegraf-kafka-app-metrics-consumer"
username = "${INFLUXDB_USER}"
password = "${INFLUXDB_PASSWORD}"
[[outputs.influxdb]]
namepass = ["telegraf_*"]
urls = [
{{ .Values.influxdb.url | quote }}
]
database = "telegraf"
username = "${INFLUXDB_USER}"
password = "${INFLUXDB_PASSWORD}"
{{- range $index, $app := .Values.apps }}
[[inputs.kafka_consumer]]
brokers = [
"sasquatch-kafka-brokers.sasquatch:9092"
]
consumer_group = "telegraf-kafka-consumer-app-metrics"
sasl_mechanism = "SCRAM-SHA-512"
sasl_password = "$TELEGRAF_PASSWORD"
sasl_username = "telegraf"
data_format = "avro"
avro_schema_registry = "http://sasquatch-schema-registry.sasquatch:8081"
avro_timestamp = "timestamp_ns"
avro_timestamp_format = "unix_ns"
avro_union_mode = "nullable"
avro_tags = {{ include "helpers.toTomlArray" (index $.Values.global "app-metrics-config" $app "influxTags") }}
topics = [
"lsst.square.app-metrics.events.{{ $app }}",
]
max_processing_time = "5s"
consumer_fetch_default = "5MB"
max_undelivered_messages = 10000
compression_codec = 3
{{- end }}
[[inputs.internal]]
name_prefix = "telegraf_"
collect_memstats = true
tags = { instance = "app-metrics" }
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: sasquatch-telegraf-app-metrics
labels:
app.kubernetes.io/name: sasquatch-telegraf
app.kubernetes.io/instance: sasquatch-telegraf-app-metrics
app.kubernetes.io/part-of: sasquatch
spec:
replicas: {{ default 1 .Values.replicaCount }}
selector:
matchLabels:
app.kubernetes.io/instance: sasquatch-telegraf-app-metrics
template:
metadata:
labels:
app.kubernetes.io/instance: sasquatch-telegraf-app-metrics
annotations:
checksum/config: {{ include (print $.Template.BasePath "/telegraf-configmap.yaml") $ | sha256sum }}
{{- if .Values.podAnnotations }}
{{- toYaml .Values.podAnnotations | nindent 8 }}
{{- end }}
spec:
securityContext:
runAsNonRoot: true
runAsUser: 1000
runAsGroup: 1000
containers:
- name: telegraf
securityContext:
capabilities:
drop:
- all
readOnlyRootFilesystem: true
allowPrivilegeEscalation: false
image: "{{ .Values.image.repo }}:{{ .Values.image.tag }}"
imagePullPolicy: {{ default "IfNotPresent" .Values.image.pullPolicy | quote }}
{{- if .Values.resources }}
resources:
{{- toYaml .Values.resources | nindent 10 }}
{{- end }}
{{- if .Values.args }}
args:
{{- toYaml .Values.args | nindent 8 }}
{{- end }}
{{- if .Values.env }}
env:
{{- toYaml .Values.env | nindent 8 }}
{{- end }}
{{- if .Values.envFromSecret }}
envFrom:
- secretRef:
name: {{ .Values.envFromSecret }}
{{- end }}
volumeMounts:
- name: config
mountPath: /etc/telegraf
{{- if .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml .Values.imagePullSecrets | nindent 8 }}
{{- end }}
{{- if .Values.nodeSelector }}
nodeSelector:
{{- toYaml .Values.nodeSelector | nindent 8 }}
{{- end }}
{{- if .Values.affinity }}
affinity:
{{- toYaml .Values.affinity | nindent 8 }}
{{- end }}
{{- if .Values.tolerations }}
tolerations:
{{- toYaml .Values.tolerations | nindent 8 }}
{{- end }}
volumes:
- name: config
configMap:
name: sasquatch-telegraf-app-metrics
Loading

0 comments on commit 0f5e9aa

Please sign in to comment.