diff --git a/sensu_go/README.md b/sensu_go/README.md new file mode 100644 index 00000000..d68785a4 --- /dev/null +++ b/sensu_go/README.md @@ -0,0 +1,60 @@ +## Sensu Go Monitoring Template + +This InfluxDB Template can be used to monitor the performance of your Sensu Go observability tool with Telegraf and Prometheus. + +![Sensu Go Dashboard Screenshot](sensu_go_dashboard.png) + +### Quick Install + +#### InfluxDB UI + +In the InfluxDB UI, go to Settings->Templates and enter this URL: https://raw.githubusercontent.com/influxdata/community-templates/master/sensu_go/sensu_go.yml + +#### Influx CLI + +If you have your InfluxDB credentials [configured in the CLI](https://v2.docs.influxdata.com/v2.0/reference/cli/influx/config/), you can install this template with: + +``` +influx apply -u https://raw.githubusercontent.com/influxdata/community-templates/master/sensu_go/sensu_go.yml +``` + +### Included Resources + +- 1 Bucket: `telegraf`, 7d retention +- Labels: `Sensu Go Template` + Telegraf Plugin Labels +- 1 Telegraf Configuration +- 1 Dashboard: `Sensu Go` +- 1 Variable: `bucket` + +## Setup Instructions + + General instructions on using InfluxDB Templates can be found in the [use a template](../docs/use_a_template.md) document. + + The data for the dashboard is populated by the included Telegraf configuration. The Telegraf Configuration requires the following environment variables + + - `INFLUX_TOKEN` - The token with the permissions to read Telegraf configs and write data to the `telegraf` bucket. You can just use your operator token to get started. + - `INFLUX_ORG` - The name of your Organization (this will be your email address on the InfluxDB Cloud free tier) + - `INFLUX_HOST` - The URL of your InfluxDB host (this can your localhost, a remote instance, or InfluxDB Cloud) + + You **MUST** set these environment variables before running Telegraf using something similar to the following commands + + - This can be found on the `Load Data` > `Tokens` page in your browser: `export INFLUX_TOKEN=TOKEN` + - Your Organization name can be found on the Settings page in your browser: `export INFLUX_ORG=my_org` + +## Running Telegraf + + To get resource data from your Linux hosts, [download and install Telegraf](https://portal.influxdata.com/downloads/) on those hosts. InfluxData provides native packages for a number of distributions as well as binaries that can be executed directly. + + Start Telegraf using the instructions from the `Load Data` > `Telegraf` > `Setup Instructions` link in the UI. + +## Customizations + +You can customize it based on your Sensu installation. More information can be found in the Sensu Go [backend configuration](https://docs.sensu.io/sensu-go/latest/observability-pipeline/observe-schedule/backend/#configuration-summary) and [/metrics endpoint](https://docs.sensu.io/sensu-go/latest/api/metrics) documentation. + +## Contact + +- Author: Nikki Attea +- Email: contact@nikki.dev +- Github: [@nikkictl](https://github.com/nikkictl) +- Influx Slack: [@nikki](https://influxdata.com/slack) +- Website: https://nikki.dev \ No newline at end of file diff --git a/sensu_go/sensu_go.yml b/sensu_go/sensu_go.yml new file mode 100644 index 00000000..262b92c5 --- /dev/null +++ b/sensu_go/sensu_go.yml @@ -0,0 +1,367 @@ +--- +apiVersion: influxdata.com/v2alpha1 +kind: Label +metadata: + name: sensu-go-template +spec: + name: Sensu Go Template + color: '#7A65F2' +--- +apiVersion: influxdata.com/v2alpha1 +kind: Label +metadata: + name: outputs-influxdb-v2 +spec: + name: outputs.influxdb_v2 + color: '#108174' +--- +apiVersion: influxdata.com/v2alpha1 +kind: Label +metadata: + name: inputs-prometheus +spec: + name: inputs.prometheus + color: '#326BBA' +--- +apiVersion: influxdata.com/v2alpha1 +kind: Bucket +metadata: + name: telegraf +spec: + name: telegraf + retentionRules: + - everySeconds: 604800 + type: expire +--- +apiVersion: influxdata.com/v2alpha1 +kind: Variable +metadata: + name: bucket +spec: + associations: + - kind: Label + name: sensu-go-template + language: flux + name: bucket + query: |- + buckets() + |> filter(fn: (r) => r.name !~ /^_/) + |> rename(columns: {name: "_value"}) + |> keep(columns: ["_value"]) + type: query +--- +apiVersion: influxdata.com/v2alpha1 +kind: Telegraf +metadata: + name: sensu-go-monitoring +spec: + name: Sensu Go Monitoring + associations: + - kind: Label + name: outputs-influxdb-v2 + - kind: Label + name: inputs-prometheus + - kind: Label + name: sensu-go-template + config: | + # Configuration for telegraf agent + [agent] + interval = "10s" + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + collection_jitter = "0s" + flush_interval = "10s" + flush_jitter = "0s" + precision = "" + hostname = "" + omit_hostname = false + # Configuration for sending metrics to InfluxDB + [[outputs.influxdb_v2]] + urls = ["$INFLUX_HOST"] + token = "$INFLUX_TOKEN" + organization = "$INFLUX_ORG" + bucket = "telegraf" + # Read metrics from one or many prometheus clients + [[inputs.prometheus]] + ## An array of urls to scrape metrics from. + urls = ["http://localhost:8080/metrics"] +--- +apiVersion: influxdata.com/v2alpha1 +kind: Dashboard +metadata: + name: sensu-go +spec: + charts: + - axes: + - base: "10" + name: x + scale: linear + - base: "10" + label: agent sessions + name: y + scale: linear + colors: + - hex: '#FDC44F' + name: Cthulhu + type: scale + - hex: '#007C76' + name: Cthulhu + type: scale + - hex: '#8983FF' + name: Cthulhu + type: scale + geom: monotoneX + height: 3 + hoverDimension: auto + kind: Xy + name: Agent sessions per namespace + position: overlaid + queries: + - query: |- + from(bucket: v.bucket) + |> range(start: v.timeRangeStart, stop: v.timeRangeStop) + |> filter(fn: (r) => r["_measurement"] == "sensu_go_agent_sessions") + |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: true) + |> yield(name: "last") + shade: true + width: 5 + - axes: + - base: "10" + name: x + scale: linear + - base: "10" + name: y + scale: linear + colors: + - hex: '#DA6FF1' + name: Ectoplasm + type: scale + - hex: '#00717A' + name: Ectoplasm + type: scale + - hex: '#ACFF76' + name: Ectoplasm + type: scale + geom: line + height: 3 + hoverDimension: auto + kind: Xy + name: Message publish latency distributions + position: overlaid + queries: + - query: |- + from(bucket: v.bucket) + |> range(start: v.timeRangeStart, stop: v.timeRangeStop) + |> filter(fn: (r) => r["_measurement"] == "sensu_go_bus_message_duration") + |> filter(fn: (r) => r["_field"] == "0.5" or r["_field"] == "0.9" or r["_field"] == "0.99") + |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false) + |> yield(name: "last") + shade: true + width: 6 + xCol: _time + yCol: _value + yPos: 3 + - axes: + - base: "10" + name: x + scale: linear + - base: "10" + name: y + scale: linear + colors: + - hex: '#FD7A5D' + name: Delorean + type: scale + - hex: '#5F1CF2' + name: Delorean + type: scale + - hex: '#4CE09A' + name: Delorean + type: scale + geom: line + height: 3 + hoverDimension: auto + kind: Xy + name: Event handler latency distribution + position: overlaid + queries: + - query: |- + from(bucket: v.bucket) + |> range(start: v.timeRangeStart, stop: v.timeRangeStop) + |> filter(fn: (r) => r["_measurement"] == "sensu_go_event_handler_duration") + |> filter(fn: (r) => r["_field"] == "0.5" or r["_field"] == "0.9" or r["_field"] == "0.99") + |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: false) + |> yield(name: "last") + shade: true + width: 6 + xCol: _time + yCol: _value + yPos: 6 + - colors: + - hex: '#7CE490' + name: honeydew + type: text + decimalPlaces: 0 + height: 1 + kind: Single_Stat + name: Total etcd servers + queries: + - query: |- + from(bucket: "telegraf") + |> range(start: v.timeRangeStart, stop: v.timeRangeStop) + |> filter(fn: (r) => r["_measurement"] == "etcd_server_id") + |> aggregateWindow(every: v.windowPeriod, fn: sum, createEmpty: false) + |> yield(name: "sum") + width: 2 + xPos: 5 + - colors: + - hex: '#00C9FF' + name: laser + type: text + decimalPlaces: 0 + height: 1 + kind: Single_Stat + name: Total agent sessions + queries: + - query: |- + from(bucket: v.bucket) + |> range(start: v.timeRangeStart, stop: v.timeRangeStop) + |> filter(fn: (r) => r["_measurement"] == "sensu_go_agent_sessions") + |> group() + |> aggregateWindow(every: v.windowPeriod, fn: sum, createEmpty: true) + |> yield(name: "sum") + width: 2 + xPos: 5 + yPos: 1 + - colors: + - hex: '#F48D38' + name: tiger + type: text + decimalPlaces: 0 + height: 1 + kind: Single_Stat + name: Total check schedulers + queries: + - query: |- + from(bucket: v.bucket) + |> range(start: v.timeRangeStart, stop: v.timeRangeStop) + |> filter(fn: (r) => r["_measurement"] == "sensu_go_cron_schedulers" or r["_measurement"] == "sensu_go_interval_schedulers" or r["_measurement"] == "sensu_go_round_robin_cron_schedulers" or r["_measurement"] == "sensu_go_round_robin_interval_schedulers") + |> group() + |> aggregateWindow(every: v.windowPeriod, fn: sum, createEmpty: true) + |> yield(name: "sum") + width: 2 + xPos: 5 + yPos: 2 + - axes: + - base: "10" + name: x + scale: linear + - base: "10" + label: messages published + name: y + scale: linear + colors: + - hex: '#74D495' + name: Atlantis + type: scale + - hex: '#3F3FBA' + name: Atlantis + type: scale + - hex: '#FF4D9E' + name: Atlantis + type: scale + geom: line + height: 3 + hoverDimension: auto + kind: Xy + name: Messages published per second + position: overlaid + queries: + - query: |- + from(bucket: v.bucket) + |> range(start: v.timeRangeStart, stop: v.timeRangeStop) + |> filter(fn: (r) => r["_measurement"] == "sensu_go_bus_messages_published") + |> derivative(unit: 1s, nonNegative: true) + |> yield(name: "nonnegative derivative") + shade: true + width: 6 + xCol: _time + xPos: 6 + yCol: _value + yPos: 3 + - axes: + - base: "10" + name: x + scale: linear + - base: "10" + label: processed events + name: y + scale: linear + colors: + - hex: '#FD7A5D' + name: Delorean + type: scale + - hex: '#5F1CF2' + name: Delorean + type: scale + - hex: '#4CE09A' + name: Delorean + type: scale + geom: line + height: 3 + hoverDimension: auto + kind: Xy + name: Processed events per second + position: overlaid + queries: + - query: |- + from(bucket: v.bucket) + |> range(start: v.timeRangeStart, stop: v.timeRangeStop) + |> filter(fn: (r) => r["_measurement"] == "sensu_go_events_processed") + |> derivative(unit: 1s, nonNegative: true) + |> yield(name: "nonnegative derivative") + shade: true + width: 6 + xCol: _time + xPos: 6 + yCol: _value + yPos: 6 + - axes: + - base: "10" + name: x + scale: linear + - base: "10" + label: check schedulers + name: y + scale: linear + colors: + - hex: '#FD7A5D' + name: Delorean + type: scale + - hex: '#5F1CF2' + name: Delorean + type: scale + - hex: '#4CE09A' + name: Delorean + type: scale + geom: monotoneX + height: 3 + hoverDimension: auto + kind: Xy + name: Check schedulers per type + position: overlaid + queries: + - query: |- + from(bucket: v.bucket) + |> range(start: v.timeRangeStart, stop: v.timeRangeStop) + |> filter(fn: (r) => r["_measurement"] == "sensu_go_round_robin_interval_schedulers" or r["_measurement"] == "sensu_go_round_robin_cron_schedulers" or r["_measurement"] == "sensu_go_interval_schedulers" or r["_measurement"] == "sensu_go_cron_schedulers") + |> aggregateWindow(every: v.windowPeriod, fn: last, createEmpty: true) + |> yield(name: "last") + shade: true + width: 5 + xCol: _time + xPos: 7 + yCol: _value + description: Monitoring your Sensu Go observability tool with Telegraf and Prometheus + name: Sensu Go diff --git a/sensu_go/sensu_go_dashboard.png b/sensu_go/sensu_go_dashboard.png new file mode 100644 index 00000000..8fd280c1 Binary files /dev/null and b/sensu_go/sensu_go_dashboard.png differ