Skip to content

Commit

Permalink
Collect syslog lines next to metrics
Browse files Browse the repository at this point in the history
This makes that we can show the last log entries next to a metric to
detect certain issues such as why a CPU clocks down or why a process
stops for example.
  • Loading branch information
Your Name authored and vmcj committed Sep 3, 2024
1 parent 050e49c commit 4b2186a
Show file tree
Hide file tree
Showing 10 changed files with 172 additions and 96 deletions.
1 change: 1 addition & 0 deletions provision-contest/ansible/roles/grafana/files/.gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
/ssl.*
/grafana.deb
/loki-linux-amd64.zip
Original file line number Diff line number Diff line change
Expand Up @@ -2,45 +2,26 @@ auth_enabled: false

server:
http_listen_port: 3100
grpc_listen_port: 9096
grpc_listen_port: 13100

ingester:
lifecycler:
address: 127.0.0.1
ring:
kvstore:
store: inmemory
replication_factor: 1
final_sleep: 0s
chunk_idle_period: 5m
chunk_retain_period: 30s
max_transfer_retries: 0
common:
ring:
instance_addr: 127.0.0.1
kvstore:
store: inmemory
replication_factor: 1
path_prefix: /tmp/loki

schema_config:
configs:
- from: 2018-04-15
store: boltdb
- from: 2020-05-15
store: tsdb
object_store: filesystem
schema: v11
schema: v13
index:
prefix: index_
period: 168h
period: 24h

storage_config:
boltdb:
directory: /data/loki/index

filesystem:
directory: /data/loki/chunks

limits_config:
enforce_metric_name: false
reject_old_samples: true
reject_old_samples_max_age: 168h

chunk_store_config:
max_look_back_period: 0s

table_manager:
retention_deletes_enabled: false
retention_period: 0s
directory: /tmp/loki/chunks
94 changes: 51 additions & 43 deletions provision-contest/ansible/roles/grafana/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,48 +19,56 @@
notify: Restart prometheus

# Setup loki which gathers our logs
- name: Install loki
unarchive:
src: https://github.com/grafana/loki/releases/download/v2.5.0/loki-linux-amd64.zip
dest: /usr/bin/
remote_src: true
owner: domjudge
group: domjudge
when: loki

- name: Dir for loki settings
file:
state: directory
path: /etc/grafana/loki/
owner: root
group: root
mode: 0755
when: loki

- name: Set loki settings
copy:
src: loki-local-config.yaml
dest: /etc/grafana/loki/
owner: root
group: root
mode: 0644
when: loki
notify: Restart loki

- name: Setup loki systemd
copy:
src: loki.service
dest: /etc/systemd/system/
mode: 0655
when: loki
notify: Restart loki

- name: Start loki service
service:
name: loki
state: started
enabled: true
- name: Setup loki
when: loki
block:
- name: Install loki
unarchive:
src: loki-linux-amd64.zip
dest: /usr/bin/
remote_src: false
owner: domjudge
group: domjudge
when: ICPC_IMAGE

- name: Install loki
unarchive:
src: https://github.com/grafana/loki/releases/download/v2.5.0/loki-linux-amd64.zip
dest: /usr/bin/
remote_src: true
owner: domjudge
group: domjudge
when: not ICPC_IMAGE

- name: Dir for loki settings
file:
state: directory
path: /etc/grafana/loki/
owner: root
group: root
mode: 0755

- name: Set loki settings
copy:
src: loki-local-config.yaml
dest: /etc/grafana/loki/
owner: root
group: root
mode: 0644
notify: Restart loki

- name: Setup loki systemd
copy:
src: loki.service
dest: /etc/systemd/system/
mode: 0655
notify: Restart loki

- name: Start loki service
service:
name: loki
state: started
enabled: true

## Setup grafana
- name: Install grafana
Expand Down Expand Up @@ -91,8 +99,8 @@
notify: Restart grafana

- name: Set up grafana datasources
synchronize:
src: files/grafana/datasources.yml
template:
src: datasources.yml.j2
dest: /etc/grafana/provisioning/datasources/default.yml
notify: Restart grafana

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,14 @@ datasources:
version: 1
# <bool> allow users to edit datasources from the UI.
editable: true
{% if loki is defined and loki %}
- name: Loki
type: loki
access: proxy
url: http://localhost:3100
jsonData:
timeout: 60
maxLines: 1000
httpHeaderName1: Connection
httpHeaderName2: Upgrade
{% endif %}
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
prom: true
GROUP_PREFIXES:
- 'online-'
- ''
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
*.key
*.crt
promtail-linux-amd64.zip
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ After=network.target

[Service]
Type=simple
ExecStart=/usr/bin/promtail-linux-amd64 --config.file /etc/promtail/promtail-local-config.yaml
ExecStart=/usr/bin/promtail-linux-amd64 --config.file /etc/promtail/promtail-local-config.yml

[Install]
WantedBy=multi-user.target
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,57 @@
regexp: '^ARGS=""'
line: 'ARGS="--web.config /etc/prometheus/prometheus-authentication.yml"'
notify: Restart node-exporter

# Setup promtail which sends our logs
- name: Setup promtail to ship logs to loki (and grafana)
when: prom
block:
- name: Install promtail
unarchive:
src: promtail-linux-amd64.zip
dest: /usr/bin/
remote_src: false
owner: domjudge
group: domjudge
when: ICPC_IMAGE

- name: Install promtail
unarchive:
src: https://github.com/grafana/loki/releases/download/v3.0.1/promtail-linux-amd64.zip
dest: /usr/bin/
remote_src: true
owner: domjudge
group: domjudge
when: not ICPC_IMAGE

- name: Dir for promtail settings
file:
state: directory
path: /etc/promtail
owner: root
group: root
mode: 0755

- name: Set promtail settings
template:
src: promtail-local-config.yml.j2
dest: /etc/promtail/promtail-local-config.yml
owner: root
group: root
mode: 0644
notify: Restart promtail

- name: Setup promtail systemd
copy:
src: promtail.service
dest: /etc/systemd/system/
mode: 0655
owner: root
group: root
notify: Restart promtail

- name: Start promtail service
service:
name: promtail
state: started
enabled: true

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
server:
http_listen_port: 9080
grpc_listen_port: 19080

positions:
filename: /var/tmp/promtail-syslog-positions.yml

clients:
{% for host in groups["grafana"] %}
- url: http://{{ hostvars[host].ansible_host }}:3100/loki/api/v1/push
{% endfor %}

scrape_configs:
- job_name: system
static_configs:
- labels:
__path__: /var/log/**/*log
{% for group_prefix in GROUP_PREFIXES %}
{% if ansible_fqdn in groups[group_prefix+'judgehost'] %}
- job_name: judgehostlogs
static_configs:
- labels:
__path__: '/opt/domjudge/output/log/*'
{% endif %}
{% if ansible_fqdn in groups[group_prefix+'domserver'] %}
- job_name: webapplogs
static_configs:
- labels:
__path__: '/opt/domjudge/webapp/var/log/*'
{% endif %}
{% endfor %}
# See: https://alexandre.deverteuil.net/post/syslog-relay-for-loki/
- job_name: syslog
syslog:
listen_address: 0.0.0.0:1514
labels:
job: syslog

0 comments on commit 4b2186a

Please sign in to comment.