From 8758f5a0c7367b4ad4418502a73e6c0ed8311151 Mon Sep 17 00:00:00 2001 From: Alexandre Iooss <erdnaxe@crans.org> Date: Sat, 8 Jun 2019 19:08:52 +0200 Subject: [PATCH] Alert ninjabot --- .../templates/prometheus/alertmanager.yml.j2 | 10 +++++++--- .../prometheus/templates/prometheus/alert.rules.yml.j2 | 4 +++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2 b/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2 index 2442108b..666c5ffc 100644 --- a/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2 +++ b/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2 @@ -28,14 +28,15 @@ route: # When the first notification was sent, wait 'group_interval' to send a batch # of new alerts that started firing for that group. - group_interval: 5m + group_interval: 1m # If an alert has successfully been sent, wait 'repeat_interval' to # resend them. repeat_interval: 3h # A default receiver - receiver: team-roots-mails + #receiver: team-roots-mails + receiver: webhook-ninjabot # Inhibition rules allow to mute a set of alerts given that another alert is @@ -55,4 +56,7 @@ receivers: - name: 'team-roots-mails' email_configs: - to: 'roots@crans.org' - +- name: 'webhook-ninjabot' + webhook_configs: + - url: 'http://zamok.adm.crans.org:5000/' + send_resolved: true diff --git a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 b/roles/prometheus/templates/prometheus/alert.rules.yml.j2 index cf24fd7a..18923c4f 100644 --- a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 +++ b/roles/prometheus/templates/prometheus/alert.rules.yml.j2 @@ -59,6 +59,8 @@ groups: - alert: APIHighRequestLatency expr: api_http_request_latencies_second{quantile="0.5"} > 1 for: 10m + labels: + severity: warning annotations: summary: "Latence élevée sur {{ $labels.instance }}" description: "{{ $labels.instance }} a un temps de réponse médian au dessus d'une seconde (actuellement : {{ $value }}s)." @@ -77,7 +79,7 @@ groups: expr: node_ntp_drift_seconds > 0.01 for: 1m labels: - severity: moderate + severity: warning annotations: description: "Décalage NTP élevé sur {{ $labels.instance }}" summary: "Le décalage NTP est élevé ({{ $value }} > 0.01)" -- GitLab