From 8758f5a0c7367b4ad4418502a73e6c0ed8311151 Mon Sep 17 00:00:00 2001
From: Alexandre Iooss <erdnaxe@crans.org>
Date: Sat, 8 Jun 2019 19:08:52 +0200
Subject: [PATCH] Alert ninjabot

---
 .../templates/prometheus/alertmanager.yml.j2           | 10 +++++++---
 .../prometheus/templates/prometheus/alert.rules.yml.j2 |  4 +++-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2 b/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2
index 2442108b..666c5ffc 100644
--- a/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2
+++ b/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2
@@ -28,14 +28,15 @@ route:
 
   # When the first notification was sent, wait 'group_interval' to send a batch
   # of new alerts that started firing for that group.
-  group_interval: 5m
+  group_interval: 1m
 
   # If an alert has successfully been sent, wait 'repeat_interval' to
   # resend them.
   repeat_interval: 3h 
 
   # A default receiver
-  receiver: team-roots-mails
+  #receiver: team-roots-mails
+  receiver: webhook-ninjabot
 
 
 # Inhibition rules allow to mute a set of alerts given that another alert is
@@ -55,4 +56,7 @@ receivers:
 - name: 'team-roots-mails'
   email_configs:
   - to: 'roots@crans.org'
-
+- name: 'webhook-ninjabot'
+  webhook_configs:
+  - url: 'http://zamok.adm.crans.org:5000/'
+    send_resolved: true
diff --git a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 b/roles/prometheus/templates/prometheus/alert.rules.yml.j2
index cf24fd7a..18923c4f 100644
--- a/roles/prometheus/templates/prometheus/alert.rules.yml.j2
+++ b/roles/prometheus/templates/prometheus/alert.rules.yml.j2
@@ -59,6 +59,8 @@ groups:
   - alert: APIHighRequestLatency
     expr: api_http_request_latencies_second{quantile="0.5"} > 1
     for: 10m
+    labels:
+      severity: warning
     annotations:
       summary: "Latence élevée sur {{ $labels.instance }}"
       description: "{{ $labels.instance }} a un temps de réponse médian au dessus d'une seconde (actuellement : {{ $value }}s)."
@@ -77,7 +79,7 @@ groups:
     expr: node_ntp_drift_seconds > 0.01
     for: 1m
     labels:
-      severity: moderate
+      severity: warning
     annotations:
       description: "Décalage NTP élevé sur {{ $labels.instance }}"
       summary: "Le décalage NTP est élevé ({{ $value }} > 0.01)"
-- 
GitLab