From f7347e41d2ce799c760936535d9c6efccb3580c1 Mon Sep 17 00:00:00 2001
From: shirenn <shirenn@crans.org>
Date: Sun, 23 May 2021 18:18:35 +0200
Subject: [PATCH] [monitoring] PEPCRANS + monitoring of cachan infra

---
 group_vars/all/prometheus_node_exporter.yaml  |   3 +
 group_vars/grafana.yml                        |   7 +
 group_vars/nginx.yml                          |   3 +
 group_vars/prometheus.yml                     |  13 ++
 host_vars/airbus.cachan-adm.crans.org.yml     |   3 +
 host_vars/fyre.cachan-adm.crans.org.yml       | 116 ++++++++++++
 host_vars/gulp.cachan-adm.crans.org.yml       |   3 +
 host_vars/monitoring.adm.crans.org.yml        | 111 +++++++++++-
 host_vars/omnomnom.cachan-adm.crans.org.yml   |   3 +
 host_vars/re2o-ldap.cachan-adm.crans.org.yml  |   3 +
 host_vars/re2o.cachan-adm.crans.org.yml       |   3 +
 host_vars/rodauh.cachan-adm.crans.org.yml     |   6 +
 .../cachan.yml                                |   3 +
 host_vars/terenez.cachan-adm.crans.org.yml    |   6 +
 host_vars/unifi.cachan-adm.crans.org.yml      |   3 +
 host_vars/zephir.cachan-adm.crans.org.yml     |   3 +
 hosts                                         |  14 +-
 plays/monitoring.yml                          |  92 +++-------
 roles/ninjabot/tasks/main.yml                 |   5 +
 .../templates/ninjabot/ninjabot.json.j2       |   1 +
 .../systemd/system/ninjabot.service.j2        |   2 +-
 .../prometheus-nginx-exporter/tasks/main.yml  |   2 +-
 roles/prometheus-node-exporter/tasks/main.yml |  30 +---
 .../templates/prometheus/snmp.yml.j2          |  10 +-
 roles/prometheus/tasks/main.yml               |  27 +--
 .../templates/prometheus/alert.rules.yml.j2   |  38 ++--
 .../templates/prometheus/django.rules.yml.j2  | 106 -----------
 .../templates/prometheus/prometheus.yml.j2    | 167 ++----------------
 28 files changed, 381 insertions(+), 402 deletions(-)
 create mode 100644 group_vars/all/prometheus_node_exporter.yaml
 create mode 100644 group_vars/grafana.yml
 create mode 100644 group_vars/prometheus.yml
 create mode 100644 host_vars/fyre.cachan-adm.crans.org.yml
 create mode 100644 roles/ninjabot/templates/ninjabot/ninjabot.json.j2
 delete mode 100644 roles/prometheus/templates/prometheus/django.rules.yml.j2

diff --git a/group_vars/all/prometheus_node_exporter.yaml b/group_vars/all/prometheus_node_exporter.yaml
new file mode 100644
index 00000000..39212fa9
--- /dev/null
+++ b/group_vars/all/prometheus_node_exporter.yaml
@@ -0,0 +1,3 @@
+---
+glob_prometheus_node_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'adm') | ipv4 | first }}"
diff --git a/group_vars/grafana.yml b/group_vars/grafana.yml
new file mode 100644
index 00000000..1cb40d06
--- /dev/null
+++ b/group_vars/grafana.yml
@@ -0,0 +1,7 @@
+---
+glob_grafana:
+  root_url: https://grafana.crans.org
+  icon: crans_icon_white.svg
+  ldap_base: "{{ glob_ldap.base }}"
+  ldap_master_ipv4: "{{ glob_ldap.servers[0] }}"
+  ldap_user_tree: "ou=passwd,{{ glob_ldap.base }}"
diff --git a/group_vars/nginx.yml b/group_vars/nginx.yml
index e2868541..8d61b273 100644
--- a/group_vars/nginx.yml
+++ b/group_vars/nginx.yml
@@ -30,3 +30,6 @@ glob_nginx:
     - "172.16.0.0/16"
     - "fd00:0:0:10::/64"
   deploy_robots_file: false
+
+glob_prometheus_nginx_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'adm') | ipv4 | first }}"
diff --git a/group_vars/prometheus.yml b/group_vars/prometheus.yml
new file mode 100644
index 00000000..64642c8a
--- /dev/null
+++ b/group_vars/prometheus.yml
@@ -0,0 +1,13 @@
+---
+glob_prometheus: {}
+
+glob_snmp_exporter:
+  procurve_password: "{{ vault.snmp_procurve_password }}"
+  unifi_password: "{{ vault.snmp_unifi_password }}"
+
+glob_ninjabot:
+  config:
+    nick: Prometheus
+    server: irc.adm.crans.org
+    port: 6667
+    channel: "#monitoring"
diff --git a/host_vars/airbus.cachan-adm.crans.org.yml b/host_vars/airbus.cachan-adm.crans.org.yml
index b8d9e214..c8d0ef2e 100644
--- a/host_vars/airbus.cachan-adm.crans.org.yml
+++ b/host_vars/airbus.cachan-adm.crans.org.yml
@@ -18,3 +18,6 @@ loc_borg:
   remote:
     - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
   ssh_options: ""
+
+glob_prometheus_node_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
diff --git a/host_vars/fyre.cachan-adm.crans.org.yml b/host_vars/fyre.cachan-adm.crans.org.yml
new file mode 100644
index 00000000..2f6cfaee
--- /dev/null
+++ b/host_vars/fyre.cachan-adm.crans.org.yml
@@ -0,0 +1,116 @@
+---
+interfaces:
+  adm: ens18
+
+loc_home_nounou:
+  ip: 172.17.10.9
+  mountpoint: /rpool/home
+
+loc_ldap:
+  servers:
+    - 172.17.10.9
+  base: 'dc=crans,dc=org'
+
+loc_ntp_client:
+  servers:
+    - terenez.cachan-adm.crans.org
+
+debian_mirror: http://172.17.10.202/debian
+
+loc_mirror:
+  name: mirror.cachan-adm.crans.org
+  ip: "{{ query('ldap','ip','terenez','cachan-adm') | ipv4 | first }}"
+
+loc_borg:
+  remote:
+    - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
+  ssh_options: ""
+
+glob_prometheus_node_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
+
+glob_snmp_exporter:
+  procurve_password: "{{ vault.snmp_procurve_password }}"
+  unifi_password: "{{ vault.snmp_unifi_password }}"
+
+loc_ninjabot:
+  config:
+    nick: fyre
+    server: irc.adm.crans.org
+    port: 6667
+    channel: "#monitoring"
+
+loc_prometheus:
+  node:
+    file: targets_node.json
+    targets: "{{ groups['server'] | select('match', '^.*\\.cachan-adm\\.crans\\.org$')  | list | sort }}"
+    config:
+      - job_name: servers
+        file_sd_configs:
+          - files:
+            - '/etc/prometheus/targets_node.json'
+        relabel_configs:
+          - source_labels: [__address__]
+            target_label: __param_target
+          - source_labels: [__param_target]
+            target_label: instance
+          - source_labels: [__param_target]
+            target_label: __address__
+            replacement: '$1:9100'
+
+  ups_snmp:
+    file: targets_ups_snmp.json
+    targets:
+      - pulsar.cachan-adm.crans.org  # 0B
+      - quasar.cachan-adm.crans.org  # 4J
+    config:
+      - job_name: ups_snmp
+        file_sd_configs:
+          - files:
+            - '/etc/prometheus/targets_ups_snmp.json'
+        metrics_path: /snmp
+        params:
+          module: [eatonups]
+        relabel_configs:
+          - source_labels: [__address__]
+            target_label: __param_target
+          - source_labels: [__param_target]
+            target_label: instance
+          - target_label: __address__
+            replacement: 127.0.0.1:9116
+
+  unifi_snmp:
+    file: targets_unifi_snmp.json
+    targets: "{{ groups['crans_unifi'] | list | sort }}"
+    config:
+      - job_name: unifi_snmp
+        file_sd_configs:
+          - files:
+            - '/etc/prometheus/targets_unifi_snmp.json'
+        metrics_path: /snmp
+        params:
+          module: [ubiquiti_unifi]
+        relabel_configs:
+          - source_labels: [__address__]
+            target_label: __param_target
+          - source_labels: [__param_target]
+            target_label: instance
+          - target_label: __address__
+            replacement: 127.0.0.1:9116
+
+  nginx:
+    file: targets_nginx.json
+    targets:
+      - rodauh.cachan-adm.crans.org
+      - terenez.cachan-adm.crans.org
+    config:
+      - job_name: nginx
+        file_sd_configs:
+          - files:
+            - '/etc/prometheus/targets_nginx.json'
+        relabel_configs:
+          - source_labels: [__address__]
+            target_label: instance
+          - source_labels: [instance]
+            target_label: __address__
+            replacement: '$1:9117'
diff --git a/host_vars/gulp.cachan-adm.crans.org.yml b/host_vars/gulp.cachan-adm.crans.org.yml
index 251c4bdd..a3b8517c 100644
--- a/host_vars/gulp.cachan-adm.crans.org.yml
+++ b/host_vars/gulp.cachan-adm.crans.org.yml
@@ -25,3 +25,6 @@ loc_borg:
   to_exclude:
     - /var/lib/lxcfs
   ssh_options: ""
+
+glob_prometheus_node_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
diff --git a/host_vars/monitoring.adm.crans.org.yml b/host_vars/monitoring.adm.crans.org.yml
index ab6e12d6..ecdb2719 100644
--- a/host_vars/monitoring.adm.crans.org.yml
+++ b/host_vars/monitoring.adm.crans.org.yml
@@ -1,4 +1,113 @@
 interfaces:
   adm: eth0
   srv_nat: eth1
-  infra: eth2
+
+loc_prometheus:
+  node:
+    file: targets_node.json
+    targets: "{{ groups['server'] | select('match', '^.*\\.adm\\.crans\\.org$')  | list | sort }}"
+    config:
+      - job_name: servers
+        file_sd_configs:
+          - files:
+            - '/etc/prometheus/targets_node.json'
+        relabel_configs:
+          - source_labels: [__address__]
+            target_label: __param_target
+          - source_labels: [__param_target]
+            target_label: instance
+          - source_labels: [__param_target]
+            target_label: __address__
+            replacement: '$1:9100'
+
+  nginx:
+    file: targets_nginx.json
+    targets:
+      - hodaur.adm.crans.org
+      - charybde.adm.crans.org
+    config:
+      - job_name: nginx
+        file_sd_configs:
+          - files:
+            - '/etc/prometheus/targets_nginx.json'
+        relabel_configs:
+          - source_labels: [__address__]
+            target_label: instance
+          - source_labels: [instance]
+            target_label: __address__
+            replacement: '$1:9117'
+
+  blackbox:
+    file: targets_blackbox.json
+    targets:
+      - https://crans.org/
+      - https://www.crans.org/
+      - https://webirc.crans.org/
+      - https://jitsi.crans.org/
+      - https://ftps.crans.org/
+      - http://ftp.crans.org/
+      - https://grafana.crans.org/
+      - https://roundcube.crans.org/
+      - https://zero.crans.org/
+      - https://wiki.crans.org/PageAccueil
+      - https://framadate.crans.org/
+      - https://pad.crans.org/
+      - https://lists.crans.org/
+      - https://cas.crans.org/
+      - https://ethercalc.crans.org/
+      - https://phabricator.crans.org/
+      - https://webmail.crans.org/horde/login.php
+      - https://gitlab.crans.org/
+      - https://perso.crans.org/crans/
+      - https://install-party.crans.org/
+      - https://intranet.crans.org/
+      - https://owncloud.crans.org/
+    config:
+      - job_name: blackbox
+        file_sd_configs:
+          - files:
+            - '/etc/prometheus/targets_blackbox.json'
+        metrics_path: /probe
+        params:
+          module: [http_2xx]  # Look for a HTTP 200 response.
+        relabel_configs:
+          - source_labels: [__address__]
+            target_label: __param_target
+          - source_labels: [__param_target]
+            target_label: instance
+          - target_label: __address__
+            replacement: 127.0.0.1:9115
+
+  mtail:
+    file: targets_mtail.json
+    targets:
+      - tealc.adm.crans.org
+    config:
+      - job_name: mtail
+        static_configs:
+          - targets: ["tealc.adm.crans.org"]
+        relabel_configs:
+          - source_labels: [__address__]
+            target_label: instance
+          - source_labels: [instance]
+            target_label: __address__
+            replacement: '$1:3903'
+
+
+#  apache:
+#    targets:
+#    config:
+#      - job_name: apache
+#        file_sd_configs:
+#          - files:
+#            - '/etc/prometheus/targets_apache.json'
+#        relabel_configs:
+#          - source_labels: [__address__]
+#            target_label: instance
+#          - source_labels: [instance]
+#            target_label: __address__
+#            replacement: '$1:9117'
+
+#  bird_targets:
+#    - routeur-sam.adm.crans.org
+
diff --git a/host_vars/omnomnom.cachan-adm.crans.org.yml b/host_vars/omnomnom.cachan-adm.crans.org.yml
index d99be4be..3685560f 100644
--- a/host_vars/omnomnom.cachan-adm.crans.org.yml
+++ b/host_vars/omnomnom.cachan-adm.crans.org.yml
@@ -25,3 +25,6 @@ loc_borg:
   remote:
     - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
   ssh_options: ""
+
+glob_prometheus_node_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
diff --git a/host_vars/re2o-ldap.cachan-adm.crans.org.yml b/host_vars/re2o-ldap.cachan-adm.crans.org.yml
index b8d9e214..c8d0ef2e 100644
--- a/host_vars/re2o-ldap.cachan-adm.crans.org.yml
+++ b/host_vars/re2o-ldap.cachan-adm.crans.org.yml
@@ -18,3 +18,6 @@ loc_borg:
   remote:
     - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
   ssh_options: ""
+
+glob_prometheus_node_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
diff --git a/host_vars/re2o.cachan-adm.crans.org.yml b/host_vars/re2o.cachan-adm.crans.org.yml
index 279c4f4d..c872b865 100644
--- a/host_vars/re2o.cachan-adm.crans.org.yml
+++ b/host_vars/re2o.cachan-adm.crans.org.yml
@@ -14,6 +14,9 @@ glob_ntp_client:
 
 debian_mirror: http://172.17.10.202/debian
 
+glob_prometheus_node_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
+
 loc_borg:
   remote:
     - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
diff --git a/host_vars/rodauh.cachan-adm.crans.org.yml b/host_vars/rodauh.cachan-adm.crans.org.yml
index 37b60a76..75e6a304 100644
--- a/host_vars/rodauh.cachan-adm.crans.org.yml
+++ b/host_vars/rodauh.cachan-adm.crans.org.yml
@@ -37,3 +37,9 @@ loc_borg:
   remote:
     - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
   ssh_options: ""
+
+glob_prometheus_node_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
+
+glob_prometheus_nginx_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
diff --git a/host_vars/routeur-gulp.cachan-adm.crans.org/cachan.yml b/host_vars/routeur-gulp.cachan-adm.crans.org/cachan.yml
index 4144ad09..20e5538c 100644
--- a/host_vars/routeur-gulp.cachan-adm.crans.org/cachan.yml
+++ b/host_vars/routeur-gulp.cachan-adm.crans.org/cachan.yml
@@ -22,3 +22,6 @@ loc_borg:
   remote:
     - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
   ssh_options: ""
+
+glob_prometheus_node_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
diff --git a/host_vars/terenez.cachan-adm.crans.org.yml b/host_vars/terenez.cachan-adm.crans.org.yml
index b8d9e214..46732a20 100644
--- a/host_vars/terenez.cachan-adm.crans.org.yml
+++ b/host_vars/terenez.cachan-adm.crans.org.yml
@@ -18,3 +18,9 @@ loc_borg:
   remote:
     - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
   ssh_options: ""
+
+glob_prometheus_node_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
+
+glob_prometheus_nginx_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
diff --git a/host_vars/unifi.cachan-adm.crans.org.yml b/host_vars/unifi.cachan-adm.crans.org.yml
index 55bd2d2f..75dc7ea5 100644
--- a/host_vars/unifi.cachan-adm.crans.org.yml
+++ b/host_vars/unifi.cachan-adm.crans.org.yml
@@ -23,3 +23,6 @@ loc_borg:
   remote:
     - borg@zephir.cachan-adm.crans.org:/backup/borg/{{ ansible_hostname }}
   ssh_options: ""
+
+glob_prometheus_node_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
diff --git a/host_vars/zephir.cachan-adm.crans.org.yml b/host_vars/zephir.cachan-adm.crans.org.yml
index 0bf66658..f30a1b16 100644
--- a/host_vars/zephir.cachan-adm.crans.org.yml
+++ b/host_vars/zephir.cachan-adm.crans.org.yml
@@ -27,3 +27,6 @@ loc_borg:
   ssh_options: ""
   to_exclude:
     - /var/lib/backuppc
+
+glob_prometheus_node_exporter:
+  listen_addr: "{{ query('ldap', 'ip', ansible_hostname, 'cachan-adm') | ipv4 | first }}"
diff --git a/hosts b/hosts
index 30746559..df51e245 100644
--- a/hosts
+++ b/hosts
@@ -18,6 +18,9 @@ tealc.adm.crans.org
 tealc.adm.crans.org
 gulp.cachan-adm.crans.org
 
+[blackbox]
+monitoring.adm.crans.org
+
 [bdd:children]
 virtu
 
@@ -81,6 +84,9 @@ neree.adm.crans.org
 [gitlab]
 gitzly.adm.crans.org
 
+[grafana]
+monitoring.adm.crans.org
+
 [horde]
 horde.adm.crans.org
 
@@ -99,17 +105,16 @@ linx.adm.crans.org
 [mailman]
 mailman.adm.crans.org
 
-[monitoring]
+[prometheus]
 monitoring.adm.crans.org
-
-[nginx]
-charybde.adm.crans.org
+fyre.cachan-adm.crans.org
 
 [nginx:children]
 django_cas
 galene
 jitsi
 mailman
+ntp_server
 re2o_front
 reverseproxy
 roundcube
@@ -212,6 +217,7 @@ cas.adm.crans.org
 codichotomie.adm.crans.org
 ethercalc.adm.crans.org
 fluxx.adm.crans.org
+fyre.cachan-adm.crans.org
 gitlab-ci.adm.crans.org
 gitzly.adm.crans.org
 hodaur.adm.crans.org
diff --git a/plays/monitoring.yml b/plays/monitoring.yml
index 0685ef51..70a418a3 100755
--- a/plays/monitoring.yml
+++ b/plays/monitoring.yml
@@ -1,81 +1,43 @@
 #!/usr/bin/env ansible-playbook
 ---
-# Deploy Prometheus and Grafana on monitoring server
-- hosts: monitoring
-  vars:
-    # Prometheus targets.json
-    prometheus:
-      node_targets: "{{ groups['server'] | list | sort }}"
-      ups_snmp_targets:
-        - pulsar.adm.crans.org  # 0B
-        - quasar.adm.crans.org  # 4J
-      procurve_snmp_targets:
-        - batg-9.infra.crans.org
-      unifi_snmp_targets: "{{ groups['crans_unifi'] | list | sort }}"
-      blackbox_targets:
-        - https://crans.org/
-        - https://www.crans.org/
-        - https://webirc.crans.org/
-        - https://jitsi.crans.org/
-        - https://ftps.crans.org/
-        - http://ftp.crans.org/
-        - https://grafana.crans.org/
-        - https://roundcube.crans.org/
-        - https://zero.crans.org/
-        - https://wiki.crans.org/PageAccueil
-        - https://framadate.crans.org/
-        - https://pad.crans.org/
-        - https://lists.crans.org/
-        - https://cas.crans.org/
-        - https://ethercalc.crans.org/
-        - https://phabricator.crans.org/
-        - https://webmail.crans.org/horde/login.php
-        - https://gitlab.crans.org/
-        - https://perso.crans.org/crans/
-        - https://install-party.crans.org/
-        - https://intranet.crans.org/
-        - https://owncloud.crans.org/
-      nginx_targets:
-        - hodaur.adm.crans.org
-        - charybde.adm.crans.org
-      apache_targets: []  # [zamok.adm.crans.org]
-      bird_targets:
-        - routeur-sam.adm.crans.org
-
-    snmp_procurve_password: "{{ vault.snmp_procurve_password }}"
-    snmp_unifi_password: "{{ vault.snmp_unifi_password }}"
 
-    grafana:
-      root_url: https://grafana.crans.org
-      icon: crans_icon_white.svg
-      ldap_base: "{{ glob_ldap.base }}"
-      ldap_master_ipv4: "{{ glob_ldap.servers[0] }}"
-      ldap_user_tree: "ou=passwd,{{ glob_ldap.base }}"
+# Deploy Prometheus on monitoring server
+- hosts: prometheus
+  vars:
+    prometheus: "{{ glob_prometheus | default({}) | combine(loc_prometheus | default({})) }}"
+    alertmanager: "{{ glob_alertmanager | default({}) | combine(loc_alertmanager | default({})) }}"
+    snmp_exporter: "{{ glob_snmp_exporter | default({}) | combine(loc_snmp_exporter | default({})) }}"
+    ninjabot: "{{ glob_ninjabot | default({}) | combine(loc_ninjabot | default({})) }}"
   roles:
     - prometheus
     - prometheus-alertmanager
     - prometheus-snmp-exporter
-    - prometheus-blackbox-exporter
     - ninjabot
-    - grafana
+
+# # Deploy Grafana on monitoring server
+# - hosts: grafana
+#   vars:
+#     grafana: "{{ glob_grafana | default({}) | combine(loc_grafana | default({})) }}"
+#   roles:
+#     - grafana
+
+- hosts: blackbox
+  roles:
+    - prometheus-blackbox-exporter
 
 # Monitor all hosts
 - hosts: server
   vars:
-    adm_ipv4: "{{ query('ldap', 'ip', ansible_hostname, 'adm') | ipv4 | first }}"
-  roles: ["prometheus-node-exporter"]
+    prometheus_node_exporter: "{{ glob_prometheus_node_exporter | default({}) | combine(loc_prometheus_node_exporter | default({})) }}"
+  roles:
+    - prometheus-node-exporter
 
 # Export nginx metrics
 - hosts: nginx
   vars:
-    adm_ipv4: "{{ query('ldap', 'ip', ansible_hostname, 'adm') | ipv4 | first }}"
-  roles: ["prometheus-nginx-exporter"]
-
-# Export apache metrics
-#- hosts: zamok.adm.crans.org
-#  vars:
-#    adm_ipv4: "{{ ansible_all_ipv4_addresses | ipaddr(adm_subnet) | first }}"
-#  roles: ["prometheus-apache-exporter"]
+    prometheus_nginx_exporter: "{{ glob_prometheus_nginx_exporter | default({}) | combine(loc_prometheus_nginx_exporter | default({})) }}"
+  roles:
+    - prometheus-nginx-exporter
 
 # Monitor mailq with a special text exporter
 #- hosts: redisdead.adm.crans.org
@@ -85,7 +47,9 @@
 - hosts: tealc.adm.crans.org
   vars:
     mirror: '{{ glob_mirror | default({}) | combine(loc_mirror | default({})) }}'
-  roles: ["mtail"]
+  roles:
+    - mtail
 
 - hosts: sputnik.adm.crans.org
-  roles: ["statping"]
+  roles:
+    - statping
diff --git a/roles/ninjabot/tasks/main.yml b/roles/ninjabot/tasks/main.yml
index 98bb4f7a..34d0184b 100644
--- a/roles/ninjabot/tasks/main.yml
+++ b/roles/ninjabot/tasks/main.yml
@@ -17,6 +17,11 @@
     dest: /var/local/ninjabot
     version: master
 
+- name: Deploy NinjaBot configuration
+  template:
+    src: ninjabot/ninjabot.json.j2
+    dest: /var/local/ninjabot/ninjabot.json
+
 - name: Deploy NinjaBot systemd unit
   template:
     src: systemd/system/ninjabot.service.j2
diff --git a/roles/ninjabot/templates/ninjabot/ninjabot.json.j2 b/roles/ninjabot/templates/ninjabot/ninjabot.json.j2
new file mode 100644
index 00000000..d0296ae2
--- /dev/null
+++ b/roles/ninjabot/templates/ninjabot/ninjabot.json.j2
@@ -0,0 +1 @@
+{{ ninjabot.config | to_nice_json(indent=2) }}
diff --git a/roles/ninjabot/templates/systemd/system/ninjabot.service.j2 b/roles/ninjabot/templates/systemd/system/ninjabot.service.j2
index 8c88045b..526a2962 100644
--- a/roles/ninjabot/templates/systemd/system/ninjabot.service.j2
+++ b/roles/ninjabot/templates/systemd/system/ninjabot.service.j2
@@ -8,7 +8,7 @@ Type=simple
 WorkingDirectory=/var/local/ninjabot
 User=nobody
 Group=nogroup
-ExecStart=/usr/bin/python3 /var/local/ninjabot/main.py
+ExecStart=/usr/bin/python3 /var/local/ninjabot/ninjabot.py
 Restart=always
 
 [Install]
diff --git a/roles/prometheus-nginx-exporter/tasks/main.yml b/roles/prometheus-nginx-exporter/tasks/main.yml
index c7e8f32c..0fb8001d 100644
--- a/roles/prometheus-nginx-exporter/tasks/main.yml
+++ b/roles/prometheus-nginx-exporter/tasks/main.yml
@@ -14,7 +14,7 @@
     path: /etc/default/prometheus-nginx-exporter
     regexp: '^ARGS='
     line: |
-      ARGS="-web.listen-address={{ adm_ipv4 }}:9117 -nginx.scrape-uri=http://[::1]:6424/stub_status"
+      ARGS="-web.listen-address={{ prometheus_nginx_exporter.listen_addr }}:9117 -nginx.scrape-uri=http://[::1]:6424/stub_status"
   notify:
     - Restart nginx
     - Restart prometheus-nginx-exporter
diff --git a/roles/prometheus-node-exporter/tasks/main.yml b/roles/prometheus-node-exporter/tasks/main.yml
index d8f2a1c2..a91d9d9e 100644
--- a/roles/prometheus-node-exporter/tasks/main.yml
+++ b/roles/prometheus-node-exporter/tasks/main.yml
@@ -7,21 +7,6 @@
   register: apt_result
   retries: 3
   until: apt_result is succeeded
-  when:
-    - ansible_lsb.codename != 'stretch'
-
-# Prometheus 2 node is in stretch-backports
-- name: Install Prometheus node-exporter (stretch-backports)
-  apt:
-    update_cache: true
-    name: prometheus-node-exporter
-    install_recommends: false
-    default_release: stretch-backports
-  register: apt_result
-  retries: 3
-  until: apt_result is succeeded
-  when:
-    - ansible_lsb.codename == 'stretch'
 
 - name: Install Prometheus node-exporter-collectors (bullseye)
   apt:
@@ -45,7 +30,7 @@
     path: /etc/default/prometheus-node-exporter
     regexp: '^ARGS='
     line: |
-      ARGS="--web.listen-address={{ adm_ipv4 }}:9100"
+      ARGS="--web.listen-address={{ prometheus_node_exporter.listen_addr }}:9100"
   tags: restart-node-exporter
 
 # Install new APT textfile collector, it might be upstreamed one day
@@ -57,15 +42,4 @@
     owner: root
     group: root
     mode: 0755
-  when: ansible_lsb.id == 'Debian' and ansible_distribution_release != "bullseye"
-
-# Install new APT textfile collector, it might be upstreamed one day
-# https://github.com/prometheus-community/node-exporter-textfile-collector-scripts/pull/35
-- name: Patch APT textfile collector
-  copy:
-    src: apt.sh
-    dest: /usr/share/prometheus-node-exporter-collectors/apt.sh
-    owner: root
-    group: root
-    mode: 0755
-  when: ansible_lsb.id == 'Ubuntu' or ansible_distribution_release == "bullseye"
+  when: ansible_distribution_release != "bullseye"
diff --git a/roles/prometheus-snmp-exporter/templates/prometheus/snmp.yml.j2 b/roles/prometheus-snmp-exporter/templates/prometheus/snmp.yml.j2
index aa30bc43..fb946b74 100644
--- a/roles/prometheus-snmp-exporter/templates/prometheus/snmp.yml.j2
+++ b/roles/prometheus-snmp-exporter/templates/prometheus/snmp.yml.j2
@@ -113,14 +113,14 @@ procurve_switch:
   version: 3
   auth:
     # To create SNMPv3 user on HP procurve, execute:
-    # snmpv3 user snmp_prometheus auth sha {{ snmp_procurve_password }} priv aes {{ snmp_procurve_password }}
+    # snmpv3 user snmp_prometheus auth sha {{ snmp_exporter.procurve_password }} priv aes {{ snmp_exporter.procurve_password }}
     # snmpv3 group managerpriv user snmp_prometheus sec-model ver3
     security_level: authPriv
     username: snmp_prometheus
-    password: {{ snmp_procurve_password }}
+    password: {{ snmp_exporter.procurve_password }}
     auth_protocol: SHA
     priv_protocol: AES
-    priv_password: {{ snmp_procurve_password }}
+    priv_password: {{ snmp_exporter.procurve_password }}
 
 ubiquiti_unifi:
   walk:
@@ -475,7 +475,7 @@ ubiquiti_unifi:
   auth:
     security_level: authPriv
     username: snmp_prometheus
-    password: {{ snmp_unifi_password }}
+    password: {{ snmp_exporter.unifi_password }}
     auth_protocol: SHA
     priv_protocol: AES
-    priv_password: {{ snmp_unifi_password }}
+    priv_password: {{ snmp_exporter.unifi_password }}
diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml
index 72136715..2a9f54fe 100644
--- a/roles/prometheus/tasks/main.yml
+++ b/roles/prometheus/tasks/main.yml
@@ -16,35 +16,18 @@
 
 - name: Configure Prometheus alert rules
   template:
-    src: "prometheus/{{ item }}.j2"
-    dest: "/etc/prometheus/{{ item }}"
+    src: prometheus/alert.rules.yml.j2
+    dest: /etc/prometheus/alert.rules.yml
     mode: 0644
   notify: Restart Prometheus
-  loop:
-    - alert.rules.yml
-    - django.rules.yml
 
 # We don't need to restart Prometheus when updating nodes
 - name: Configure Prometheus targets
   copy:
-    content: "{{ [{'targets': item.targets}] | to_nice_json }}\n"
-    dest: "/etc/prometheus/{{ item.file }}.json"
+    content: "{{ [{'targets': item.value.targets}] | to_nice_json }}\n"
+    dest: "/etc/prometheus/{{ item.value.file }}"
     mode: 0644
-  loop:
-    - file: targets
-      targets: "{{ prometheus.node_targets }}"
-    - file: targets_ups_snmp
-      targets: "{{ prometheus.ups_snmp_targets }}"
-    - file: targets_procurve_snmp
-      targets: "{{ prometheus.procurve_snmp_targets }}"
-    - file: targets_unifi_snmp
-      targets: "{{ prometheus.unifi_snmp_targets }}"
-    - file: targets_nginx
-      targets: "{{ prometheus.nginx_targets }}"
-    - file: targets_apache
-      targets: "{{ prometheus.apache_targets }}"
-    - file: targets_blackbox
-      targets: "{{ prometheus.blackbox_targets }}"
+  loop: "{{ prometheus | dict2items }}"
 
 - name: Activate prometheus service
   systemd:
diff --git a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 b/roles/prometheus/templates/prometheus/alert.rules.yml.j2
index d5017c6f..afc37b65 100644
--- a/roles/prometheus/templates/prometheus/alert.rules.yml.j2
+++ b/roles/prometheus/templates/prometheus/alert.rules.yml.j2
@@ -144,7 +144,7 @@ groups:
       description: "https://grafana.crans.org/d/qtbg59mZz/alimentation"
 
   - alert: UpsWrongOutputVoltage
-    expr: (upsOutputVoltage < 225) or (upsOutputVoltage > 235)
+    expr: (upsOutputVoltage < 215) or (upsOutputVoltage > 245)
     for: 5m
     labels:
       severity: warning
@@ -161,29 +161,27 @@ groups:
       summary: "{{ $value }} paquet(s) APT sont inutile(s) sur {{ $labels.instance }}."
 
   - alert: MailqNotEmpty
-    expr: postfix_mailq_length > 5
+    expr: postfix_mailq_length > 25
     for: 1m
     labels:
       severity: warning
     annotations:
       summary: "{{ $value }} mails dans la mailq sur {{ $labels.instance }}."
 
-  # NTP (need NTP plugin in node)
-#  - alert: ntp_drifting
-#    expr: node_ntp_drift_seconds > 0.05
-#    for: 3m
-#    labels:
-#      severity: critical
-#    annotations:
-#      summary: "Décalage NTP trop élevé sur {{ $labels.instance }}"
-#      description: "Le décalage NTP est trop élevé ({{ $value }} > 0.05)"
-
-#  - alert: ntp_drifting
-#    expr: node_ntp_drift_seconds > 0.01
-#    for: 1m
-#    labels:
-#      severity: warning
-#    annotations:
-#      summary: "Décalage NTP élevé sur {{ $labels.instance }}"
-#      description: "Le décalage NTP est élevé ({{ $value }} > 0.01)"
+  - alert: NoRadiusLogin
+    expr: rate(radiusd_access_ok[3m]) == 0
+    for: 2m
+    labels:
+      severity: warning
+    annotations:
+      summary: "Personne ne vient taper le RADIUS."
+
+  - alert: TooManyReallocatedSectors
+    expr: smartmon_reallocated_sector_ct_raw_value > 1e3
+    for: 5m
+    labels:
+      severity: warning
+    annotations:
+      summary: "{{ $labels.disk }} sur {{ $labels.instance }} a {{ $value }} secteurs réalloués."
+
 {% endraw %}
diff --git a/roles/prometheus/templates/prometheus/django.rules.yml.j2 b/roles/prometheus/templates/prometheus/django.rules.yml.j2
deleted file mode 100644
index 8131a71f..00000000
--- a/roles/prometheus/templates/prometheus/django.rules.yml.j2
+++ /dev/null
@@ -1,106 +0,0 @@
-{{ ansible_header | comment }}
-{# As this is also Jinja2 it will conflict without a raw block #}
-{% raw %}
-groups:
-- name: django.rules
-  rules:
-  - record: job:django_http_requests_before_middlewares_total:sum_rate30s
-    expr: sum(rate(django_http_requests_before_middlewares_total[30s])) BY (job)
-  - record: job:django_http_requests_unknown_latency_total:sum_rate30s
-    expr: sum(rate(django_http_requests_unknown_latency_total[30s])) BY (job)
-  - record: job:django_http_ajax_requests_total:sum_rate30s
-    expr: sum(rate(django_http_ajax_requests_total[30s])) BY (job)
-  - record: job:django_http_responses_before_middlewares_total:sum_rate30s
-    expr: sum(rate(django_http_responses_before_middlewares_total[30s])) BY (job)
-  - record: job:django_http_requests_unknown_latency_including_middlewares_total:sum_rate30s
-    expr: sum(rate(django_http_requests_unknown_latency_including_middlewares_total[30s]))
-      BY (job)
-  - record: job:django_http_requests_body_total_bytes:sum_rate30s
-    expr: sum(rate(django_http_requests_body_total_bytes[30s])) BY (job)
-  - record: job:django_http_responses_streaming_total:sum_rate30s
-    expr: sum(rate(django_http_responses_streaming_total[30s])) BY (job)
-  - record: job:django_http_responses_body_total_bytes:sum_rate30s
-    expr: sum(rate(django_http_responses_body_total_bytes[30s])) BY (job)
-  - record: job:django_http_requests_total:sum_rate30s
-    expr: sum(rate(django_http_requests_total_by_method[30s])) BY (job)
-  - record: job:django_http_requests_total_by_method:sum_rate30s
-    expr: sum(rate(django_http_requests_total_by_method[30s])) BY (job, method)
-  - record: job:django_http_requests_total_by_transport:sum_rate30s
-    expr: sum(rate(django_http_requests_total_by_transport[30s])) BY (job, transport)
-  - record: job:django_http_requests_total_by_view:sum_rate30s
-    expr: sum(rate(django_http_requests_total_by_view_transport_method[30s])) BY (job,
-      view)
-  - record: job:django_http_requests_total_by_view_transport_method:sum_rate30s
-    expr: sum(rate(django_http_requests_total_by_view_transport_method[30s])) BY (job,
-      view, transport, method)
-  - record: job:django_http_responses_total_by_templatename:sum_rate30s
-    expr: sum(rate(django_http_responses_total_by_templatename[30s])) BY (job, templatename)
-  - record: job:django_http_responses_total_by_status:sum_rate30s
-    expr: sum(rate(django_http_responses_total_by_status[30s])) BY (job, status)
-  - record: job:django_http_responses_total_by_charset:sum_rate30s
-    expr: sum(rate(django_http_responses_total_by_charset[30s])) BY (job, charset)
-  - record: job:django_http_exceptions_total_by_type:sum_rate30s
-    expr: sum(rate(django_http_exceptions_total_by_type[30s])) BY (job, type)
-  - record: job:django_http_exceptions_total_by_view:sum_rate30s
-    expr: sum(rate(django_http_exceptions_total_by_view[30s])) BY (job, view)
-  - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
-    expr: histogram_quantile(0.5, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
-      BY (job, le))
-    labels:
-      quantile: "50"
-  - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
-    expr: histogram_quantile(0.95, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
-      BY (job, le))
-    labels:
-      quantile: "95"
-  - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
-    expr: histogram_quantile(0.99, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
-      BY (job, le))
-    labels:
-      quantile: "99"
-  - record: job:django_http_requests_latency_including_middlewares_seconds:quantile_rate30s
-    expr: histogram_quantile(0.999, sum(rate(django_http_requests_latency_including_middlewares_seconds_bucket[30s]))
-      BY (job, le))
-    labels:
-      quantile: "99.9"
-  - record: job:django_http_requests_latency_seconds:quantile_rate30s
-    expr: histogram_quantile(0.5, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
-      BY (job, le))
-    labels:
-      quantile: "50"
-  - record: job:django_http_requests_latency_seconds:quantile_rate30s
-    expr: histogram_quantile(0.95, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
-      BY (job, le))
-    labels:
-      quantile: "95"
-  - record: job:django_http_requests_latency_seconds:quantile_rate30s
-    expr: histogram_quantile(0.99, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
-      BY (job, le))
-    labels:
-      quantile: "99"
-  - record: job:django_http_requests_latency_seconds:quantile_rate30s
-    expr: histogram_quantile(0.999, sum(rate(django_http_requests_latency_seconds_bucket[30s]))
-      BY (job, le))
-    labels:
-      quantile: "99.9"
-  - record: job:django_model_inserts_total:sum_rate1m
-    expr: sum(rate(django_model_inserts_total[1m])) BY (job, model)
-  - record: job:django_model_updates_total:sum_rate1m
-    expr: sum(rate(django_model_updates_total[1m])) BY (job, model)
-  - record: job:django_model_deletes_total:sum_rate1m
-    expr: sum(rate(django_model_deletes_total[1m])) BY (job, model)
-  - record: job:django_db_new_connections_total:sum_rate30s
-    expr: sum(rate(django_db_new_connections_total[30s])) BY (alias, vendor)
-  - record: job:django_db_new_connection_errors_total:sum_rate30s
-    expr: sum(rate(django_db_new_connection_errors_total[30s])) BY (alias, vendor)
-  - record: job:django_db_execute_total:sum_rate30s
-    expr: sum(rate(django_db_execute_total[30s])) BY (alias, vendor)
-  - record: job:django_db_execute_many_total:sum_rate30s
-    expr: sum(rate(django_db_execute_many_total[30s])) BY (alias, vendor)
-  - record: job:django_db_errors_total:sum_rate30s
-    expr: sum(rate(django_db_errors_total[30s])) BY (alias, vendor, type)
-  - record: job:django_migrations_applied_total:max
-    expr: max(django_migrations_applied_total) BY (job, connection)
-  - record: job:django_migrations_unapplied_total:max
-    expr: max(django_migrations_unapplied_total) BY (job, connection)
-{% endraw %}
diff --git a/roles/prometheus/templates/prometheus/prometheus.yml.j2 b/roles/prometheus/templates/prometheus/prometheus.yml.j2
index 5877a182..daa136c4 100644
--- a/roles/prometheus/templates/prometheus/prometheus.yml.j2
+++ b/roles/prometheus/templates/prometheus/prometheus.yml.j2
@@ -20,156 +20,23 @@ alerting:
 # Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
 rule_files:
   - "alert.rules.yml"  # Monitoring alerts, this is the file you may be searching!
-  - "django.rules.yml"  # Custom rules specific for Django project monitoring
 
 # A scrape configuration containing exactly one endpoint to scrape:
 # Here it's Prometheus itself.
-scrape_configs:
-  # The .json in file_sd_configs is dynamically reloaded
-
-  - job_name: prometheus
-    static_configs:
-      - targets:
-        - localhost:9090
-
-  - job_name: servers
-    file_sd_configs:
-      - files:
-        - '/etc/prometheus/targets.json'
-    relabel_configs:
-      # Do not put :9100 in instance name, rather here
-      - source_labels: [__address__]
-        target_label: __param_target
-      - source_labels: [__param_target]
-        target_label: instance
-      - source_labels: [__param_target]
-        target_label: __address__
-        replacement: '$1:9100'
-
-{% if prometheus.ups_snmp_targets is defined %}
-  - job_name: ups_snmp
-    file_sd_configs:
-      - files:
-        - '/etc/prometheus/targets_ups_snmp.json'
-    metrics_path: /snmp
-    params:
-      module: [eatonups]
-    relabel_configs:
-      - source_labels: [__address__]
-        target_label: __param_target
-      - source_labels: [__param_target]
-        target_label: instance
-      - target_label: __address__
-        replacement: 127.0.0.1:9116
-{% endif %}
-
-{% if prometheus.procurve_snmp_targets is defined %}
-  - job_name: procurve_snmp
-    file_sd_configs:
-      - files:
-        - '/etc/prometheus/targets_procurve_snmp.json'
-    metrics_path: /snmp
-    params:
-      module: [procurve_switch]
-    relabel_configs:
-      - source_labels: [__address__]
-        target_label: __param_target
-      - source_labels: [__param_target]
-        target_label: instance
-      - target_label: __address__
-        replacement: 127.0.0.1:9116
-{% endif %}
-
-{% if prometheus.unifi_snmp_targets is defined %}
-  - job_name: unifi_snmp
-    file_sd_configs:
-      - files:
-        - '/etc/prometheus/targets_unifi_snmp.json'
-    metrics_path: /snmp
-    params:
-      module: [ubiquiti_unifi]
-    relabel_configs:
-      - source_labels: [__address__]
-        target_label: __param_target
-      - source_labels: [__param_target]
-        target_label: instance
-      - target_label: __address__
-        replacement: 127.0.0.1:9116
-{% endif %}
-
-{% if prometheus.nginx_targets is defined %}
-  - job_name: nginx
-    file_sd_configs:
-      - files:
-        - '/etc/prometheus/targets_nginx.json'
-    relabel_configs:
-      # Do not put :9117 in instance name, rather here
-      - source_labels: [__address__]
-        target_label: instance
-      - source_labels: [instance]
-        target_label: __address__
-        replacement: '$1:9117'
-{% endif %}
-
-{% if prometheus.apache_targets is defined %}
-  - job_name: apache
-    file_sd_configs:
-      - files:
-        - '/etc/prometheus/targets_apache.json'
-    relabel_configs:
-      # Do not put :9117 in instance name, rather here
-      - source_labels: [__address__]
-        target_label: instance
-      - source_labels: [instance]
-        target_label: __address__
-        replacement: '$1:9117'
-{% endif %}
-
-{% if prometheus.blackbox_targets is defined %}
-  - job_name: blackbox
-    file_sd_configs:
-      - files:
-        - '/etc/prometheus/targets_blackbox.json'
-    metrics_path: /probe
-    params:
-      module: [http_2xx]  # Look for a HTTP 200 response.
-    relabel_configs:
-      - source_labels: [__address__]
-        target_label: __param_target
-      - source_labels: [__param_target]
-        target_label: instance
-      - target_label: __address__
-        replacement: 127.0.0.1:9115
-{% endif %}
-
-  - job_name: mtail
-    static_configs:
-      - targets: ["tealc.adm.crans.org"]
-    relabel_configs:
-      # Do not put :3903 in instance name, rather here
-      - source_labels: [__address__]
-        target_label: instance
-      - source_labels: [instance]
-        target_label: __address__
-        replacement: '$1:3903'
-
-{% if prometheus.bird_targets is defined %}
-  - job_name: bird
-    file_sd_configs:
-      - files:
-        - '/etc/prometheus/targets_bird.json'
-    relabel_configs:
-      # Do not put :3903 in instance name, rather here
-      - source_labels: [__address__]
-        target_label: instance
-      - source_labels: [instance]
-        target_label: __address__
-        replacement: '$1:9324'
-{% endif %}
-
-  - job_name: django
-    scheme: https
-    static_configs:
-      - targets: []
-# Activate this line when the captive portal need monitoring
-#      - targets: ["portail-captif.crans.org:443"]
+{{
+  {
+    "scrape_configs":
+    [
+      {
+        "job_name": "prometheus",
+        "static_configs" : [
+          {
+            "targets": [
+               "localhost:9090"
+            ]
+          }
+        ]
+      }
+    ] + (prometheus | json_query("*.config[0]"))
+  } | to_nice_yaml(indent=2)
+}}
-- 
GitLab