From f3c38819ef7d8753ea9f1729deff506c964de08e Mon Sep 17 00:00:00 2001
From: Alexandre Iooss <erdnaxe@crans.org>
Date: Wed, 13 Jan 2021 16:37:48 +0100
Subject: [PATCH] Prometheus on bullseye

---
 plays/monitoring.yml                          |  11 +-
 roles/mtail/templates/mtail/radiusd.mtail.j2  |   6 +-
 .../templates/prometheus/alertmanager.yml.j2  |   4 -
 roles/prometheus-node-exporter/tasks/main.yml |  14 +-
 .../default/prometheus-node-exporter.j2       | 130 ------------------
 .../templates/prometheus/snmp.yml.j2          |  14 +-
 roles/prometheus/tasks/main.yml               |  60 +++-----
 .../templates/prometheus/alert.rules.yml.j2   |   2 +-
 .../templates/prometheus/prometheus.yml.j2    |  38 ++++-
 9 files changed, 83 insertions(+), 196 deletions(-)
 delete mode 100644 roles/prometheus-node-exporter/templates/default/prometheus-node-exporter.j2

diff --git a/plays/monitoring.yml b/plays/monitoring.yml
index d813a866..a046fb6b 100755
--- a/plays/monitoring.yml
+++ b/plays/monitoring.yml
@@ -6,9 +6,11 @@
     # Prometheus targets.json
     prometheus:
       node_targets: "{{ groups['server'] | list | sort }}"
-      ups_snmp_targets: []
-      #  - pulsar.adm.crans.org  # 0B
-      #  - quasar.adm.crans.org  # 4J
+      ups_snmp_targets:
+        - pulsar.adm.crans.org  # 0B
+        - quasar.adm.crans.org  # 4J
+      procurve_snmp_targets:
+        - batg-9.infra.crans.org
       unifi_snmp_targets: "{{ groups['crans_unifi'] | list | sort }}"
       blackbox_targets:
         - https://crans.org/
@@ -37,7 +39,10 @@
         - hodaur.adm.crans.org
         - charybde.adm.crans.org
       apache_targets: []  # [zamok.adm.crans.org]
+      bird_targets:
+        - routeur-sam.adm.crans.org
 
+    snmp_procurve_password: "{{ vault_snmp_procurve_password }}"
     snmp_unifi_password: "{{ vault_snmp_unifi_password }}"
 
     grafana:
diff --git a/roles/mtail/templates/mtail/radiusd.mtail.j2 b/roles/mtail/templates/mtail/radiusd.mtail.j2
index e7b090f0..9d1dc16f 100644
--- a/roles/mtail/templates/mtail/radiusd.mtail.j2
+++ b/roles/mtail/templates/mtail/radiusd.mtail.j2
@@ -35,7 +35,11 @@ def syslog {
     }
 
     # Bouh!
-    /Adherent non cotisant/ {
+    /Invalid connexion \(non-contributing user\)/ {
         radiusd_access_refused["Did not pay"]++
     }
+
+    /Invalid user/ {
+        radiusd_access_refused["Invalid user"]++
+    }
 }
diff --git a/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2 b/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2
index 1b61324d..620ddee9 100644
--- a/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2
+++ b/roles/prometheus-alertmanager/templates/prometheus/alertmanager.yml.j2
@@ -7,10 +7,6 @@ global:
   smtp_from: 'alertmanager@example.org'
   #smtp_auth_username: 'alertmanager'
   #smtp_auth_password: 'password'
-  # The auth token for Hipchat.
-  hipchat_auth_token: '1234556789'
-  # Alternative host for Hipchat.
-  hipchat_api_url: 'https://hipchat.foobar.org/'
 
 # The directory from which notification templates are read.
 templates: 
diff --git a/roles/prometheus-node-exporter/tasks/main.yml b/roles/prometheus-node-exporter/tasks/main.yml
index b324f2fb..0e07c74d 100644
--- a/roles/prometheus-node-exporter/tasks/main.yml
+++ b/roles/prometheus-node-exporter/tasks/main.yml
@@ -29,12 +29,12 @@
     enabled: true
     state: started
 
-# Doesn't work on Debian Stretch with the old prometheus package
 - name: Make Prometheus node-exporter listen on adm only
-  template:
-    src: default/prometheus-node-exporter.j2
-    dest: /etc/default/prometheus-node-exporter
-  notify: Restart prometheus-node-exporter
+  lineinfile:
+    path: /etc/default/prometheus-node-exporter
+    regexp: '^ARGS='
+    line: |
+      ARGS="--web.listen-address={{ adm_ipv4 }}:9100"
   tags: restart-node-exporter
 
 # Install new APT textfile collector, it might be upstreamed one day
@@ -46,7 +46,7 @@
     owner: root
     group: root
     mode: 0755
-  when: ansible_lsb.id == 'Debian'
+  when: ansible_lsb.id == 'Debian' and ansible_distribution_release != "bullseye"
 
 # Install new APT textfile collector, it might be upstreamed one day
 # https://github.com/prometheus-community/node-exporter-textfile-collector-scripts/pull/35
@@ -57,4 +57,4 @@
     owner: root
     group: root
     mode: 0755
-  when: ansible_lsb.id == 'Ubuntu'
+  when: ansible_lsb.id == 'Ubuntu' or ansible_distribution_release == "bullseye"
diff --git a/roles/prometheus-node-exporter/templates/default/prometheus-node-exporter.j2 b/roles/prometheus-node-exporter/templates/default/prometheus-node-exporter.j2
deleted file mode 100644
index 819d243a..00000000
--- a/roles/prometheus-node-exporter/templates/default/prometheus-node-exporter.j2
+++ /dev/null
@@ -1,130 +0,0 @@
-{{ ansible_header | comment }}
-
-# Set the command-line arguments to pass to the server.
-# Due to shell scaping, to pass backslashes for regexes, you need to double
-# them (\\d for \d). If running under systemd, you need to double them again
-# (\\\\d to mean \d), and escape newlines too.
-ARGS="--web.listen-address={{ adm_ipv4 }}:9100"
-
-# Prometheus-node-exporter supports the following options:
-#
-#  --collector.diskstats.ignored-devices="^(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$"
-#                            Regexp of devices to ignore for diskstats.
-#  --collector.filesystem.ignored-mount-points="^/(dev|proc|run|sys|mnt|media|var/lib/docker)($|/)"
-#                            Regexp of mount points to ignore for filesystem
-#                            collector.
-#  --collector.filesystem.ignored-fs-types="^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$"
-#                            Regexp of filesystem types to ignore for
-#                            filesystem collector.
-#  --collector.netdev.ignored-devices="^lo$"
-#                            Regexp of net devices to ignore for netdev
-#                            collector.
-#  --collector.netstat.fields="^(.*_(InErrors|InErrs)|Ip_Forwarding|Ip(6|Ext)_(InOctets|OutOctets)|Icmp6?_(InMsgs|OutMsgs)|TcpExt_(Listen.*|Syncookies.*)|Tcp_(ActiveOpens|PassiveOpens|RetransSegs|CurrEstab)|Udp6?_(InDatagrams|OutDatagrams|NoPorts))$"
-#                            Regexp of fields to return for netstat
-#                            collector.
-#  --collector.ntp.server="127.0.0.1"
-#                            NTP server to use for ntp collector
-#  --collector.ntp.protocol-version=4
-#                            NTP protocol version
-#  --collector.ntp.server-is-local
-#                            Certify that collector.ntp.server address is the
-#                            same local host as this collector.
-#  --collector.ntp.ip-ttl=1  IP TTL to use while sending NTP query
-#  --collector.ntp.max-distance=3.46608s
-#                            Max accumulated distance to the root
-#  --collector.ntp.local-offset-tolerance=1ms
-#                            Offset between local clock and local ntpd time
-#                            to tolerate
-#  --path.procfs="/proc"     procfs mountpoint.
-#  --path.sysfs="/sys"       sysfs mountpoint.
-#  --collector.qdisc.fixtures=""
-#                            test fixtures to use for qdisc collector
-#                            end-to-end testing
-#  --collector.runit.servicedir="/etc/service"
-#                            Path to runit service directory.
-#  --collector.supervisord.url="http://localhost:9001/RPC2"
-#                            XML RPC endpoint.
-#  --collector.systemd.unit-whitelist=".+"
-#                            Regexp of systemd units to whitelist. Units must
-#                            both match whitelist and not match blacklist to
-#                            be included.
-#  --collector.systemd.unit-blacklist=".+(\\.device|\\.scope|\\.slice|\\.target)"
-#                            Regexp of systemd units to blacklist. Units must
-#                            both match whitelist and not match blacklist to
-#                            be included.
-#  --collector.systemd.private
-#                            Establish a private, direct connection to
-#                            systemd without dbus.
-#  --collector.textfile.directory="/var/lib/prometheus/node-exporter"
-#                            Directory to read text files with metrics from.
-#  --collector.vmstat.fields="^(oom_kill|pgpg|pswp|pg.*fault).*"
-#                            Regexp of fields to return for vmstat collector.
-#  --collector.wifi.fixtures=""
-#                            test fixtures to use for wifi collector metrics
-#  --collector.arp           Enable the arp collector (default: enabled).
-#  --collector.bcache        Enable the bcache collector (default: enabled).
-#  --collector.bonding       Enable the bonding collector (default: enabled).
-#  --collector.buddyinfo     Enable the buddyinfo collector (default:
-#                            disabled).
-#  --collector.conntrack     Enable the conntrack collector (default:
-#                            enabled).
-#  --collector.cpu           Enable the cpu collector (default: enabled).
-#  --collector.diskstats     Enable the diskstats collector (default:
-#                            enabled).
-#  --collector.drbd          Enable the drbd collector (default: disabled).
-#  --collector.edac          Enable the edac collector (default: enabled).
-#  --collector.entropy       Enable the entropy collector (default: enabled).
-#  --collector.filefd        Enable the filefd collector (default: enabled).
-#  --collector.filesystem    Enable the filesystem collector (default:
-#                            enabled).
-#  --collector.hwmon         Enable the hwmon collector (default: enabled).
-#  --collector.infiniband    Enable the infiniband collector (default:
-#                            enabled).
-#  --collector.interrupts    Enable the interrupts collector (default:
-#                            disabled).
-#  --collector.ipvs          Enable the ipvs collector (default: enabled).
-#  --collector.ksmd          Enable the ksmd collector (default: disabled).
-#  --collector.loadavg       Enable the loadavg collector (default: enabled).
-#  --collector.logind        Enable the logind collector (default: disabled).
-#  --collector.mdadm         Enable the mdadm collector (default: enabled).
-#  --collector.meminfo       Enable the meminfo collector (default: enabled).
-#  --collector.meminfo_numa  Enable the meminfo_numa collector (default:
-#                            disabled).
-#  --collector.mountstats    Enable the mountstats collector (default:
-#                            disabled).
-#  --collector.netdev        Enable the netdev collector (default: enabled).
-#  --collector.netstat       Enable the netstat collector (default: enabled).
-#  --collector.nfs           Enable the nfs collector (default: enabled).
-#  --collector.nfsd          Enable the nfsd collector (default: enabled).
-#  --collector.ntp           Enable the ntp collector (default: disabled).
-#  --collector.qdisc         Enable the qdisc collector (default: disabled).
-#  --collector.runit         Enable the runit collector (default: disabled).
-#  --collector.sockstat      Enable the sockstat collector (default:
-#                            enabled).
-#  --collector.stat          Enable the stat collector (default: enabled).
-#  --collector.supervisord   Enable the supervisord collector (default:
-#                            disabled).
-#  --collector.systemd       Enable the systemd collector (default: enabled).
-#  --collector.tcpstat       Enable the tcpstat collector (default:
-#                            disabled).
-#  --collector.textfile      Enable the textfile collector (default:
-#                            enabled).
-#  --collector.time          Enable the time collector (default: enabled).
-#  --collector.uname         Enable the uname collector (default: enabled).
-#  --collector.vmstat        Enable the vmstat collector (default: enabled).
-#  --collector.wifi          Enable the wifi collector (default: enabled).
-#  --collector.xfs           Enable the xfs collector (default: enabled).
-#  --collector.zfs           Enable the zfs collector (default: enabled).
-#  --collector.timex         Enable the timex collector (default: enabled).
-#  --web.listen-address=":9100"
-#                            Address on which to expose metrics and web
-#                            interface.
-#  --web.telemetry-path="/metrics"
-#                            Path under which to expose metrics.
-#  --log.level="info"        Only log messages with the given severity or
-#                            above. Valid levels: [debug, info, warn, error,
-#                            fatal]
-#  --log.format="logger:stderr"
-#                            Set the log target and format. Example:
-#                            "logger:syslog?appname=bob&local=7" or
-#                            "logger:stdout?json=true"
diff --git a/roles/prometheus-snmp-exporter/templates/prometheus/snmp.yml.j2 b/roles/prometheus-snmp-exporter/templates/prometheus/snmp.yml.j2
index 8748744f..69770a1a 100644
--- a/roles/prometheus-snmp-exporter/templates/prometheus/snmp.yml.j2
+++ b/roles/prometheus-snmp-exporter/templates/prometheus/snmp.yml.j2
@@ -1,8 +1,4 @@
 {{ ansible_header | comment }}
-# TODOlist :
-# - Faire fonctionner le monitoring des switchs défini ici
-#   * Configurer tous les switchs avec un compte SNMPv3
-#   * Mettre l'inventaire des switchs dans Ansible
 
 eatonups:
   walk:
@@ -116,7 +112,15 @@ procurve_switch:
       type: gauge
   version: 3
   auth:
-    username: prometheus
+    # To create SNMPv3 user on HP procurve, execute:
+    # snmpv3 user snmp_prometheus auth sha {{ snmp_procurve_password }} priv aes {{ snmp_procurve_password }}
+    # snmpv3 group managerpriv user snmp_prometheus sec-model ver3
+    security_level: authPriv
+    username: snmp_prometheus
+    password: {{ snmp_procurve_password }}
+    auth_protocol: SHA
+    priv_protocol: AES
+    priv_password: {{ snmp_procurve_password }}
 
 ubiquiti_unifi:
   walk:
diff --git a/roles/prometheus/tasks/main.yml b/roles/prometheus/tasks/main.yml
index 65a5fe05..72136715 100644
--- a/roles/prometheus/tasks/main.yml
+++ b/roles/prometheus/tasks/main.yml
@@ -25,50 +25,26 @@
     - django.rules.yml
 
 # We don't need to restart Prometheus when updating nodes
-- name: Configure Prometheus nodes
+- name: Configure Prometheus targets
   copy:
-    content: "{{ [{'targets': prometheus.node_targets}] | to_nice_json }}"
-    dest: /etc/prometheus/targets.json
+    content: "{{ [{'targets': item.targets}] | to_nice_json }}\n"
+    dest: "/etc/prometheus/{{ item.file }}.json"
     mode: 0644
-
-# We don't need to restart Prometheus when updating nodes
-- name: Configure Prometheus UPS SNMP devices
-  copy:
-    content: "{{ [{'targets': prometheus.ups_snmp_targets}] | to_nice_json }}"
-    dest: /etc/prometheus/targets_ups_snmp.json
-    mode: 0644
-
-# We don't need to restart Prometheus when updating nodes
-- name: Configure Prometheus Ubiquity Unifi SNMP devices
-  copy:
-    content: "{{ [{'targets': prometheus.unifi_snmp_targets}] | to_nice_json }}"
-    dest: /etc/prometheus/targets_unifi_snmp.json
-    mode: 0644
-  when: prometheus.unifi_snmp_targets is defined
-
-# We don't need to restart Prometheus when updating nodes
-- name: Configure Prometheus NGINX targets
-  copy:
-    content: "{{ [{'targets': prometheus.nginx_targets}] | to_nice_json }}"
-    dest: /etc/prometheus/targets_nginx.json
-    mode: 0644
-  when: prometheus.nginx_targets is defined
-
-# We don't need to restart Prometheus when updating nodes
-- name: Configure Prometheus Apache targets
-  copy:
-    content: "{{ [{'targets': prometheus.apache_targets}] | to_nice_json }}"
-    dest: /etc/prometheus/targets_apache.json
-    mode: 0644
-  when: prometheus.apache_targets is defined
-
-# We don't need to restart Prometheus when updating nodes
-- name: Configure Prometheus Blackbox targets
-  copy:
-    content: "{{ [{'targets': prometheus.blackbox_targets}] | to_nice_json }}"
-    dest: /etc/prometheus/targets_blackbox.json
-    mode: 0644
-  when: prometheus.blackbox_targets is defined
+  loop:
+    - file: targets
+      targets: "{{ prometheus.node_targets }}"
+    - file: targets_ups_snmp
+      targets: "{{ prometheus.ups_snmp_targets }}"
+    - file: targets_procurve_snmp
+      targets: "{{ prometheus.procurve_snmp_targets }}"
+    - file: targets_unifi_snmp
+      targets: "{{ prometheus.unifi_snmp_targets }}"
+    - file: targets_nginx
+      targets: "{{ prometheus.nginx_targets }}"
+    - file: targets_apache
+      targets: "{{ prometheus.apache_targets }}"
+    - file: targets_blackbox
+      targets: "{{ prometheus.blackbox_targets }}"
 
 - name: Activate prometheus service
   systemd:
diff --git a/roles/prometheus/templates/prometheus/alert.rules.yml.j2 b/roles/prometheus/templates/prometheus/alert.rules.yml.j2
index 477ed057..d5017c6f 100644
--- a/roles/prometheus/templates/prometheus/alert.rules.yml.j2
+++ b/roles/prometheus/templates/prometheus/alert.rules.yml.j2
@@ -45,7 +45,7 @@ groups:
 
   # Alert for high CPU usage
   - alert: CpuBusy
-    expr: node_load5{instance="zbee.adm.crans.org"} > 7 or node_load5{instance!="zbee.adm.crans.org"} > 5
+    expr: node_load5 > 9
     for: 10m
     labels:
       severity: warning
diff --git a/roles/prometheus/templates/prometheus/prometheus.yml.j2 b/roles/prometheus/templates/prometheus/prometheus.yml.j2
index 7fe60352..5877a182 100644
--- a/roles/prometheus/templates/prometheus/prometheus.yml.j2
+++ b/roles/prometheus/templates/prometheus/prometheus.yml.j2
@@ -46,6 +46,7 @@ scrape_configs:
         target_label: __address__
         replacement: '$1:9100'
 
+{% if prometheus.ups_snmp_targets is defined %}
   - job_name: ups_snmp
     file_sd_configs:
       - files:
@@ -60,6 +61,24 @@ scrape_configs:
         target_label: instance
       - target_label: __address__
         replacement: 127.0.0.1:9116
+{% endif %}
+
+{% if prometheus.procurve_snmp_targets is defined %}
+  - job_name: procurve_snmp
+    file_sd_configs:
+      - files:
+        - '/etc/prometheus/targets_procurve_snmp.json'
+    metrics_path: /snmp
+    params:
+      module: [procurve_switch]
+    relabel_configs:
+      - source_labels: [__address__]
+        target_label: __param_target
+      - source_labels: [__param_target]
+        target_label: instance
+      - target_label: __address__
+        replacement: 127.0.0.1:9116
+{% endif %}
 
 {% if prometheus.unifi_snmp_targets is defined %}
   - job_name: unifi_snmp
@@ -125,7 +144,7 @@ scrape_configs:
 
   - job_name: mtail
     static_configs:
-    - targets: ["tealc.adm.crans.org"]
+      - targets: ["tealc.adm.crans.org"]
     relabel_configs:
       # Do not put :3903 in instance name, rather here
       - source_labels: [__address__]
@@ -134,10 +153,23 @@ scrape_configs:
         target_label: __address__
         replacement: '$1:3903'
 
+{% if prometheus.bird_targets is defined %}
+  - job_name: bird
+    file_sd_configs:
+      - files:
+        - '/etc/prometheus/targets_bird.json'
+    relabel_configs:
+      # Do not put :3903 in instance name, rather here
+      - source_labels: [__address__]
+        target_label: instance
+      - source_labels: [instance]
+        target_label: __address__
+        replacement: '$1:9324'
+{% endif %}
 
   - job_name: django
     scheme: https
     static_configs:
-    - targets: []
+      - targets: []
 # Activate this line when the captive portal need monitoring
-#    - targets: ["portail-captif.crans.org:443"]
+#      - targets: ["portail-captif.crans.org:443"]
-- 
GitLab