Skip to content
Snippets Groups Projects
Verified Commit f3c38819 authored by me5na7qbjqbrp's avatar me5na7qbjqbrp
Browse files

Prometheus on bullseye

parent 5c7c0932
No related branches found
No related tags found
1 merge request!189Prometheus on bullseye
......@@ -6,9 +6,11 @@
# Prometheus targets.json
prometheus:
node_targets: "{{ groups['server'] | list | sort }}"
ups_snmp_targets: []
# - pulsar.adm.crans.org # 0B
# - quasar.adm.crans.org # 4J
ups_snmp_targets:
- pulsar.adm.crans.org # 0B
- quasar.adm.crans.org # 4J
procurve_snmp_targets:
- batg-9.infra.crans.org
unifi_snmp_targets: "{{ groups['crans_unifi'] | list | sort }}"
blackbox_targets:
- https://crans.org/
......@@ -37,7 +39,10 @@
- hodaur.adm.crans.org
- charybde.adm.crans.org
apache_targets: [] # [zamok.adm.crans.org]
bird_targets:
- routeur-sam.adm.crans.org
snmp_procurve_password: "{{ vault_snmp_procurve_password }}"
snmp_unifi_password: "{{ vault_snmp_unifi_password }}"
grafana:
......
......@@ -35,7 +35,11 @@ def syslog {
}
# Bouh!
/Adherent non cotisant/ {
/Invalid connexion \(non-contributing user\)/ {
radiusd_access_refused["Did not pay"]++
}
/Invalid user/ {
radiusd_access_refused["Invalid user"]++
}
}
......@@ -7,10 +7,6 @@ global:
smtp_from: 'alertmanager@example.org'
#smtp_auth_username: 'alertmanager'
#smtp_auth_password: 'password'
# The auth token for Hipchat.
hipchat_auth_token: '1234556789'
# Alternative host for Hipchat.
hipchat_api_url: 'https://hipchat.foobar.org/'
# The directory from which notification templates are read.
templates:
......
......@@ -29,12 +29,12 @@
enabled: true
state: started
# Doesn't work on Debian Stretch with the old prometheus package
- name: Make Prometheus node-exporter listen on adm only
template:
src: default/prometheus-node-exporter.j2
dest: /etc/default/prometheus-node-exporter
notify: Restart prometheus-node-exporter
lineinfile:
path: /etc/default/prometheus-node-exporter
regexp: '^ARGS='
line: |
ARGS="--web.listen-address={{ adm_ipv4 }}:9100"
tags: restart-node-exporter
# Install new APT textfile collector, it might be upstreamed one day
......@@ -46,7 +46,7 @@
owner: root
group: root
mode: 0755
when: ansible_lsb.id == 'Debian'
when: ansible_lsb.id == 'Debian' and ansible_distribution_release != "bullseye"
# Install new APT textfile collector, it might be upstreamed one day
# https://github.com/prometheus-community/node-exporter-textfile-collector-scripts/pull/35
......@@ -57,4 +57,4 @@
owner: root
group: root
mode: 0755
when: ansible_lsb.id == 'Ubuntu'
when: ansible_lsb.id == 'Ubuntu' or ansible_distribution_release == "bullseye"
{{ ansible_header | comment }}
# Set the command-line arguments to pass to the server.
# Due to shell scaping, to pass backslashes for regexes, you need to double
# them (\\d for \d). If running under systemd, you need to double them again
# (\\\\d to mean \d), and escape newlines too.
ARGS="--web.listen-address={{ adm_ipv4 }}:9100"
# Prometheus-node-exporter supports the following options:
#
# --collector.diskstats.ignored-devices="^(ram|loop|fd|(h|s|v|xv)d[a-z]|nvme\\d+n\\d+p)\\d+$"
# Regexp of devices to ignore for diskstats.
# --collector.filesystem.ignored-mount-points="^/(dev|proc|run|sys|mnt|media|var/lib/docker)($|/)"
# Regexp of mount points to ignore for filesystem
# collector.
# --collector.filesystem.ignored-fs-types="^(autofs|binfmt_misc|cgroup|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|mqueue|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|sysfs|tracefs)$"
# Regexp of filesystem types to ignore for
# filesystem collector.
# --collector.netdev.ignored-devices="^lo$"
# Regexp of net devices to ignore for netdev
# collector.
# --collector.netstat.fields="^(.*_(InErrors|InErrs)|Ip_Forwarding|Ip(6|Ext)_(InOctets|OutOctets)|Icmp6?_(InMsgs|OutMsgs)|TcpExt_(Listen.*|Syncookies.*)|Tcp_(ActiveOpens|PassiveOpens|RetransSegs|CurrEstab)|Udp6?_(InDatagrams|OutDatagrams|NoPorts))$"
# Regexp of fields to return for netstat
# collector.
# --collector.ntp.server="127.0.0.1"
# NTP server to use for ntp collector
# --collector.ntp.protocol-version=4
# NTP protocol version
# --collector.ntp.server-is-local
# Certify that collector.ntp.server address is the
# same local host as this collector.
# --collector.ntp.ip-ttl=1 IP TTL to use while sending NTP query
# --collector.ntp.max-distance=3.46608s
# Max accumulated distance to the root
# --collector.ntp.local-offset-tolerance=1ms
# Offset between local clock and local ntpd time
# to tolerate
# --path.procfs="/proc" procfs mountpoint.
# --path.sysfs="/sys" sysfs mountpoint.
# --collector.qdisc.fixtures=""
# test fixtures to use for qdisc collector
# end-to-end testing
# --collector.runit.servicedir="/etc/service"
# Path to runit service directory.
# --collector.supervisord.url="http://localhost:9001/RPC2"
# XML RPC endpoint.
# --collector.systemd.unit-whitelist=".+"
# Regexp of systemd units to whitelist. Units must
# both match whitelist and not match blacklist to
# be included.
# --collector.systemd.unit-blacklist=".+(\\.device|\\.scope|\\.slice|\\.target)"
# Regexp of systemd units to blacklist. Units must
# both match whitelist and not match blacklist to
# be included.
# --collector.systemd.private
# Establish a private, direct connection to
# systemd without dbus.
# --collector.textfile.directory="/var/lib/prometheus/node-exporter"
# Directory to read text files with metrics from.
# --collector.vmstat.fields="^(oom_kill|pgpg|pswp|pg.*fault).*"
# Regexp of fields to return for vmstat collector.
# --collector.wifi.fixtures=""
# test fixtures to use for wifi collector metrics
# --collector.arp Enable the arp collector (default: enabled).
# --collector.bcache Enable the bcache collector (default: enabled).
# --collector.bonding Enable the bonding collector (default: enabled).
# --collector.buddyinfo Enable the buddyinfo collector (default:
# disabled).
# --collector.conntrack Enable the conntrack collector (default:
# enabled).
# --collector.cpu Enable the cpu collector (default: enabled).
# --collector.diskstats Enable the diskstats collector (default:
# enabled).
# --collector.drbd Enable the drbd collector (default: disabled).
# --collector.edac Enable the edac collector (default: enabled).
# --collector.entropy Enable the entropy collector (default: enabled).
# --collector.filefd Enable the filefd collector (default: enabled).
# --collector.filesystem Enable the filesystem collector (default:
# enabled).
# --collector.hwmon Enable the hwmon collector (default: enabled).
# --collector.infiniband Enable the infiniband collector (default:
# enabled).
# --collector.interrupts Enable the interrupts collector (default:
# disabled).
# --collector.ipvs Enable the ipvs collector (default: enabled).
# --collector.ksmd Enable the ksmd collector (default: disabled).
# --collector.loadavg Enable the loadavg collector (default: enabled).
# --collector.logind Enable the logind collector (default: disabled).
# --collector.mdadm Enable the mdadm collector (default: enabled).
# --collector.meminfo Enable the meminfo collector (default: enabled).
# --collector.meminfo_numa Enable the meminfo_numa collector (default:
# disabled).
# --collector.mountstats Enable the mountstats collector (default:
# disabled).
# --collector.netdev Enable the netdev collector (default: enabled).
# --collector.netstat Enable the netstat collector (default: enabled).
# --collector.nfs Enable the nfs collector (default: enabled).
# --collector.nfsd Enable the nfsd collector (default: enabled).
# --collector.ntp Enable the ntp collector (default: disabled).
# --collector.qdisc Enable the qdisc collector (default: disabled).
# --collector.runit Enable the runit collector (default: disabled).
# --collector.sockstat Enable the sockstat collector (default:
# enabled).
# --collector.stat Enable the stat collector (default: enabled).
# --collector.supervisord Enable the supervisord collector (default:
# disabled).
# --collector.systemd Enable the systemd collector (default: enabled).
# --collector.tcpstat Enable the tcpstat collector (default:
# disabled).
# --collector.textfile Enable the textfile collector (default:
# enabled).
# --collector.time Enable the time collector (default: enabled).
# --collector.uname Enable the uname collector (default: enabled).
# --collector.vmstat Enable the vmstat collector (default: enabled).
# --collector.wifi Enable the wifi collector (default: enabled).
# --collector.xfs Enable the xfs collector (default: enabled).
# --collector.zfs Enable the zfs collector (default: enabled).
# --collector.timex Enable the timex collector (default: enabled).
# --web.listen-address=":9100"
# Address on which to expose metrics and web
# interface.
# --web.telemetry-path="/metrics"
# Path under which to expose metrics.
# --log.level="info" Only log messages with the given severity or
# above. Valid levels: [debug, info, warn, error,
# fatal]
# --log.format="logger:stderr"
# Set the log target and format. Example:
# "logger:syslog?appname=bob&local=7" or
# "logger:stdout?json=true"
{{ ansible_header | comment }}
# TODOlist :
# - Faire fonctionner le monitoring des switchs défini ici
# * Configurer tous les switchs avec un compte SNMPv3
# * Mettre l'inventaire des switchs dans Ansible
eatonups:
walk:
......@@ -116,7 +112,15 @@ procurve_switch:
type: gauge
version: 3
auth:
username: prometheus
# To create SNMPv3 user on HP procurve, execute:
# snmpv3 user snmp_prometheus auth sha {{ snmp_procurve_password }} priv aes {{ snmp_procurve_password }}
# snmpv3 group managerpriv user snmp_prometheus sec-model ver3
security_level: authPriv
username: snmp_prometheus
password: {{ snmp_procurve_password }}
auth_protocol: SHA
priv_protocol: AES
priv_password: {{ snmp_procurve_password }}
ubiquiti_unifi:
walk:
......
......@@ -25,50 +25,26 @@
- django.rules.yml
# We don't need to restart Prometheus when updating nodes
- name: Configure Prometheus nodes
- name: Configure Prometheus targets
copy:
content: "{{ [{'targets': prometheus.node_targets}] | to_nice_json }}"
dest: /etc/prometheus/targets.json
content: "{{ [{'targets': item.targets}] | to_nice_json }}\n"
dest: "/etc/prometheus/{{ item.file }}.json"
mode: 0644
# We don't need to restart Prometheus when updating nodes
- name: Configure Prometheus UPS SNMP devices
copy:
content: "{{ [{'targets': prometheus.ups_snmp_targets}] | to_nice_json }}"
dest: /etc/prometheus/targets_ups_snmp.json
mode: 0644
# We don't need to restart Prometheus when updating nodes
- name: Configure Prometheus Ubiquity Unifi SNMP devices
copy:
content: "{{ [{'targets': prometheus.unifi_snmp_targets}] | to_nice_json }}"
dest: /etc/prometheus/targets_unifi_snmp.json
mode: 0644
when: prometheus.unifi_snmp_targets is defined
# We don't need to restart Prometheus when updating nodes
- name: Configure Prometheus NGINX targets
copy:
content: "{{ [{'targets': prometheus.nginx_targets}] | to_nice_json }}"
dest: /etc/prometheus/targets_nginx.json
mode: 0644
when: prometheus.nginx_targets is defined
# We don't need to restart Prometheus when updating nodes
- name: Configure Prometheus Apache targets
copy:
content: "{{ [{'targets': prometheus.apache_targets}] | to_nice_json }}"
dest: /etc/prometheus/targets_apache.json
mode: 0644
when: prometheus.apache_targets is defined
# We don't need to restart Prometheus when updating nodes
- name: Configure Prometheus Blackbox targets
copy:
content: "{{ [{'targets': prometheus.blackbox_targets}] | to_nice_json }}"
dest: /etc/prometheus/targets_blackbox.json
mode: 0644
when: prometheus.blackbox_targets is defined
loop:
- file: targets
targets: "{{ prometheus.node_targets }}"
- file: targets_ups_snmp
targets: "{{ prometheus.ups_snmp_targets }}"
- file: targets_procurve_snmp
targets: "{{ prometheus.procurve_snmp_targets }}"
- file: targets_unifi_snmp
targets: "{{ prometheus.unifi_snmp_targets }}"
- file: targets_nginx
targets: "{{ prometheus.nginx_targets }}"
- file: targets_apache
targets: "{{ prometheus.apache_targets }}"
- file: targets_blackbox
targets: "{{ prometheus.blackbox_targets }}"
- name: Activate prometheus service
systemd:
......
......@@ -45,7 +45,7 @@ groups:
# Alert for high CPU usage
- alert: CpuBusy
expr: node_load5{instance="zbee.adm.crans.org"} > 7 or node_load5{instance!="zbee.adm.crans.org"} > 5
expr: node_load5 > 9
for: 10m
labels:
severity: warning
......
......@@ -46,6 +46,7 @@ scrape_configs:
target_label: __address__
replacement: '$1:9100'
{% if prometheus.ups_snmp_targets is defined %}
- job_name: ups_snmp
file_sd_configs:
- files:
......@@ -60,6 +61,24 @@ scrape_configs:
target_label: instance
- target_label: __address__
replacement: 127.0.0.1:9116
{% endif %}
{% if prometheus.procurve_snmp_targets is defined %}
- job_name: procurve_snmp
file_sd_configs:
- files:
- '/etc/prometheus/targets_procurve_snmp.json'
metrics_path: /snmp
params:
module: [procurve_switch]
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 127.0.0.1:9116
{% endif %}
{% if prometheus.unifi_snmp_targets is defined %}
- job_name: unifi_snmp
......@@ -125,7 +144,7 @@ scrape_configs:
- job_name: mtail
static_configs:
- targets: ["tealc.adm.crans.org"]
- targets: ["tealc.adm.crans.org"]
relabel_configs:
# Do not put :3903 in instance name, rather here
- source_labels: [__address__]
......@@ -134,10 +153,23 @@ scrape_configs:
target_label: __address__
replacement: '$1:3903'
{% if prometheus.bird_targets is defined %}
- job_name: bird
file_sd_configs:
- files:
- '/etc/prometheus/targets_bird.json'
relabel_configs:
# Do not put :3903 in instance name, rather here
- source_labels: [__address__]
target_label: instance
- source_labels: [instance]
target_label: __address__
replacement: '$1:9324'
{% endif %}
- job_name: django
scheme: https
static_configs:
- targets: []
- targets: []
# Activate this line when the captive portal need monitoring
# - targets: ["portail-captif.crans.org:443"]
# - targets: ["portail-captif.crans.org:443"]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment