Commit 2136ced8 authored by Jelle van der Waa's avatar Jelle van der Waa 🚧
Browse files

Merge branch 'add-dashboards' into 'master'

Add dashboards.archlinux.org for public Grafana dashboards

Closes #172

See merge request !368
parents 2cbfa2c0 9ef30adb
Pipeline #7296 passed with stage
in 33 seconds
......@@ -112,7 +112,7 @@ groups:
summary: "Prometheus too many restarts (instance {{ $labels.instance }})"
description: "Prometheus has restarted more than twice in the last 15 minutes. It might be crashlooping.\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: PrometheusNotConnectedToAlertmanager
expr: prometheus_notifications_alertmanagers_discovered < 1
expr: prometheus_notifications_alertmanagers_discovered{instance!~"dashboards.archlinux.org"} < 1
for: 5m
labels:
severity: critical
......
PROMETHEUS_ARGS="--storage.tsdb.retention.time=365d"
......@@ -2,6 +2,3 @@
- name: reload prometheus
service: name=prometheus state=reloaded
- name: reload alertmanager
service: name=alertmanager state=reloaded
---
- name: install prometheus,alertmanager server
pacman: name=prometheus,alertmanager state=present
- name: install prometheus server
pacman: name=prometheus state=present
- name: install cert renewal hook
template: src=letsencrypt.hook.d.j2 dest=/etc/letsencrypt/renewal-hooks/deploy/prometheus owner=root group=root mode=0755
when: prometheus_receive_only
- name: create ssl cert
include_role:
name: certificate
vars:
domains: ["{{ prometheus_domain }}"]
when: prometheus_receive_only
- name: install prometheus configuration
template: src=prometheus.yml.j2 dest=/etc/prometheus/prometheus.yml owner=root group=root mode=644
template: src=prometheus.yml.j2 dest=/etc/prometheus/prometheus.yml owner=root group=prometheus mode=640
notify: reload prometheus
- name: install prometheus cli configuration
copy: src=prometheus.conf dest=/etc/conf.d/prometheus owner=root group=root mode=600
template: src=prometheus.conf.j2 dest=/etc/conf.d/prometheus owner=root group=root mode=600
notify: reload prometheus
- name: install prometheus web-config configuration
template: src=web-config.yml.j2 dest=/etc/prometheus/web-config.yml owner=root group=prometheus mode=640
notify: reload prometheus
when: prometheus_receive_only
- name: install prometheus alert configuration
copy: src=node.rules.yml dest=/etc/prometheus/node.rules.yml owner=root group=root mode=644
notify: reload prometheus
- name: install alertmanager configuration
template: src=alertmanager.yml.j2 dest=/etc/alertmanager/alertmanager.yml owner=root group=alertmanager mode=640
notify: reload alertmanager
when: not prometheus_receive_only
- name: enable prometheus server service
systemd: name=prometheus enabled=yes daemon_reload=yes state=started
- name: enable alertmanager server service
systemd: name=alertmanager enabled=yes daemon_reload=yes state=started
- name: open firewall holes for prometheus
ansible.posix.firewalld: service=prometheus permanent=true state=enabled immediate=yes
when: configure_firewall and prometheus_receive_only
tags:
- firewall
#!/bin/bash
set -o errexit -o nounset
for domain in ${RENEWED_DOMAINS}; do
if [[ "{{ prometheus_domain }}" = "${domain}" ]]; then
umask 077
cp --dereference "${RENEWED_LINEAGE}/fullchain.pem" /etc/prometheus/server.crt.new
cp --dereference "${RENEWED_LINEAGE}/privkey.pem" /etc/prometheus/server.key.new
chown root:prometheus /etc/prometheus/server.{crt,key}.new
chmod 640 /etc/prometheus/server.{crt,key}.new
rename ".new" "" /etc/prometheus/server.{crt,key}.new
break
fi
done
{% if prometheus_receive_only %}
PROMETHEUS_ARGS="--storage.tsdb.retention.time=365d --enable-feature=remote-write-receiver --web.config.file=/etc/prometheus/web-config.yml"
{% else %}
PROMETHEUS_ARGS="--storage.tsdb.retention.time=365d"
{% endif %}
{% if not prometheus_receive_only %}
global:
scrape_interval: 60s
......@@ -12,7 +13,34 @@ alerting:
- targets:
- localhost:9093
remote_write:
- url: https://{{ prometheus_domain }}:9090/api/v1/write
write_relabel_configs:
- source_labels: [__name__]
regex: "archive_directory_size_bytes|archive_total_packages|rebuilderd_results|rebuilderd_workers|rebuilderd_queue_length|repository_directory_size_bytes"
action: keep
basic_auth:
username: {{ vault_prometheus_user }}
password: {{ vault_prometheus_passwd }}
scrape_configs:
- job_name: prometheus
static_configs:
- targets: ['127.0.0.1:9090']
labels:
instance: "{{ ansible_fqdn }}"
- job_name: prometheus-domain
scheme: https
basic_auth:
username: {{ vault_prometheus_user }}
password: {{ vault_prometheus_passwd }}
static_configs:
- targets: ['{{ prometheus_domain }}:9090']
labels:
job: prometheus
instance: "{{ prometheus_domain }}"
- job_name: loki
static_configs:
- targets: ['127.0.0.1:3100']
......@@ -111,3 +139,4 @@ scrape_configs:
- target_label: __address__
replacement: 127.0.0.1:9115
{% endfor %}
{% endif %}
tls_server_config:
cert_file: server.crt
key_file: server.key
# Usernames and passwords required to connect to Prometheus.
# Passwords are hashed with bcrypt: https://github.com/prometheus/exporter-toolkit/blob/46630604b0f1c5d64fbd3eb3010d91af38dc798b/docs/web-configuration.md#about-bcrypt
basic_auth_users:
{{ vault_prometheus_user }}: {{ vault_prometheus_passwd_hashed }}
......@@ -102,6 +102,10 @@ locals {
server_type = "cx31"
domain = "monitoring"
}
"dashboards.archlinux.org" = {
server_type = "cx11"
domain = "dashboards"
}
"patchwork.archlinux.org" = {
server_type = "cx11"
domain = "patchwork"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment