diff --git a/group_vars/all/common.yml b/group_vars/all/common.yml index 56a6bf8708642e1f99ca0f558309c47996f9ae93..664f5ddce40ea5d4f07c98ed76f44e2188e5517f 100644 --- a/group_vars/all/common.yml +++ b/group_vars/all/common.yml @@ -10,3 +10,6 @@ zabbix_agent_templates: # this is used by the maintenance role to get the ip address # of the machine running the playbook maintenance_remote_machine: "{{ hostvars[inventory_hostname]['ansible_env'].SSH_CLIENT.split(' ')[0] }}" + +# prometheus-node-exporter port +prometheus_exporter_port: '9100' diff --git a/group_vars/all/vault_gitlab.yml b/group_vars/all/vault_gitlab.yml index 8f4e0e010d353530c52a22796577cc02d2f98d18..64145b7e620d1f24c60355cc0a8ee03181e64a6b 100644 --- a/group_vars/all/vault_gitlab.yml +++ b/group_vars/all/vault_gitlab.yml @@ -1,86 +1,93 @@ $ANSIBLE_VAULT;1.1;AES256 -35393938326563366437646365633563303031393034626433333163373838613535333136356132 -3139393330383337376633313739643431636337343263310a616263613665356437383862663134 -34643230613832643332323634613561313634386636373937373533653338313030633339653235 -6330646665656530350aa326136373934633930656538363633 +39636137336436363233643038663935386633383433353533383134636532353139303239326332 +3465626434646334620adiff --git a/group_vars/gitlab_runners.yml b/group_vars/gitlab_runners.yml index 1c98fb00e6ee21fa0f9948475bea59c92a9eb800..80777b00c8970bc60cf98c6121d5cbe5d768fa10 100644 --- a/group_vars/gitlab_runners.yml +++ b/group_vars/gitlab_runners.yml @@ -1,3 +1,5 @@ +gitlab_runner_exporter_port: 9252 + fail2ban_jails: sshd: true postfix: false diff --git a/hosts b/hosts index 43a89c29524d41c5a2d6c7eecacda6cf8e1299ae..bc91b96b014eb2dd451e59d03ee7734f82af732a 100644 --- a/hosts +++ b/hosts @@ -100,3 +100,13 @@ aur-dev.archlinux.org [prometheus] monitoring.archlinux.org + +[node_exporters] +aur.archlinux.org +monitoring.archlinux.org +gitlab.archlinux.org +reproducible.archlinux.org +runner1.archlinux.org +runner2.archlinux.org +secure-runner1.archlinux.org +secure-runner2.archlinux.org diff --git a/playbooks/aur.archlinux.org.yml b/playbooks/aur.archlinux.org.yml index e2fcabf57c54961a8c6056f66821b4a6226bbc44..6a13585a289c71ce27277ded3cf9ada8d6bcd7eb 100644 --- a/playbooks/aur.archlinux.org.yml +++ b/playbooks/aur.archlinux.org.yml @@ -8,6 +8,7 @@ - { role: tools } - { role: sshd, sshd_enable_includes: true } - { role: root_ssh } + - { role: prometheus_exporters } - { role: certbot } - { role: nginx } - { role: mariadb, mariadb_innodb_buffer_pool_size: '64M', mariadb_table_open_cache: '256', mariadb_query_cache_type: '0', diff --git a/playbooks/gitlab-runners.yml b/playbooks/gitlab-runners.yml index b2d09a3713d6377f02a4348ec161b92f42cdfc4f..3980aed0be513d5b2f8e0cb5293f7ad87f4c8e05 100644 --- a/playbooks/gitlab-runners.yml +++ b/playbooks/gitlab-runners.yml @@ -9,4 +9,5 @@ - { role: sshd } - { role: root_ssh } - { role: fail2ban } + - { role: prometheus_exporters } - { role: gitlab_runner } diff --git a/playbooks/gitlab.archlinux.org.yml b/playbooks/gitlab.archlinux.org.yml index 3f47c753c1622c7737a8a865805a8ddf0ce6e879..d5e7eeeeed83b4388870f660fe45644b2f294b15 100644 --- a/playbooks/gitlab.archlinux.org.yml +++ b/playbooks/gitlab.archlinux.org.yml @@ -11,3 +11,4 @@ - { role: root_ssh } - { role: gitlab, gitlab_domain: "gitlab.archlinux.org" } - { role: borg_client, tags: ["borg"] } + - { role: prometheus_exporters } diff --git a/playbooks/monitoring.archlinux.org.yml b/playbooks/monitoring.archlinux.org.yml index ec5aa7945277c62840f1ccf51783f3a874ac00b4..2fc94d7b286c6ec343bd6cd826b01d194713294f 100644 --- a/playbooks/monitoring.archlinux.org.yml +++ b/playbooks/monitoring.archlinux.org.yml @@ -10,5 +10,6 @@ - { role: hardening } - { role: borg_client, tags: ["borg"], when: "'borg_clients' in group_names" } - { role: prometheus } + - { role: prometheus_exporters } - { role: certbot } - { role: nginx } diff --git a/playbooks/reproducible.archlinux.org.yml b/playbooks/reproducible.archlinux.org.yml index d194e102df544760cefe6eaf7510748bca2c10e8..ce800050a9201ea66f561a550262725f0a9f8f70 100644 --- a/playbooks/reproducible.archlinux.org.yml +++ b/playbooks/reproducible.archlinux.org.yml @@ -14,3 +14,4 @@ - { role: certbot } - { role: nginx } - { role: rebuilderd } + - { role: prometheus_exporters } diff --git a/roles/gitlab_runner/tasks/main.yml b/roles/gitlab_runner/tasks/main.yml index c5e18fab1998a96f2988a348b9d63dffc19aee20..f1a70b46cb7035311966dd3974cf9de978016026 100644 --- a/roles/gitlab_runner/tasks/main.yml +++ b/roles/gitlab_runner/tasks/main.yml @@ -40,5 +40,12 @@ line: concurrent = 100 notify: restart gitlab-runner +- name: enable prometheus exporter + lineinfile: + path: /etc/gitlab-runner/config.toml + insertbefore: '^concurrent' + line: listen_address = ":{{ gitlab_runner_exporter_port }}" + notify: restart gitlab-runner + - name: enable and start gitlab runner service systemd: name=gitlab-runner state=started enabled=yes daemon_reload=yes diff --git a/roles/prometheus/defaults/main.yml b/roles/prometheus/defaults/main.yml index 3b75be8a27d4a8258a72c033aa01383f4ae32fa7..47660d86680e1f8ed3b1adc6c5074c317e281418 100644 --- a/roles/prometheus/defaults/main.yml +++ b/roles/prometheus/defaults/main.yml @@ -1 +1,2 @@ monitoring_domain: monitoring.archlinux.org +gitlab_runner_exporter_port: '9252' diff --git a/roles/prometheus/templates/prometheus.yml.j2 b/roles/prometheus/templates/prometheus.yml.j2 index c868c7545ac9888ec7d53983aa17b6f6c6d200c0..1da101edbc1922fb856e830fac10aa9579e1aa05 100644 --- a/roles/prometheus/templates/prometheus.yml.j2 +++ b/roles/prometheus/templates/prometheus.yml.j2 @@ -13,3 +13,69 @@ alerting: - localhost:9093 scrape_configs: + - job_name: 'node_exporter' + static_configs: + {% for host in groups['node_exporters'] %} + + - targets: ['{{ host }}:{{ prometheus_exporter_port }}'] + labels: + instance: "{{ host }}" + + {% endfor %} + + - job_name: 'gitlab_runner_exporter' + static_configs: + {% for host in groups['gitlab_runners'] %} + + - targets: ['{{ host }}:{{ gitlab_runner_exporter_port }}'] + labels: + instance: "{{ host }}" + + {% endfor %} + + - job_name: 'keycloak' + scheme: https + metrics_path: "/auth/realms/master/metrics" + basic_auth: + username: "{{ vault_keycloak_nginx_user }}" + password: "{{ vault_keycloak_nginx_passwd }}" + static_configs: + - targets: ['accounts.archlinux.org:443'] + labels: + instance: "accounts.archlinux.org" + + - job_name: 'gitlab_exporter' + scheme: https + metrics_path: "-/metrics" + params: + token: ["{{ vault_gitlab_prometheus_token }}"] + static_configs: + - targets: ['gitlab.archlinux.org:443'] + labels: + instance: "gitlab.archlinux.org" + + - job_name: 'mysqld_exporter' + static_configs: + + - targets: ['aur.archlinux.org:9104'] + labels: + instance: "aur.archlinux.org" + + - job_name: 'blackbox' + metrics_path: /probe + scrape_interval: 15s + params: + module: [http_prometheus] + static_configs: + - targets: + {% for target in blackbox_targets %} + - {{ target }} + {% endfor %} + + relabel_configs: + - source_labels: [__address__] + target_label: __param_target + - source_labels: [__param_target] + target_label: instance + - target_label: __address__ + replacement: 127.0.0.1:9115 diff --git a/roles/prometheus_exporters/defaults/main.yml b/roles/prometheus_exporters/defaults/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..b66b5c4602da8ff860fd2d17f6aa9d73816a54f8 --- /dev/null +++ b/roles/prometheus_exporters/defaults/main.yml @@ -0,0 +1,10 @@ +--- + +prometheus_domain: monitoring.archlinux.org + +prometheus_textfile_dir: /var/lib/node_exporter + +gitlab_runner_exporter_port: '9252' + +prometheus_mysqld_user: mysqld_exporter +prometheus_mysqld_exporter_port: '9104' diff --git a/roles/prometheus_exporters/files/arch-textcollector.sh b/roles/prometheus_exporters/files/arch-textcollector.sh new file mode 100755 index 0000000000000000000000000000000000000000..963a851a9e2538acfae9aab6eb28770d70a6747e --- /dev/null +++ b/roles/prometheus_exporters/files/arch-textcollector.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +set -o errexit +set -o nounset + +if (( $# != 1 )); then + echo "Missing textcollector directory argument" + exit 1 +fi + +HOSTNAME=$(hostname) +TEXTFILE_COLLECTOR_DIR=${1} +PROM_FILE=$TEXTFILE_COLLECTOR_DIR/pacman.prom + +TMP_FILE=$PROM_FILE.$$ +[ -e $TMP_FILE ] && rm -f $TMP_FILE + +trap "rm -f $TMP_FILE" EXIT + +updates=$(/usr/bin/checkupdates | wc -l) +secupdates=$(/usr/bin/arch-audit -u | wc -l) + +echo "# HELP pacman_updates_pending number of pending updates from pacman" >> $TMP_FILE +echo "# TYPE pacman_updates_pending gauge" >> $TMP_FILE +echo "pacman_updates_pending{host=\"${HOSTNAME}\"} $updates" >> $TMP_FILE + +echo "# HELP pacman_security_updates_pending number of pending updates from pacman" >> $TMP_FILE +echo "# TYPE pacman_security_updates_pending gauge" >> $TMP_FILE +echo "pacman_security_updates_pending{host=\"${HOSTNAME}\"} $secupdates" >> $TMP_FILE + +mv -f $TMP_FILE $PROM_FILE diff --git a/roles/prometheus_exporters/files/borg-textcollector.sh b/roles/prometheus_exporters/files/borg-textcollector.sh new file mode 100755 index 0000000000000000000000000000000000000000..14de62849c3ab737020a159288bb9b865fa16b34 --- /dev/null +++ b/roles/prometheus_exporters/files/borg-textcollector.sh @@ -0,0 +1,46 @@ +#!/usr/bin/bash + +set -o errexit +set -o nounset +set -o pipefail + +if (( $# != 1 )); then + echo "Missing textcollector directory argument" + exit 1 +fi + +HOSTNAME=$(hostname) +TEXTFILE_COLLECTOR_DIR=${1} +PROM_FILE=$TEXTFILE_COLLECTOR_DIR/borg.prom + + +TMP_FILE=$PROM_FILE.$$ +[ -e $TMP_FILE ] && rm -f $TMP_FILE + +trap "rm -f $TMP_FILE" EXIT + +# Hetzner borg +if [[ -f /usr/local/bin/borg ]]; then + LAST_ARCHIVE=$(/usr/local/bin/borg list --last 1) + LAST_ARCHIVE_NAME=$(echo $LAST_ARCHIVE | awk '{print $1}') + LAST_ARCHIVE_DATE=$(echo $LAST_ARCHIVE | awk '{print $3" "$4}') + LAST_ARCHIVE_TIMESTAMP=$(date -d "$LAST_ARCHIVE_DATE" +"%s") + + echo "# HELP borg_hetzner_last_archive_timestamp timestamp of last backup in UTC" >> $TMP_FILE + echo "# TYPE borg_hetzner_last_archive_timestamp counter" >> $TMP_FILE + echo "borg_hetzner_last_archive_timestamp{host=\"${HOSTNAME}\"} $LAST_ARCHIVE_TIMESTAMP" >> $TMP_FILE; +fi + +# rsync.net borg +if [[ -f /usr/local/bin/borg-offsite ]]; then + LAST_ARCHIVE=$(/usr/local/bin/borg-offsite list --last 1) + LAST_ARCHIVE_NAME=$(echo $LAST_ARCHIVE | awk '{print $1}') + LAST_ARCHIVE_DATE=$(echo $LAST_ARCHIVE | awk '{print $3" "$4}') + LAST_ARCHIVE_TIMESTAMP=$(date -d "$LAST_ARCHIVE_DATE" +"%s") + + echo "# HELP borg_offsite_last_archive_timestamp timestamp of last backup in UTC" >> $TMP_FILE + echo "# TYPE borg_offsite_last_archive_timestamp counter" >> $TMP_FILE + echo "borg_offsite_last_archive_timestamp{host=\"${HOSTNAME}\"} $LAST_ARCHIVE_TIMESTAMP" >> $TMP_FILE; +fi + +mv -f $TMP_FILE $PROM_FILE diff --git a/roles/prometheus_exporters/tasks/main.yml b/roles/prometheus_exporters/tasks/main.yml new file mode 100644 index 0000000000000000000000000000000000000000..60ea1ee36387a3e35b1d352ef642e4d30e2d53be --- /dev/null +++ b/roles/prometheus_exporters/tasks/main.yml @@ -0,0 +1,88 @@ +--- + +- name: install prometheus-node-exporter + pacman: name=prometheus-node-exporter,arch-audit,pacman-contrib state=present + +- name: install prometheus-memcached-exporter + pacman: name=prometheus-memcached-exporter state=present + when: "'memcached' in group_names" + +- name: install prometheus-mysqld-exporter + pacman: name=prometheus-mysqld-exporter state=present + when: "'mysql_servers' in group_names" + +- name: create prometheus mysqld database user + mysql_user: + name: '{{ prometheus_mysqld_user }}' + password: '{{ vault_prometheus_mysql_password }}' + priv: "*.*:PROCESS,REPLICATION CLIENT" + state: present + when: "'mysql_servers' in group_names" + +# TODO: with ansible 2.10 this can be set by mysql_user https://github.com/ansible/ansible/issues/26581 +- name: set max_user_connections for prometheus mysqld user + command: mysql -u root -e "ALTER USER '{{ prometheus_mysqld_user }}'@'localhost' WITH MAX_USER_CONNECTIONS 3;" + when: "'mysql_servers' in group_names" + +- name: copy prometheus mysqld exporter configuration + template: src=prometheus-mysqld-exporter.j2 dest=/etc/conf.d/prometheus-mysqld-exporter owner=root group=root mode=600 + when: "'mysql_servers' in group_names" + +- name: enable prometheus-mysqld-exporter service + systemd: name=prometheus-mysqld-exporter enabled=yes daemon_reload=yes state=started + when: "'mysql_servers' in group_names" + +- name: install node exporter configuration + template: src=prometheus-node-exporter.env.j2 dest=/etc/conf.d/prometheus-node-exporter owner=root group=root mode=600 + +- name: create textcollector directory + file: path="{{ prometheus_textfile_dir }}" state=directory owner=node_exporter group=node_exporter mode=700 + +- name: install node exporter textcollector scripts + copy: src={{ item }} dest=/usr/local/bin/{{ item }} owner=root group=root mode=0755 + with_items: + - arch-textcollector.sh + - borg-textcollector.sh + +- name: install arch textcollector service + template: src=prometheus-arch-textcollector.service.j2 dest=/etc/systemd/system/prometheus-arch-textcollector.service owner=root group=root mode=600 + +- name: install arch textcollector timer + template: src=prometheus-arch-textcollector.timer.j2 dest=/etc/systemd/system/prometheus-arch-textcollector.timer owner=root group=root mode=600 + +- name: enable and start prometheus arch textcollector timer + systemd: name=prometheus-arch-textcollector.timer enabled=yes daemon_reload=yes state=started + +- name: install borg textcollector service + template: src=prometheus-borg-textcollector.service.j2 dest=/etc/systemd/system/prometheus-borg-textcollector.service owner=root group=root mode=600 + when: "'borg_clients' in group_names" + +- name: install borg textcollector timer + template: src=prometheus-borg-textcollector.timer.j2 dest=/etc/systemd/system/prometheus-borg-textcollector.timer owner=root group=root mode=600 + when: "'borg_clients' in group_names" + +- name: enable and start prometheus borg textcollector timer + systemd: name=prometheus-borg-textcollector.timer enabled=yes daemon_reload=yes state=started + when: "'borg_clients' in group_names" + +- name: enable prometheus-node-exporter service + systemd: name=prometheus-node-exporter enabled=yes daemon_reload=yes state=started + +- name: enable prometheus-memcached-exporter service + systemd: name=prometheus-memcached-exporter enabled=yes daemon_reload=yes state=started + when: "'memcached' in group_names" + +- name: open prometheus-node-exporter ipv4 port for monitoring.archlinux.org + firewalld: state=enabled permanent=true immediate=yes + rich_rule="rule family=ipv4 source address={{ hostvars['monitoring.archlinux.org']['ipv4_address'] }} port protocol=tcp port={{ prometheus_exporter_port }} accept" + when: "'prometheus' not in group_names" + +- name: open gitlab exporter ipv4 port for monitoring.archlinux.org + firewalld: state=enabled permanent=true immediate=yes + rich_rule="rule family=ipv4 source address={{ hostvars['monitoring.archlinux.org']['ipv4_address'] }} port protocol=tcp port={{ gitlab_runner_exporter_port }} accept" + when: "'gitlab_runners' in group_names" + +- name: open prometheus mysqld exporter ipv4 port for monitoring.archlinux.org + firewalld: state=enabled permanent=true immediate=yes + rich_rule="rule family=ipv4 source address={{ hostvars['monitoring.archlinux.org']['ipv4_address'] }} port protocol=tcp port={{ prometheus_mysqld_exporter_port }} accept" + when: "'mysql_servers' in group_names" diff --git a/roles/prometheus_exporters/templates/prometheus-arch-textcollector.service.j2 b/roles/prometheus_exporters/templates/prometheus-arch-textcollector.service.j2 new file mode 100644 index 0000000000000000000000000000000000000000..5edb6dc89f1b22f9479cdf3b8e5bc8c62a687b0b --- /dev/null +++ b/roles/prometheus_exporters/templates/prometheus-arch-textcollector.service.j2 @@ -0,0 +1,37 @@ +[Unit] +Description=Prometheus Arch Exporter +After=network.target + +[Service] +Type=oneshot +User=node_exporter +ExecStart=/usr/local/bin/arch-textcollector.sh {{ prometheus_textfile_dir }} + +NoNewPrivileges=true +LockPersonality=true +CapabilityBoundingSet= +UMask=077 + +PrivateDevices=true +PrivateTmp=true +ProtectSystem=strict +ProtectHome=true +ReadWritePaths={{ prometheus_textfile_dir }} + +MemoryDenyWriteExecute=true +RemoveIPC=true +RestrictRealtime=true +RestrictNamespaces=true +RestrictSUIDSGID=true + +RestrictAddressFamilies=~AF_NETLINK +RestrictAddressFamilies=~AF_PACKET + +ProtectHostname=true +ProtectControlGroups=true +ProtectKernelLogs=true +ProtectKernelTunables=true +ProtectKernelModules=true +ProtectClock=true + +SystemCallArchitectures=native diff --git a/roles/prometheus_exporters/templates/prometheus-arch-textcollector.timer.j2 b/roles/prometheus_exporters/templates/prometheus-arch-textcollector.timer.j2 new file mode 100644 index 0000000000000000000000000000000000000000..6afeacf4dc4428cdd3b2ca2a16d7f1a2e052ec50 --- /dev/null +++ b/roles/prometheus_exporters/templates/prometheus-arch-textcollector.timer.j2 @@ -0,0 +1,10 @@ +[Unit] +Description=Prometheus Arch Exporter TextCollector Timer + +[Timer] +OnUnitActiveSec=60m +OnBootSec=15min +RandomizedDelaySec=1min + +[Install] +WantedBy=timers.target diff --git a/roles/prometheus_exporters/templates/prometheus-borg-textcollector.service.j2 b/roles/prometheus_exporters/templates/prometheus-borg-textcollector.service.j2 new file mode 100644 index 0000000000000000000000000000000000000000..593a774eef4860048b1bc2049752d69cdb443531 --- /dev/null +++ b/roles/prometheus_exporters/templates/prometheus-borg-textcollector.service.j2 @@ -0,0 +1,35 @@ +[Unit] +Description=Prometheus Borg Exporter TextCollector +After=network.target +ConditionPathExistsGlob=!/root/.cache/borg/*/lock.roster + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/borg-textcollector.sh {{ prometheus_textfile_dir }} + +NoNewPrivileges=true +LockPersonality=true + +PrivateDevices=true +PrivateTmp=true +ProtectSystem=strict +ProtectHome=read-only +ReadWritePaths={{ prometheus_textfile_dir }} /root/.cache/borg + +MemoryDenyWriteExecute=true +RemoveIPC=true +RestrictRealtime=true +RestrictNamespaces=true +RestrictSUIDSGID=true + +RestrictAddressFamilies=~AF_PACKET +RestrictAddressFamilies=~AF_NETLINK + +ProtectHostname=true +ProtectControlGroups=true +ProtectKernelLogs=true +ProtectKernelTunables=true +ProtectKernelModules=true +ProtectClock=true + +SystemCallArchitectures=native diff --git a/roles/prometheus_exporters/templates/prometheus-borg-textcollector.timer.j2 b/roles/prometheus_exporters/templates/prometheus-borg-textcollector.timer.j2 new file mode 100644 index 0000000000000000000000000000000000000000..ca8a197e20efe0879cb411fe4fc720324a24785f --- /dev/null +++ b/roles/prometheus_exporters/templates/prometheus-borg-textcollector.timer.j2 @@ -0,0 +1,10 @@ +[Unit] +Description=Prometheus Borg Exporter TextCollector Timer + +[Timer] +OnUnitActiveSec=1h +OnBootSec=15min +RandomizedDelaySec=1min + +[Install] +WantedBy=timers.target diff --git a/roles/prometheus_exporters/templates/prometheus-mysqld-exporter.j2 b/roles/prometheus_exporters/templates/prometheus-mysqld-exporter.j2 new file mode 100644 index 0000000000000000000000000000000000000000..c74feee70fcb6a05aac0775046c1463796b258ab --- /dev/null +++ b/roles/prometheus_exporters/templates/prometheus-mysqld-exporter.j2 @@ -0,0 +1,3 @@ +DATA_SOURCE_NAME="{{ prometheus_mysqld_user }}:{{ vault_prometheus_mysql_password }}@(localhost:3306)/" +# TODO: review these settings +MYSQLD_EXPORTER_ARGS="--collect.binlog_size --collect.info_schema.processlist --collect.info_schema.userstats" diff --git a/roles/prometheus_exporters/templates/prometheus-node-exporter.env.j2 b/roles/prometheus_exporters/templates/prometheus-node-exporter.env.j2 new file mode 100644 index 0000000000000000000000000000000000000000..88dd42d6d790b9f87e13c1ea4715bcabb8cc79fa --- /dev/null +++ b/roles/prometheus_exporters/templates/prometheus-node-exporter.env.j2 @@ -0,0 +1 @@ +NODE_EXPORTER_ARGS="--collector.systemd --collector.textfile.directory={{ prometheus_textfile_dir }}"