Verified Commit 9e099305 authored by Jelle van der Waa's avatar Jelle van der Waa 🚧
Browse files

Fix prometheus yaml formatting

parent 06e42574
Pipeline #5437 passed with stage
in 45 seconds
......@@ -2,96 +2,95 @@ groups:
- name: node_common
interval: 60s
rules:
- alert: HostHighCpuLoad
expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle",instance!~"build.archlinux.org",instance!~"repro1.pkgbuild.com",instance!~"repro2.pkgbuild.com"}[5m])) * 100) > 80
for: 5m
labels:
severity: warning
annotations:
summary: "Host high CPU load (instance {{ $labels.instance }})"
description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostHighCpuLoad
expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle",instance!~"build.archlinux.org",instance!~"repro1.pkgbuild.com",instance!~"repro2.pkgbuild.com"}[5m])) * 100) > 80
for: 5m
labels:
severity: warning
annotations:
summary: "Host high CPU load (instance {{ $labels.instance }})"
description: "CPU load is > 80%\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostSwapIsFillingUp
expr: (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "Host swap is filling up (instance {{ $labels.instance }})"
description: "Swap is filling up (>80%)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostSwapIsFillingUp
expr: (1 - (node_memory_SwapFree_bytes / node_memory_SwapTotal_bytes)) * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "Host swap is filling up (instance {{ $labels.instance }})"
description: "Swap is filling up (>80%)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostOutOfMemory
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
for: 5m
labels:
severity: warning
annotations:
summary: "Host out of memory (instance {{ $labels.instance }})"
description: "Node memory is filling up (< 10% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostOutOfMemory
expr: node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100 < 10
for: 5m
labels:
severity: warning
annotations:
summary: "Host out of memory (instance {{ $labels.instance }})"
description: "Node memory is filling up (< 10% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostMemoryUnderMemoryPressure
expr: rate(node_vmstat_pgmajfault[1m]) > 1000
for: 5m
labels:
severity: warning
annotations:
summary: "Host memory under memory pressure (instance {{ $labels.instance }})"
description: "The node is under heavy memory pressure. High rate of major page faults\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostMemoryUnderMemoryPressure
expr: rate(node_vmstat_pgmajfault[1m]) > 1000
for: 5m
labels:
severity: warning
annotations:
summary: "Host memory under memory pressure (instance {{ $labels.instance }})"
description: "The node is under heavy memory pressure. High rate of major page faults\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostUnusualNetworkThroughputIn
expr: sum by (instance) (irate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100
for: 5m
labels:
severity: warning
annotations:
summary: "Host unusual network throughput in (instance {{ $labels.instance }})"
description: "Host network interfaces are probably receiving too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostUnusualNetworkThroughputIn
expr: sum by (instance) (irate(node_network_receive_bytes_total[2m])) / 1024 / 1024 > 100
for: 5m
labels:
severity: warning
annotations:
summary: "Host unusual network throughput in (instance {{ $labels.instance }})"
description: "Host network interfaces are probably receiving too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostUnusualNetworkThroughputOut
expr: sum by (instance) (irate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100
for: 5m
labels:
severity: warning
annotations:
summary: "Host unusual network throughput out (instance {{ $labels.instance }})"
description: "Host network interfaces are probably sending too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostUnusualNetworkThroughputOut
expr: sum by (instance) (irate(node_network_transmit_bytes_total[2m])) / 1024 / 1024 > 100
for: 5m
labels:
severity: warning
annotations:
summary: "Host unusual network throughput out (instance {{ $labels.instance }})"
description: "Host network interfaces are probably sending too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostOutOfDiskSpace
expr: (node_filesystem_avail_bytes{mountpoint="/rootfs"} * 100) / node_filesystem_size_bytes{mountpoint="/rootfs"} < 10
for: 5m
labels:
severity: warning
annotations:
summary: "Host out of disk space (instance {{ $labels.instance }})"
description: "Disk is almost full (< 20% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostOutOfDiskSpace
expr: (node_filesystem_avail_bytes{mountpoint="/rootfs"} * 100) / node_filesystem_size_bytes{mountpoint="/rootfs"} < 10
for: 5m
labels:
severity: warning
annotations:
summary: "Host out of disk space (instance {{ $labels.instance }})"
description: "Disk is almost full (< 20% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostDiskWillFillIn4Hours
expr: predict_linear(node_filesystem_free_bytes{fstype!~"tmpfs",mountpoint!~"/backup"}[1h], 4 * 3600) < 0
for: 5m
labels:
severity: warning
annotations:
summary: "Host disk will fill in 4 hours (instance {{ $labels.instance }})"
description: "Disk will fill in 4 hours at current write rate\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostDiskWillFillIn4Hours
expr: predict_linear(node_filesystem_free_bytes{fstype!~"tmpfs",mountpoint!~"/backup"}[1h], 4 * 3600) < 0
for: 5m
labels:
severity: warning
annotations:
summary: "Host disk will fill in 4 hours (instance {{ $labels.instance }})"
description: "Disk will fill in 4 hours at current write rate\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostOutOfInodes
expr: node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint ="/rootfs"} * 100 < 10
for: 5m
labels:
severity: warning
annotations:
summary: "Host out of inodes (instance {{ $labels.instance }})"
description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostOutOfInodes
expr: node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint ="/rootfs"} * 100 < 10
for: 5m
labels:
severity: warning
annotations:
summary: "Host out of inodes (instance {{ $labels.instance }})"
description: "Disk is almost running out of available inodes (< 10% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostOomKillDetected
expr: increase(node_vmstat_oom_kill[5m]) > 0
for: 5m
labels:
severity: warning
annotations:
summary: "Host OOM kill detected (instance {{ $labels.instance }})"
description: "OOM kill detected\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- alert: HostOomKillDetected
expr: increase(node_vmstat_oom_kill[5m]) > 0
for: 5m
labels:
severity: warning
annotations:
summary: "Host OOM kill detected (instance {{ $labels.instance }})"
description: "OOM kill detected\n VALUE = {{ $value }}\n LABELS: {{ $labels }}"
- name: prometheus
interval: 60s
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment