diff --git a/roles/prometheus/files/node.rules.yml b/roles/prometheus/files/node.rules.yml index a60229bfe06decf0b7714649f9f46129dd6012ae..86a9fce10a10f0557f83014da9f7c0d370acb104 100644 --- a/roles/prometheus/files/node.rules.yml +++ b/roles/prometheus/files/node.rules.yml @@ -57,16 +57,16 @@ groups: description: "Host network interfaces are probably sending too much data (> 100 MB/s)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" - alert: HostOutOfDiskSpace - expr: (node_filesystem_avail_bytes{mountpoint="/rootfs"} * 100) / node_filesystem_size_bytes{mountpoint="/rootfs"} < 10 + expr: (node_filesystem_avail_bytes{fstype!="tmpfs",mountpoint!~"/backup.*"} * 100) / node_filesystem_size_bytes < 10 for: 5m labels: severity: warning annotations: summary: "Host out of disk space (instance {{ $labels.instance }})" - description: "Disk is almost full (< 20% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" + description: "Disk is almost full (< 10% left)\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" - alert: HostDiskWillFillIn24Hours - expr: (node_filesystem_avail_bytes{mountpoint!~"/backup"} * 100) / node_filesystem_size_bytes < 10 and ON (instance, device, mountpoint) predict_linear(node_filesystem_avail_bytes{fstype!~"tmpfs",mountpoint!~"/backup"}[1h], 24 * 3600) < 0 and ON (instance, device, mountpoint) node_filesystem_readonly == 0 + expr: (node_filesystem_avail_bytes{fstype!="tmpfs",mountpoint!~"/backup.*"} * 100) / node_filesystem_size_bytes < 10 and predict_linear(node_filesystem_avail_bytes[1h], 24 * 3600) < 0 for: 2m labels: severity: warning @@ -75,7 +75,7 @@ groups: description: "Filesystem is predicted to run out of space within the next 24 hours at current write rate\n VALUE = {{ $value }}\n LABELS: {{ $labels }}" - alert: HostOutOfInodes - expr: node_filesystem_files_free{mountpoint ="/rootfs"} / node_filesystem_files{mountpoint ="/rootfs"} * 100 < 10 + expr: (node_filesystem_files_free{fstype!="tmpfs",mountpoint!~"/backup.*"} * 100) / node_filesystem_files < 10 for: 5m labels: severity: warning