Skip to content

Commit 3fa2fa9

Browse files
committed
postgres_mixin/alerts: change severity to "warning"
Changing severity to align with mixin recommendations from https://github.com/monitoring-mixins/docs#guidelines-for-alert-names-labels-and-annotations Signed-off-by: Paweł Krupa (paulfantom) <[email protected]>
1 parent e7debe2 commit 3fa2fa9

File tree

1 file changed

+56
-12
lines changed

1 file changed

+56
-12
lines changed

postgres_mixin/alerts/postgres.libsonnet

Lines changed: 56 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,16 @@
1010
description: '{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy.',
1111
summary: '{{ $labels.instance }} has maxed out Postgres connections.',
1212
},
13-
expr: 'sum(pg_stat_activity_count) by (instance) >= sum(pg_settings_max_connections) by (instance) - sum(pg_settings_superuser_reserved_connections) by (instance)',
13+
expr: |||
14+
sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s})
15+
>=
16+
sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s})
17+
-
18+
sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
19+
||| % $._config,
1420
'for': '1m',
1521
labels: {
16-
severity: 'email',
22+
severity: 'warning',
1723
},
1824
},
1925
{
@@ -22,10 +28,18 @@
2228
description: '{{ $labels.instance }} is exceeding 80% of the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Please check utilization graphs and confirm if this is normal service growth, abuse or an otherwise temporary condition or if new resources need to be provisioned (or the limits increased, which is mostly likely).',
2329
summary: '{{ $labels.instance }} is over 80% of max Postgres connections.',
2430
},
25-
expr: 'sum(pg_stat_activity_count) by (instance) > (sum(pg_settings_max_connections) by (instance) - sum(pg_settings_superuser_reserved_connections) by (instance)) * 0.8',
31+
expr: |||
32+
sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s})
33+
>
34+
(
35+
sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s})
36+
-
37+
sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
38+
) * 0.8
39+
||| % $._config,
2640
'for': '10m',
2741
labels: {
28-
severity: 'email',
42+
severity: 'warning',
2943
},
3044
},
3145
{
@@ -34,10 +48,10 @@
3448
description: '{{ $labels.instance }} is rejecting query requests from the exporter, and thus probably not allowing DNS requests to work either. User services should not be effected provided at least 1 node is still alive.',
3549
summary: 'PostgreSQL is not processing queries: {{ $labels.instance }}',
3650
},
37-
expr: 'pg_up != 1',
51+
expr: 'pg_up{%(postgresExporterSelector)s} != 1' % $._config,
3852
'for': '1m',
3953
labels: {
40-
severity: 'email',
54+
severity: 'warning',
4155
},
4256
},
4357
{
@@ -46,10 +60,16 @@
4660
description: 'PostgreSQL high number of slow queries {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }} ',
4761
summary: 'PostgreSQL high number of slow on {{ $labels.cluster }} for database {{ $labels.datname }} ',
4862
},
49-
expr: 'avg(rate(pg_stat_activity_max_tx_duration{datname!~"template.*"}[2m])) by (datname) > 2 * 60',
63+
expr: |||
64+
avg(
65+
rate by (datname) (
66+
pg_stat_activity_max_tx_duration{datname!~"template.*",%(postgresExporterSelector)s}[2m]
67+
)
68+
) > 2 * 60
69+
||| % $._config,
5070
'for': '2m',
5171
labels: {
52-
severity: 'email',
72+
severity: 'warning',
5373
},
5474
},
5575
{
@@ -58,10 +78,20 @@
5878
description: 'PostgreSQL high number of queries per second on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}',
5979
summary: 'PostgreSQL high number of queries per second {{ $labels.cluster }} for database {{ $labels.datname }}',
6080
},
61-
expr: 'avg(irate(pg_stat_database_xact_commit{datname!~"template.*"}[5m]) + irate(pg_stat_database_xact_rollback{datname!~"template.*"}[5m])) by (datname) > 10000',
81+
expr: |||
82+
avg by (datname) (
83+
irate(
84+
pg_stat_database_xact_commit{datname!~"template.*",%(postgresExporterSelector)s}[5m]
85+
)
86+
+
87+
irate(
88+
pg_stat_database_xact_rollback{datname!~"template.*",%(postgresExporterSelector)s}[5m]
89+
)
90+
) > 10000
91+
||| % $._config,
6292
'for': '5m',
6393
labels: {
64-
severity: 'email',
94+
severity: 'warning',
6595
},
6696
},
6797
{
@@ -70,10 +100,24 @@
70100
description: 'PostgreSQL low on cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}',
71101
summary: 'PostgreSQL low cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }}',
72102
},
73-
expr: 'avg(rate(pg_stat_database_blks_hit{datname!~"template.*"}[5m]) / (rate(pg_stat_database_blks_hit{datname!~"template.*"}[5m]) + rate(pg_stat_database_blks_read{datname!~"template.*"}[5m]))) by (datname) < 0.98',
103+
expr: |||
104+
avg by (datname) (
105+
rate(pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m])
106+
/
107+
(
108+
rate(
109+
pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m]
110+
)
111+
+
112+
rate(
113+
pg_stat_database_blks_read{datname!~"template.*",%(postgresExporterSelector)s}[5m]
114+
)
115+
)
116+
) < 0.98
117+
||| % $._config,
74118
'for': '5m',
75119
labels: {
76-
severity: 'email',
120+
severity: 'warning',
77121
},
78122
},
79123
],

0 commit comments

Comments
 (0)