Skip to content

Commit 3aa7cc5

Browse files
authored
Merge pull request prometheus-community#631 from paulfantom/jsonnetify
postgres_mixin: jsonnetify mixin
2 parents 860860b + ecbbcb3 commit 3aa7cc5

File tree

6 files changed

+141
-70
lines changed

6 files changed

+141
-70
lines changed
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
(import 'postgres.libsonnet')

postgres_mixin/alerts/alerts.yaml

-57
This file was deleted.
+127
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
{
2+
prometheusAlerts+:: {
3+
groups+: [
4+
{
5+
name: 'PostgreSQL',
6+
rules: [
7+
{
8+
alert: 'PostgreSQLMaxConnectionsReached',
9+
annotations: {
10+
description: '{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy.',
11+
summary: '{{ $labels.instance }} has maxed out Postgres connections.',
12+
},
13+
expr: |||
14+
sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s})
15+
>=
16+
sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s})
17+
-
18+
sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
19+
||| % $._config,
20+
'for': '1m',
21+
labels: {
22+
severity: 'warning',
23+
},
24+
},
25+
{
26+
alert: 'PostgreSQLHighConnections',
27+
annotations: {
28+
description: '{{ $labels.instance }} is exceeding 80% of the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Please check utilization graphs and confirm if this is normal service growth, abuse or an otherwise temporary condition or if new resources need to be provisioned (or the limits increased, which is mostly likely).',
29+
summary: '{{ $labels.instance }} is over 80% of max Postgres connections.',
30+
},
31+
expr: |||
32+
sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s})
33+
>
34+
(
35+
sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s})
36+
-
37+
sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
38+
) * 0.8
39+
||| % $._config,
40+
'for': '10m',
41+
labels: {
42+
severity: 'warning',
43+
},
44+
},
45+
{
46+
alert: 'PostgreSQLDown',
47+
annotations: {
48+
description: '{{ $labels.instance }} is rejecting query requests from the exporter, and thus probably not allowing DNS requests to work either. User services should not be effected provided at least 1 node is still alive.',
49+
summary: 'PostgreSQL is not processing queries: {{ $labels.instance }}',
50+
},
51+
expr: 'pg_up{%(postgresExporterSelector)s} != 1' % $._config,
52+
'for': '1m',
53+
labels: {
54+
severity: 'warning',
55+
},
56+
},
57+
{
58+
alert: 'PostgreSQLSlowQueries',
59+
annotations: {
60+
description: 'PostgreSQL high number of slow queries {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }} ',
61+
summary: 'PostgreSQL high number of slow on {{ $labels.cluster }} for database {{ $labels.datname }} ',
62+
},
63+
expr: |||
64+
avg(
65+
rate by (datname) (
66+
pg_stat_activity_max_tx_duration{datname!~"template.*",%(postgresExporterSelector)s}[2m]
67+
)
68+
) > 2 * 60
69+
||| % $._config,
70+
'for': '2m',
71+
labels: {
72+
severity: 'warning',
73+
},
74+
},
75+
{
76+
alert: 'PostgreSQLQPS',
77+
annotations: {
78+
description: 'PostgreSQL high number of queries per second on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}',
79+
summary: 'PostgreSQL high number of queries per second {{ $labels.cluster }} for database {{ $labels.datname }}',
80+
},
81+
expr: |||
82+
avg by (datname) (
83+
irate(
84+
pg_stat_database_xact_commit{datname!~"template.*",%(postgresExporterSelector)s}[5m]
85+
)
86+
+
87+
irate(
88+
pg_stat_database_xact_rollback{datname!~"template.*",%(postgresExporterSelector)s}[5m]
89+
)
90+
) > 10000
91+
||| % $._config,
92+
'for': '5m',
93+
labels: {
94+
severity: 'warning',
95+
},
96+
},
97+
{
98+
alert: 'PostgreSQLCacheHitRatio',
99+
annotations: {
100+
description: 'PostgreSQL low on cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }} with a value of {{ $value }}',
101+
summary: 'PostgreSQL low cache hit rate on {{ $labels.cluster }} for database {{ $labels.datname }}',
102+
},
103+
expr: |||
104+
avg by (datname) (
105+
rate(pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m])
106+
/
107+
(
108+
rate(
109+
pg_stat_database_blks_hit{datname!~"template.*",%(postgresExporterSelector)s}[5m]
110+
)
111+
+
112+
rate(
113+
pg_stat_database_blks_read{datname!~"template.*",%(postgresExporterSelector)s}[5m]
114+
)
115+
)
116+
) < 0.98
117+
||| % $._config,
118+
'for': '5m',
119+
labels: {
120+
severity: 'warning',
121+
},
122+
},
123+
],
124+
},
125+
],
126+
},
127+
}

postgres_mixin/config.libsonnet

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
_config+:: {
3+
postgresExporterSelector: '',
4+
},
5+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
grafanaDashboards+:: {
3+
'postgres-overview.json': (import 'postgres-overview.json'),
4+
},
5+
}

postgres_mixin/mixin.libsonnet

+3-13
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,3 @@
1-
{
2-
grafanaDashboards: {
3-
'postgres-overview.json': (import 'dashboards/postgres-overview.json'),
4-
},
5-
6-
// Helper function to ensure that we don't override other rules, by forcing
7-
// the patching of the groups list, and not the overall rules object.
8-
local importRules(rules) = {
9-
groups+: std.native('parseYaml')(rules)[0].groups,
10-
},
11-
12-
prometheusAlerts+: importRules(importstr 'alerts/alerts.yaml'),
13-
}
1+
(import 'alerts/alerts.libsonnet') +
2+
(import 'dashboards/dashboards.libsonnet') +
3+
(import 'config.libsonnet')

0 commit comments

Comments
 (0)