Skip to content

Commit ca1f0ab

Browse files
Add a Collector for pg_stat_io.
Docs: https://www.postgresql.org/docs/current/monitoring-stats.html#MONITORING-PG-STAT-IO-VIEW We use this in production. One problem is that the stats for a vacuum are reported when the vacuum ends, not continuously. Signed-off-by: Peter Nuttall <[email protected]>
1 parent 94e8399 commit ca1f0ab

File tree

2 files changed

+481
-0
lines changed

2 files changed

+481
-0
lines changed

collector/pg_stat_io.go

Lines changed: 325 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,325 @@
1+
// Copyright 2023 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
import (
17+
"context"
18+
"database/sql"
19+
"log/slog"
20+
21+
"github.com/blang/semver/v4"
22+
"github.com/prometheus/client_golang/prometheus"
23+
)
24+
25+
const statIOSubsystem = "stat_io"
26+
27+
func init() {
28+
registerCollector(statIOSubsystem, defaultDisabled, NewStatIOCollector)
29+
}
30+
31+
type StatIOCollector struct {
32+
log *slog.Logger
33+
}
34+
35+
func NewStatIOCollector(config collectorConfig) (Collector, error) {
36+
return &StatIOCollector{
37+
log: config.logger,
38+
}, nil
39+
}
40+
41+
var (
42+
statIOReadsTotal = prometheus.NewDesc(
43+
prometheus.BuildFQName(namespace, statIOSubsystem, "reads_total"),
44+
"Number of read operations, each of the size specified in op_bytes.",
45+
[]string{"backend_type", "object", "context"},
46+
prometheus.Labels{},
47+
)
48+
statIOReadTimeTotal = prometheus.NewDesc(
49+
prometheus.BuildFQName(namespace, statIOSubsystem, "read_time_total"),
50+
"Time spent in read operations in milliseconds (if track_io_timing is enabled, otherwise zero)",
51+
[]string{"backend_type", "object", "context"},
52+
prometheus.Labels{},
53+
)
54+
55+
statIOWritesTotal = prometheus.NewDesc(
56+
prometheus.BuildFQName(namespace, statIOSubsystem, "writes_total"),
57+
"Number of write operations, each of the size specified in op_bytes.",
58+
[]string{"backend_type", "object", "context"},
59+
prometheus.Labels{},
60+
)
61+
statIOWriteTimeTotal = prometheus.NewDesc(
62+
prometheus.BuildFQName(namespace, statIOSubsystem, "writes_time_total"),
63+
"Time spent in write operations in milliseconds (if track_io_timing is enabled, otherwise zero)",
64+
[]string{"backend_type", "object", "context"},
65+
prometheus.Labels{},
66+
)
67+
68+
statIOWriteBackTotal = prometheus.NewDesc(
69+
prometheus.BuildFQName(namespace, statIOSubsystem, "write_back_total"),
70+
"Number of units of size op_bytes which the process requested the kernel write out to permanent storage.",
71+
[]string{"backend_type", "object", "context"},
72+
prometheus.Labels{},
73+
)
74+
statIOWriteBackTimeTotal = prometheus.NewDesc(
75+
prometheus.BuildFQName(namespace, statIOSubsystem, "write_back_time_total"),
76+
"Time spent in writeback operations in milliseconds (if track_io_timing is enabled, otherwise zero). This includes the time spent queueing write-out requests and, potentially, the time spent to write out the dirty data.",
77+
[]string{"backend_type", "object", "context"},
78+
prometheus.Labels{},
79+
)
80+
81+
statIOExtendsTotal = prometheus.NewDesc(
82+
prometheus.BuildFQName(namespace, statIOSubsystem, "extends_total"),
83+
"Number of relation extend operations, each of the size specified in op_bytes.",
84+
[]string{"backend_type", "object", "context"},
85+
prometheus.Labels{},
86+
)
87+
88+
statIOExtendsTimeTotal = prometheus.NewDesc(
89+
prometheus.BuildFQName(namespace, statIOSubsystem, "extends_time_total"),
90+
"Time spent in extend operations in milliseconds (if track_io_timing is enabled, otherwise zero)",
91+
[]string{"backend_type", "object", "context"},
92+
prometheus.Labels{},
93+
)
94+
95+
statIOHitsTotal = prometheus.NewDesc(
96+
prometheus.BuildFQName(namespace, statIOSubsystem, "hits_total"),
97+
"The number of times a desired block was found in a shared buffer.",
98+
[]string{"backend_type", "object", "context"},
99+
prometheus.Labels{},
100+
)
101+
statIOEvictionsTotal = prometheus.NewDesc(
102+
prometheus.BuildFQName(namespace, statIOSubsystem, "evictions_total"),
103+
"Number of times a block has been written out from a shared or local buffer in order to make it available for another use.",
104+
[]string{"backend_type", "object", "context"},
105+
prometheus.Labels{},
106+
)
107+
statIOReusesTotal = prometheus.NewDesc(
108+
prometheus.BuildFQName(namespace, statIOSubsystem, "reuses_total"),
109+
"The number of times an existing buffer in a size-limited ring buffer outside of shared buffers was reused as part of an I/O operation in the bulkread, bulkwrite, or vacuum contexts.",
110+
[]string{"backend_type", "object", "context"},
111+
prometheus.Labels{},
112+
)
113+
114+
statIOFsyncsTotal = prometheus.NewDesc(
115+
prometheus.BuildFQName(namespace, statIOSubsystem, "fsync_total"),
116+
"Number of fsync calls. These are only tracked in context normal.",
117+
[]string{"backend_type", "object", "context"},
118+
prometheus.Labels{},
119+
)
120+
statIOFsyncTimeTotal = prometheus.NewDesc(
121+
prometheus.BuildFQName(namespace, statIOSubsystem, "fsync_time_total"),
122+
"Time spent in fsync operations in milliseconds (if track_io_timing is enabled, otherwise zero)",
123+
[]string{"backend_type", "object", "context"},
124+
prometheus.Labels{},
125+
)
126+
statIOQuery = `
127+
SELECT
128+
backend_type,
129+
object,
130+
context,
131+
reads,
132+
read_time,
133+
writes,
134+
write_time,
135+
writebacks,
136+
writeback_time,
137+
extends,
138+
extend_time,
139+
hits,
140+
evictions,
141+
reuses,
142+
fsyncs,
143+
fsync_time
144+
145+
FROM
146+
pg_stat_io
147+
`
148+
)
149+
150+
// Update implements Collector and exposes database locks.
151+
// It is called by the Prometheus registry when collecting metrics.
152+
func (c StatIOCollector) Update(ctx context.Context, instance *instance, ch chan<- prometheus.Metric) error {
153+
// pg_stat_io is only in v16, and we don't need support for earlier currently.
154+
if !instance.version.GE(semver.MustParse("16.0.0")) {
155+
return nil
156+
}
157+
db := instance.getDB()
158+
// Query the list of databases
159+
rows, err := db.QueryContext(ctx, statIOQuery)
160+
if err != nil {
161+
return err
162+
}
163+
defer rows.Close()
164+
165+
var backendType, object, PGContext sql.NullString
166+
var reads, writes, writeBacks, extends, hits, evictions, reuses, fsyncs sql.NullInt64
167+
var readTime, writeTime, writeBackTime, extendsTime, fsyncTime sql.NullFloat64
168+
169+
for rows.Next() {
170+
if err := rows.Scan(
171+
&backendType, &object, &PGContext,
172+
&reads,
173+
&readTime,
174+
&writes,
175+
&writeTime,
176+
&writeBacks,
177+
&writeBackTime,
178+
&extends,
179+
&extendsTime,
180+
&hits,
181+
&evictions,
182+
&reuses,
183+
&fsyncs,
184+
&fsyncTime); err != nil {
185+
return err
186+
}
187+
188+
if !backendType.Valid || !object.Valid || !PGContext.Valid {
189+
continue
190+
}
191+
192+
readsMetric := 0.0
193+
if reads.Valid {
194+
readsMetric = float64(reads.Int64)
195+
}
196+
ch <- prometheus.MustNewConstMetric(
197+
statIOReadsTotal,
198+
prometheus.CounterValue,
199+
readsMetric,
200+
backendType.String, object.String, PGContext.String)
201+
202+
readTimeMetric := 0.0
203+
if readTime.Valid {
204+
readTimeMetric = readTime.Float64
205+
}
206+
ch <- prometheus.MustNewConstMetric(
207+
statIOReadTimeTotal,
208+
prometheus.CounterValue,
209+
readTimeMetric,
210+
backendType.String, object.String, PGContext.String)
211+
212+
writesMetric := 0.0
213+
if writes.Valid {
214+
writesMetric = float64(writes.Int64)
215+
}
216+
ch <- prometheus.MustNewConstMetric(
217+
statIOWritesTotal,
218+
prometheus.CounterValue,
219+
writesMetric,
220+
backendType.String, object.String, PGContext.String)
221+
222+
writeTimeMetric := 0.0
223+
if writeTime.Valid {
224+
writeTimeMetric = writeTime.Float64
225+
}
226+
ch <- prometheus.MustNewConstMetric(
227+
statIOWriteTimeTotal,
228+
prometheus.CounterValue,
229+
writeTimeMetric,
230+
backendType.String, object.String, PGContext.String)
231+
232+
writeBackMetric := 0.0
233+
if writeBacks.Valid {
234+
writeBackMetric = float64(writeBacks.Int64)
235+
}
236+
ch <- prometheus.MustNewConstMetric(
237+
statIOWriteBackTotal,
238+
prometheus.CounterValue,
239+
writeBackMetric,
240+
backendType.String, object.String, PGContext.String)
241+
242+
writeBackTimeMetric := 0.0
243+
if writeBackTime.Valid {
244+
writeBackTimeMetric = writeBackTime.Float64
245+
}
246+
ch <- prometheus.MustNewConstMetric(
247+
statIOWriteBackTimeTotal,
248+
prometheus.CounterValue,
249+
writeBackTimeMetric,
250+
backendType.String, object.String, PGContext.String)
251+
252+
extendsMetric := 0.0
253+
if extends.Valid {
254+
extendsMetric = float64(extends.Int64)
255+
}
256+
ch <- prometheus.MustNewConstMetric(
257+
statIOExtendsTotal,
258+
prometheus.CounterValue,
259+
extendsMetric,
260+
backendType.String, object.String, PGContext.String)
261+
262+
extendsTimeMetric := 0.0
263+
if extendsTime.Valid {
264+
extendsTimeMetric = extendsTime.Float64
265+
}
266+
ch <- prometheus.MustNewConstMetric(
267+
statIOExtendsTimeTotal,
268+
prometheus.CounterValue,
269+
extendsTimeMetric,
270+
backendType.String, object.String, PGContext.String)
271+
272+
hitsMetric := 0.0
273+
if hits.Valid {
274+
hitsMetric = float64(hits.Int64)
275+
}
276+
ch <- prometheus.MustNewConstMetric(
277+
statIOHitsTotal,
278+
prometheus.CounterValue,
279+
hitsMetric,
280+
backendType.String, object.String, PGContext.String)
281+
282+
evictionsMetric := 0.0
283+
if evictions.Valid {
284+
evictionsMetric = float64(evictions.Int64)
285+
}
286+
ch <- prometheus.MustNewConstMetric(
287+
statIOEvictionsTotal,
288+
prometheus.CounterValue,
289+
evictionsMetric,
290+
backendType.String, object.String, PGContext.String)
291+
292+
reusesMetric := 0.0
293+
if reuses.Valid {
294+
reusesMetric = float64(reuses.Int64)
295+
}
296+
ch <- prometheus.MustNewConstMetric(
297+
statIOReusesTotal,
298+
prometheus.CounterValue,
299+
reusesMetric,
300+
backendType.String, object.String, PGContext.String)
301+
302+
fsyncsMetric := 0.0
303+
if fsyncs.Valid {
304+
fsyncsMetric = float64(fsyncs.Int64)
305+
}
306+
ch <- prometheus.MustNewConstMetric(
307+
statIOFsyncsTotal,
308+
prometheus.CounterValue,
309+
fsyncsMetric,
310+
backendType.String, object.String, PGContext.String)
311+
312+
fsyncTimeMetric := 0.0
313+
if fsyncTime.Valid {
314+
fsyncTimeMetric = fsyncTime.Float64
315+
}
316+
ch <- prometheus.MustNewConstMetric(
317+
statIOFsyncTimeTotal,
318+
prometheus.CounterValue,
319+
fsyncTimeMetric,
320+
backendType.String, object.String, PGContext.String)
321+
322+
}
323+
324+
return rows.Err()
325+
}

0 commit comments

Comments
 (0)