Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions collector/diskstats_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"log/slog"
"os"
"path/filepath"
"strconv"
"strings"

Expand Down Expand Up @@ -84,6 +85,8 @@ type diskstatsCollector struct {
filesystemInfoDesc typedFactorDesc
deviceMapperInfoDesc typedFactorDesc
ataDescs map[string]typedFactorDesc
ioErrDesc typedFactorDesc
ioDoneDesc typedFactorDesc
logger *slog.Logger
getUdevDeviceProperties func(uint32, uint32) (udevInfo, error)
}
Expand Down Expand Up @@ -256,6 +259,20 @@ func NewDiskstatsCollector(logger *slog.Logger) (Collector, error) {
), valueType: prometheus.GaugeValue,
},
},
ioErrDesc: typedFactorDesc{
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "ioerr_total"),
"Number of IO commands that completed with an error.",
[]string{"device"},
nil,
), valueType: prometheus.CounterValue,
},
ioDoneDesc: typedFactorDesc{
desc: prometheus.NewDesc(prometheus.BuildFQName(namespace, diskSubsystem, "iodone_total"),
"Number of completed or rejected IO commands.",
[]string{"device"},
nil,
), valueType: prometheus.CounterValue,
},
logger: logger,
}

Expand Down Expand Up @@ -372,6 +389,37 @@ func (c *diskstatsCollector) Update(ch chan<- prometheus.Metric) error {
}
}
}

// Read IO error counts if available
iodoneCnt, err := os.ReadFile(filepath.Join(*sysPath, "block", dev, "device/iodone_cnt"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs to be implemented in the procfs library first.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Naively it looks like prometheus/procfs#651 merged, is this PR in need of a new owner? Found this PR via the associated issue 😅

if err != nil {
// Skip if file doesn't exist
if !os.IsNotExist(err) {
c.logger.Debug("Error reading IO errors count", "collector", "diskstats", "err", err)
}
} else {
iodone, err := strconv.ParseUint(strings.TrimSpace(string(iodoneCnt)), 10, 64)
if err != nil {
c.logger.Debug("Error parsing iodone count", "collector", "diskstats", "err", err)
} else {
ch <- c.ioDoneDesc.mustNewConstMetric(float64(iodone), dev)
}
}

ioerrCnt, err := os.ReadFile(filepath.Join(*sysPath, "block", dev, "device/ioerr_cnt"))
if err != nil {
// Skip if file doesn't exist
if !os.IsNotExist(err) {
c.logger.Debug("Error reading IO errors count", "collector", "diskstats", "err", err)
}
} else {
ioerr, err := strconv.ParseUint(strings.TrimSpace(string(ioerrCnt)), 10, 64)
if err != nil {
c.logger.Debug("Error parsing ioerr count", "collector", "diskstats", "err", err)
} else {
ch <- c.ioErrDesc.mustNewConstMetric(float64(ioerr), dev)
}
}
}
return nil
}
Expand Down
8 changes: 8 additions & 0 deletions collector/diskstats_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001
node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07
node_disk_io_time_weighted_seconds_total{device="sr0"} 0
node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06
# HELP node_disk_iodone_total Number of completed or rejected IO commands.
# TYPE node_disk_iodone_total counter
node_disk_iodone_total{device="sda"} 307
node_disk_iodone_total{device="sr0"} 2767
# HELP node_disk_ioerr_total Number of IO commands that completed with an error.
# TYPE node_disk_ioerr_total counter
node_disk_ioerr_total{device="sda"} 3
node_disk_ioerr_total{device="sr0"} 29
# HELP node_disk_read_bytes_total The total number of bytes read successfully.
# TYPE node_disk_read_bytes_total counter
node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11
Expand Down
110 changes: 110 additions & 0 deletions collector/ext4_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Copyright 2017 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !noext4
// +build !noext4

package collector

import (
"fmt"
"log/slog"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs/ext4"
)

// An ext4Collector is a Collector which gathers metrics from ext4 filesystems.
type ext4Collector struct {
fs ext4.FS
logger *slog.Logger
}

func init() {
registerCollector("ext4", defaultEnabled, NewExt4Collector)
}

// NewExt4Collector returns a new Collector exposing ext4 statistics.
func NewExt4Collector(logger *slog.Logger) (Collector, error) {
fs, err := ext4.NewFS(*procPath, *sysPath)
if err != nil {
return nil, fmt.Errorf("failed to open sysfs: %w", err)
}

return &ext4Collector{
fs: fs,
logger: logger,
}, nil
}

// Update implements Collector.
func (c *ext4Collector) Update(ch chan<- prometheus.Metric) error {
stats, err := c.fs.ProcStat()
if err != nil {
return fmt.Errorf("failed to retrieve ext4 stats: %w", err)
}

for _, s := range stats {
c.updateExt4Stats(ch, s)
}

return nil
}

// updateExt4Stats collects statistics for a single ext4 filesystem.
func (c *ext4Collector) updateExt4Stats(ch chan<- prometheus.Metric, s *ext4.Stats) {
const (
subsystem = "ext4"
)
var (
labels = []string{"device"}
)

metrics := []struct {
name string
desc string
value float64
}{
{
name: "errors",
desc: "Number of ext4 filesystem errors.",
value: float64(s.Errors),
},
{
name: "warnings",
desc: "Number of ext4 filesystem warnings.",
value: float64(s.Warnings),
},
{
name: "messages",
desc: "Number of ext4 filesystem log messages.",
value: float64(s.Messages),
},
}

for _, m := range metrics {
desc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, subsystem, m.name),
m.desc,
labels,
nil,
)

ch <- prometheus.MustNewConstMetric(
desc,
prometheus.CounterValue,
m.value,
s.Name,
)
}
}
9 changes: 9 additions & 0 deletions collector/fixtures/e2e-64k-page-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001
node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07
node_disk_io_time_weighted_seconds_total{device="sr0"} 0
node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06
# HELP node_disk_iodone_total Number of completed or rejected IO commands.
# TYPE node_disk_iodone_total counter
node_disk_iodone_total{device="sda"} 307
node_disk_iodone_total{device="sr0"} 2767
# HELP node_disk_ioerr_total Number of IO commands that completed with an error.
# TYPE node_disk_ioerr_total counter
node_disk_ioerr_total{device="sda"} 3
node_disk_ioerr_total{device="sr0"} 29
# HELP node_disk_read_bytes_total The total number of bytes read successfully.
# TYPE node_disk_read_bytes_total counter
node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11
Expand Down Expand Up @@ -2971,6 +2979,7 @@ node_scrape_collector_success{collector="dmi"} 1
node_scrape_collector_success{collector="drbd"} 1
node_scrape_collector_success{collector="edac"} 1
node_scrape_collector_success{collector="entropy"} 1
node_scrape_collector_success{collector="ext4"} 1
node_scrape_collector_success{collector="fibrechannel"} 1
node_scrape_collector_success{collector="filefd"} 1
node_scrape_collector_success{collector="hwmon"} 1
Expand Down
9 changes: 9 additions & 0 deletions collector/fixtures/e2e-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,14 @@ node_disk_io_time_weighted_seconds_total{device="sdb"} 67.07000000000001
node_disk_io_time_weighted_seconds_total{device="sdc"} 17.07
node_disk_io_time_weighted_seconds_total{device="sr0"} 0
node_disk_io_time_weighted_seconds_total{device="vda"} 2.0778722280000001e+06
# HELP node_disk_iodone_total Number of completed or rejected IO commands.
# TYPE node_disk_iodone_total counter
node_disk_iodone_total{device="sda"} 307
node_disk_iodone_total{device="sr0"} 2767
# HELP node_disk_ioerr_total Number of IO commands that completed with an error.
# TYPE node_disk_ioerr_total counter
node_disk_ioerr_total{device="sda"} 3
node_disk_ioerr_total{device="sr0"} 29
# HELP node_disk_read_bytes_total The total number of bytes read successfully.
# TYPE node_disk_read_bytes_total counter
node_disk_read_bytes_total{device="dm-0"} 5.13708655616e+11
Expand Down Expand Up @@ -2993,6 +3001,7 @@ node_scrape_collector_success{collector="dmi"} 1
node_scrape_collector_success{collector="drbd"} 1
node_scrape_collector_success{collector="edac"} 1
node_scrape_collector_success{collector="entropy"} 1
node_scrape_collector_success{collector="ext4"} 1
node_scrape_collector_success{collector="fibrechannel"} 1
node_scrape_collector_success{collector="filefd"} 1
node_scrape_collector_success{collector="hwmon"} 1
Expand Down
26 changes: 26 additions & 0 deletions collector/fixtures/sys.ttar
Original file line number Diff line number Diff line change
Expand Up @@ -803,6 +803,32 @@ Lines: 1
in_sync
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sda/device
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sda/device/iodone_cnt
Lines: 1
307
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sda/device/ioerr_cnt
Lines: 1
3
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/sr0/device
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sr0/device/iodone_cnt
Lines: 1
2767
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/block/sr0/device/ioerr_cnt
Lines: 1
29
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/block/md6/md/rd3
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Expand Down
1 change: 1 addition & 0 deletions end-to-end-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ enabled_collectors=$(cat << COLLECTORS
drbd
edac
entropy
ext4
fibrechannel
filefd
hwmon
Expand Down