Separate io tests to read and write mode with separate latencies (#2)

This commit is contained in:
T.v.Dein
2025-10-22 18:02:26 +02:00
committed by GitHub
parent 8b3cf64e96
commit 5184c3a03e
6 changed files with 178 additions and 74 deletions

View File

@@ -15,7 +15,12 @@ specified via commandline.
io-exporter [options] <file> io-exporter [options] <file>
Options: Options:
-t --timeout <int> When should the operation timeout in seconds -t --timeout <int> When should the operation timeout in seconds
-s --sleeptime <int> Time to sleep between checks (default: 5s)
-l --label <label=value> Add label to exported metric -l --label <label=value> Add label to exported metric
-i --internals Also add labels about resource usage
-r --read Only execute the read test
-w --write Only execute the write test
-d --debug Enable debug log level
-h --help Show help -h --help Show help
-v --version Show program version -v --version Show program version
``` ```
@@ -31,14 +36,20 @@ io-exporter -l foo=bar -l blah=blubb t/blah
You'll get such metrics: You'll get such metrics:
```default ```default
# HELP io_exporter_io_latency how long does the operation take in seconds
# TYPE io_exporter_io_latency gauge
io_exporter_io_latency{file="/tmp/blah",maxwait="1",namespace="debug",pod="foo1"} 0.0001142815
# HELP io_exporter_io_operation whether io is working on the pvc, 1=ok, 0=fail # HELP io_exporter_io_operation whether io is working on the pvc, 1=ok, 0=fail
# TYPE io_exporter_io_operation gauge # TYPE io_exporter_io_operation gauge
io_exporter_io_operation{file="/tmp/blah",maxwait="1",namespace="debug",pod="foo1"} 1 io_exporter_io_operation{blah="blubb",exectime="1761148383705",file="t/blah",foo="bar",maxwait="1"} 1
# HELP io_exporter_io_read_latency how long does the read operation take in seconds
# TYPE io_exporter_io_read_latency gauge
io_exporter_io_read_latency{blah="blubb",exectime="1761148383705",file="t/blah",foo="bar",maxwait="1"} 0.0040411716
# HELP io_exporter_io_write_latency how long does the write operation take in seconds
# TYPE io_exporter_io_write_latency gauge
io_exporter_io_write_latency{blah="blubb",exectime="1761148383705",file="t/blah",foo="bar",maxwait="1"} 0
``` ```
You may also restrict the exporter to only test read (`-r` flag) or
write (`-w` flag) operation.
## Installation ## Installation
There are no released binaries yet. There are no released binaries yet.

View File

@@ -1,6 +1,11 @@
package cmd package cmd
import "github.com/ncw/directio" import (
"bytes"
"errors"
"github.com/ncw/directio"
)
// aligned allocs used for testing // aligned allocs used for testing
type Alloc struct { type Alloc struct {
@@ -25,3 +30,12 @@ func NewAlloc() *Alloc {
readBlock: directio.AlignedBlock(directio.BlockSize), readBlock: directio.AlignedBlock(directio.BlockSize),
} }
} }
func (alloc *Alloc) Compare() bool {
// compare
if !bytes.Equal(alloc.writeBlock, alloc.readBlock) {
return report(errors.New("read not the same as written"), nil)
}
return true
}

View File

@@ -15,7 +15,7 @@ import (
) )
const ( const (
Version = `v0.0.4` Version = `v0.0.5`
SLEEP = 5 SLEEP = 5
Usage = `io-exporter [options] <file> Usage = `io-exporter [options] <file>
Options: Options:
@@ -23,9 +23,15 @@ Options:
-s --sleeptime <int> Time to sleep between checks (default: 5s) -s --sleeptime <int> Time to sleep between checks (default: 5s)
-l --label <label=value> Add label to exported metric -l --label <label=value> Add label to exported metric
-i --internals Also add labels about resource usage -i --internals Also add labels about resource usage
-r --read Only execute the read test
-w --write Only execute the write test
-d --debug Enable debug log level -d --debug Enable debug log level
-h --help Show help -h --help Show help
-v --version Show program version` -v --version Show program version`
O_R = iota
O_W
O_RW
) )
// config via commandline flags // config via commandline flags
@@ -34,6 +40,8 @@ type Config struct {
Showhelp bool `koanf:"help"` // -h Showhelp bool `koanf:"help"` // -h
Internals bool `koanf:"internals"` // -i Internals bool `koanf:"internals"` // -i
Debug bool `koanf:"debug"` // -d Debug bool `koanf:"debug"` // -d
ReadMode bool `koanf:"read"` // -r
WriteMode bool `koanf:"write"` // -w
Label []string `koanf:"label"` // -v Label []string `koanf:"label"` // -v
Timeout int `koanf:"timeout"` // -t Timeout int `koanf:"timeout"` // -t
Port int `koanf:"port"` // -p Port int `koanf:"port"` // -p
@@ -60,6 +68,8 @@ func InitConfig(output io.Writer) (*Config, error) {
flagset.BoolP("help", "h", false, "show help") flagset.BoolP("help", "h", false, "show help")
flagset.BoolP("debug", "d", false, "enable debug logs") flagset.BoolP("debug", "d", false, "enable debug logs")
flagset.BoolP("internals", "i", false, "add internal metrics") flagset.BoolP("internals", "i", false, "add internal metrics")
flagset.BoolP("read", "r", false, "only execute the read test")
flagset.BoolP("write", "w", false, "only execute the write test")
flagset.StringArrayP("label", "l", nil, "additional labels") flagset.StringArrayP("label", "l", nil, "additional labels")
flagset.IntP("timeout", "t", 1, "timeout for file operation in seconds") flagset.IntP("timeout", "t", 1, "timeout for file operation in seconds")
flagset.IntP("port", "p", 9187, "prometheus metrics port to listen to") flagset.IntP("port", "p", 9187, "prometheus metrics port to listen to")
@@ -103,5 +113,10 @@ func InitConfig(output io.Writer) (*Config, error) {
conf.Labels = append(conf.Labels, Label{Name: parts[0], Value: parts[1]}) conf.Labels = append(conf.Labels, Label{Name: parts[0], Value: parts[1]})
} }
if !conf.ReadMode && !conf.WriteMode {
conf.ReadMode = true
conf.WriteMode = true
}
return conf, nil return conf, nil
} }

View File

@@ -1,7 +1,6 @@
package cmd package cmd
import ( import (
"bytes"
"context" "context"
"errors" "errors"
"io" "io"
@@ -12,7 +11,7 @@ import (
"github.com/ncw/directio" "github.com/ncw/directio"
) )
func die(err error, fd *os.File) bool { func report(err error, fd *os.File) bool {
slog.Debug("failed to check io", "error", err) slog.Debug("failed to check io", "error", err)
if fd != nil { if fd != nil {
@@ -24,8 +23,8 @@ func die(err error, fd *os.File) bool {
return false return false
} }
// Calls runcheck() with timeout // Calls runcheck* with timeout
func runExporter(file string, alloc *Alloc, timeout time.Duration) bool { func runExporter(file string, alloc *Alloc, timeout time.Duration, op int) bool {
ctx := context.Background() ctx := context.Background()
ctx, cancel := context.WithTimeout(ctx, timeout) ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel() defer cancel()
@@ -34,14 +33,19 @@ func runExporter(file string, alloc *Alloc, timeout time.Duration) bool {
var res bool var res bool
go func() { go func() {
res = runcheck(file, alloc) switch op {
case O_R:
res = runcheck_r(file, alloc)
case O_W:
res = runcheck_w(file, alloc)
}
run <- struct{}{} run <- struct{}{}
}() }()
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
return die(ctx.Err(), nil) return report(ctx.Err(), nil)
case <-run: case <-run:
return res return res
} }
@@ -50,24 +54,49 @@ func runExporter(file string, alloc *Alloc, timeout time.Duration) bool {
// Checks file io on the specified path: // Checks file io on the specified path:
// //
// - open the file (create if it doesnt exist) // - opens it for reading
// - truncate it if it already exists
// - write some data to it
// - closes the file
// - re-opens it for reading
// - reads the block // - reads the block
// - compares if written block is equal to read block
// - closes file again // - closes file again
// //
// Returns false if anything failed during that sequence, // Returns false if anything failed during that sequence,
// true otherwise. // true otherwise.
func runcheck(file string, alloc *Alloc) bool { func runcheck_r(file string, alloc *Alloc) bool {
alloc.Clean() // read
in, err := directio.OpenFile(file, os.O_RDONLY, 0640)
if err != nil {
report(err, nil)
}
n, err := io.ReadFull(in, alloc.readBlock)
if err != nil {
return report(err, in)
}
if n != len(alloc.writeBlock) {
return report(errors.New("failed to read block"), in)
}
if err := in.Close(); err != nil {
return report(err, nil)
}
return true
}
// Checks file io on the specified path:
//
// - open the file (create if it doesnt exist)
// - truncate it if it already exists
// - write some data to it
// - closes the file
//
// Returns false if anything failed during that sequence,
// true otherwise.
func runcheck_w(file string, alloc *Alloc) bool {
// write // write
fd, err := directio.OpenFile(file, os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0640) fd, err := directio.OpenFile(file, os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0640)
if err != nil { if err != nil {
die(err, nil) report(err, nil)
} }
for i := 0; i < len(alloc.writeBlock); i++ { for i := 0; i < len(alloc.writeBlock); i++ {
@@ -76,39 +105,15 @@ func runcheck(file string, alloc *Alloc) bool {
n, err := fd.Write(alloc.writeBlock) n, err := fd.Write(alloc.writeBlock)
if err != nil { if err != nil {
return die(err, fd) return report(err, fd)
} }
if n != len(alloc.writeBlock) { if n != len(alloc.writeBlock) {
return die(errors.New("failed to write block"), fd) return report(errors.New("failed to write block"), fd)
} }
if err := fd.Close(); err != nil { if err := fd.Close(); err != nil {
return die(err, nil) return report(err, nil)
}
// read
in, err := directio.OpenFile(file, os.O_RDONLY, 0640)
if err != nil {
die(err, nil)
}
n, err = io.ReadFull(in, alloc.readBlock)
if err != nil {
return die(err, in)
}
if n != len(alloc.writeBlock) {
return die(errors.New("failed to read block"), fd)
}
if err := in.Close(); err != nil {
return die(err, nil)
}
// compare
if !bytes.Equal(alloc.writeBlock, alloc.readBlock) {
return die(errors.New("read not the same as written"), nil)
} }
return true return true

View File

@@ -2,6 +2,7 @@ package cmd
import ( import (
"fmt" "fmt"
"time"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors" "github.com/prometheus/client_golang/prometheus/collectors"
@@ -15,14 +16,16 @@ type Label struct {
// simple prometheus wrapper // simple prometheus wrapper
type Metrics struct { type Metrics struct {
run *prometheus.GaugeVec run *prometheus.GaugeVec
latency *prometheus.GaugeVec latency_r *prometheus.GaugeVec
latency_w *prometheus.GaugeVec
registry *prometheus.Registry registry *prometheus.Registry
values []string values []string
mode int
} }
func NewMetrics(conf *Config) *Metrics { func NewMetrics(conf *Config) *Metrics {
labels := []string{"file", "maxwait"} labels := []string{"file", "maxwait", "exectime"}
LabelLen := 2 LabelLen := 3
for _, label := range conf.Labels { for _, label := range conf.Labels {
labels = append(labels, label.Name) labels = append(labels, label.Name)
@@ -36,10 +39,17 @@ func NewMetrics(conf *Config) *Metrics {
}, },
labels, labels,
), ),
latency: prometheus.NewGaugeVec( latency_r: prometheus.NewGaugeVec(
prometheus.GaugeOpts{ prometheus.GaugeOpts{
Name: "io_exporter_io_latency", Name: "io_exporter_io_read_latency",
Help: "how long does the operation take in seconds", Help: "how long does the read operation take in seconds",
},
labels,
),
latency_w: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "io_exporter_io_write_latency",
Help: "how long does the write operation take in seconds",
}, },
labels, labels,
), ),
@@ -53,7 +63,8 @@ func NewMetrics(conf *Config) *Metrics {
if conf.Internals { if conf.Internals {
metrics.registry.MustRegister( metrics.registry.MustRegister(
metrics.run, metrics.run,
metrics.latency, metrics.latency_r,
metrics.latency_w,
// we might need to take care of the exporter in terms of // we might need to take care of the exporter in terms of
// resources, so also report those internals // resources, so also report those internals
@@ -65,28 +76,50 @@ func NewMetrics(conf *Config) *Metrics {
), ),
) )
} else { } else {
metrics.registry.MustRegister(metrics.run, metrics.latency) metrics.registry.MustRegister(metrics.run, metrics.latency_r, metrics.latency_w)
} }
// static labels // static labels
metrics.values[0] = conf.File metrics.values[0] = conf.File
metrics.values[1] = fmt.Sprintf("%d", conf.Timeout) metrics.values[1] = fmt.Sprintf("%d", conf.Timeout)
metrics.values[2] = fmt.Sprintf("%d", time.Now().UnixMilli())
// custom labels via -l label=value // custom labels via -l label=value
for idx, label := range conf.Labels { for idx, label := range conf.Labels {
metrics.values[idx+LabelLen] = label.Value metrics.values[idx+LabelLen] = label.Value
} }
switch {
case conf.ReadMode && conf.WriteMode:
metrics.mode = O_RW
case conf.ReadMode:
metrics.mode = O_R
case conf.WriteMode:
metrics.mode = O_W
}
return metrics return metrics
} }
func (metrics *Metrics) Set(result bool, elapsed float64) { func (metrics *Metrics) Set(result_r, result_w bool, elapsed_r, elapsed_w float64) {
var res float64 var res float64
if result { switch metrics.mode {
case O_RW:
if result_r && result_w {
res = 1 res = 1
} }
case O_R:
if result_r {
res = 1
}
case O_W:
if result_w {
res = 1
}
}
metrics.run.WithLabelValues(metrics.values...).Set(res) metrics.run.WithLabelValues(metrics.values...).Set(res)
metrics.latency.WithLabelValues(metrics.values...).Set(elapsed) metrics.latency_r.WithLabelValues(metrics.values...).Set(elapsed_r)
metrics.latency_w.WithLabelValues(metrics.values...).Set(elapsed_w)
} }

View File

@@ -30,16 +30,28 @@ func Run() {
go func() { go func() {
for { for {
start := time.Now() var result_r, result_w bool
var elapsed_w, elapsed_r float64
result := runExporter(conf.File, alloc, time.Duration(conf.Timeout)*time.Second) alloc.Clean()
// ns => s if conf.WriteMode {
now := time.Now() elapsed_w, result_w = measure(conf.File, alloc, conf.Timeout, O_W)
elapsed := float64(now.Sub(start).Nanoseconds()) / 10000000000 slog.Debug("elapsed write time", "elapsed", elapsed_w, "result", result_w)
slog.Debug("elapsed time", "elapsed", elapsed, "result", result) }
metrics.Set(result, elapsed) if conf.ReadMode {
elapsed_r, result_r = measure(conf.File, alloc, conf.Timeout, O_R)
slog.Debug("elapsed read time", "elapsed", elapsed_r, "result", result_r)
}
if conf.WriteMode && conf.ReadMode {
if !alloc.Compare() {
result_r = false
}
}
metrics.Set(result_r, result_w, elapsed_r, elapsed_w)
time.Sleep(time.Duration(conf.Sleeptime) * time.Second) time.Sleep(time.Duration(conf.Sleeptime) * time.Second)
} }
@@ -52,7 +64,21 @@ func Run() {
slog.Info("start testing and serving metrics on localhost", "port", conf.Port) slog.Info("start testing and serving metrics on localhost", "port", conf.Port)
slog.Info("test setup", "file", conf.File, "labels", strings.Join(conf.Label, ",")) slog.Info("test setup", "file", conf.File, "labels", strings.Join(conf.Label, ","))
slog.Info("measuring", "read", conf.ReadMode, "write", conf.WriteMode, "timeout(s)", conf.Timeout)
if err := http.ListenAndServe(fmt.Sprintf(":%d", conf.Port), nil); err != nil { if err := http.ListenAndServe(fmt.Sprintf(":%d", conf.Port), nil); err != nil {
log.Fatal(err) log.Fatal(err)
} }
} }
func measure(file string, alloc *Alloc, timeout int, mode int) (float64, bool) {
start := time.Now()
result := runExporter(file, alloc, time.Duration(timeout)*time.Second, mode)
// ns => s
now := time.Now()
elapsed := float64(now.Sub(start).Nanoseconds()) / 10000000000
return elapsed, result
}