From 5184c3a03ec846bcabc2ee5f79a4e8437513f929 Mon Sep 17 00:00:00 2001 From: "T.v.Dein" Date: Wed, 22 Oct 2025 18:02:26 +0200 Subject: [PATCH] Separate io tests to read and write mode with separate latencies (#2) --- README.md | 27 ++++++++++----- cmd/alloc.go | 16 ++++++++- cmd/config.go | 17 +++++++++- cmd/exporter.go | 89 ++++++++++++++++++++++++++----------------------- cmd/metrics.go | 63 +++++++++++++++++++++++++--------- cmd/root.go | 40 ++++++++++++++++++---- 6 files changed, 178 insertions(+), 74 deletions(-) diff --git a/README.md b/README.md index 0e82821..e7252e2 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,15 @@ specified via commandline. ```default io-exporter [options] Options: --t --timeout When should the operation timeout in seconds --l --label Add label to exported metric --h --help Show help --v --version Show program version +-t --timeout When should the operation timeout in seconds +-s --sleeptime Time to sleep between checks (default: 5s) +-l --label Add label to exported metric +-i --internals Also add labels about resource usage +-r --read Only execute the read test +-w --write Only execute the write test +-d --debug Enable debug log level +-h --help Show help +-v --version Show program version ``` ## Output @@ -31,14 +36,20 @@ io-exporter -l foo=bar -l blah=blubb t/blah You'll get such metrics: ```default -# HELP io_exporter_io_latency how long does the operation take in seconds -# TYPE io_exporter_io_latency gauge -io_exporter_io_latency{file="/tmp/blah",maxwait="1",namespace="debug",pod="foo1"} 0.0001142815 # HELP io_exporter_io_operation whether io is working on the pvc, 1=ok, 0=fail # TYPE io_exporter_io_operation gauge -io_exporter_io_operation{file="/tmp/blah",maxwait="1",namespace="debug",pod="foo1"} 1 +io_exporter_io_operation{blah="blubb",exectime="1761148383705",file="t/blah",foo="bar",maxwait="1"} 1 +# HELP io_exporter_io_read_latency how long does the read operation take in seconds +# TYPE io_exporter_io_read_latency gauge +io_exporter_io_read_latency{blah="blubb",exectime="1761148383705",file="t/blah",foo="bar",maxwait="1"} 0.0040411716 +# HELP io_exporter_io_write_latency how long does the write operation take in seconds +# TYPE io_exporter_io_write_latency gauge +io_exporter_io_write_latency{blah="blubb",exectime="1761148383705",file="t/blah",foo="bar",maxwait="1"} 0 ``` +You may also restrict the exporter to only test read (`-r` flag) or +write (`-w` flag) operation. + ## Installation There are no released binaries yet. diff --git a/cmd/alloc.go b/cmd/alloc.go index ddf5a10..7025c6e 100644 --- a/cmd/alloc.go +++ b/cmd/alloc.go @@ -1,6 +1,11 @@ package cmd -import "github.com/ncw/directio" +import ( + "bytes" + "errors" + + "github.com/ncw/directio" +) // aligned allocs used for testing type Alloc struct { @@ -25,3 +30,12 @@ func NewAlloc() *Alloc { readBlock: directio.AlignedBlock(directio.BlockSize), } } + +func (alloc *Alloc) Compare() bool { + // compare + if !bytes.Equal(alloc.writeBlock, alloc.readBlock) { + return report(errors.New("read not the same as written"), nil) + } + + return true +} diff --git a/cmd/config.go b/cmd/config.go index bdf223c..e7eeae4 100644 --- a/cmd/config.go +++ b/cmd/config.go @@ -15,7 +15,7 @@ import ( ) const ( - Version = `v0.0.4` + Version = `v0.0.5` SLEEP = 5 Usage = `io-exporter [options] Options: @@ -23,9 +23,15 @@ Options: -s --sleeptime Time to sleep between checks (default: 5s) -l --label Add label to exported metric -i --internals Also add labels about resource usage +-r --read Only execute the read test +-w --write Only execute the write test -d --debug Enable debug log level -h --help Show help -v --version Show program version` + + O_R = iota + O_W + O_RW ) // config via commandline flags @@ -34,6 +40,8 @@ type Config struct { Showhelp bool `koanf:"help"` // -h Internals bool `koanf:"internals"` // -i Debug bool `koanf:"debug"` // -d + ReadMode bool `koanf:"read"` // -r + WriteMode bool `koanf:"write"` // -w Label []string `koanf:"label"` // -v Timeout int `koanf:"timeout"` // -t Port int `koanf:"port"` // -p @@ -60,6 +68,8 @@ func InitConfig(output io.Writer) (*Config, error) { flagset.BoolP("help", "h", false, "show help") flagset.BoolP("debug", "d", false, "enable debug logs") flagset.BoolP("internals", "i", false, "add internal metrics") + flagset.BoolP("read", "r", false, "only execute the read test") + flagset.BoolP("write", "w", false, "only execute the write test") flagset.StringArrayP("label", "l", nil, "additional labels") flagset.IntP("timeout", "t", 1, "timeout for file operation in seconds") flagset.IntP("port", "p", 9187, "prometheus metrics port to listen to") @@ -103,5 +113,10 @@ func InitConfig(output io.Writer) (*Config, error) { conf.Labels = append(conf.Labels, Label{Name: parts[0], Value: parts[1]}) } + if !conf.ReadMode && !conf.WriteMode { + conf.ReadMode = true + conf.WriteMode = true + } + return conf, nil } diff --git a/cmd/exporter.go b/cmd/exporter.go index 25c139d..86fd5ec 100644 --- a/cmd/exporter.go +++ b/cmd/exporter.go @@ -1,7 +1,6 @@ package cmd import ( - "bytes" "context" "errors" "io" @@ -12,7 +11,7 @@ import ( "github.com/ncw/directio" ) -func die(err error, fd *os.File) bool { +func report(err error, fd *os.File) bool { slog.Debug("failed to check io", "error", err) if fd != nil { @@ -24,8 +23,8 @@ func die(err error, fd *os.File) bool { return false } -// Calls runcheck() with timeout -func runExporter(file string, alloc *Alloc, timeout time.Duration) bool { +// Calls runcheck* with timeout +func runExporter(file string, alloc *Alloc, timeout time.Duration, op int) bool { ctx := context.Background() ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -34,14 +33,19 @@ func runExporter(file string, alloc *Alloc, timeout time.Duration) bool { var res bool go func() { - res = runcheck(file, alloc) + switch op { + case O_R: + res = runcheck_r(file, alloc) + case O_W: + res = runcheck_w(file, alloc) + } run <- struct{}{} }() for { select { case <-ctx.Done(): - return die(ctx.Err(), nil) + return report(ctx.Err(), nil) case <-run: return res } @@ -50,24 +54,49 @@ func runExporter(file string, alloc *Alloc, timeout time.Duration) bool { // Checks file io on the specified path: // -// - open the file (create if it doesnt exist) -// - truncate it if it already exists -// - write some data to it -// - closes the file -// - re-opens it for reading +// - opens it for reading // - reads the block -// - compares if written block is equal to read block // - closes file again // // Returns false if anything failed during that sequence, // true otherwise. -func runcheck(file string, alloc *Alloc) bool { - alloc.Clean() +func runcheck_r(file string, alloc *Alloc) bool { + // read + in, err := directio.OpenFile(file, os.O_RDONLY, 0640) + if err != nil { + report(err, nil) + } + n, err := io.ReadFull(in, alloc.readBlock) + if err != nil { + return report(err, in) + } + + if n != len(alloc.writeBlock) { + return report(errors.New("failed to read block"), in) + } + + if err := in.Close(); err != nil { + return report(err, nil) + } + + return true +} + +// Checks file io on the specified path: +// +// - open the file (create if it doesnt exist) +// - truncate it if it already exists +// - write some data to it +// - closes the file +// +// Returns false if anything failed during that sequence, +// true otherwise. +func runcheck_w(file string, alloc *Alloc) bool { // write fd, err := directio.OpenFile(file, os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0640) if err != nil { - die(err, nil) + report(err, nil) } for i := 0; i < len(alloc.writeBlock); i++ { @@ -76,39 +105,15 @@ func runcheck(file string, alloc *Alloc) bool { n, err := fd.Write(alloc.writeBlock) if err != nil { - return die(err, fd) + return report(err, fd) } if n != len(alloc.writeBlock) { - return die(errors.New("failed to write block"), fd) + return report(errors.New("failed to write block"), fd) } if err := fd.Close(); err != nil { - return die(err, nil) - } - - // read - in, err := directio.OpenFile(file, os.O_RDONLY, 0640) - if err != nil { - die(err, nil) - } - - n, err = io.ReadFull(in, alloc.readBlock) - if err != nil { - return die(err, in) - } - - if n != len(alloc.writeBlock) { - return die(errors.New("failed to read block"), fd) - } - - if err := in.Close(); err != nil { - return die(err, nil) - } - - // compare - if !bytes.Equal(alloc.writeBlock, alloc.readBlock) { - return die(errors.New("read not the same as written"), nil) + return report(err, nil) } return true diff --git a/cmd/metrics.go b/cmd/metrics.go index 565c115..ff893a0 100644 --- a/cmd/metrics.go +++ b/cmd/metrics.go @@ -2,6 +2,7 @@ package cmd import ( "fmt" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" @@ -14,15 +15,17 @@ type Label struct { // simple prometheus wrapper type Metrics struct { - run *prometheus.GaugeVec - latency *prometheus.GaugeVec - registry *prometheus.Registry - values []string + run *prometheus.GaugeVec + latency_r *prometheus.GaugeVec + latency_w *prometheus.GaugeVec + registry *prometheus.Registry + values []string + mode int } func NewMetrics(conf *Config) *Metrics { - labels := []string{"file", "maxwait"} - LabelLen := 2 + labels := []string{"file", "maxwait", "exectime"} + LabelLen := 3 for _, label := range conf.Labels { labels = append(labels, label.Name) @@ -36,10 +39,17 @@ func NewMetrics(conf *Config) *Metrics { }, labels, ), - latency: prometheus.NewGaugeVec( + latency_r: prometheus.NewGaugeVec( prometheus.GaugeOpts{ - Name: "io_exporter_io_latency", - Help: "how long does the operation take in seconds", + Name: "io_exporter_io_read_latency", + Help: "how long does the read operation take in seconds", + }, + labels, + ), + latency_w: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "io_exporter_io_write_latency", + Help: "how long does the write operation take in seconds", }, labels, ), @@ -53,7 +63,8 @@ func NewMetrics(conf *Config) *Metrics { if conf.Internals { metrics.registry.MustRegister( metrics.run, - metrics.latency, + metrics.latency_r, + metrics.latency_w, // we might need to take care of the exporter in terms of // resources, so also report those internals @@ -65,28 +76,50 @@ func NewMetrics(conf *Config) *Metrics { ), ) } else { - metrics.registry.MustRegister(metrics.run, metrics.latency) + metrics.registry.MustRegister(metrics.run, metrics.latency_r, metrics.latency_w) } // static labels metrics.values[0] = conf.File metrics.values[1] = fmt.Sprintf("%d", conf.Timeout) + metrics.values[2] = fmt.Sprintf("%d", time.Now().UnixMilli()) // custom labels via -l label=value for idx, label := range conf.Labels { metrics.values[idx+LabelLen] = label.Value } + switch { + case conf.ReadMode && conf.WriteMode: + metrics.mode = O_RW + case conf.ReadMode: + metrics.mode = O_R + case conf.WriteMode: + metrics.mode = O_W + } + return metrics } -func (metrics *Metrics) Set(result bool, elapsed float64) { +func (metrics *Metrics) Set(result_r, result_w bool, elapsed_r, elapsed_w float64) { var res float64 - if result { - res = 1 + switch metrics.mode { + case O_RW: + if result_r && result_w { + res = 1 + } + case O_R: + if result_r { + res = 1 + } + case O_W: + if result_w { + res = 1 + } } metrics.run.WithLabelValues(metrics.values...).Set(res) - metrics.latency.WithLabelValues(metrics.values...).Set(elapsed) + metrics.latency_r.WithLabelValues(metrics.values...).Set(elapsed_r) + metrics.latency_w.WithLabelValues(metrics.values...).Set(elapsed_w) } diff --git a/cmd/root.go b/cmd/root.go index 7bfbefb..f7ba0a3 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -30,16 +30,28 @@ func Run() { go func() { for { - start := time.Now() + var result_r, result_w bool + var elapsed_w, elapsed_r float64 - result := runExporter(conf.File, alloc, time.Duration(conf.Timeout)*time.Second) + alloc.Clean() - // ns => s - now := time.Now() - elapsed := float64(now.Sub(start).Nanoseconds()) / 10000000000 - slog.Debug("elapsed time", "elapsed", elapsed, "result", result) + if conf.WriteMode { + elapsed_w, result_w = measure(conf.File, alloc, conf.Timeout, O_W) + slog.Debug("elapsed write time", "elapsed", elapsed_w, "result", result_w) + } - metrics.Set(result, elapsed) + if conf.ReadMode { + elapsed_r, result_r = measure(conf.File, alloc, conf.Timeout, O_R) + slog.Debug("elapsed read time", "elapsed", elapsed_r, "result", result_r) + } + + if conf.WriteMode && conf.ReadMode { + if !alloc.Compare() { + result_r = false + } + } + + metrics.Set(result_r, result_w, elapsed_r, elapsed_w) time.Sleep(time.Duration(conf.Sleeptime) * time.Second) } @@ -52,7 +64,21 @@ func Run() { slog.Info("start testing and serving metrics on localhost", "port", conf.Port) slog.Info("test setup", "file", conf.File, "labels", strings.Join(conf.Label, ",")) + slog.Info("measuring", "read", conf.ReadMode, "write", conf.WriteMode, "timeout(s)", conf.Timeout) + if err := http.ListenAndServe(fmt.Sprintf(":%d", conf.Port), nil); err != nil { log.Fatal(err) } } + +func measure(file string, alloc *Alloc, timeout int, mode int) (float64, bool) { + start := time.Now() + + result := runExporter(file, alloc, time.Duration(timeout)*time.Second, mode) + + // ns => s + now := time.Now() + elapsed := float64(now.Sub(start).Nanoseconds()) / 10000000000 + + return elapsed, result +}