Separate io tests to read and write mode with separate latencies (#2)

2026-02-04 17:50:58 +01:00 · 2025-10-22 18:02:26 +02:00
parent 8b3cf64e96
commit 5184c3a03e
6 changed files with 178 additions and 74 deletions
--- a/README.md
+++ b/README.md
@@ -15,7 +15,12 @@ specified via commandline.
 io-exporter [options] <file>
 Options:
 -t --timeout   <int>          When should the operation timeout in seconds
 -s --sleeptime <int>          Time to sleep between checks (default: 5s)
 -l --label     <label=value>  Add label to exported metric
 -i --internals                Also add labels about resource usage
 -r --read                     Only execute the read test
 -w --write                    Only execute the write test
 -d --debug                    Enable debug log level
 -h --help                     Show help
 -v --version                  Show program version
 ```
@@ -31,14 +36,20 @@ io-exporter -l foo=bar -l blah=blubb t/blah
 You'll get such metrics:
 ```default
 # HELP io_exporter_io_latency how long does the operation take in seconds
 # TYPE io_exporter_io_latency gauge
 io_exporter_io_latency{file="/tmp/blah",maxwait="1",namespace="debug",pod="foo1"} 0.0001142815
 # HELP io_exporter_io_operation whether io is working on the pvc, 1=ok, 0=fail
 # TYPE io_exporter_io_operation gauge
-io_exporter_io_operation{file="/tmp/blah",maxwait="1",namespace="debug",pod="foo1"} 1
+io_exporter_io_operation{blah="blubb",exectime="1761148383705",file="t/blah",foo="bar",maxwait="1"} 1
 # HELP io_exporter_io_read_latency how long does the read operation take in seconds
 # TYPE io_exporter_io_read_latency gauge
 io_exporter_io_read_latency{blah="blubb",exectime="1761148383705",file="t/blah",foo="bar",maxwait="1"} 0.0040411716
 # HELP io_exporter_io_write_latency how long does the write operation take in seconds
 # TYPE io_exporter_io_write_latency gauge
 io_exporter_io_write_latency{blah="blubb",exectime="1761148383705",file="t/blah",foo="bar",maxwait="1"} 0
 ```
 You may  also restrict the exporter  to only test read  (`-r` flag) or
 write (`-w` flag) operation.
 ## Installation
 There are no released binaries yet.
--- a/cmd/alloc.go
+++ b/cmd/alloc.go
@@ -1,6 +1,11 @@
 package cmd
-import "github.com/ncw/directio"
+import (
 	"bytes"
 	"errors"
 	"github.com/ncw/directio"
 )
 // aligned allocs used for testing
 type Alloc struct {
@@ -25,3 +30,12 @@ func NewAlloc() *Alloc {
 		readBlock:  directio.AlignedBlock(directio.BlockSize),
 	}
 }
 func (alloc *Alloc) Compare() bool {
 	// compare
 	if !bytes.Equal(alloc.writeBlock, alloc.readBlock) {
 		return report(errors.New("read not the same as written"), nil)
 	}
 	return true
 }
--- a/cmd/config.go
+++ b/cmd/config.go
@@ -15,7 +15,7 @@ import (
 )
 const (
-	Version = `v0.0.4`
+	Version = `v0.0.5`
 	SLEEP   = 5
 	Usage   = `io-exporter [options] <file>
 Options:
@@ -23,9 +23,15 @@ Options:
 -s --sleeptime <int>          Time to sleep between checks (default: 5s)
 -l --label     <label=value>  Add label to exported metric
 -i --internals                Also add labels about resource usage
 -r --read                     Only execute the read test
 -w --write                    Only execute the write test
 -d --debug                    Enable debug log level
 -h --help                     Show help
 -v --version                  Show program version`
 	O_R = iota
 	O_W
 	O_RW
 )
 // config via commandline flags
@@ -34,6 +40,8 @@ type Config struct {
 	Showhelp    bool     `koanf:"help"`      // -h
 	Internals   bool     `koanf:"internals"` // -i
 	Debug       bool     `koanf:"debug"`     // -d
 	ReadMode    bool     `koanf:"read"`      // -r
 	WriteMode   bool     `koanf:"write"`     // -w
 	Label       []string `koanf:"label"`     // -v
 	Timeout     int      `koanf:"timeout"`   // -t
 	Port        int      `koanf:"port"`      // -p
@@ -60,6 +68,8 @@ func InitConfig(output io.Writer) (*Config, error) {
 	flagset.BoolP("help", "h", false, "show help")
 	flagset.BoolP("debug", "d", false, "enable debug logs")
 	flagset.BoolP("internals", "i", false, "add internal metrics")
 	flagset.BoolP("read", "r", false, "only execute the read test")
 	flagset.BoolP("write", "w", false, "only execute the write test")
 	flagset.StringArrayP("label", "l", nil, "additional labels")
 	flagset.IntP("timeout", "t", 1, "timeout for file operation in seconds")
 	flagset.IntP("port", "p", 9187, "prometheus metrics port to listen to")
@@ -103,5 +113,10 @@ func InitConfig(output io.Writer) (*Config, error) {
 		conf.Labels = append(conf.Labels, Label{Name: parts[0], Value: parts[1]})
 	}
 	if !conf.ReadMode && !conf.WriteMode {
 		conf.ReadMode = true
 		conf.WriteMode = true
 	}
 	return conf, nil
 }
--- a/cmd/exporter.go
+++ b/cmd/exporter.go
@@ -1,7 +1,6 @@
 package cmd
 import (
 	"bytes"
 	"context"
 	"errors"
 	"io"
@@ -12,7 +11,7 @@ import (
 	"github.com/ncw/directio"
 )
-func die(err error, fd *os.File) bool {
+func report(err error, fd *os.File) bool {
 	slog.Debug("failed to check io", "error", err)
 	if fd != nil {
@@ -24,8 +23,8 @@ func die(err error, fd *os.File) bool {
 	return false
 }
-// Calls runcheck() with timeout
+// Calls runcheck* with timeout
-func runExporter(file string, alloc *Alloc, timeout time.Duration) bool {
+func runExporter(file string, alloc *Alloc, timeout time.Duration, op int) bool {
 	ctx := context.Background()
 	ctx, cancel := context.WithTimeout(ctx, timeout)
 	defer cancel()
@@ -34,14 +33,19 @@ func runExporter(file string, alloc *Alloc, timeout time.Duration) bool {
 	var res bool
 	go func() {
-		res = runcheck(file, alloc)
+		switch op {
 		case O_R:
 			res = runcheck_r(file, alloc)
 		case O_W:
 			res = runcheck_w(file, alloc)
 		}
 		run <- struct{}{}
 	}()
 	for {
 		select {
 		case <-ctx.Done():
-			return die(ctx.Err(), nil)
+			return report(ctx.Err(), nil)
 		case <-run:
 			return res
 		}
@@ -50,24 +54,49 @@ func runExporter(file string, alloc *Alloc, timeout time.Duration) bool {
 // Checks file io on the specified path:
 //
-// - open the file (create if it doesnt exist)
+// - opens it for reading
 // - truncate it if it already exists
 // - write some data to it
 // - closes the file
 // - re-opens it for reading
 // - reads the block
 // - compares if written block is equal to read block
 // - closes file again
 //
 // Returns false if anything failed during that sequence,
 // true otherwise.
-func runcheck(file string, alloc *Alloc) bool {
+func runcheck_r(file string, alloc *Alloc) bool {
-	alloc.Clean()
+	// read
 	in, err := directio.OpenFile(file, os.O_RDONLY, 0640)
 	if err != nil {
 		report(err, nil)
 	}
 	n, err := io.ReadFull(in, alloc.readBlock)
 	if err != nil {
 		return report(err, in)
 	}
 	if n != len(alloc.writeBlock) {
 		return report(errors.New("failed to read block"), in)
 	}
 	if err := in.Close(); err != nil {
 		return report(err, nil)
 	}
 	return true
 }
 // Checks file io on the specified path:
 //
 // - open the file (create if it doesnt exist)
 // - truncate it if it already exists
 // - write some data to it
 // - closes the file
 //
 // Returns false if anything failed during that sequence,
 // true otherwise.
 func runcheck_w(file string, alloc *Alloc) bool {
 	// write
 	fd, err := directio.OpenFile(file, os.O_RDWR|os.O_TRUNC|os.O_CREATE, 0640)
 	if err != nil {
-		die(err, nil)
+		report(err, nil)
 	}
 	for i := 0; i < len(alloc.writeBlock); i++ {
@@ -76,39 +105,15 @@ func runcheck(file string, alloc *Alloc) bool {
 	n, err := fd.Write(alloc.writeBlock)
 	if err != nil {
-		return die(err, fd)
+		return report(err, fd)
 	}
 	if n != len(alloc.writeBlock) {
-		return die(errors.New("failed to write block"), fd)
+		return report(errors.New("failed to write block"), fd)
 	}
 	if err := fd.Close(); err != nil {
-		return die(err, nil)
+		return report(err, nil)
 	}
 	// read
 	in, err := directio.OpenFile(file, os.O_RDONLY, 0640)
 	if err != nil {
 		die(err, nil)
 	}
 	n, err = io.ReadFull(in, alloc.readBlock)
 	if err != nil {
 		return die(err, in)
 	}
 	if n != len(alloc.writeBlock) {
 		return die(errors.New("failed to read block"), fd)
 	}
 	if err := in.Close(); err != nil {
 		return die(err, nil)
 	}
 	// compare
 	if !bytes.Equal(alloc.writeBlock, alloc.readBlock) {
 		return die(errors.New("read not the same as written"), nil)
 	}
 	return true
--- a/cmd/metrics.go
+++ b/cmd/metrics.go
@@ -2,6 +2,7 @@ package cmd
 import (
 	"fmt"
 	"time"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus/collectors"
@@ -15,14 +16,16 @@ type Label struct {
 // simple prometheus wrapper
 type Metrics struct {
 	run       *prometheus.GaugeVec
-	latency  *prometheus.GaugeVec
+	latency_r *prometheus.GaugeVec
 	latency_w *prometheus.GaugeVec
 	registry  *prometheus.Registry
 	values    []string
 	mode      int
 }
 func NewMetrics(conf *Config) *Metrics {
-	labels := []string{"file", "maxwait"}
+	labels := []string{"file", "maxwait", "exectime"}
-	LabelLen := 2
+	LabelLen := 3
 	for _, label := range conf.Labels {
 		labels = append(labels, label.Name)
@@ -36,10 +39,17 @@ func NewMetrics(conf *Config) *Metrics {
 			},
 			labels,
 		),
-		latency: prometheus.NewGaugeVec(
+		latency_r: prometheus.NewGaugeVec(
 			prometheus.GaugeOpts{
-				Name: "io_exporter_io_latency",
+				Name: "io_exporter_io_read_latency",
-				Help: "how long does the operation take in seconds",
+				Help: "how long does the read operation take in seconds",
 			},
 			labels,
 		),
 		latency_w: prometheus.NewGaugeVec(
 			prometheus.GaugeOpts{
 				Name: "io_exporter_io_write_latency",
 				Help: "how long does the write operation take in seconds",
 			},
 			labels,
 		),
@@ -53,7 +63,8 @@ func NewMetrics(conf *Config) *Metrics {
 	if conf.Internals {
 		metrics.registry.MustRegister(
 			metrics.run,
-			metrics.latency,
+			metrics.latency_r,
 			metrics.latency_w,
 			// we  might need  to take  care of the  exporter in  terms of
 			// resources, so also report those internals
@@ -65,28 +76,50 @@ func NewMetrics(conf *Config) *Metrics {
 			),
 		)
 	} else {
-		metrics.registry.MustRegister(metrics.run, metrics.latency)
+		metrics.registry.MustRegister(metrics.run, metrics.latency_r, metrics.latency_w)
 	}
 	// static labels
 	metrics.values[0] = conf.File
 	metrics.values[1] = fmt.Sprintf("%d", conf.Timeout)
 	metrics.values[2] = fmt.Sprintf("%d", time.Now().UnixMilli())
 	// custom labels via -l label=value
 	for idx, label := range conf.Labels {
 		metrics.values[idx+LabelLen] = label.Value
 	}
 	switch {
 	case conf.ReadMode && conf.WriteMode:
 		metrics.mode = O_RW
 	case conf.ReadMode:
 		metrics.mode = O_R
 	case conf.WriteMode:
 		metrics.mode = O_W
 	}
 	return metrics
 }
-func (metrics *Metrics) Set(result bool, elapsed float64) {
+func (metrics *Metrics) Set(result_r, result_w bool, elapsed_r, elapsed_w float64) {
 	var res float64
-	if result {
+	switch metrics.mode {
 	case O_RW:
 		if result_r && result_w {
 			res = 1
 		}
 	case O_R:
 		if result_r {
 			res = 1
 		}
 	case O_W:
 		if result_w {
 			res = 1
 		}
 	}
 	metrics.run.WithLabelValues(metrics.values...).Set(res)
-	metrics.latency.WithLabelValues(metrics.values...).Set(elapsed)
+	metrics.latency_r.WithLabelValues(metrics.values...).Set(elapsed_r)
 	metrics.latency_w.WithLabelValues(metrics.values...).Set(elapsed_w)
 }
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -30,16 +30,28 @@ func Run() {
 	go func() {
 		for {
-			start := time.Now()
+			var result_r, result_w bool
 			var elapsed_w, elapsed_r float64
-			result := runExporter(conf.File, alloc, time.Duration(conf.Timeout)*time.Second)
+			alloc.Clean()
-			// ns => s
+			if conf.WriteMode {
-			now := time.Now()
+				elapsed_w, result_w = measure(conf.File, alloc, conf.Timeout, O_W)
-			elapsed := float64(now.Sub(start).Nanoseconds()) / 10000000000
+				slog.Debug("elapsed write time", "elapsed", elapsed_w, "result", result_w)
-			slog.Debug("elapsed time", "elapsed", elapsed, "result", result)
+			}
-			metrics.Set(result, elapsed)
+			if conf.ReadMode {
 				elapsed_r, result_r = measure(conf.File, alloc, conf.Timeout, O_R)
 				slog.Debug("elapsed read time", "elapsed", elapsed_r, "result", result_r)
 			}
 			if conf.WriteMode && conf.ReadMode {
 				if !alloc.Compare() {
 					result_r = false
 				}
 			}
 			metrics.Set(result_r, result_w, elapsed_r, elapsed_w)
 			time.Sleep(time.Duration(conf.Sleeptime) * time.Second)
 		}
@@ -52,7 +64,21 @@ func Run() {
 	slog.Info("start testing and serving metrics on localhost", "port", conf.Port)
 	slog.Info("test setup", "file", conf.File, "labels", strings.Join(conf.Label, ","))
 	slog.Info("measuring", "read", conf.ReadMode, "write", conf.WriteMode, "timeout(s)", conf.Timeout)
 	if err := http.ListenAndServe(fmt.Sprintf(":%d", conf.Port), nil); err != nil {
 		log.Fatal(err)
 	}
 }
 func measure(file string, alloc *Alloc, timeout int, mode int) (float64, bool) {
 	start := time.Now()
 	result := runExporter(file, alloc, time.Duration(timeout)*time.Second, mode)
 	// ns => s
 	now := time.Now()
 	elapsed := float64(now.Sub(start).Nanoseconds()) / 10000000000
 	return elapsed, result
 }