added arithmetic mean

2026-02-04 11:10:57 +01:00 · 2024-10-13 13:31:57 +02:00
parent a56f76acdc
commit aff1194a89
4 changed files with 85 additions and 9 deletions
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ with go as a reusable module.
 ## Features

 - standalone module without external dependencies
- uses 3 different metrics to measure password quality
+- uses 5 different metrics to measure password quality
 - you can configure which metric to use
 - you can also configure the quality thresholds
 - there's support for dictionary lookup, but you need to provide the dictionary yourself 
@@ -88,6 +88,21 @@ Of course we do not use RLE. We measure compression
 using the [Flate algorithm](
 https://en.m.wikipedia.org/wiki/Deflate).

+### Optional: arithmetic mean value
+
+This is simply the result of summing the all the printable ascii chars
+divided by password length. The ideal value would be ~80, because most
+normal  letters hang  out in  the upper  area between  32 (space)  and
+126(tilde). We  consider a password ok,  if its mean lies  around this
+area give or  take 5.  If the  mean departs more from  this value, the
+characters are consistently  high or low (e.g. more  numbers and upper
+case  letters or  only  lower case  letters). The  latter,  5, can  be
+tweaked. The larger the number, tha laxer the result.
+
+Please be  warned, that this  metric will in  most cases give  you bad
+results on otherwise good passwords,  such as diceware passwords. Only
+use it if you know what you're doing.
+
 ### Optional: dictionary check

 You can supply a dictionary of words of your
@@ -103,12 +118,8 @@ you can tune the quality thresholds as needed.
 ### Future/ ToDo

 - checksum test using supplied checksum list, e.g. of leaked passwords
- fuzzy testing against dictionary to catch variations
- chi square test (see  http://www.fourmilab.ch/random/)
- Arithmetic mean value test
- Monte Carlo value test
- Serial correlation
- maybe some dieharder tests
+-  fuzzy  testing  against   dictionary  to  catch  variations,  using
+  Levenshtein or something similar.


 ## Usage
--- a/example/test.go
+++ b/example/test.go
@@ -16,7 +16,8 @@ Compression rate         0%%             min %d%%     %d%%
 Character distribution   100%%           min %0.2f%%  %0.2f%%
 Character entropy        8.0 bits/char  min %0.2f    %0.2f bits/char
 Character redundancy     0.0%%           max %0.2f%%  %0.2f%%
-Dictionary match         false          false       %t 
+Dictionary match         false          false       %t
+Arithmetic mean             80          true        %0.2f
 ------------------------------------------------------------------
 Validation response                                 %t
 `
@@ -28,6 +29,7 @@ func main() {
 		Entropy:          valpass.MIN_ENTROPY,
 		Dictionary:       &valpass.Dictionary{Words: ReadDict("t/american-english")},
 		UTF8:             false,
+		Mean:             20, //valpass.LIMIT_MEAN,
 	}

 	res, err := valpass.Validate(os.Args[1], opts)
@@ -46,6 +48,7 @@ func main() {
 		100-opts.CharDistribution,
 		100-res.CharDistribution,
 		res.DictionaryMatch,
+		res.Mean,
 		res.Ok,
 	)

--- a/lib.go
+++ b/lib.go
@@ -26,6 +26,7 @@ type Options struct {
 	Entropy          float64     // minimum entropy value in bits/char
 	Dictionary       *Dictionary // if set, lookup given dictionary, the caller provides it
 	UTF8             bool        // if true work on unicode utf-8 space, not just bytes
+	Mean             float64     // if >0, calculate the arithmetic mean
 }

 /*
@@ -43,6 +44,14 @@ const (
 	//  we start  our ascii  arrays  at char(32),  so to  have max  95
 	// elements in the slice, we subtract 32 from each ascii code
 	MIN_ASCII byte = 32
+
+	//  arithmetic  mean limits:  we work on  chr(32) til  chr(126) in
+	// ascii. The mean value, however, is not 63 as one would suppose,
+	// but  80, because most used  printable ascii chars exist  in the
+	// upper area  of the space. So,  we take 80 as  the middle ground
+	// and go beyond 5 up or down
+	MIDDLE_MEAN float64 = 80
+	LIMIT_MEAN  float64 = 5
 )

 /*
@@ -54,6 +63,7 @@ type Result struct {
 	Compress         int     // actual compression rate in percent
 	CharDistribution float64 // actual character distribution in percent
 	Entropy          float64 // actual entropy value in bits/chars
+	Mean             float64 // actual arithmetic mean, close to 127.5 is best
 }

 /*
@@ -70,7 +80,8 @@ func Validate(passphrase string, opts ...Options) (Result, error) {
 		MIN_DIST,
 		MIN_ENTROPY,
 		nil,
-		false,
+		false, // dict: default off
+		0,     // mean: default off
 	}

 	if len(opts) == 1 {
@@ -144,6 +155,16 @@ func Validate(passphrase string, opts ...Options) (Result, error) {
 		}
 	}

+	if options.Mean > 0 {
+		mean := GetArithmeticMean(passphrase)
+
+		if mean > (MIDDLE_MEAN+options.Mean) || mean < (MIDDLE_MEAN-options.Mean) {
+			result.Ok = false
+		}
+
+		result.Mean = mean
+	}
+
 	return result, nil
 }

@@ -316,3 +337,27 @@ func GetDictMatch(passphrase string, dict *Dictionary) (bool, error) {

 	return false, nil
 }
+
+/*
+* Return  the arithmetic  mean value:
+
+	This is simply the result of summing the all the bytes (bits if the
+
+-b  option  is specified)  in  the  file  and  dividing by  the  file
+length. If the  data are close to random, this  should be about 127.5
+(0.5 for -b option output). If  the mean departs from this value, the
+values are consistently high or low.
+
+	Working on US-ASCII space
+*/
+func GetArithmeticMean(passphrase string) float64 {
+	sum := 0.0
+	count := 0.0
+
+	for _, char := range []byte(passphrase) {
+		sum += float64(char)
+		count++
+	}
+
+	return sum / count
+}
--- a/lib_test.go
+++ b/lib_test.go
@@ -182,6 +182,10 @@ var opts_dict = valpass.Options{
 	UTF8:             false,
 }

+var opts_mean = valpass.Options{
+	Mean: 15, // very lax in order to succeed!
+}
+
 var goodtests = []Tests{
 	{
 		name: "checkgood",
@@ -195,6 +199,14 @@ var goodtests = []Tests{
 	},
 }

+var meantests = []Tests{
+	{
+		name: "checkgood-mean",
+		want: true,
+		opts: opts_mean,
+	},
+}
+
 var badtests = []Tests{
 	{
 		name: "checkbad",
@@ -231,6 +243,11 @@ func TestValidate(t *testing.T) {
 		}
 	}

+	for _, tt := range meantests {
+		for _, pass := range pass_random_good {
+			CheckPassword(t, pass, tt.name, tt.want, tt.opts)
+		}
+	}
 }

 func CheckPassword(t *testing.T, password string,