added arithmetic mean

2026-02-04 11:10:57 +01:00 · 2024-10-13 13:31:57 +02:00
parent a56f76acdc
commit aff1194a89
4 changed files with 85 additions and 9 deletions
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ with go as a reusable module.
 ## Features
 - standalone module without external dependencies
- uses 3 different metrics to measure password quality
+- uses 5 different metrics to measure password quality
 - you can configure which metric to use
 - you can also configure the quality thresholds
 - there's support for dictionary lookup, but you need to provide the dictionary yourself 
@@ -88,6 +88,21 @@ Of course we do not use RLE. We measure compression
 using the [Flate algorithm](
 https://en.m.wikipedia.org/wiki/Deflate).
 ### Optional: arithmetic mean value
 This is simply the result of summing the all the printable ascii chars
 divided by password length. The ideal value would be ~80, because most
 normal  letters hang  out in  the upper  area between  32 (space)  and
 126(tilde). We  consider a password ok,  if its mean lies  around this
 area give or  take 5.  If the  mean departs more from  this value, the
 characters are consistently  high or low (e.g. more  numbers and upper
 case  letters or  only  lower case  letters). The  latter,  5, can  be
 tweaked. The larger the number, tha laxer the result.
 Please be  warned, that this  metric will in  most cases give  you bad
 results on otherwise good passwords,  such as diceware passwords. Only
 use it if you know what you're doing.
 ### Optional: dictionary check
 You can supply a dictionary of words of your
@@ -103,12 +118,8 @@ you can tune the quality thresholds as needed.
 ### Future/ ToDo
 - checksum test using supplied checksum list, e.g. of leaked passwords
- fuzzy testing against dictionary to catch variations
+-  fuzzy  testing  against   dictionary  to  catch  variations,  using
- chi square test (see  http://www.fourmilab.ch/random/)
+  Levenshtein or something similar.
 - Arithmetic mean value test
 - Monte Carlo value test
 - Serial correlation
 - maybe some dieharder tests
 ## Usage
--- a/example/test.go
+++ b/example/test.go
@@ -16,7 +16,8 @@ Compression rate         0%%             min %d%%     %d%%
 Character distribution   100%%           min %0.2f%%  %0.2f%%
 Character entropy        8.0 bits/char  min %0.2f    %0.2f bits/char
 Character redundancy     0.0%%           max %0.2f%%  %0.2f%%
-Dictionary match         false          false       %t 
+Dictionary match         false          false       %t
 Arithmetic mean             80          true        %0.2f
 ------------------------------------------------------------------
 Validation response                                 %t
 `
@@ -28,6 +29,7 @@ func main() {
 		Entropy:          valpass.MIN_ENTROPY,
 		Dictionary:       &valpass.Dictionary{Words: ReadDict("t/american-english")},
 		UTF8:             false,
 		Mean:             20, //valpass.LIMIT_MEAN,
 	}
 	res, err := valpass.Validate(os.Args[1], opts)
@@ -46,6 +48,7 @@ func main() {
 		100-opts.CharDistribution,
 		100-res.CharDistribution,
 		res.DictionaryMatch,
 		res.Mean,
 		res.Ok,
 	)
--- a/lib.go
+++ b/lib.go
@@ -26,6 +26,7 @@ type Options struct {
 	Entropy          float64     // minimum entropy value in bits/char
 	Dictionary       *Dictionary // if set, lookup given dictionary, the caller provides it
 	UTF8             bool        // if true work on unicode utf-8 space, not just bytes
 	Mean             float64     // if >0, calculate the arithmetic mean
 }
 /*
@@ -43,6 +44,14 @@ const (
 	//  we start  our ascii  arrays  at char(32),  so to  have max  95
 	// elements in the slice, we subtract 32 from each ascii code
 	MIN_ASCII byte = 32
 	//  arithmetic  mean limits:  we work on  chr(32) til  chr(126) in
 	// ascii. The mean value, however, is not 63 as one would suppose,
 	// but  80, because most used  printable ascii chars exist  in the
 	// upper area  of the space. So,  we take 80 as  the middle ground
 	// and go beyond 5 up or down
 	MIDDLE_MEAN float64 = 80
 	LIMIT_MEAN  float64 = 5
 )
 /*
@@ -54,6 +63,7 @@ type Result struct {
 	Compress         int     // actual compression rate in percent
 	CharDistribution float64 // actual character distribution in percent
 	Entropy          float64 // actual entropy value in bits/chars
 	Mean             float64 // actual arithmetic mean, close to 127.5 is best
 }
 /*
@@ -70,7 +80,8 @@ func Validate(passphrase string, opts ...Options) (Result, error) {
 		MIN_DIST,
 		MIN_ENTROPY,
 		nil,
-		false,
+		false, // dict: default off
 		0,     // mean: default off
 	}
 	if len(opts) == 1 {
@@ -144,6 +155,16 @@ func Validate(passphrase string, opts ...Options) (Result, error) {
 		}
 	}
 	if options.Mean > 0 {
 		mean := GetArithmeticMean(passphrase)
 		if mean > (MIDDLE_MEAN+options.Mean) || mean < (MIDDLE_MEAN-options.Mean) {
 			result.Ok = false
 		}
 		result.Mean = mean
 	}
 	return result, nil
 }
@@ -316,3 +337,27 @@ func GetDictMatch(passphrase string, dict *Dictionary) (bool, error) {
 	return false, nil
 }
 /*
 * Return  the arithmetic  mean value:
 	This is simply the result of summing the all the bytes (bits if the
 -b  option  is specified)  in  the  file  and  dividing by  the  file
 length. If the  data are close to random, this  should be about 127.5
 (0.5 for -b option output). If  the mean departs from this value, the
 values are consistently high or low.
 	Working on US-ASCII space
 */
 func GetArithmeticMean(passphrase string) float64 {
 	sum := 0.0
 	count := 0.0
 	for _, char := range []byte(passphrase) {
 		sum += float64(char)
 		count++
 	}
 	return sum / count
 }
--- a/lib_test.go
+++ b/lib_test.go
@@ -182,6 +182,10 @@ var opts_dict = valpass.Options{
 	UTF8:             false,
 }
 var opts_mean = valpass.Options{
 	Mean: 15, // very lax in order to succeed!
 }
 var goodtests = []Tests{
 	{
 		name: "checkgood",
@@ -195,6 +199,14 @@ var goodtests = []Tests{
 	},
 }
 var meantests = []Tests{
 	{
 		name: "checkgood-mean",
 		want: true,
 		opts: opts_mean,
 	},
 }
 var badtests = []Tests{
 	{
 		name: "checkbad",
@@ -231,6 +243,11 @@ func TestValidate(t *testing.T) {
 		}
 	}
 	for _, tt := range meantests {
 		for _, pass := range pass_random_good {
 			CheckPassword(t, pass, tt.name, tt.want, tt.opts)
 		}
 	}
 }
 func CheckPassword(t *testing.T, password string,