diff --git a/README.md b/README.md index 62dbd9d..adbc504 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ with go as a reusable module. ## Features - standalone module without external dependencies -- uses 3 different metrics to measure password quality +- uses 5 different metrics to measure password quality - you can configure which metric to use - you can also configure the quality thresholds - there's support for dictionary lookup, but you need to provide the dictionary yourself @@ -88,6 +88,21 @@ Of course we do not use RLE. We measure compression using the [Flate algorithm]( https://en.m.wikipedia.org/wiki/Deflate). +### Optional: arithmetic mean value + +This is simply the result of summing the all the printable ascii chars +divided by password length. The ideal value would be ~80, because most +normal letters hang out in the upper area between 32 (space) and +126(tilde). We consider a password ok, if its mean lies around this +area give or take 5. If the mean departs more from this value, the +characters are consistently high or low (e.g. more numbers and upper +case letters or only lower case letters). The latter, 5, can be +tweaked. The larger the number, tha laxer the result. + +Please be warned, that this metric will in most cases give you bad +results on otherwise good passwords, such as diceware passwords. Only +use it if you know what you're doing. + ### Optional: dictionary check You can supply a dictionary of words of your @@ -103,12 +118,8 @@ you can tune the quality thresholds as needed. ### Future/ ToDo - checksum test using supplied checksum list, e.g. of leaked passwords -- fuzzy testing against dictionary to catch variations -- chi square test (see http://www.fourmilab.ch/random/) -- Arithmetic mean value test -- Monte Carlo value test -- Serial correlation -- maybe some dieharder tests +- fuzzy testing against dictionary to catch variations, using + Levenshtein or something similar. ## Usage diff --git a/example/test.go b/example/test.go index 2e828ac..fb2ada7 100644 --- a/example/test.go +++ b/example/test.go @@ -16,7 +16,8 @@ Compression rate 0%% min %d%% %d%% Character distribution 100%% min %0.2f%% %0.2f%% Character entropy 8.0 bits/char min %0.2f %0.2f bits/char Character redundancy 0.0%% max %0.2f%% %0.2f%% -Dictionary match false false %t +Dictionary match false false %t +Arithmetic mean 80 true %0.2f ------------------------------------------------------------------ Validation response %t ` @@ -28,6 +29,7 @@ func main() { Entropy: valpass.MIN_ENTROPY, Dictionary: &valpass.Dictionary{Words: ReadDict("t/american-english")}, UTF8: false, + Mean: 20, //valpass.LIMIT_MEAN, } res, err := valpass.Validate(os.Args[1], opts) @@ -46,6 +48,7 @@ func main() { 100-opts.CharDistribution, 100-res.CharDistribution, res.DictionaryMatch, + res.Mean, res.Ok, ) diff --git a/lib.go b/lib.go index eb52869..f86020d 100644 --- a/lib.go +++ b/lib.go @@ -26,6 +26,7 @@ type Options struct { Entropy float64 // minimum entropy value in bits/char Dictionary *Dictionary // if set, lookup given dictionary, the caller provides it UTF8 bool // if true work on unicode utf-8 space, not just bytes + Mean float64 // if >0, calculate the arithmetic mean } /* @@ -43,6 +44,14 @@ const ( // we start our ascii arrays at char(32), so to have max 95 // elements in the slice, we subtract 32 from each ascii code MIN_ASCII byte = 32 + + // arithmetic mean limits: we work on chr(32) til chr(126) in + // ascii. The mean value, however, is not 63 as one would suppose, + // but 80, because most used printable ascii chars exist in the + // upper area of the space. So, we take 80 as the middle ground + // and go beyond 5 up or down + MIDDLE_MEAN float64 = 80 + LIMIT_MEAN float64 = 5 ) /* @@ -54,6 +63,7 @@ type Result struct { Compress int // actual compression rate in percent CharDistribution float64 // actual character distribution in percent Entropy float64 // actual entropy value in bits/chars + Mean float64 // actual arithmetic mean, close to 127.5 is best } /* @@ -70,7 +80,8 @@ func Validate(passphrase string, opts ...Options) (Result, error) { MIN_DIST, MIN_ENTROPY, nil, - false, + false, // dict: default off + 0, // mean: default off } if len(opts) == 1 { @@ -144,6 +155,16 @@ func Validate(passphrase string, opts ...Options) (Result, error) { } } + if options.Mean > 0 { + mean := GetArithmeticMean(passphrase) + + if mean > (MIDDLE_MEAN+options.Mean) || mean < (MIDDLE_MEAN-options.Mean) { + result.Ok = false + } + + result.Mean = mean + } + return result, nil } @@ -316,3 +337,27 @@ func GetDictMatch(passphrase string, dict *Dictionary) (bool, error) { return false, nil } + +/* +* Return the arithmetic mean value: + + This is simply the result of summing the all the bytes (bits if the + +-b option is specified) in the file and dividing by the file +length. If the data are close to random, this should be about 127.5 +(0.5 for -b option output). If the mean departs from this value, the +values are consistently high or low. + + Working on US-ASCII space +*/ +func GetArithmeticMean(passphrase string) float64 { + sum := 0.0 + count := 0.0 + + for _, char := range []byte(passphrase) { + sum += float64(char) + count++ + } + + return sum / count +} diff --git a/lib_test.go b/lib_test.go index 6b2ad7f..e7c2f1c 100644 --- a/lib_test.go +++ b/lib_test.go @@ -182,6 +182,10 @@ var opts_dict = valpass.Options{ UTF8: false, } +var opts_mean = valpass.Options{ + Mean: 15, // very lax in order to succeed! +} + var goodtests = []Tests{ { name: "checkgood", @@ -195,6 +199,14 @@ var goodtests = []Tests{ }, } +var meantests = []Tests{ + { + name: "checkgood-mean", + want: true, + opts: opts_mean, + }, +} + var badtests = []Tests{ { name: "checkbad", @@ -231,6 +243,11 @@ func TestValidate(t *testing.T) { } } + for _, tt := range meantests { + for _, pass := range pass_random_good { + CheckPassword(t, pass, tt.name, tt.want, tt.opts) + } + } } func CheckPassword(t *testing.T, password string,