added arithmetic mean

This commit is contained in:
2024-10-13 13:31:57 +02:00
parent a56f76acdc
commit aff1194a89
4 changed files with 85 additions and 9 deletions

View File

@@ -21,7 +21,7 @@ with go as a reusable module.
## Features ## Features
- standalone module without external dependencies - standalone module without external dependencies
- uses 3 different metrics to measure password quality - uses 5 different metrics to measure password quality
- you can configure which metric to use - you can configure which metric to use
- you can also configure the quality thresholds - you can also configure the quality thresholds
- there's support for dictionary lookup, but you need to provide the dictionary yourself - there's support for dictionary lookup, but you need to provide the dictionary yourself
@@ -88,6 +88,21 @@ Of course we do not use RLE. We measure compression
using the [Flate algorithm]( using the [Flate algorithm](
https://en.m.wikipedia.org/wiki/Deflate). https://en.m.wikipedia.org/wiki/Deflate).
### Optional: arithmetic mean value
This is simply the result of summing the all the printable ascii chars
divided by password length. The ideal value would be ~80, because most
normal letters hang out in the upper area between 32 (space) and
126(tilde). We consider a password ok, if its mean lies around this
area give or take 5. If the mean departs more from this value, the
characters are consistently high or low (e.g. more numbers and upper
case letters or only lower case letters). The latter, 5, can be
tweaked. The larger the number, tha laxer the result.
Please be warned, that this metric will in most cases give you bad
results on otherwise good passwords, such as diceware passwords. Only
use it if you know what you're doing.
### Optional: dictionary check ### Optional: dictionary check
You can supply a dictionary of words of your You can supply a dictionary of words of your
@@ -103,12 +118,8 @@ you can tune the quality thresholds as needed.
### Future/ ToDo ### Future/ ToDo
- checksum test using supplied checksum list, e.g. of leaked passwords - checksum test using supplied checksum list, e.g. of leaked passwords
- fuzzy testing against dictionary to catch variations - fuzzy testing against dictionary to catch variations, using
- chi square test (see http://www.fourmilab.ch/random/) Levenshtein or something similar.
- Arithmetic mean value test
- Monte Carlo value test
- Serial correlation
- maybe some dieharder tests
## Usage ## Usage

View File

@@ -16,7 +16,8 @@ Compression rate 0%% min %d%% %d%%
Character distribution 100%% min %0.2f%% %0.2f%% Character distribution 100%% min %0.2f%% %0.2f%%
Character entropy 8.0 bits/char min %0.2f %0.2f bits/char Character entropy 8.0 bits/char min %0.2f %0.2f bits/char
Character redundancy 0.0%% max %0.2f%% %0.2f%% Character redundancy 0.0%% max %0.2f%% %0.2f%%
Dictionary match false false %t Dictionary match false false %t
Arithmetic mean 80 true %0.2f
------------------------------------------------------------------ ------------------------------------------------------------------
Validation response %t Validation response %t
` `
@@ -28,6 +29,7 @@ func main() {
Entropy: valpass.MIN_ENTROPY, Entropy: valpass.MIN_ENTROPY,
Dictionary: &valpass.Dictionary{Words: ReadDict("t/american-english")}, Dictionary: &valpass.Dictionary{Words: ReadDict("t/american-english")},
UTF8: false, UTF8: false,
Mean: 20, //valpass.LIMIT_MEAN,
} }
res, err := valpass.Validate(os.Args[1], opts) res, err := valpass.Validate(os.Args[1], opts)
@@ -46,6 +48,7 @@ func main() {
100-opts.CharDistribution, 100-opts.CharDistribution,
100-res.CharDistribution, 100-res.CharDistribution,
res.DictionaryMatch, res.DictionaryMatch,
res.Mean,
res.Ok, res.Ok,
) )

47
lib.go
View File

@@ -26,6 +26,7 @@ type Options struct {
Entropy float64 // minimum entropy value in bits/char Entropy float64 // minimum entropy value in bits/char
Dictionary *Dictionary // if set, lookup given dictionary, the caller provides it Dictionary *Dictionary // if set, lookup given dictionary, the caller provides it
UTF8 bool // if true work on unicode utf-8 space, not just bytes UTF8 bool // if true work on unicode utf-8 space, not just bytes
Mean float64 // if >0, calculate the arithmetic mean
} }
/* /*
@@ -43,6 +44,14 @@ const (
// we start our ascii arrays at char(32), so to have max 95 // we start our ascii arrays at char(32), so to have max 95
// elements in the slice, we subtract 32 from each ascii code // elements in the slice, we subtract 32 from each ascii code
MIN_ASCII byte = 32 MIN_ASCII byte = 32
// arithmetic mean limits: we work on chr(32) til chr(126) in
// ascii. The mean value, however, is not 63 as one would suppose,
// but 80, because most used printable ascii chars exist in the
// upper area of the space. So, we take 80 as the middle ground
// and go beyond 5 up or down
MIDDLE_MEAN float64 = 80
LIMIT_MEAN float64 = 5
) )
/* /*
@@ -54,6 +63,7 @@ type Result struct {
Compress int // actual compression rate in percent Compress int // actual compression rate in percent
CharDistribution float64 // actual character distribution in percent CharDistribution float64 // actual character distribution in percent
Entropy float64 // actual entropy value in bits/chars Entropy float64 // actual entropy value in bits/chars
Mean float64 // actual arithmetic mean, close to 127.5 is best
} }
/* /*
@@ -70,7 +80,8 @@ func Validate(passphrase string, opts ...Options) (Result, error) {
MIN_DIST, MIN_DIST,
MIN_ENTROPY, MIN_ENTROPY,
nil, nil,
false, false, // dict: default off
0, // mean: default off
} }
if len(opts) == 1 { if len(opts) == 1 {
@@ -144,6 +155,16 @@ func Validate(passphrase string, opts ...Options) (Result, error) {
} }
} }
if options.Mean > 0 {
mean := GetArithmeticMean(passphrase)
if mean > (MIDDLE_MEAN+options.Mean) || mean < (MIDDLE_MEAN-options.Mean) {
result.Ok = false
}
result.Mean = mean
}
return result, nil return result, nil
} }
@@ -316,3 +337,27 @@ func GetDictMatch(passphrase string, dict *Dictionary) (bool, error) {
return false, nil return false, nil
} }
/*
* Return the arithmetic mean value:
This is simply the result of summing the all the bytes (bits if the
-b option is specified) in the file and dividing by the file
length. If the data are close to random, this should be about 127.5
(0.5 for -b option output). If the mean departs from this value, the
values are consistently high or low.
Working on US-ASCII space
*/
func GetArithmeticMean(passphrase string) float64 {
sum := 0.0
count := 0.0
for _, char := range []byte(passphrase) {
sum += float64(char)
count++
}
return sum / count
}

View File

@@ -182,6 +182,10 @@ var opts_dict = valpass.Options{
UTF8: false, UTF8: false,
} }
var opts_mean = valpass.Options{
Mean: 15, // very lax in order to succeed!
}
var goodtests = []Tests{ var goodtests = []Tests{
{ {
name: "checkgood", name: "checkgood",
@@ -195,6 +199,14 @@ var goodtests = []Tests{
}, },
} }
var meantests = []Tests{
{
name: "checkgood-mean",
want: true,
opts: opts_mean,
},
}
var badtests = []Tests{ var badtests = []Tests{
{ {
name: "checkbad", name: "checkbad",
@@ -231,6 +243,11 @@ func TestValidate(t *testing.T) {
} }
} }
for _, tt := range meantests {
for _, pass := range pass_random_good {
CheckPassword(t, pass, tt.name, tt.want, tt.opts)
}
}
} }
func CheckPassword(t *testing.T, password string, func CheckPassword(t *testing.T, password string,