no utf8 anymore, better unit tests

This commit is contained in:
2024-10-14 17:54:00 +02:00
parent aff1194a89
commit 1e0f22f0e7
4 changed files with 218 additions and 201 deletions

View File

@@ -25,7 +25,6 @@ with go as a reusable module.
- you can configure which metric to use
- you can also configure the quality thresholds
- there's support for dictionary lookup, but you need to provide the dictionary yourself
- different metrics for ASCII and UTF-8 character space
- it's reasonably fast
- the code is small enough to just copy it into your code
@@ -53,7 +52,7 @@ strength of the password. In non-technical words:
it checks how scrambled the password looks or how
many different bits it uses.
By default we only look for printable US-ASCII characters. But you can switch to UTF-8 as well.
We only look for printable US-ASCII characters.
### Character diffusion
@@ -146,11 +145,11 @@ parameters. To do this, just supply a second argument, which must be a
```go
type Options struct {
Compress int // minimum compression rate in percent
CharDistribution float64 // minimum char distribution in percent
Entropy float64 // minimum entropy value in bits/char
Dictionary *Dictionary // if set, lookup given dictionary, the caller provides it
UTF8 bool // if true work on unicode utf-8 space, not just bytes
Compress int // minimum compression rate in percent, default 10%
CharDistribution float64 // minimum character distribution in percent, default 10%
Entropy float64 // minimum entropy value in bits/char, default 3 bits/s
Dictionary *Dictionary // lookup given dictionary, the caller has to provide it
MeanDeviation float64 // minimum arithmetic mean deviation, by default disabled, standard 5
}
```

View File

@@ -28,8 +28,7 @@ func main() {
CharDistribution: valpass.MIN_DIST,
Entropy: valpass.MIN_ENTROPY,
Dictionary: &valpass.Dictionary{Words: ReadDict("t/american-english")},
UTF8: false,
Mean: 20, //valpass.LIMIT_MEAN,
MeanDeviation: 20, //valpass.LIMIT_MEAN,
}
res, err := valpass.Validate(os.Args[1], opts)

180
lib.go
View File

@@ -1,3 +1,4 @@
// Package valpass can be used to validate password quality using different metrics.
package valpass
import (
@@ -8,55 +9,47 @@ import (
"strings"
)
/*
* Contains the raw dictionary data and some flags. Must be provided
* by the user
*/
// Dictionary is a container struct to store and submit a dictionary of words.
type Dictionary struct {
Words []string // the actual dictionary
Submatch bool // if true 'foo' would match 'foobar'
Words []string // Contains the actual dictionary.
Submatch bool // Set to true to enable submatches, e.g. 'foo' would match 'foobar', default is false.
Fuzzy bool // Set to true to enable more lax dictionary checks, default is false.
}
/*
* Options define how to operate the validation
*/
// Options struct can be used to configure the validator, turn on/off
// certain validator functions and tune the thresholds when to flag a
// password as valid.
//
// Set option to zero or false to disable the feature.
type Options struct {
Compress int // minimum compression rate in percent
CharDistribution float64 // minimum char distribution in percent
Entropy float64 // minimum entropy value in bits/char
Dictionary *Dictionary // if set, lookup given dictionary, the caller provides it
UTF8 bool // if true work on unicode utf-8 space, not just bytes
Mean float64 // if >0, calculate the arithmetic mean
Compress int // minimum compression rate in percent, default 10%
CharDistribution float64 // minimum character distribution in percent, default 10%
Entropy float64 // minimum entropy value in bits/char, default 3 bits/s
Dictionary *Dictionary // lookup given dictionary, the caller has to provide it
MeanDeviation float64 // minimum arithmetic mean deviation, by default disabled, standard 5
}
/*
* Default validation config, a compromise of comfort and security, as always.
*/
const (
MIN_ENTROPY float64 = 3.0
MIN_COMPRESS int = 10
MIN_DICT bool = false
MIN_DIST float64 = 10.0
MAX_UTF8 int = 2164864 // max characters encodable with utf8
MAX_CHARS int = 95 // maximum printable US ASCII chars
MIN_DICT_LEN int = 5000
MIN_COMPRESS int = 10
MIN_DIST float64 = 10.0
MIN_ENTROPY float64 = 3.0
MIN_DICT_LEN int = 5000
MAX_CHARS int = 95 // maximum printable US ASCII chars
LIMIT_MEAN_DEVIATION float64 = 20
// we start our ascii arrays at char(32), so to have max 95
// elements in the slice, we subtract 32 from each ascii code
MIN_ASCII byte = 32
ascii_base byte = 32
// arithmetic mean limits: we work on chr(32) til chr(126) in
// ascii. The mean value, however, is not 63 as one would suppose,
// but 80, because most used printable ascii chars exist in the
// upper area of the space. So, we take 80 as the middle ground
// and go beyond 5 up or down
MIDDLE_MEAN float64 = 80
LIMIT_MEAN float64 = 5
mean_base float64 = 80
)
/*
Stores the results of all validations.
*/
// Result stores the results of all validations.
type Result struct {
Ok bool // overall result
DictionaryMatch bool // true if the password matched a dictionary entry
@@ -66,22 +59,21 @@ type Result struct {
Mean float64 // actual arithmetic mean, close to 127.5 is best
}
/*
* Generic validation function. You should only call this function and
* tune it using the Options struct. However, options are optional,
* there are sensible defaults builtin
*/
// Validate validates a given password. You can tune its behavior
// using the Options struct. However, options are optional, there are
// sensible defaults builtins.
//
// The returned Result struct returns the password quality.
func Validate(passphrase string, opts ...Options) (Result, error) {
result := Result{Ok: true}
// defaults, see above
options := Options{
MIN_COMPRESS,
MIN_DIST,
MIN_ENTROPY,
nil,
false, // dict: default off
0, // mean: default off
Compress: MIN_COMPRESS,
CharDistribution: MIN_DIST,
Entropy: MIN_ENTROPY,
Dictionary: nil,
MeanDeviation: 0,
}
if len(opts) == 1 {
@@ -94,17 +86,9 @@ func Validate(passphrase string, opts ...Options) (Result, error) {
var entropy float64
var err error
switch options.UTF8 {
case true:
entropy, err = GetEntropyUTF8(passphrase)
if err != nil {
return result, err
}
default:
entropy, err = GetEntropyAscii(passphrase)
if err != nil {
return result, err
}
entropy, err = getEntropy(passphrase)
if err != nil {
return result, err
}
if entropy <= options.Entropy {
@@ -115,7 +99,7 @@ func Validate(passphrase string, opts ...Options) (Result, error) {
}
if options.Compress > 0 {
compression, err := GetCompression([]byte(passphrase))
compression, err := getCompression([]byte(passphrase))
if err != nil {
return result, err
}
@@ -128,14 +112,8 @@ func Validate(passphrase string, opts ...Options) (Result, error) {
}
if options.CharDistribution > 0 {
var dist float64
var dist = getDistribution(passphrase)
switch options.UTF8 {
case true:
dist = GetDistributionUTF8(passphrase)
default:
dist = GetDistributionAscii(passphrase)
}
if dist <= options.CharDistribution {
result.Ok = false
}
@@ -144,7 +122,7 @@ func Validate(passphrase string, opts ...Options) (Result, error) {
}
if options.Dictionary != nil {
match, err := GetDictMatch(passphrase, options.Dictionary)
match, err := getDictMatch(passphrase, options.Dictionary)
if err != nil {
return result, err
}
@@ -155,10 +133,10 @@ func Validate(passphrase string, opts ...Options) (Result, error) {
}
}
if options.Mean > 0 {
mean := GetArithmeticMean(passphrase)
if options.MeanDeviation > 0 {
mean := getArithmeticMean(passphrase)
if mean > (MIDDLE_MEAN+options.Mean) || mean < (MIDDLE_MEAN-options.Mean) {
if mean > (mean_base+options.MeanDeviation) || mean < (mean_base-options.MeanDeviation) {
result.Ok = false
}
@@ -174,7 +152,7 @@ func Validate(passphrase string, opts ...Options) (Result, error) {
* contains repeating characters; OR it is larger than the password,
* in which case it could NOT be compressed, which is what we want.
*/
func GetCompression(passphrase []byte) (int, error) {
func getCompression(passphrase []byte) (int, error) {
var b bytes.Buffer
flater, _ := flate.NewWriter(&b, 9)
@@ -203,44 +181,11 @@ func GetCompression(passphrase []byte) (int, error) {
return int(percent), nil
}
/*
* Return the entropy as bits/rune, where rune is a unicode char in
* utf8 space.
*/
func GetEntropyUTF8(passphrase string) (float64, error) {
var entropy float64
length := len(passphrase)
wherechar := make([]int, MAX_UTF8)
hist := make([]int, length)
var histlen int
for i := 0; i < MAX_UTF8; i++ {
wherechar[i] = -1
}
for _, char := range passphrase {
if wherechar[char] == -1 {
wherechar[char] = histlen
histlen++
}
hist[wherechar[char]]++
}
for i := 0; i < histlen; i++ {
diff := float64(hist[i]) / float64(length)
entropy -= diff * math.Log2(diff)
}
return entropy, nil
}
/*
Return the entropy as bits/char, where char is a printable char in
US-ASCII space. Returns error if a char is non-printable.
*/
func GetEntropyAscii(passphrase string) (float64, error) {
func getEntropy(passphrase string) (float64, error) {
var entropy float64
length := len(passphrase)
@@ -253,15 +198,15 @@ func GetEntropyAscii(passphrase string) (float64, error) {
}
for _, char := range []byte(passphrase) {
if char < MIN_ASCII || char > 126 {
if char < ascii_base || char > 126 {
return 0, fmt.Errorf("non-printable ASCII character encountered: %c", char)
}
if wherechar[char-MIN_ASCII] == -1 {
wherechar[char-MIN_ASCII] = histlen
if wherechar[char-ascii_base] == -1 {
wherechar[char-ascii_base] = histlen
histlen++
}
hist[wherechar[char-MIN_ASCII]]++
hist[wherechar[char-ascii_base]]++
}
for i := 0; i < histlen; i++ {
@@ -272,34 +217,15 @@ func GetEntropyAscii(passphrase string) (float64, error) {
return entropy, nil
}
/*
* Return character distribution in utf8 space
*/
func GetDistributionUTF8(passphrase string) float64 {
hash := make([]int, MAX_UTF8)
var chars float64
for _, char := range passphrase {
hash[char]++
}
for i := 0; i < MAX_UTF8; i++ {
if hash[i] > 0 {
chars++
}
}
return chars / (float64(MAX_UTF8) / 100)
}
/*
* Return character distribution in US-ASCII space
*/
func GetDistributionAscii(passphrase string) float64 {
func getDistribution(passphrase string) float64 {
hash := make([]int, MAX_CHARS)
var chars float64
for _, char := range []byte(passphrase) {
hash[char-MIN_ASCII]++
hash[char-ascii_base]++
}
for i := 0; i < MAX_CHARS; i++ {
@@ -314,7 +240,7 @@ func GetDistributionAscii(passphrase string) float64 {
* Return true if password can be found in given dictionary. This has
* to be supplied by the user, we do NOT ship with a dictionary!
*/
func GetDictMatch(passphrase string, dict *Dictionary) (bool, error) {
func getDictMatch(passphrase string, dict *Dictionary) (bool, error) {
if len(dict.Words) < MIN_DICT_LEN {
return false, fmt.Errorf("provided dictionary is too small")
}
@@ -350,7 +276,7 @@ values are consistently high or low.
Working on US-ASCII space
*/
func GetArithmeticMean(passphrase string) float64 {
func getArithmeticMean(passphrase string) float64 {
sum := 0.0
count := 0.0

View File

@@ -11,10 +11,14 @@ import (
"github.com/tlinden/valpass"
)
type Tests struct {
name string
want bool
opts valpass.Options
type Passwordlist [][]string
type Test struct {
name string
want bool
wanterr bool
opts valpass.Options
passwords Passwordlist
}
var pass_random_good = []string{
@@ -145,6 +149,7 @@ var pass_worst_bad = []string{
`monkey`, `Daniel`, `andrea`, `chelsea`, `william`,
`654321`, `Hannah`, `1qaz2wsx`, `ranger`, `soccer`,
`!@#$%^&*`, `Thomas`, `starwars`, `trustno1`, `london`,
`aaaaaaaaaaaaaaaaaaaaa`, // compression fail test
}
var pass_dict_bad = []string{
@@ -166,106 +171,194 @@ var pass_dict_bad = []string{
`effected`, `ministry`,
}
var pass_mean_bad = []string{
`UT6RTLTNAK3JN2UVWJGXSLHKT4P3ECXJ`,
`L4HENABMJR0UZBFSFV0GPSXWZ4HEMOHO`,
`YTYPHSGR8XHP4C85T3YZFF4TG2OLMQVF`,
`TWAGHNVLMYR5RW67RNKUO8K3SPYAJID2`,
`MU0OCIE9ZUYBFLMSKWKCLTSWKZ6GBTLM`,
`GHBSLIVXCJCVUNTJBSPHXZUSE906QGZH`,
`PZWQMRNG8LDRTY9GVELRALXCO181O8AK`,
`KZYKWCUZWDG4OSREEKCKOA58JQMRUUBZ`,
`CKZWG3H6A2TJKJDPEFX2CESMPYTA7WBF`,
`RT8HGYUBUNUJMF0SLWKW8JISCRSG6L6M`,
`368WCV4PGAWE1MWZJWZU8JPEQILMEBHV`,
`W6HVUTBNAGJN4ABMWEKK5OHTIXUYTPDG`,
`GZXQAEWMNSKJDYVQRPYIQXJTPIDHMF9T`,
`AWTJNUFOTML7GC2OC04K74F30AO9A2VJ`,
`MTHJUGOHCTYNWICVVNEMETRYA2L2QHBE`,
`XHTUQVYNSBPTH8TWCRMMV6BILHV6KYOP`,
`MTNAROLNNZZBARVNKGGVLL8VR682GQUP`,
`3VDYD0CJGFQ1UQKTRQOUQ5FZ4PROITVQ`,
`JWOFUTKGTVG035HUFTTWHGLECAX5IYMX`,
`DVVMB6XXZPALLFMEFJRMSZUZIRU7CLNF`,
`QCNKZ82LGDHT97LGJKLEVUSU1MSX7FNH`,
`HWNZDPHHFIDO88FB4KMJSTBI35FEJUCN`,
`1MJ7DRGDQ9BETU5JJ3NPUEWVSLZB9WGP`,
`TCVC1RLXKIKGIVYGGWOEQXDRSHQJCJUA`,
`BYMT86DO8VNU0UF0FFOC3EPLMLANAYY5`,
`OPEBVIMRKAAGURO3BQAGFSZQ0MV9OBAJ`,
`BKZUICCERVRZCFPSMFZPY1UHPFEDJLUH`,
`ECWSDOGFI1PXHI2ZAP06O1CT8USL7HLM`,
`ZRNFW4CWXP5HHYBETZQFTNOL6AJ8ZMXZ`,
`UDV3CHYM4YJUFMIS9QCHWEO1DIZ7PH59`,
`KS7FYTZ12TAZ8J3MTZAPT7TGXMYNABGX`,
`BFNAM5SRZQGO9ENP1E14GGJR8HDZZUHS`,
`34IIW3TPK2IUDTYVSEGNHNR0RLI1TL7B`,
`7TMGYVOA4NRHSY6TF6MRHHFJ07GOW2YR`,
`SDS0RTQUPVAGDMNYXYCVJEV2MDT4IH5S`,
`IQMMSGHI5JNG5VIV5K6N11WCGGGCSBWP`,
`11LMWSI2YPRMOJ9MBIA4IPKFPOJPS71U`,
`CPMXAMBOTBQ6AHXJ1FRHWBWZUX8TENST`,
`LEHQVCBRSSHY482UU1MZJZGFHWKWE716`,
`KMCGTBIYSJXDURAX5F1QQQB3Y1UU2EF6`,
`VPPZ8UFNTXAANQWDIDIAQJACVZPQIQ94`,
`CQ3GOBWGX91FT1SVVLOLCDX54HWUYLKO`,
`DKRJ7CX5JCKHEKI2JKMVPCHRCT3IKKUK`,
`XILAMTWXXGAHHMEUPNXBP5HQEGKCFH8X`,
`OGJ7A3RNOCSGPPUXSPOING6AYUNZ8OSR`,
`LB1XL9YWUXX6Q7GJBDI0BISHG7V1PAXY`,
`YRUJYIOYDNYBUBQK0YY02WA45YNGTKMS`,
`UTPTMOILT9WI3O2ZPPASMHQYCJPO2HTT`,
`J6NXVXG5FN9CTWYEYQBLFVZSSALFDJEF`,
`CQC84VGBZMJ65I8XLRF2PBMK5X86BVMC`,
}
var pass_dictsub_bad = []string{
`regational`, `iminalizat`, `rconductiv`, `substantia`,
`oritativen`, `trocardiog`, `communicat`, `aracterist`,
`rofluoroca`, `trocardiog`, `scendental`, `terintelli`,
`ercializat`, `nsideraten`, `scendental`, `troencepha`,
`rehensibil`, `nspicuousn`, `aconservat`, `troencepha`,
`rehensiven`, `strializat`, `ianampoini`, `ianampoini`,
`eptualizat`, `rdenominat`, `rofluoroca`, `terrevolut`,
`cientiousn`, `rrelations`, `terrevolut`, `terrevolut`,
`titutional`, `nterpretat`, `nfranchise`, `troencepha`,
`radistinct`, `epresentat`, `trocardiog`, `troencepha`,
`ersational`, `epresentat`, `troencepha`, `troencepha`,
`terintelli`, `simplifica`, `simplifica`,
`terrevolut`, `icularizat`, `communicat`,
}
var pass_invalid = []string{
string([]byte{12, 16, 45, 65, 96, 145}),
}
var opts_std = valpass.Options{
Compress: valpass.MIN_COMPRESS,
CharDistribution: valpass.MIN_DIST,
Entropy: valpass.MIN_ENTROPY,
Dictionary: nil,
UTF8: false,
}
var opts_dict = valpass.Options{
Compress: 0,
CharDistribution: 0,
Entropy: 0,
Dictionary: &valpass.Dictionary{Words: ReadDict("t/american-english")},
}
var opts_dictsub = valpass.Options{
Compress: valpass.MIN_COMPRESS,
CharDistribution: valpass.MIN_DIST,
Entropy: valpass.MIN_ENTROPY,
Dictionary: &valpass.Dictionary{Words: ReadDict("t/american-english")},
UTF8: false,
Dictionary: &valpass.Dictionary{Words: ReadDict("t/american-english"), Submatch: true},
}
var opts_invaliddict = valpass.Options{
Compress: 0,
CharDistribution: 0,
Entropy: 0,
Dictionary: &valpass.Dictionary{Words: []string{"eins", "zwei", "drei"}},
}
var opts_mean = valpass.Options{
Mean: 15, // very lax in order to succeed!
MeanDeviation: 15, // very lax in order to succeed!
}
var goodtests = []Tests{
var tests = []Test{
{
name: "checkgood",
want: true,
opts: opts_std,
name: "checkgood",
want: true,
opts: opts_std,
passwords: Passwordlist{pass_random_good, pass_diceware_good},
},
{
name: "checkgood-dict",
want: true,
opts: opts_dict,
},
}
var meantests = []Tests{
{
name: "checkgood-mean",
want: true,
opts: opts_mean,
},
}
var badtests = []Tests{
{
name: "checkbad",
want: false,
opts: opts_std,
name: "checkgood-dict",
want: true,
opts: opts_dict,
passwords: Passwordlist{pass_random_good, pass_diceware_good},
},
{
name: "checkbad-dict",
want: false,
opts: opts_dict,
name: "checkbad-dictsub",
want: false,
opts: opts_dictsub,
passwords: Passwordlist{pass_dictsub_bad},
},
{
name: "checkgood-mean",
want: true,
opts: opts_mean,
passwords: Passwordlist{pass_random_good},
},
{
name: "checkbad",
want: false,
opts: opts_std,
passwords: Passwordlist{pass_worst_bad, pass_dict_bad},
},
{
name: "checkbad-dict",
want: false,
opts: opts_dict,
passwords: Passwordlist{pass_dict_bad},
},
{
name: "checkinvalid",
want: false,
wanterr: true,
opts: opts_std,
passwords: Passwordlist{pass_invalid},
},
{
name: "checkinvalid-dict",
want: false,
wanterr: true,
opts: opts_invaliddict,
passwords: Passwordlist{pass_invalid},
},
}
func TestValidate(t *testing.T) {
t.Parallel()
for _, tt := range goodtests {
for _, pass := range pass_random_good {
CheckPassword(t, pass, tt.name, tt.want, tt.opts)
}
for _, pass := range pass_diceware_good {
CheckPassword(t, pass, tt.name, tt.want, tt.opts)
}
}
for _, tt := range badtests {
for _, pass := range pass_worst_bad {
CheckPassword(t, pass, tt.name, tt.want, tt.opts)
}
for _, pass := range pass_dict_bad {
CheckPassword(t, pass, tt.name, tt.want, tt.opts)
}
}
for _, tt := range meantests {
for _, pass := range pass_random_good {
CheckPassword(t, pass, tt.name, tt.want, tt.opts)
for _, tt := range tests {
for _, passlist := range tt.passwords {
for _, pass := range passlist {
CheckPassword(t, pass, tt)
}
}
}
}
func CheckPassword(t *testing.T, password string,
name string, want bool, opts valpass.Options) {
func CheckPassword(t *testing.T, password string, tt Test) {
result, err := valpass.Validate(password, opts)
result, err := valpass.Validate(password, tt.opts)
if err != nil {
t.Errorf("test %s failed with error: %s\n", name, err)
if tt.wanterr {
return
}
t.Errorf("test %s failed with error: %s. wanterr: %t\n", tt.name, err, tt.wanterr)
}
if want && !result.Ok {
if tt.want && !result.Ok {
t.Errorf("test %s failed. pass: %s, want: %t, got: %t, dict: %t\nresult: %v\n",
name, password, want, result.Ok, result.DictionaryMatch, result)
tt.name, password, tt.want, result.Ok, result.DictionaryMatch, result)
}
if !want && result.Ok {
if !tt.want && result.Ok {
t.Errorf("test %s failed. pass: %s, want: %t, got: %t, dict: %t\nresult: %v\n",
name, password, want, result.Ok, result.DictionaryMatch, result)
tt.name, password, tt.want, result.Ok, result.DictionaryMatch, result)
}
}