package valpass import ( "bytes" "compress/flate" "fmt" "math" "strings" ) /* * Contains the raw dictionary data and some flags. Must be provided * by the user */ type Dictionary struct { Words []string // the actual dictionary Submatch bool // if true 'foo' would match 'foobar' } /* * Options define how to operate the validation */ type Options struct { Compress int // minimum compression rate in percent CharDistribution float64 // minimum char distribution in percent Entropy float64 // minimum entropy value in bits/char Dictionary *Dictionary // if set, lookup given dictionary, the caller provides it UTF8 bool // if true work on unicode utf-8 space, not just bytes Mean float64 // if >0, calculate the arithmetic mean } /* * Default validation config, a compromise of comfort and security, as always. */ const ( MIN_ENTROPY float64 = 3.0 MIN_COMPRESS int = 10 MIN_DICT bool = false MIN_DIST float64 = 10.0 MAX_UTF8 int = 2164864 // max characters encodable with utf8 MAX_CHARS int = 95 // maximum printable US ASCII chars MIN_DICT_LEN int = 5000 // we start our ascii arrays at char(32), so to have max 95 // elements in the slice, we subtract 32 from each ascii code MIN_ASCII byte = 32 // arithmetic mean limits: we work on chr(32) til chr(126) in // ascii. The mean value, however, is not 63 as one would suppose, // but 80, because most used printable ascii chars exist in the // upper area of the space. So, we take 80 as the middle ground // and go beyond 5 up or down MIDDLE_MEAN float64 = 80 LIMIT_MEAN float64 = 5 ) /* Stores the results of all validations. */ type Result struct { Ok bool // overall result DictionaryMatch bool // true if the password matched a dictionary entry Compress int // actual compression rate in percent CharDistribution float64 // actual character distribution in percent Entropy float64 // actual entropy value in bits/chars Mean float64 // actual arithmetic mean, close to 127.5 is best } /* * Generic validation function. You should only call this function and * tune it using the Options struct. However, options are optional, * there are sensible defaults builtin */ func Validate(passphrase string, opts ...Options) (Result, error) { result := Result{Ok: true} // defaults, see above options := Options{ MIN_COMPRESS, MIN_DIST, MIN_ENTROPY, nil, false, // dict: default off 0, // mean: default off } if len(opts) == 1 { options = opts[0] } // execute the actual validation checks if options.Entropy > 0 { var entropy float64 var err error switch options.UTF8 { case true: entropy, err = GetEntropyUTF8(passphrase) if err != nil { return result, err } default: entropy, err = GetEntropyAscii(passphrase) if err != nil { return result, err } } if entropy <= options.Entropy { result.Ok = false } result.Entropy = entropy } if options.Compress > 0 { compression, err := GetCompression([]byte(passphrase)) if err != nil { return result, err } if compression >= options.Compress { result.Ok = false } result.Compress = compression } if options.CharDistribution > 0 { var dist float64 switch options.UTF8 { case true: dist = GetDistributionUTF8(passphrase) default: dist = GetDistributionAscii(passphrase) } if dist <= options.CharDistribution { result.Ok = false } result.CharDistribution = dist } if options.Dictionary != nil { match, err := GetDictMatch(passphrase, options.Dictionary) if err != nil { return result, err } if match { result.Ok = false result.DictionaryMatch = true } } if options.Mean > 0 { mean := GetArithmeticMean(passphrase) if mean > (MIDDLE_MEAN+options.Mean) || mean < (MIDDLE_MEAN-options.Mean) { result.Ok = false } result.Mean = mean } return result, nil } /* * we compress with Flate level 9 (max) and see if the result is * smaller than the password, in which case it could be compressed and * contains repeating characters; OR it is larger than the password, * in which case it could NOT be compressed, which is what we want. */ func GetCompression(passphrase []byte) (int, error) { var b bytes.Buffer flater, _ := flate.NewWriter(&b, 9) if _, err := flater.Write(passphrase); err != nil { return 0, fmt.Errorf("failed to write to flate writer: %w", err) } if err := flater.Flush(); err != nil { return 0, fmt.Errorf("failed to flush flate writer: %w", err) } if err := flater.Close(); err != nil { return 0, fmt.Errorf("failed to close flate writer: %w", err) } // use floats to avoid division by zero panic length := float32(len(passphrase)) compressed := float32(len(b.Bytes())) if compressed >= length { return 0, nil } percent := 100 - (compressed / (length / 100)) return int(percent), nil } /* * Return the entropy as bits/rune, where rune is a unicode char in * utf8 space. */ func GetEntropyUTF8(passphrase string) (float64, error) { var entropy float64 length := len(passphrase) wherechar := make([]int, MAX_UTF8) hist := make([]int, length) var histlen int for i := 0; i < MAX_UTF8; i++ { wherechar[i] = -1 } for _, char := range passphrase { if wherechar[char] == -1 { wherechar[char] = histlen histlen++ } hist[wherechar[char]]++ } for i := 0; i < histlen; i++ { diff := float64(hist[i]) / float64(length) entropy -= diff * math.Log2(diff) } return entropy, nil } /* Return the entropy as bits/char, where char is a printable char in US-ASCII space. Returns error if a char is non-printable. */ func GetEntropyAscii(passphrase string) (float64, error) { var entropy float64 length := len(passphrase) wherechar := make([]int, MAX_CHARS) hist := make([]int, length) var histlen int for i := 0; i < MAX_CHARS; i++ { wherechar[i] = -1 } for _, char := range []byte(passphrase) { if char < MIN_ASCII || char > 126 { return 0, fmt.Errorf("non-printable ASCII character encountered: %c", char) } if wherechar[char-MIN_ASCII] == -1 { wherechar[char-MIN_ASCII] = histlen histlen++ } hist[wherechar[char-MIN_ASCII]]++ } for i := 0; i < histlen; i++ { diff := float64(hist[i]) / float64(length) entropy -= diff * math.Log2(diff) } return entropy, nil } /* * Return character distribution in utf8 space */ func GetDistributionUTF8(passphrase string) float64 { hash := make([]int, MAX_UTF8) var chars float64 for _, char := range passphrase { hash[char]++ } for i := 0; i < MAX_UTF8; i++ { if hash[i] > 0 { chars++ } } return chars / (float64(MAX_UTF8) / 100) } /* * Return character distribution in US-ASCII space */ func GetDistributionAscii(passphrase string) float64 { hash := make([]int, MAX_CHARS) var chars float64 for _, char := range []byte(passphrase) { hash[char-MIN_ASCII]++ } for i := 0; i < MAX_CHARS; i++ { if hash[i] > 0 { chars++ } } return chars / (float64(MAX_CHARS) / 100) } /* * Return true if password can be found in given dictionary. This has * to be supplied by the user, we do NOT ship with a dictionary! */ func GetDictMatch(passphrase string, dict *Dictionary) (bool, error) { if len(dict.Words) < MIN_DICT_LEN { return false, fmt.Errorf("provided dictionary is too small") } lcpass := strings.ToLower(passphrase) if dict.Submatch { for _, word := range dict.Words { if strings.Contains(strings.ToLower(word), lcpass) { return true, nil } } } else { for _, word := range dict.Words { if lcpass == strings.ToLower(word) { return true, nil } } } return false, nil } /* * Return the arithmetic mean value: This is simply the result of summing the all the bytes (bits if the -b option is specified) in the file and dividing by the file length. If the data are close to random, this should be about 127.5 (0.5 for -b option output). If the mean departs from this value, the values are consistently high or low. Working on US-ASCII space */ func GetArithmeticMean(passphrase string) float64 { sum := 0.0 count := 0.0 for _, char := range []byte(passphrase) { sum += float64(char) count++ } return sum / count }