added -f to override d-hash, better debug and error handling

This commit is contained in:
2024-01-22 14:30:54 +01:00
committed by T.v.Dein
parent e971070f9f
commit e2afc1350b
6 changed files with 30 additions and 17 deletions

View File

@@ -62,6 +62,7 @@ Options:
-l --limit <num> Limit the ads to download to <num>, default: load all. -l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck). -c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Download images even if they already exist.
-m --manual Show manual. -m --manual Show manual.
-h --help Show usage. -h --help Show usage.
-V --version Show program version. -V --version Show program version.
@@ -82,6 +83,7 @@ type Config struct {
Loglevel string `koanf:"loglevel"` Loglevel string `koanf:"loglevel"`
Limit int `koanf:"limit"` Limit int `koanf:"limit"`
IgnoreErrors bool `koanf:"ignoreerrors"` IgnoreErrors bool `koanf:"ignoreerrors"`
ForceDownload bool `koanf:"force"`
Adlinks []string Adlinks []string
StatsCountAds int StatsCountAds int
StatsCountImages int StatsCountImages int
@@ -133,6 +135,7 @@ func InitConfig(w io.Writer) (*Config, error) {
f.BoolP("version", "V", false, "show program version") f.BoolP("version", "V", false, "show program version")
f.BoolP("help", "h", false, "show usage") f.BoolP("help", "h", false, "show usage")
f.BoolP("manual", "m", false, "show manual") f.BoolP("manual", "m", false, "show manual")
f.BoolP("force", "f", false, "force")
if err := f.Parse(os.Args[1:]); err != nil { if err := f.Parse(os.Args[1:]); err != nil {
return nil, err return nil, err

View File

@@ -46,7 +46,7 @@ func (img *Image) LogValue() slog.Value {
} }
// holds all images of an ad // holds all images of an ad
type Images []*Image type Cache []*goimagehash.ImageHash
func NewImage(buf *bytes.Buffer, filename string, uri string) *Image { func NewImage(buf *bytes.Buffer, filename string, uri string) *Image {
img := &Image{ img := &Image{
@@ -76,15 +76,16 @@ func (img *Image) CalcHash() error {
} }
// checks if 2 images are similar enough to be considered the same // checks if 2 images are similar enough to be considered the same
func (img *Image) Similar(otherimg *Image) bool { func (img *Image) Similar(hash *goimagehash.ImageHash) bool {
distance, err := img.Hash.Distance(otherimg.Hash) distance, err := img.Hash.Distance(hash)
if err != nil { if err != nil {
slog.Debug("failed to compute diff hash distance", "error", err) slog.Debug("failed to compute diff hash distance", "error", err)
return false return false
} }
if distance < MaxDistance { if distance < MaxDistance {
slog.Debug("distance computation", "image-A", img, "image-B", otherimg, "distance", distance) slog.Debug("distance computation", "image-A", img.Hash.ToString(),
"image-B", hash.ToString(), "distance", distance)
return true return true
} else { } else {
return false return false
@@ -92,8 +93,8 @@ func (img *Image) Similar(otherimg *Image) bool {
} }
// check current image against all known hashes. // check current image against all known hashes.
func (img *Image) SimilarExists(images Images) bool { func (img *Image) SimilarExists(cache Cache) bool {
for _, otherimg := range images { for _, otherimg := range cache {
if img.Similar(otherimg) { if img.Similar(otherimg) {
return true return true
} }
@@ -104,13 +105,18 @@ func (img *Image) SimilarExists(images Images) bool {
// read all JPG images in a ad directory, compute diff hashes and // read all JPG images in a ad directory, compute diff hashes and
// store the results in the slice Images // store the results in the slice Images
func ReadImages(addir string) (Images, error) { func ReadImages(addir string, dont bool) (Cache, error) {
files, err := os.ReadDir(addir) files, err := os.ReadDir(addir)
if err != nil { if err != nil {
return nil, err return nil, err
} }
imgs := Images{} cache := Cache{}
if dont {
// forced download, -f given
return cache, nil
}
for _, file := range files { for _, file := range files {
ext := filepath.Ext(file.Name()) ext := filepath.Ext(file.Name())
@@ -127,10 +133,10 @@ func ReadImages(addir string) (Images, error) {
} }
slog.Debug("Caching image from file system", "image", img, "hash", img.Hash.ToString()) slog.Debug("Caching image from file system", "image", img, "hash", img.Hash.ToString())
imgs = append(imgs, img) cache = append(cache, img.Hash)
} }
} }
//return nil, errors.New("ende") //return nil, errors.New("ende")
return imgs, nil return cache, nil
} }

View File

@@ -133,7 +133,7 @@
.\" ======================================================================== .\" ========================================================================
.\" .\"
.IX Title "KLEINGEBAECK 1" .IX Title "KLEINGEBAECK 1"
.TH KLEINGEBAECK 1 "2024-01-17" "1" "User Commands" .TH KLEINGEBAECK 1 "2024-01-22" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents. .\" way too many mistakes in technical documents.
.if n .ad l .if n .ad l
@@ -142,7 +142,7 @@
kleingebaeck \- kleinanzeigen.de backup tool kleingebaeck \- kleinanzeigen.de backup tool
.SH "SYNOPSYS" .SH "SYNOPSYS"
.IX Header "SYNOPSYS" .IX Header "SYNOPSYS"
.Vb 12 .Vb 10
\& Usage: kleingebaeck [\-dvVhmoc] [<ad\-listing\-url>,...] \& Usage: kleingebaeck [\-dvVhmoc] [<ad\-listing\-url>,...]
\& Options: \& Options:
\& \-u \-\-user <uid> Backup ads from user with uid <uid>. \& \-u \-\-user <uid> Backup ads from user with uid <uid>.
@@ -152,6 +152,7 @@ kleingebaeck \- kleinanzeigen.de backup tool
\& \-l \-\-limit <num> Limit the ads to download to <num>, default: load all. \& \-l \-\-limit <num> Limit the ads to download to <num>, default: load all.
\& \-c \-\-config <file> Use config file <file> (default: ~/.kleingebaeck). \& \-c \-\-config <file> Use config file <file> (default: ~/.kleingebaeck).
\& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. \& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
\& \-f \-\-force Download images even if they already exist.
\& \-m \-\-manual Show manual. \& \-m \-\-manual Show manual.
\& \-h \-\-help Show usage. \& \-h \-\-help Show usage.
\& \-V \-\-version Show program version. \& \-V \-\-version Show program version.

View File

@@ -14,6 +14,7 @@ SYNOPSYS
-l --limit <num> Limit the ads to download to <num>, default: load all. -l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck). -c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Download images even if they already exist.
-m --manual Show manual. -m --manual Show manual.
-h --help Show usage. -h --help Show usage.
-V --version Show program version. -V --version Show program version.

View File

@@ -13,6 +13,7 @@ kleingebaeck - kleinanzeigen.de backup tool
-l --limit <num> Limit the ads to download to <num>, default: load all. -l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck). -c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Download images even if they already exist.
-m --manual Show manual. -m --manual Show manual.
-h --help Show usage. -h --help Show usage.
-V --version Show program version. -V --version Show program version.

View File

@@ -136,11 +136,10 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error { func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
// fetch images // fetch images
img := 1 img := 1
adpath := filepath.Join(fetch.Config.Outdir, addir) adpath := filepath.Join(fetch.Config.Outdir, addir)
// scan existing images, if any // scan existing images, if any
images, err := ReadImages(adpath) cache, err := ReadImages(adpath, fetch.Config.ForceDownload)
if err != nil { if err != nil {
return err return err
} }
@@ -167,10 +166,12 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
return err return err
} }
if image.SimilarExists(images) { if !fetch.Config.ForceDownload {
slog.Debug("similar image exists, not written", "image", image) if image.SimilarExists(cache) {
slog.Debug("similar image exists, not written", "uri", image.Uri)
return nil return nil
} }
}
err = WriteImage(file, buf2) err = WriteImage(file, buf2)
if err != nil { if err != nil {