diff --git a/config.go b/config.go index 8227c9f..bf026e8 100644 --- a/config.go +++ b/config.go @@ -62,6 +62,7 @@ Options: -l --limit Limit the ads to download to , default: load all. -c --config Use config file (default: ~/.kleingebaeck). --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. +-f --force Download images even if they already exist. -m --manual Show manual. -h --help Show usage. -V --version Show program version. @@ -82,6 +83,7 @@ type Config struct { Loglevel string `koanf:"loglevel"` Limit int `koanf:"limit"` IgnoreErrors bool `koanf:"ignoreerrors"` + ForceDownload bool `koanf:"force"` Adlinks []string StatsCountAds int StatsCountImages int @@ -133,6 +135,7 @@ func InitConfig(w io.Writer) (*Config, error) { f.BoolP("version", "V", false, "show program version") f.BoolP("help", "h", false, "show usage") f.BoolP("manual", "m", false, "show manual") + f.BoolP("force", "f", false, "force") if err := f.Parse(os.Args[1:]); err != nil { return nil, err diff --git a/image.go b/image.go index f162534..5582567 100644 --- a/image.go +++ b/image.go @@ -46,7 +46,7 @@ func (img *Image) LogValue() slog.Value { } // holds all images of an ad -type Images []*Image +type Cache []*goimagehash.ImageHash func NewImage(buf *bytes.Buffer, filename string, uri string) *Image { img := &Image{ @@ -76,15 +76,16 @@ func (img *Image) CalcHash() error { } // checks if 2 images are similar enough to be considered the same -func (img *Image) Similar(otherimg *Image) bool { - distance, err := img.Hash.Distance(otherimg.Hash) +func (img *Image) Similar(hash *goimagehash.ImageHash) bool { + distance, err := img.Hash.Distance(hash) if err != nil { slog.Debug("failed to compute diff hash distance", "error", err) return false } if distance < MaxDistance { - slog.Debug("distance computation", "image-A", img, "image-B", otherimg, "distance", distance) + slog.Debug("distance computation", "image-A", img.Hash.ToString(), + "image-B", hash.ToString(), "distance", distance) return true } else { return false @@ -92,8 +93,8 @@ func (img *Image) Similar(otherimg *Image) bool { } // check current image against all known hashes. -func (img *Image) SimilarExists(images Images) bool { - for _, otherimg := range images { +func (img *Image) SimilarExists(cache Cache) bool { + for _, otherimg := range cache { if img.Similar(otherimg) { return true } @@ -104,13 +105,18 @@ func (img *Image) SimilarExists(images Images) bool { // read all JPG images in a ad directory, compute diff hashes and // store the results in the slice Images -func ReadImages(addir string) (Images, error) { +func ReadImages(addir string, dont bool) (Cache, error) { files, err := os.ReadDir(addir) if err != nil { return nil, err } - imgs := Images{} + cache := Cache{} + + if dont { + // forced download, -f given + return cache, nil + } for _, file := range files { ext := filepath.Ext(file.Name()) @@ -127,10 +133,10 @@ func ReadImages(addir string) (Images, error) { } slog.Debug("Caching image from file system", "image", img, "hash", img.Hash.ToString()) - imgs = append(imgs, img) + cache = append(cache, img.Hash) } } //return nil, errors.New("ende") - return imgs, nil + return cache, nil } diff --git a/kleingebaeck.1 b/kleingebaeck.1 index 2709a6b..d8b4bec 100644 --- a/kleingebaeck.1 +++ b/kleingebaeck.1 @@ -133,7 +133,7 @@ .\" ======================================================================== .\" .IX Title "KLEINGEBAECK 1" -.TH KLEINGEBAECK 1 "2024-01-17" "1" "User Commands" +.TH KLEINGEBAECK 1 "2024-01-22" "1" "User Commands" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -142,7 +142,7 @@ kleingebaeck \- kleinanzeigen.de backup tool .SH "SYNOPSYS" .IX Header "SYNOPSYS" -.Vb 12 +.Vb 10 \& Usage: kleingebaeck [\-dvVhmoc] [,...] \& Options: \& \-u \-\-user Backup ads from user with uid . @@ -152,6 +152,7 @@ kleingebaeck \- kleinanzeigen.de backup tool \& \-l \-\-limit Limit the ads to download to , default: load all. \& \-c \-\-config Use config file (default: ~/.kleingebaeck). \& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. +\& \-f \-\-force Download images even if they already exist. \& \-m \-\-manual Show manual. \& \-h \-\-help Show usage. \& \-V \-\-version Show program version. diff --git a/kleingebaeck.go b/kleingebaeck.go index 7488b98..bf510b1 100644 --- a/kleingebaeck.go +++ b/kleingebaeck.go @@ -14,6 +14,7 @@ SYNOPSYS -l --limit Limit the ads to download to , default: load all. -c --config Use config file (default: ~/.kleingebaeck). --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. + -f --force Download images even if they already exist. -m --manual Show manual. -h --help Show usage. -V --version Show program version. diff --git a/kleingebaeck.pod b/kleingebaeck.pod index cad4423..664056b 100644 --- a/kleingebaeck.pod +++ b/kleingebaeck.pod @@ -13,6 +13,7 @@ kleingebaeck - kleinanzeigen.de backup tool -l --limit Limit the ads to download to , default: load all. -c --config Use config file (default: ~/.kleingebaeck). --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. + -f --force Download images even if they already exist. -m --manual Show manual. -h --help Show usage. -V --version Show program version. diff --git a/scrape.go b/scrape.go index 6b38a13..3c4735b 100644 --- a/scrape.go +++ b/scrape.go @@ -136,11 +136,10 @@ func ScrapeAd(fetch *Fetcher, uri string) error { func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error { // fetch images img := 1 - adpath := filepath.Join(fetch.Config.Outdir, addir) // scan existing images, if any - images, err := ReadImages(adpath) + cache, err := ReadImages(adpath, fetch.Config.ForceDownload) if err != nil { return err } @@ -167,9 +166,11 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error { return err } - if image.SimilarExists(images) { - slog.Debug("similar image exists, not written", "image", image) - return nil + if !fetch.Config.ForceDownload { + if image.SimilarExists(cache) { + slog.Debug("similar image exists, not written", "uri", image.Uri) + return nil + } } err = WriteImage(file, buf2)