mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-17 04:21:00 +01:00
added -f to override d-hash, better debug and error handling
This commit is contained in:
@@ -62,6 +62,7 @@ Options:
|
|||||||
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
||||||
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||||
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
||||||
|
-f --force Download images even if they already exist.
|
||||||
-m --manual Show manual.
|
-m --manual Show manual.
|
||||||
-h --help Show usage.
|
-h --help Show usage.
|
||||||
-V --version Show program version.
|
-V --version Show program version.
|
||||||
@@ -82,6 +83,7 @@ type Config struct {
|
|||||||
Loglevel string `koanf:"loglevel"`
|
Loglevel string `koanf:"loglevel"`
|
||||||
Limit int `koanf:"limit"`
|
Limit int `koanf:"limit"`
|
||||||
IgnoreErrors bool `koanf:"ignoreerrors"`
|
IgnoreErrors bool `koanf:"ignoreerrors"`
|
||||||
|
ForceDownload bool `koanf:"force"`
|
||||||
Adlinks []string
|
Adlinks []string
|
||||||
StatsCountAds int
|
StatsCountAds int
|
||||||
StatsCountImages int
|
StatsCountImages int
|
||||||
@@ -133,6 +135,7 @@ func InitConfig(w io.Writer) (*Config, error) {
|
|||||||
f.BoolP("version", "V", false, "show program version")
|
f.BoolP("version", "V", false, "show program version")
|
||||||
f.BoolP("help", "h", false, "show usage")
|
f.BoolP("help", "h", false, "show usage")
|
||||||
f.BoolP("manual", "m", false, "show manual")
|
f.BoolP("manual", "m", false, "show manual")
|
||||||
|
f.BoolP("force", "f", false, "force")
|
||||||
|
|
||||||
if err := f.Parse(os.Args[1:]); err != nil {
|
if err := f.Parse(os.Args[1:]); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|||||||
26
image.go
26
image.go
@@ -46,7 +46,7 @@ func (img *Image) LogValue() slog.Value {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// holds all images of an ad
|
// holds all images of an ad
|
||||||
type Images []*Image
|
type Cache []*goimagehash.ImageHash
|
||||||
|
|
||||||
func NewImage(buf *bytes.Buffer, filename string, uri string) *Image {
|
func NewImage(buf *bytes.Buffer, filename string, uri string) *Image {
|
||||||
img := &Image{
|
img := &Image{
|
||||||
@@ -76,15 +76,16 @@ func (img *Image) CalcHash() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// checks if 2 images are similar enough to be considered the same
|
// checks if 2 images are similar enough to be considered the same
|
||||||
func (img *Image) Similar(otherimg *Image) bool {
|
func (img *Image) Similar(hash *goimagehash.ImageHash) bool {
|
||||||
distance, err := img.Hash.Distance(otherimg.Hash)
|
distance, err := img.Hash.Distance(hash)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
slog.Debug("failed to compute diff hash distance", "error", err)
|
slog.Debug("failed to compute diff hash distance", "error", err)
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if distance < MaxDistance {
|
if distance < MaxDistance {
|
||||||
slog.Debug("distance computation", "image-A", img, "image-B", otherimg, "distance", distance)
|
slog.Debug("distance computation", "image-A", img.Hash.ToString(),
|
||||||
|
"image-B", hash.ToString(), "distance", distance)
|
||||||
return true
|
return true
|
||||||
} else {
|
} else {
|
||||||
return false
|
return false
|
||||||
@@ -92,8 +93,8 @@ func (img *Image) Similar(otherimg *Image) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// check current image against all known hashes.
|
// check current image against all known hashes.
|
||||||
func (img *Image) SimilarExists(images Images) bool {
|
func (img *Image) SimilarExists(cache Cache) bool {
|
||||||
for _, otherimg := range images {
|
for _, otherimg := range cache {
|
||||||
if img.Similar(otherimg) {
|
if img.Similar(otherimg) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
@@ -104,13 +105,18 @@ func (img *Image) SimilarExists(images Images) bool {
|
|||||||
|
|
||||||
// read all JPG images in a ad directory, compute diff hashes and
|
// read all JPG images in a ad directory, compute diff hashes and
|
||||||
// store the results in the slice Images
|
// store the results in the slice Images
|
||||||
func ReadImages(addir string) (Images, error) {
|
func ReadImages(addir string, dont bool) (Cache, error) {
|
||||||
files, err := os.ReadDir(addir)
|
files, err := os.ReadDir(addir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
imgs := Images{}
|
cache := Cache{}
|
||||||
|
|
||||||
|
if dont {
|
||||||
|
// forced download, -f given
|
||||||
|
return cache, nil
|
||||||
|
}
|
||||||
|
|
||||||
for _, file := range files {
|
for _, file := range files {
|
||||||
ext := filepath.Ext(file.Name())
|
ext := filepath.Ext(file.Name())
|
||||||
@@ -127,10 +133,10 @@ func ReadImages(addir string) (Images, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
slog.Debug("Caching image from file system", "image", img, "hash", img.Hash.ToString())
|
slog.Debug("Caching image from file system", "image", img, "hash", img.Hash.ToString())
|
||||||
imgs = append(imgs, img)
|
cache = append(cache, img.Hash)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//return nil, errors.New("ende")
|
//return nil, errors.New("ende")
|
||||||
return imgs, nil
|
return cache, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -133,7 +133,7 @@
|
|||||||
.\" ========================================================================
|
.\" ========================================================================
|
||||||
.\"
|
.\"
|
||||||
.IX Title "KLEINGEBAECK 1"
|
.IX Title "KLEINGEBAECK 1"
|
||||||
.TH KLEINGEBAECK 1 "2024-01-17" "1" "User Commands"
|
.TH KLEINGEBAECK 1 "2024-01-22" "1" "User Commands"
|
||||||
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
||||||
.\" way too many mistakes in technical documents.
|
.\" way too many mistakes in technical documents.
|
||||||
.if n .ad l
|
.if n .ad l
|
||||||
@@ -142,7 +142,7 @@
|
|||||||
kleingebaeck \- kleinanzeigen.de backup tool
|
kleingebaeck \- kleinanzeigen.de backup tool
|
||||||
.SH "SYNOPSYS"
|
.SH "SYNOPSYS"
|
||||||
.IX Header "SYNOPSYS"
|
.IX Header "SYNOPSYS"
|
||||||
.Vb 12
|
.Vb 10
|
||||||
\& Usage: kleingebaeck [\-dvVhmoc] [<ad\-listing\-url>,...]
|
\& Usage: kleingebaeck [\-dvVhmoc] [<ad\-listing\-url>,...]
|
||||||
\& Options:
|
\& Options:
|
||||||
\& \-u \-\-user <uid> Backup ads from user with uid <uid>.
|
\& \-u \-\-user <uid> Backup ads from user with uid <uid>.
|
||||||
@@ -152,6 +152,7 @@ kleingebaeck \- kleinanzeigen.de backup tool
|
|||||||
\& \-l \-\-limit <num> Limit the ads to download to <num>, default: load all.
|
\& \-l \-\-limit <num> Limit the ads to download to <num>, default: load all.
|
||||||
\& \-c \-\-config <file> Use config file <file> (default: ~/.kleingebaeck).
|
\& \-c \-\-config <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||||
\& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
\& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
||||||
|
\& \-f \-\-force Download images even if they already exist.
|
||||||
\& \-m \-\-manual Show manual.
|
\& \-m \-\-manual Show manual.
|
||||||
\& \-h \-\-help Show usage.
|
\& \-h \-\-help Show usage.
|
||||||
\& \-V \-\-version Show program version.
|
\& \-V \-\-version Show program version.
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ SYNOPSYS
|
|||||||
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
||||||
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||||
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
||||||
|
-f --force Download images even if they already exist.
|
||||||
-m --manual Show manual.
|
-m --manual Show manual.
|
||||||
-h --help Show usage.
|
-h --help Show usage.
|
||||||
-V --version Show program version.
|
-V --version Show program version.
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ kleingebaeck - kleinanzeigen.de backup tool
|
|||||||
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
||||||
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||||
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
||||||
|
-f --force Download images even if they already exist.
|
||||||
-m --manual Show manual.
|
-m --manual Show manual.
|
||||||
-h --help Show usage.
|
-h --help Show usage.
|
||||||
-V --version Show program version.
|
-V --version Show program version.
|
||||||
|
|||||||
11
scrape.go
11
scrape.go
@@ -136,11 +136,10 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
|
|||||||
func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
|
func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
|
||||||
// fetch images
|
// fetch images
|
||||||
img := 1
|
img := 1
|
||||||
|
|
||||||
adpath := filepath.Join(fetch.Config.Outdir, addir)
|
adpath := filepath.Join(fetch.Config.Outdir, addir)
|
||||||
|
|
||||||
// scan existing images, if any
|
// scan existing images, if any
|
||||||
images, err := ReadImages(adpath)
|
cache, err := ReadImages(adpath, fetch.Config.ForceDownload)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -167,9 +166,11 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if image.SimilarExists(images) {
|
if !fetch.Config.ForceDownload {
|
||||||
slog.Debug("similar image exists, not written", "image", image)
|
if image.SimilarExists(cache) {
|
||||||
return nil
|
slog.Debug("similar image exists, not written", "uri", image.Uri)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
err = WriteImage(file, buf2)
|
err = WriteImage(file, buf2)
|
||||||
|
|||||||
Reference in New Issue
Block a user