From 8df3ebfa6d89a5c9b2b0752ec53ac21a2ac47361 Mon Sep 17 00:00:00 2001 From: Thomas von Dein Date: Wed, 24 Jan 2024 18:35:06 +0100 Subject: [PATCH] add throttling to image download --- config.go | 4 ++++ main.go | 5 +++++ scrape.go | 10 ++++++++-- util.go | 6 ++++++ 4 files changed, 23 insertions(+), 2 deletions(-) diff --git a/config.go b/config.go index c0973b9..31309ee 100644 --- a/config.go +++ b/config.go @@ -48,6 +48,10 @@ const ( Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" DefaultAdNameTemplate string = "{{.Slug}}" + + // for image download throttling + MinThrottle int = 2 + MaxThrottle int = 20 ) const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool. diff --git a/main.go b/main.go index a22b30a..7a4d0be 100644 --- a/main.go +++ b/main.go @@ -22,8 +22,10 @@ import ( "fmt" "io" "log/slog" + "math/rand" "os" "runtime/debug" + "time" "github.com/lmittmann/tint" "github.com/tlinden/yadu" @@ -113,6 +115,9 @@ func Main(w io.Writer) int { // used for all HTTP requests fetch := NewFetcher(conf) + // randomization needed here and there + rand.Seed(time.Now().UnixNano()) + if len(conf.Adlinks) >= 1 { // directly backup ad listing[s] for _, uri := range conf.Adlinks { diff --git a/scrape.go b/scrape.go index f6c2a91..0debc5a 100644 --- a/scrape.go +++ b/scrape.go @@ -24,6 +24,7 @@ import ( "log/slog" "path/filepath" "strings" + "time" "astuart.co/goq" "golang.org/x/sync/errgroup" @@ -150,6 +151,11 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error { imguri := imguri file := filepath.Join(adpath, fmt.Sprintf("%d.jpg", img)) g.Go(func() error { + // wait a little + + t := GetThrottleTime() + time.Sleep(t) + body, err := fetch.Getimage(imguri) if err != nil { return err @@ -163,7 +169,7 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error { buf2 := buf.Bytes() // needed for image writing - image := NewImage(buf, "", imguri) + image := NewImage(buf, file, imguri) err = image.CalcHash() if err != nil { return err @@ -181,7 +187,7 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error { return err } - slog.Debug("wrote image", "image", image, "size", len(buf2)) + slog.Debug("wrote image", "image", image, "size", len(buf2), "throttle", t) return nil }) img++ diff --git a/util.go b/util.go index bf06ae5..68632d8 100644 --- a/util.go +++ b/util.go @@ -20,9 +20,11 @@ package main import ( "bytes" "errors" + "math/rand" "os" "os/exec" "runtime" + "time" "github.com/mattn/go-isatty" ) @@ -66,3 +68,7 @@ func IsNoTty() bool { // it is a tty return false } + +func GetThrottleTime() time.Duration { + return time.Duration(rand.Intn(MaxThrottle-MinThrottle+1)+MinThrottle) * time.Millisecond +}