add throttling to image download

This commit is contained in:
2024-01-24 18:35:06 +01:00
committed by T.v.Dein
parent de82127223
commit 8df3ebfa6d
4 changed files with 23 additions and 2 deletions

View File

@@ -48,6 +48,10 @@ const (
Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
DefaultAdNameTemplate string = "{{.Slug}}"
// for image download throttling
MinThrottle int = 2
MaxThrottle int = 20
)
const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.

View File

@@ -22,8 +22,10 @@ import (
"fmt"
"io"
"log/slog"
"math/rand"
"os"
"runtime/debug"
"time"
"github.com/lmittmann/tint"
"github.com/tlinden/yadu"
@@ -113,6 +115,9 @@ func Main(w io.Writer) int {
// used for all HTTP requests
fetch := NewFetcher(conf)
// randomization needed here and there
rand.Seed(time.Now().UnixNano())
if len(conf.Adlinks) >= 1 {
// directly backup ad listing[s]
for _, uri := range conf.Adlinks {

View File

@@ -24,6 +24,7 @@ import (
"log/slog"
"path/filepath"
"strings"
"time"
"astuart.co/goq"
"golang.org/x/sync/errgroup"
@@ -150,6 +151,11 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
imguri := imguri
file := filepath.Join(adpath, fmt.Sprintf("%d.jpg", img))
g.Go(func() error {
// wait a little
t := GetThrottleTime()
time.Sleep(t)
body, err := fetch.Getimage(imguri)
if err != nil {
return err
@@ -163,7 +169,7 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
buf2 := buf.Bytes() // needed for image writing
image := NewImage(buf, "", imguri)
image := NewImage(buf, file, imguri)
err = image.CalcHash()
if err != nil {
return err
@@ -181,7 +187,7 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
return err
}
slog.Debug("wrote image", "image", image, "size", len(buf2))
slog.Debug("wrote image", "image", image, "size", len(buf2), "throttle", t)
return nil
})
img++

View File

@@ -20,9 +20,11 @@ package main
import (
"bytes"
"errors"
"math/rand"
"os"
"os/exec"
"runtime"
"time"
"github.com/mattn/go-isatty"
)
@@ -66,3 +68,7 @@ func IsNoTty() bool {
// it is a tty
return false
}
func GetThrottleTime() time.Duration {
return time.Duration(rand.Intn(MaxThrottle-MinThrottle+1)+MinThrottle) * time.Millisecond
}