mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-16 12:01:00 +01:00
add throttling to image download
This commit is contained in:
@@ -48,6 +48,10 @@ const (
|
||||
Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
DefaultAdNameTemplate string = "{{.Slug}}"
|
||||
|
||||
// for image download throttling
|
||||
MinThrottle int = 2
|
||||
MaxThrottle int = 20
|
||||
)
|
||||
|
||||
const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.
|
||||
|
||||
5
main.go
5
main.go
@@ -22,8 +22,10 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"math/rand"
|
||||
"os"
|
||||
"runtime/debug"
|
||||
"time"
|
||||
|
||||
"github.com/lmittmann/tint"
|
||||
"github.com/tlinden/yadu"
|
||||
@@ -113,6 +115,9 @@ func Main(w io.Writer) int {
|
||||
// used for all HTTP requests
|
||||
fetch := NewFetcher(conf)
|
||||
|
||||
// randomization needed here and there
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
|
||||
if len(conf.Adlinks) >= 1 {
|
||||
// directly backup ad listing[s]
|
||||
for _, uri := range conf.Adlinks {
|
||||
|
||||
10
scrape.go
10
scrape.go
@@ -24,6 +24,7 @@ import (
|
||||
"log/slog"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"astuart.co/goq"
|
||||
"golang.org/x/sync/errgroup"
|
||||
@@ -150,6 +151,11 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
|
||||
imguri := imguri
|
||||
file := filepath.Join(adpath, fmt.Sprintf("%d.jpg", img))
|
||||
g.Go(func() error {
|
||||
// wait a little
|
||||
|
||||
t := GetThrottleTime()
|
||||
time.Sleep(t)
|
||||
|
||||
body, err := fetch.Getimage(imguri)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -163,7 +169,7 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
|
||||
|
||||
buf2 := buf.Bytes() // needed for image writing
|
||||
|
||||
image := NewImage(buf, "", imguri)
|
||||
image := NewImage(buf, file, imguri)
|
||||
err = image.CalcHash()
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -181,7 +187,7 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
slog.Debug("wrote image", "image", image, "size", len(buf2))
|
||||
slog.Debug("wrote image", "image", image, "size", len(buf2), "throttle", t)
|
||||
return nil
|
||||
})
|
||||
img++
|
||||
|
||||
6
util.go
6
util.go
@@ -20,9 +20,11 @@ package main
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"math/rand"
|
||||
"os"
|
||||
"os/exec"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
"github.com/mattn/go-isatty"
|
||||
)
|
||||
@@ -66,3 +68,7 @@ func IsNoTty() bool {
|
||||
// it is a tty
|
||||
return false
|
||||
}
|
||||
|
||||
func GetThrottleTime() time.Duration {
|
||||
return time.Duration(rand.Intn(MaxThrottle-MinThrottle+1)+MinThrottle) * time.Millisecond
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user