mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-17 04:21:00 +01:00
add throttling to image download
This commit is contained in:
@@ -48,6 +48,10 @@ const (
|
|||||||
Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
|
Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
|
||||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
DefaultAdNameTemplate string = "{{.Slug}}"
|
DefaultAdNameTemplate string = "{{.Slug}}"
|
||||||
|
|
||||||
|
// for image download throttling
|
||||||
|
MinThrottle int = 2
|
||||||
|
MaxThrottle int = 20
|
||||||
)
|
)
|
||||||
|
|
||||||
const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.
|
const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.
|
||||||
|
|||||||
5
main.go
5
main.go
@@ -22,8 +22,10 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"math/rand"
|
||||||
"os"
|
"os"
|
||||||
"runtime/debug"
|
"runtime/debug"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/lmittmann/tint"
|
"github.com/lmittmann/tint"
|
||||||
"github.com/tlinden/yadu"
|
"github.com/tlinden/yadu"
|
||||||
@@ -113,6 +115,9 @@ func Main(w io.Writer) int {
|
|||||||
// used for all HTTP requests
|
// used for all HTTP requests
|
||||||
fetch := NewFetcher(conf)
|
fetch := NewFetcher(conf)
|
||||||
|
|
||||||
|
// randomization needed here and there
|
||||||
|
rand.Seed(time.Now().UnixNano())
|
||||||
|
|
||||||
if len(conf.Adlinks) >= 1 {
|
if len(conf.Adlinks) >= 1 {
|
||||||
// directly backup ad listing[s]
|
// directly backup ad listing[s]
|
||||||
for _, uri := range conf.Adlinks {
|
for _, uri := range conf.Adlinks {
|
||||||
|
|||||||
10
scrape.go
10
scrape.go
@@ -24,6 +24,7 @@ import (
|
|||||||
"log/slog"
|
"log/slog"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
"astuart.co/goq"
|
"astuart.co/goq"
|
||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
@@ -150,6 +151,11 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
|
|||||||
imguri := imguri
|
imguri := imguri
|
||||||
file := filepath.Join(adpath, fmt.Sprintf("%d.jpg", img))
|
file := filepath.Join(adpath, fmt.Sprintf("%d.jpg", img))
|
||||||
g.Go(func() error {
|
g.Go(func() error {
|
||||||
|
// wait a little
|
||||||
|
|
||||||
|
t := GetThrottleTime()
|
||||||
|
time.Sleep(t)
|
||||||
|
|
||||||
body, err := fetch.Getimage(imguri)
|
body, err := fetch.Getimage(imguri)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -163,7 +169,7 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
|
|||||||
|
|
||||||
buf2 := buf.Bytes() // needed for image writing
|
buf2 := buf.Bytes() // needed for image writing
|
||||||
|
|
||||||
image := NewImage(buf, "", imguri)
|
image := NewImage(buf, file, imguri)
|
||||||
err = image.CalcHash()
|
err = image.CalcHash()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -181,7 +187,7 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Debug("wrote image", "image", image, "size", len(buf2))
|
slog.Debug("wrote image", "image", image, "size", len(buf2), "throttle", t)
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
img++
|
img++
|
||||||
|
|||||||
6
util.go
6
util.go
@@ -20,9 +20,11 @@ package main
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
|
"math/rand"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/mattn/go-isatty"
|
"github.com/mattn/go-isatty"
|
||||||
)
|
)
|
||||||
@@ -66,3 +68,7 @@ func IsNoTty() bool {
|
|||||||
// it is a tty
|
// it is a tty
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func GetThrottleTime() time.Duration {
|
||||||
|
return time.Duration(rand.Intn(MaxThrottle-MinThrottle+1)+MinThrottle) * time.Millisecond
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user