diff --git a/http.go b/http.go new file mode 100644 index 0000000..8b4d608 --- /dev/null +++ b/http.go @@ -0,0 +1,66 @@ +/* +Copyright © 2023-2024 Thomas von Dein + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +// FIXME: we could also incorporate +// https://github.com/kdkumawat/golang/blob/main/http-retry/http/retry-client.go + +package main + +import ( + "fmt" + "log/slog" + "math/rand" + "net/http" + "net/http/httputil" + "os" +) + +type loggingTransport struct{} + +var letters = []rune("ABCDEF0123456789") + +func getid() string { + b := make([]rune, 8) + for i := range b { + b[i] = letters[rand.Intn(len(letters))] + } + return string(b) +} + +func (s *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error) { + resp, err := http.DefaultTransport.RoundTrip(req) + + // just requred for debugging + id := getid() + slog.Debug("REQUEST", "id", id, "uri", req.URL, "host", req.Host) + slog.Debug("RESPONSE", "id", id, "status", resp.StatusCode, "contentlength", resp.ContentLength) + + if len(os.Getenv("DEBUGHTTP")) > 0 { + fmt.Println("DEBUGHTTP Request ===>") + bytes, _ := httputil.DumpRequestOut(req, true) + fmt.Printf("%s\n", bytes) + + fmt.Println("<=== DEBUGHTTP Response") + for header, value := range resp.Header { + fmt.Printf("%s: %s\n", header, value) + } + fmt.Printf("Status: %s %s\nContent-Length: %d\n\n\n", resp.Proto, resp.Status, resp.ContentLength) + + } + + return resp, err +} diff --git a/main.go b/main.go index 5f334c3..b93f3c3 100644 --- a/main.go +++ b/main.go @@ -22,6 +22,7 @@ import ( "fmt" "io" "log/slog" + "net/http" "os" "runtime/debug" @@ -111,17 +112,20 @@ func Main(w io.Writer) int { return Die(err) } + // used for all HTTP requests + client := &http.Client{Transport: &loggingTransport{}} + if len(conf.Adlinks) >= 1 { // directly backup ad listing[s] for _, uri := range conf.Adlinks { - err := Scrape(conf, uri) + err := ScrapeAd(conf, uri, client) if err != nil { return Die(err) } } } else if conf.User > 0 { // backup all ads of the given user (via config or cmdline) - err := Start(conf) + err := ScrapeUser(conf, client) if err != nil { return Die(err) } diff --git a/scrape.go b/scrape.go index 6545b70..867a6fe 100644 --- a/scrape.go +++ b/scrape.go @@ -56,8 +56,7 @@ func Get(uri string, client *http.Client) (io.ReadCloser, error) { // extract links from all ad listing pages (that is: use pagination) // and scrape every page -func Start(conf *Config) error { - client := &http.Client{} +func ScrapeUser(conf *Config, client *http.Client) error { adlinks := []string{} baseuri := fmt.Sprintf("%s%s?userId=%d", Baseuri, Listuri, conf.User) @@ -96,7 +95,7 @@ func Start(conf *Config) error { } for i, adlink := range adlinks { - err := Scrape(conf, Baseuri+adlink) + err := ScrapeAd(conf, Baseuri+adlink, client) if err != nil { return err } @@ -110,8 +109,7 @@ func Start(conf *Config) error { } // scrape an ad. uri is the full uri of the ad, dir is the basedir -func Scrape(c *Config, uri string) error { - client := &http.Client{} +func ScrapeAd(c *Config, uri string, client *http.Client) error { ad := &Ad{} // extract slug and id from uri @@ -155,10 +153,10 @@ func Scrape(c *Config, uri string) error { c.IncrAds() - return ScrapeImages(c, ad, addir) + return ScrapeImages(c, ad, addir, client) } -func ScrapeImages(c *Config, ad *Ad, addir string) error { +func ScrapeImages(c *Config, ad *Ad, addir string, client *http.Client) error { // fetch images img := 1 g := new(errgroup.Group) @@ -167,7 +165,7 @@ func ScrapeImages(c *Config, ad *Ad, addir string) error { imguri := imguri file := filepath.Join(c.Outdir, addir, fmt.Sprintf("%d.jpg", img)) g.Go(func() error { - err := Getimage(imguri, file) + err := Getimage(imguri, file, client) if err != nil { return err } @@ -188,9 +186,16 @@ func ScrapeImages(c *Config, ad *Ad, addir string) error { } // fetch an image -func Getimage(uri, fileName string) error { +func Getimage(uri, fileName string, client *http.Client) error { slog.Debug("fetching ad image", "uri", uri) - response, err := http.Get(uri) + req, err := http.NewRequest("GET", uri, nil) + if err != nil { + return err + } + + req.Header.Set("User-Agent", Useragent) + + response, err := client.Do(req) if err != nil { return err }