From 14f8c3fd43a72d0d51b25fcc18aa295a51dc7ebd Mon Sep 17 00:00:00 2001 From: "T.v.Dein" Date: Thu, 25 Jan 2024 19:03:34 +0100 Subject: [PATCH] Fix/linter (#66) * added lint targets * fix linter errors * enhance error handling * !!BREAKING!! rename Id to ID in tpls --- Makefile | 12 +++++-- ad.go | 6 ++-- config.go | 84 +++++++++++++++++++++++++-------------------- fetch.go | 20 +++++++---- go.mod | 1 + go.sum | 2 ++ http.go | 37 ++++++++++++-------- image.go | 21 +++++++----- kleingebaeck.1 | 6 ++-- kleingebaeck.go | 4 +-- kleingebaeck.pod | 4 +-- main.go | 44 ++++++++++++++---------- main_test.go | 83 +++++++++++++++++++++++--------------------- scrape.go | 74 +++++++++++++++++++++------------------ store.go | 52 +++++++++++++++------------- store_test.go | 3 +- t/config-empty.conf | 2 +- t/fullconfig.conf | 2 +- util.go | 10 +++--- 19 files changed, 264 insertions(+), 203 deletions(-) diff --git a/Makefile b/Makefile index 1c920f0..595af28 100644 --- a/Makefile +++ b/Makefile @@ -56,6 +56,14 @@ test: clean mkdir -p t/out go test ./... $(ARGS) +testlint: test lint + +lint: + golangci-lint run + +lint-full: + golangci-lint run --enable-all --exclude-use-default --disable exhaustivestruct,exhaustruct,depguard,interfacer,deadcode,golint,structcheck,scopelint,varcheck,ifshort,maligned,nosnakecase,godot,funlen,gofumpt,cyclop,noctx,gochecknoglobals,paralleltest + testfuzzy: clean go test -fuzz ./... $(ARGS) @@ -88,5 +96,5 @@ show-versions: buildlocal @echo "### go version used for building:" @grep -m 1 go go.mod -lint: - golangci-lint run -p bugs -p unused +# lint: +# golangci-lint run -p bugs -p unused diff --git a/ad.go b/ad.go index 633af3d..8c8a09b 100644 --- a/ad.go +++ b/ad.go @@ -30,7 +30,7 @@ type Index struct { type Ad struct { Title string `goquery:"h1"` Slug string - Id string + ID string Condition string `goquery:".addetailslist--detail--value,text"` Category string CategoryTree []string `goquery:".breadcrump-link,text"` @@ -46,7 +46,7 @@ func (ad *Ad) LogValue() slog.Value { return slog.GroupValue( slog.String("title", ad.Title), slog.String("price", ad.Price), - slog.String("id", ad.Id), + slog.String("id", ad.ID), slog.Int("imagecount", len(ad.Images)), slog.Int("bodysize", len(ad.Text)), slog.String("categorytree", strings.Join(ad.CategoryTree, "+")), @@ -76,7 +76,7 @@ func (ad *Ad) CalculateExpire() { if len(ad.Created) > 0 { ts, err := time.Parse("02.01.2006", ad.Created) if err == nil { - ad.Expire = ts.AddDate(0, 2, 1).Format("02.01.2006") + ad.Expire = ts.AddDate(0, ExpireMonths, ExpireDays).Format("02.01.2006") } } } diff --git a/config.go b/config.go index e6004a8..751f253 100644 --- a/config.go +++ b/config.go @@ -17,7 +17,6 @@ along with this program. If not, see . package main import ( - "errors" "fmt" "io" "os" @@ -35,16 +34,16 @@ import ( ) const ( - VERSION string = "0.3.1" + VERSION string = "0.3.2" Baseuri string = "https://www.kleinanzeigen.de" Listuri string = "/s-bestandsliste.html" Defaultdir string = "." - DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.Id}}\n" + + DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" + "Category: {{.Category}}\nCondition: {{.Condition}}\n" + "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" - DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.Id}}\r\n" + + DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" + "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\n" + "Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n" @@ -56,6 +55,14 @@ const ( // for image download throttling MinThrottle int = 2 MaxThrottle int = 20 + + // we extract the slug from the uri + SlugURIPartNum int = 6 + + ExpireMonths int = 2 + ExpireDays int = 1 + + WIN string = "windows" ) const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool. @@ -107,17 +114,17 @@ func (c *Config) IncrImgs(num int) { } // load commandline flags and config file -func InitConfig(w io.Writer) (*Config, error) { - var k = koanf.New(".") +func InitConfig(output io.Writer) (*Config, error) { + var kloader = koanf.New(".") // determine template based on os template := DefaultTemplate - if runtime.GOOS == "windows" { + if runtime.GOOS == WIN { template = DefaultTemplateWin } // Load default values using the confmap provider. - if err := k.Load(confmap.Provider(map[string]interface{}{ + if err := kloader.Load(confmap.Provider(map[string]interface{}{ "template": template, "outdir": ".", "loglevel": "notice", @@ -125,37 +132,39 @@ func InitConfig(w io.Writer) (*Config, error) { "adnametemplate": DefaultAdNameTemplate, "useragent": DefaultUserAgent, }, "."), nil); err != nil { - return nil, err + return nil, fmt.Errorf("failed to load default values into koanf: %w", err) } // setup custom usage - f := flag.NewFlagSet("config", flag.ContinueOnError) - f.Usage = func() { - fmt.Fprintln(w, Usage) + flagset := flag.NewFlagSet("config", flag.ContinueOnError) + flagset.Usage = func() { + fmt.Fprintln(output, Usage) os.Exit(0) } // parse commandline flags - f.StringP("config", "c", "", "config file") - f.StringP("outdir", "o", "", "directory where to store ads") - f.IntP("user", "u", 0, "user id") - f.IntP("limit", "l", 0, "limit ads to be downloaded (default 0, unlimited)") - f.BoolP("verbose", "v", false, "be verbose") - f.BoolP("debug", "d", false, "enable debug log") - f.BoolP("version", "V", false, "show program version") - f.BoolP("help", "h", false, "show usage") - f.BoolP("manual", "m", false, "show manual") - f.BoolP("force", "f", false, "force") + flagset.StringP("config", "c", "", "config file") + flagset.StringP("outdir", "o", "", "directory where to store ads") + flagset.IntP("user", "u", 0, "user id") + flagset.IntP("limit", "l", 0, "limit ads to be downloaded (default 0, unlimited)") + flagset.BoolP("verbose", "v", false, "be verbose") + flagset.BoolP("debug", "d", false, "enable debug log") + flagset.BoolP("version", "V", false, "show program version") + flagset.BoolP("help", "h", false, "show usage") + flagset.BoolP("manual", "m", false, "show manual") + flagset.BoolP("force", "f", false, "force") - if err := f.Parse(os.Args[1:]); err != nil { - return nil, err + if err := flagset.Parse(os.Args[1:]); err != nil { + return nil, fmt.Errorf("failed to parse program arguments: %w", err) } // generate a list of config files to try to load, including the // one provided via -c, if any var configfiles []string - configfile, _ := f.GetString("config") + + configfile, _ := flagset.GetString("config") home, _ := os.UserHomeDir() + if configfile != "" { configfiles = []string{configfile} } else { @@ -171,31 +180,30 @@ func InitConfig(w io.Writer) (*Config, error) { for _, cfgfile := range configfiles { if path, err := os.Stat(cfgfile); !os.IsNotExist(err) { if !path.IsDir() { - if err := k.Load(file.Provider(cfgfile), toml.Parser()); err != nil { - return nil, errors.New("error loading config file: " + err.Error()) + if err := kloader.Load(file.Provider(cfgfile), toml.Parser()); err != nil { + return nil, fmt.Errorf("error loading config file: %w", err) } } - } - // else: we ignore the file if it doesn't exists + } // else: we ignore the file if it doesn't exists } // env overrides config file - if err := k.Load(env.Provider("KLEINGEBAECK_", ".", func(s string) string { - return strings.Replace(strings.ToLower( - strings.TrimPrefix(s, "KLEINGEBAECK_")), "_", ".", -1) + if err := kloader.Load(env.Provider("KLEINGEBAECK_", ".", func(s string) string { + return strings.ReplaceAll(strings.ToLower( + strings.TrimPrefix(s, "KLEINGEBAECK_")), "_", ".") }), nil); err != nil { - return nil, errors.New("error loading environment: " + err.Error()) + return nil, fmt.Errorf("error loading environment: %w", err) } // command line overrides env - if err := k.Load(posflag.Provider(f, ".", k), nil); err != nil { - return nil, errors.New("error loading flags: " + err.Error()) + if err := kloader.Load(posflag.Provider(flagset, ".", kloader), nil); err != nil { + return nil, fmt.Errorf("error loading flags: %w", err) } // fetch values conf := &Config{} - if err := k.Unmarshal("", &conf); err != nil { - return nil, errors.New("error unmarshalling: " + err.Error()) + if err := kloader.Unmarshal("", &conf); err != nil { + return nil, fmt.Errorf("error unmarshalling: %w", err) } // adjust loglevel @@ -207,7 +215,7 @@ func InitConfig(w io.Writer) (*Config, error) { } // are there any args left on commandline? if so threat them as adlinks - conf.Adlinks = f.Args() + conf.Adlinks = flagset.Args() return conf, nil } diff --git a/fetch.go b/fetch.go index 2160ae7..c3fd281 100644 --- a/fetch.go +++ b/fetch.go @@ -19,6 +19,7 @@ package main import ( "errors" + "fmt" "io" "log/slog" "net/http" @@ -33,10 +34,10 @@ type Fetcher struct { Cookies []*http.Cookie } -func NewFetcher(c *Config) (*Fetcher, error) { +func NewFetcher(conf *Config) (*Fetcher, error) { jar, err := cookiejar.New(nil) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to create a cookie jar obj: %w", err) } return &Fetcher{ @@ -44,35 +45,37 @@ func NewFetcher(c *Config) (*Fetcher, error) { Transport: &loggingTransport{}, // implemented in http.go Jar: jar, }, - Config: c, + Config: conf, Cookies: []*http.Cookie{}, }, nil } func (f *Fetcher) Get(uri string) (io.ReadCloser, error) { - req, err := http.NewRequest("GET", uri, nil) + req, err := http.NewRequest(http.MethodGet, uri, nil) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to create a new HTTP request obj: %w", err) } req.Header.Set("User-Agent", f.Config.UserAgent) if len(f.Cookies) > 0 { uriobj, _ := url.Parse(Baseuri) + slog.Debug("have cookies, sending them", "sample-cookie-name", f.Cookies[0].Name, "sample-cookie-expire", f.Cookies[0].Expires, ) + f.Client.Jar.SetCookies(uriobj, f.Cookies) } res, err := f.Client.Do(req) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to initiate HTTP request to %s: %w", uri, err) } - if res.StatusCode != 200 { + if res.StatusCode != http.StatusOK { return nil, errors.New("could not get page via HTTP") } @@ -85,12 +88,15 @@ func (f *Fetcher) Get(uri string) (io.ReadCloser, error) { // fetch an image func (f *Fetcher) Getimage(uri string) (io.ReadCloser, error) { slog.Debug("fetching ad image", "uri", uri) + body, err := f.Get(uri) if err != nil { if f.Config.IgnoreErrors { slog.Info("Failed to download image, error ignored", "error", err.Error()) + return nil, nil } + return nil, err } diff --git a/go.mod b/go.mod index 172a1cf..8170540 100644 --- a/go.mod +++ b/go.mod @@ -31,6 +31,7 @@ require ( github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect github.com/pelletier/go-toml v1.9.5 // indirect + github.com/pkg/errors v0.9.1 // indirect golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect golang.org/x/sys v0.14.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect diff --git a/go.sum b/go.sum index 076c484..3cadaea 100644 --- a/go.sum +++ b/go.sum @@ -50,6 +50,8 @@ github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6 github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= diff --git a/http.go b/http.go index 0441101..e281656 100644 --- a/http.go +++ b/http.go @@ -19,6 +19,7 @@ package main import ( "bytes" + "fmt" "io" "log/slog" "math" @@ -32,17 +33,20 @@ import ( // easier associated in debug output var letters = []rune("ABCDEF0123456789") -func getid() string { - b := make([]rune, 8) - for i := range b { - b[i] = letters[rand.Intn(len(letters))] - } - return string(b) -} +const IDLEN int = 8 // retry after HTTP 50x errors or err!=nil const RetryCount = 3 +func getid() string { + b := make([]rune, IDLEN) + for i := range b { + b[i] = letters[rand.Intn(len(letters))] + } + + return string(b) +} + // used to inject debug log and implement retries type loggingTransport struct{} @@ -75,6 +79,7 @@ func drainBody(resp *http.Response) { // unable to copy data? uff! panic(err) } + resp.Body.Close() } } @@ -82,8 +87,8 @@ func drainBody(resp *http.Response) { // the actual logging transport with retries func (t *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error) { - // just requred for debugging - id := getid() + // just required for debugging + requestid := getid() // clone the request body, put into request on retry var bodyBytes []byte @@ -92,16 +97,16 @@ func (t *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error) req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) } - slog.Debug("REQUEST", "id", id, "uri", req.URL, "host", req.Host) + slog.Debug("REQUEST", "id", requestid, "uri", req.URL, "host", req.Host) // first try resp, err := http.DefaultTransport.RoundTrip(req) if err == nil { - slog.Debug("RESPONSE", "id", id, "status", resp.StatusCode, + slog.Debug("RESPONSE", "id", requestid, "status", resp.StatusCode, "contentlength", resp.ContentLength) } - // enter retry check and loop, if first req were successfull, leave loop immediately + // enter retry check and loop, if first req were successful, leave loop immediately retries := 0 for shouldRetry(err, resp) && retries < RetryCount { time.Sleep(backoff(retries)) @@ -118,12 +123,16 @@ func (t *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error) resp, err = http.DefaultTransport.RoundTrip(req) if err == nil { - slog.Debug("RESPONSE", "id", id, "status", resp.StatusCode, + slog.Debug("RESPONSE", "id", requestid, "status", resp.StatusCode, "contentlength", resp.ContentLength, "retry", retries) } retries++ } - return resp, err + if err != nil { + return resp, fmt.Errorf("failed to get HTTP response for %s: %w", req.URL, err) + } + + return resp, nil } diff --git a/image.go b/image.go index 5582567..dd4ae6e 100644 --- a/image.go +++ b/image.go @@ -19,6 +19,7 @@ package main import ( "bytes" + "fmt" "image/jpeg" "log/slog" "os" @@ -33,14 +34,14 @@ type Image struct { Filename string Hash *goimagehash.ImageHash Data *bytes.Buffer - Uri string + URI string } // used for logging to avoid printing Data func (img *Image) LogValue() slog.Value { return slog.GroupValue( slog.String("filename", img.Filename), - slog.String("uri", img.Uri), + slog.String("uri", img.URI), slog.String("hash", img.Hash.ToString()), ) } @@ -51,7 +52,7 @@ type Cache []*goimagehash.ImageHash func NewImage(buf *bytes.Buffer, filename string, uri string) *Image { img := &Image{ Filename: filename, - Uri: uri, + URI: uri, Data: buf, } @@ -62,12 +63,12 @@ func NewImage(buf *bytes.Buffer, filename string, uri string) *Image { func (img *Image) CalcHash() error { jpgdata, err := jpeg.Decode(img.Data) if err != nil { - return err + return fmt.Errorf("failed to decode JPEG image: %w", err) } hash1, err := goimagehash.DifferenceHash(jpgdata) if err != nil { - return err + return fmt.Errorf("failed to calculate diff hash of image: %w", err) } img.Hash = hash1 @@ -80,16 +81,18 @@ func (img *Image) Similar(hash *goimagehash.ImageHash) bool { distance, err := img.Hash.Distance(hash) if err != nil { slog.Debug("failed to compute diff hash distance", "error", err) + return false } if distance < MaxDistance { slog.Debug("distance computation", "image-A", img.Hash.ToString(), "image-B", hash.ToString(), "distance", distance) + return true - } else { - return false } + + return false } // check current image against all known hashes. @@ -108,7 +111,7 @@ func (img *Image) SimilarExists(cache Cache) bool { func ReadImages(addir string, dont bool) (Cache, error) { files, err := os.ReadDir(addir) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to read ad directory contents: %w", err) } cache := Cache{} @@ -122,6 +125,7 @@ func ReadImages(addir string, dont bool) (Cache, error) { ext := filepath.Ext(file.Name()) if !file.IsDir() && (ext == ".jpg" || ext == ".jpeg" || ext == ".JPG" || ext == ".JPEG") { filename := filepath.Join(addir, file.Name()) + data, err := ReadImage(filename) if err != nil { return nil, err @@ -137,6 +141,5 @@ func ReadImages(addir string, dont bool) (Cache, error) { } } - //return nil, errors.New("ende") return cache, nil } diff --git a/kleingebaeck.1 b/kleingebaeck.1 index b38c4f5..350063b 100644 --- a/kleingebaeck.1 +++ b/kleingebaeck.1 @@ -133,7 +133,7 @@ .\" ======================================================================== .\" .IX Title "KLEINGEBAECK 1" -.TH KLEINGEBAECK 1 "2024-01-24" "1" "User Commands" +.TH KLEINGEBAECK 1 "2024-01-25" "1" "User Commands" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -182,7 +182,7 @@ Format is pretty simple: \& template = """ \& Title: {{.Title}} \& Price: {{.Price}} -\& Id: {{.Id}} +\& Id: {{.ID}} \& Category: {{.Category}} \& Condition: {{.Condition}} \& Created: {{.Created}} @@ -191,7 +191,7 @@ Format is pretty simple: \& """ .Ve .PP -Be carefull if you want to change the template. The variable is a +Be careful if you want to change the template. The variable is a multiline string surrounded by three double quotes. You can left out certain fields and use any formatting you like. Refer to for details how to write a diff --git a/kleingebaeck.go b/kleingebaeck.go index 5da890d..992232d 100644 --- a/kleingebaeck.go +++ b/kleingebaeck.go @@ -43,7 +43,7 @@ CONFIGURATION template = """ Title: {{.Title}} Price: {{.Price}} - Id: {{.Id}} + Id: {{.ID}} Category: {{.Category}} Condition: {{.Condition}} Created: {{.Created}} @@ -51,7 +51,7 @@ CONFIGURATION {{.Text}} """ - Be carefull if you want to change the template. The variable is a + Be careful if you want to change the template. The variable is a multiline string surrounded by three double quotes. You can left out certain fields and use any formatting you like. Refer to for details how to write a template. diff --git a/kleingebaeck.pod b/kleingebaeck.pod index a3cf39b..b8fdd99 100644 --- a/kleingebaeck.pod +++ b/kleingebaeck.pod @@ -43,7 +43,7 @@ Format is pretty simple: template = """ Title: {{.Title}} Price: {{.Price}} - Id: {{.Id}} + Id: {{.ID}} Category: {{.Category}} Condition: {{.Condition}} Created: {{.Created}} @@ -51,7 +51,7 @@ Format is pretty simple: {{.Text}} """ -Be carefull if you want to change the template. The variable is a +Be careful if you want to change the template. The variable is a multiline string surrounded by three double quotes. You can left out certain fields and use any formatting you like. Refer to L for details how to write a diff --git a/main.go b/main.go index 93857e8..c489634 100644 --- a/main.go +++ b/main.go @@ -22,10 +22,8 @@ import ( "fmt" "io" "log/slog" - "math/rand" "os" "runtime/debug" - "time" "github.com/lmittmann/tint" "github.com/tlinden/yadu" @@ -37,38 +35,43 @@ func main() { os.Exit(Main(os.Stdout)) } -func Main(w io.Writer) int { +func Main(output io.Writer) int { logLevel := &slog.LevelVar{} opts := &tint.Options{ Level: logLevel, AddSource: false, - ReplaceAttr: func(groups []string, a slog.Attr) slog.Attr { + ReplaceAttr: func(groups []string, attr slog.Attr) slog.Attr { // Remove time from the output - if a.Key == slog.TimeKey { + if attr.Key == slog.TimeKey { return slog.Attr{} } - return a + + return attr }, NoColor: IsNoTty(), } logLevel.Set(LevelNotice) - handler := tint.NewHandler(w, opts) + + handler := tint.NewHandler(output, opts) logger := slog.New(handler) + slog.SetDefault(logger) - conf, err := InitConfig(w) + conf, err := InitConfig(output) if err != nil { return Die(err) } if conf.Showversion { - fmt.Fprintf(w, "This is kleingebaeck version %s\n", VERSION) + fmt.Fprintf(output, "This is kleingebaeck version %s\n", VERSION) + return 0 } if conf.Showhelp { - fmt.Fprintln(w, Usage) + fmt.Fprintln(output, Usage) + return 0 } @@ -77,6 +80,7 @@ func Main(w io.Writer) int { if err != nil { return Die(err) } + return 0 } @@ -94,7 +98,8 @@ func Main(w io.Writer) int { } logLevel.Set(slog.LevelDebug) - handler := yadu.NewHandler(w, opts) + + handler := yadu.NewHandler(output, opts) debuglogger := slog.New(handler).With( slog.Group("program_info", slog.Int("pid", os.Getpid()), @@ -118,10 +123,8 @@ func Main(w io.Writer) int { return Die(err) } - // randomization needed here and there - rand.Seed(time.Now().UnixNano()) - - if len(conf.Adlinks) >= 1 { + switch { + case len(conf.Adlinks) >= 1: // directly backup ad listing[s] for _, uri := range conf.Adlinks { err := ScrapeAd(fetch, uri) @@ -129,25 +132,27 @@ func Main(w io.Writer) int { return Die(err) } } - } else if conf.User > 0 { + case conf.User > 0: // backup all ads of the given user (via config or cmdline) err := ScrapeUser(fetch) if err != nil { return Die(err) } - } else { + default: return Die(errors.New("invalid or no user id or no ad link specified")) } if conf.StatsCountAds > 0 { adstr := "ads" + if conf.StatsCountAds == 1 { adstr = "ad" } - fmt.Fprintf(w, "Successfully downloaded %d %s with %d images to %s.\n", + + fmt.Fprintf(output, "Successfully downloaded %d %s with %d images to %s.\n", conf.StatsCountAds, adstr, conf.StatsCountImages, conf.Outdir) } else { - fmt.Fprintf(w, "No ads found.") + fmt.Fprintf(output, "No ads found.") } return 0 @@ -155,5 +160,6 @@ func Main(w io.Writer) int { func Die(err error) int { slog.Error("Failure", "error", err.Error()) + return 1 } diff --git a/main_test.go b/main_test.go index 20c1aa8..9c61081 100644 --- a/main_test.go +++ b/main_test.go @@ -43,7 +43,7 @@ const LISTTPL string = ` {{ range . }}

{{ .Title }} + href="/s-anzeige/{{ .Slug }}/{{ .ID }}">{{ .Title }}

{{ end }} @@ -247,7 +247,7 @@ var invalidtests = []Tests{ type AdConfig struct { Title string Slug string - Id string + ID string Price string Category string Condition string @@ -259,7 +259,7 @@ type AdConfig struct { var adsrc = []AdConfig{ { Title: "First Ad", - Id: "1", Price: "5€", + ID: "1", Price: "5€", Category: "Klimbim", Text: "Thing to sale", Slug: "first-ad", @@ -269,7 +269,7 @@ var adsrc = []AdConfig{ }, { Title: "Secnd Ad", - Id: "2", Price: "5€", + ID: "2", Price: "5€", Category: "Kram", Text: "Thing to sale", Slug: "second-ad", @@ -279,7 +279,7 @@ var adsrc = []AdConfig{ }, { Title: "Third Ad", - Id: "3", + ID: "3", Price: "5€", Category: "Kuddelmuddel", Text: "Thing to sale", @@ -290,7 +290,7 @@ var adsrc = []AdConfig{ }, { Title: "Forth Ad", - Id: "4", + ID: "4", Price: "5€", Category: "Krempel", Text: "Thing to sale", @@ -301,7 +301,7 @@ var adsrc = []AdConfig{ }, { Title: "Fifth Ad", - Id: "5", + ID: "5", Price: "5€", Category: "Kladderadatsch", Text: "Thing to sale", @@ -312,7 +312,7 @@ var adsrc = []AdConfig{ }, { Title: "Sixth Ad", - Id: "6", + ID: "6", Price: "5€", Category: "Klunker", Text: "Thing to sale", @@ -334,17 +334,17 @@ type Adsource struct { } // Render a HTML template for an adlisting or an ad -func GetTemplate(l []AdConfig, a AdConfig, htmltemplate string) string { +func GetTemplate(adconfigs []AdConfig, adconfig AdConfig, htmltemplate string) string { tmpl, err := tpl.New("template").Parse(htmltemplate) if err != nil { panic(err) } var out bytes.Buffer - if len(a.Id) == 0 { - err = tmpl.Execute(&out, l) + if len(adconfig.ID) == 0 { + err = tmpl.Execute(&out, adconfigs) } else { - err = tmpl.Execute(&out, a) + err = tmpl.Execute(&out, adconfig) } if err != nil { @@ -391,10 +391,9 @@ func InitValidSources() []Adsource { // prepare urls for the ads for _, ad := range adsrc { ads = append(ads, Adsource{ - uri: fmt.Sprintf("%s/s-anzeige/%s/%s", Baseuri, ad.Slug, ad.Id), + uri: fmt.Sprintf("%s/s-anzeige/%s/%s", Baseuri, ad.Slug, ad.ID), content: GetTemplate(nil, ad, ADTPL), }) - //panic(GetTemplate(nil, ad, ADTPL)) } return ads @@ -447,46 +446,48 @@ func GetImage(path string) []byte { // setup httpmock func SetIntercept(ads []Adsource) { - ch := http.Header{} - ch.Add("Set-Cookie", "session=permanent") + headers := http.Header{} + headers.Add("Set-Cookie", "session=permanent") - for _, ad := range ads { - if ad.status == 0 { - ad.status = 200 + for _, advertisement := range ads { + if advertisement.status == 0 { + advertisement.status = 200 } - httpmock.RegisterResponder("GET", ad.uri, - httpmock.NewStringResponder(ad.status, ad.content).HeaderAdd(ch)) + httpmock.RegisterResponder("GET", advertisement.uri, + httpmock.NewStringResponder(advertisement.status, advertisement.content).HeaderAdd(headers)) } // we just use 2 images, put this here for _, image := range []string{"t/1.jpg", "t/2.jpg"} { httpmock.RegisterResponder("GET", image, - httpmock.NewBytesResponder(200, GetImage(image)).HeaderAdd(ch)) + httpmock.NewBytesResponder(200, GetImage(image)).HeaderAdd(headers)) } - } -func VerifyAd(ad AdConfig) error { - body := ad.Title + ad.Price + ad.Id + "Kleinanzeigen => " + - ad.Category + ad.Condition + ad.Created +func VerifyAd(advertisement AdConfig) error { + body := advertisement.Title + advertisement.Price + advertisement.ID + "Kleinanzeigen => " + + advertisement.Category + advertisement.Condition + advertisement.Created // prepare ad dir name using DefaultAdNameTemplate c := Config{Adnametemplate: "{{ .Slug }}"} - adstruct := Ad{Slug: ad.Slug, Id: ad.Id} + adstruct := Ad{Slug: advertisement.Slug, ID: advertisement.ID} + addir, err := AdDirName(&c, &adstruct) if err != nil { return err } file := fmt.Sprintf("t/out/%s/Adlisting.txt", addir) + content, err := os.ReadFile(file) if err != nil { - return err + return fmt.Errorf("unable to read adlisting file: %w", err) } if body != strings.TrimSpace(string(content)) { msg := fmt.Sprintf("ad content doesn't match.\nExpect: %s\n Got: %s\n", body, content) + return errors.New(msg) } @@ -504,20 +505,21 @@ func TestMain(t *testing.T) { SetIntercept(InitValidSources()) // run commandline tests - for _, tt := range tests { + for _, test := range tests { var buf bytes.Buffer - os.Args = strings.Split(tt.args, " ") + + os.Args = strings.Split(test.args, " ") ret := Main(&buf) - if ret != tt.exitcode { + if ret != test.exitcode { t.Errorf("%s with cmd <%s> did not exit with %d but %d", - tt.name, tt.args, tt.exitcode, ret) + test.name, test.args, test.exitcode, ret) } - if !strings.Contains(buf.String(), tt.expect) { + if !strings.Contains(buf.String(), test.expect) { t.Errorf("%s with cmd <%s> output did not match.\nExpect: %s\n Got: %s\n", - tt.name, tt.args, tt.expect, buf.String()) + test.name, test.args, test.expect, buf.String()) } } @@ -540,20 +542,21 @@ func TestMainInvalids(t *testing.T) { SetIntercept(InitInvalidSources()) // run commandline tests - for _, tt := range invalidtests { + for _, test := range invalidtests { var buf bytes.Buffer - os.Args = strings.Split(tt.args, " ") + + os.Args = strings.Split(test.args, " ") ret := Main(&buf) - if ret != tt.exitcode { + if ret != test.exitcode { t.Errorf("%s with cmd <%s> did not exit with %d but %d", - tt.name, tt.args, tt.exitcode, ret) + test.name, test.args, test.exitcode, ret) } - if !strings.Contains(buf.String(), tt.expect) { + if !strings.Contains(buf.String(), test.expect) { t.Errorf("%s with cmd <%s> output did not match.\nExpect: %s\n Got: %s\n", - tt.name, tt.args, tt.expect, buf.String()) + test.name, test.args, test.expect, buf.String()) } } } diff --git a/scrape.go b/scrape.go index 0debc5a..1f91e42 100644 --- a/scrape.go +++ b/scrape.go @@ -19,10 +19,10 @@ package main import ( "bytes" - "errors" "fmt" "log/slog" "path/filepath" + "strconv" "strings" "time" @@ -43,7 +43,9 @@ func ScrapeUser(fetch *Fetcher) error { for { var index Index + slog.Debug("fetching page", "uri", uri) + body, err := fetch.Get(uri) if err != nil { return err @@ -52,7 +54,7 @@ func ScrapeUser(fetch *Fetcher) error { err = goq.NewDecoder(body).Decode(&index) if err != nil { - return err + return fmt.Errorf("failed to goquery decode HTML index body: %w", err) } if len(index.Links) == 0 { @@ -67,16 +69,16 @@ func ScrapeUser(fetch *Fetcher) error { } page++ - uri = baseuri + "&pageNum=" + fmt.Sprintf("%d", page) + uri = baseuri + "&pageNum=" + strconv.Itoa(page) } - for i, adlink := range adlinks { + for index, adlink := range adlinks { err := ScrapeAd(fetch, Baseuri+adlink) if err != nil { return err } - if fetch.Config.Limit > 0 && i == fetch.Config.Limit-1 { + if fetch.Config.Limit > 0 && index == fetch.Config.Limit-1 { break } } @@ -86,18 +88,20 @@ func ScrapeUser(fetch *Fetcher) error { // scrape an ad. uri is the full uri of the ad, dir is the basedir func ScrapeAd(fetch *Fetcher, uri string) error { - ad := &Ad{} + advertisement := &Ad{} // extract slug and id from uri uriparts := strings.Split(uri, "/") - if len(uriparts) < 6 { - return errors.New("invalid uri: " + uri) + if len(uriparts) < SlugURIPartNum { + return fmt.Errorf("invalid uri: %s", uri) } - ad.Slug = uriparts[4] - ad.Id = uriparts[5] + + advertisement.Slug = uriparts[4] + advertisement.ID = uriparts[5] // get the ad slog.Debug("fetching ad page", "uri", uri) + body, err := fetch.Get(uri) if err != nil { return err @@ -105,36 +109,37 @@ func ScrapeAd(fetch *Fetcher, uri string) error { defer body.Close() // extract ad contents with goquery/goq - err = goq.NewDecoder(body).Decode(&ad) + err = goq.NewDecoder(body).Decode(&advertisement) if err != nil { - return err + return fmt.Errorf("failed to goquery decode HTML ad body: %w", err) } - if len(ad.CategoryTree) > 0 { - ad.Category = strings.Join(ad.CategoryTree, " => ") + if len(advertisement.CategoryTree) > 0 { + advertisement.Category = strings.Join(advertisement.CategoryTree, " => ") } - if ad.Incomplete() { - slog.Debug("got ad", "ad", ad) - return errors.New("could not extract ad data from page, got empty struct") + if advertisement.Incomplete() { + slog.Debug("got ad", "ad", advertisement) + + return fmt.Errorf("could not extract ad data from page, got empty struct") } - ad.CalculateExpire() + advertisement.CalculateExpire() // write listing - addir, err := WriteAd(fetch.Config, ad) + addir, err := WriteAd(fetch.Config, advertisement) if err != nil { return err } - slog.Debug("extracted ad listing", "ad", ad) + slog.Debug("extracted ad listing", "ad", advertisement) fetch.Config.IncrAds() - return ScrapeImages(fetch, ad, addir) + return ScrapeImages(fetch, advertisement, addir) } -func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error { +func ScrapeImages(fetch *Fetcher, advertisement *Ad, addir string) error { // fetch images img := 1 adpath := filepath.Join(fetch.Config.Outdir, addir) @@ -145,16 +150,17 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error { return err } - g := new(errgroup.Group) + egroup := new(errgroup.Group) - for _, imguri := range ad.Images { + for _, imguri := range advertisement.Images { imguri := imguri file := filepath.Join(adpath, fmt.Sprintf("%d.jpg", img)) - g.Go(func() error { + + egroup.Go(func() error { // wait a little - t := GetThrottleTime() - time.Sleep(t) + throttle := GetThrottleTime() + time.Sleep(throttle) body, err := fetch.Getimage(imguri) if err != nil { @@ -164,7 +170,7 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error { buf := new(bytes.Buffer) _, err = buf.ReadFrom(body) if err != nil { - return err + return fmt.Errorf("failed to read from image buffer: %w", err) } buf2 := buf.Bytes() // needed for image writing @@ -177,7 +183,8 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error { if !fetch.Config.ForceDownload { if image.SimilarExists(cache) { - slog.Debug("similar image exists, not written", "uri", image.Uri) + slog.Debug("similar image exists, not written", "uri", image.URI) + return nil } } @@ -187,17 +194,18 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error { return err } - slog.Debug("wrote image", "image", image, "size", len(buf2), "throttle", t) + slog.Debug("wrote image", "image", image, "size", len(buf2), "throttle", throttle) + return nil }) img++ } - if err := g.Wait(); err != nil { - return err + if err := egroup.Wait(); err != nil { + return fmt.Errorf("failed to finalize error waitgroup: %w", err) } - fetch.Config.IncrImgs(len(ad.Images)) + fetch.Config.IncrImgs(len(advertisement.Images)) return nil } diff --git a/store.go b/store.go index 90d6249..66e6e7d 100644 --- a/store.go +++ b/store.go @@ -28,30 +28,32 @@ import ( tpl "text/template" ) -func AdDirName(c *Config, ad *Ad) (string, error) { - tmpl, err := tpl.New("adname").Parse(c.Adnametemplate) +func AdDirName(conf *Config, advertisement *Ad) (string, error) { + tmpl, err := tpl.New("adname").Parse(conf.Adnametemplate) if err != nil { - return "", err + return "", fmt.Errorf("failed to parse adname template: %w", err) } buf := bytes.Buffer{} - err = tmpl.Execute(&buf, ad) + + err = tmpl.Execute(&buf, advertisement) if err != nil { - return "", err + return "", fmt.Errorf("failed to execute adname template: %w", err) } return buf.String(), nil } -func WriteAd(c *Config, ad *Ad) (string, error) { +func WriteAd(conf *Config, advertisement *Ad) (string, error) { // prepare ad dir name - addir, err := AdDirName(c, ad) + addir, err := AdDirName(conf, advertisement) if err != nil { return "", err } // prepare output dir - dir := filepath.Join(c.Outdir, addir) + dir := filepath.Join(conf.Outdir, addir) + err = Mkdir(dir) if err != nil { return "", err @@ -59,26 +61,27 @@ func WriteAd(c *Config, ad *Ad) (string, error) { // write ad file listingfile := filepath.Join(dir, "Adlisting.txt") - f, err := os.Create(listingfile) - if err != nil { - return "", err - } - defer f.Close() - if runtime.GOOS == "windows" { - ad.Text = strings.ReplaceAll(ad.Text, "
", "\r\n") + listingfd, err := os.Create(listingfile) + if err != nil { + return "", fmt.Errorf("failed to create Adlisting.txt: %w", err) + } + defer listingfd.Close() + + if runtime.GOOS == WIN { + advertisement.Text = strings.ReplaceAll(advertisement.Text, "
", "\r\n") } else { - ad.Text = strings.ReplaceAll(ad.Text, "
", "\n") + advertisement.Text = strings.ReplaceAll(advertisement.Text, "
", "\n") } - tmpl, err := tpl.New("adlisting").Parse(c.Template) + tmpl, err := tpl.New("adlisting").Parse(conf.Template) if err != nil { - return "", err + return "", fmt.Errorf("failed to parse adlisting template: %w", err) } - err = tmpl.Execute(f, ad) + err = tmpl.Execute(listingfd, advertisement) if err != nil { - return "", err + return "", fmt.Errorf("failed to execute adlisting template: %w", err) } slog.Info("wrote ad listing", "listingfile", listingfile) @@ -89,14 +92,14 @@ func WriteAd(c *Config, ad *Ad) (string, error) { func WriteImage(filename string, buf []byte) error { file, err := os.Create(filename) if err != nil { - return err + return fmt.Errorf("failed to open image file: %w", err) } defer file.Close() _, err = file.Write(buf) if err != nil { - return err + return fmt.Errorf("failed to write to image file: %w", err) } return nil @@ -111,12 +114,12 @@ func ReadImage(filename string) (*bytes.Buffer, error) { data, err := os.ReadFile(filename) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to read image file: %w", err) } _, err = buf.Write(data) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to write image into buffer: %w", err) } return &buf, nil @@ -127,5 +130,6 @@ func fileExists(filename string) bool { if os.IsNotExist(err) { return false } + return !info.IsDir() } diff --git a/store_test.go b/store_test.go index 90a04b0..d870578 100644 --- a/store_test.go +++ b/store_test.go @@ -26,6 +26,8 @@ import ( // doesn't show up in the coverage report for unknown reasons, so // here's a single test for it func TestWriteImage(t *testing.T) { + t.Parallel() + buf := []byte{1, 2, 3, 4, 5, 6, 7, 8} file := "t/out/t.jpg" @@ -33,5 +35,4 @@ func TestWriteImage(t *testing.T) { if err != nil { t.Errorf("Could not write mock image to %s: %s", file, err.Error()) } - } diff --git a/t/config-empty.conf b/t/config-empty.conf index 107a152..4f1dc27 100644 --- a/t/config-empty.conf +++ b/t/config-empty.conf @@ -1,6 +1,6 @@ # empty config for Main() unit tests to force unit tests NOT to use an # eventually existing ~/.kleingebaeck! template=""" -{{.Title}}{{.Price}}{{.Id}}{{.Category}}{{.Condition}}{{.Created}} +{{.Title}}{{.Price}}{{.ID}}{{.Category}}{{.Condition}}{{.Created}} """ diff --git a/t/fullconfig.conf b/t/fullconfig.conf index 9265883..b49e634 100644 --- a/t/fullconfig.conf +++ b/t/fullconfig.conf @@ -2,5 +2,5 @@ user = 1 loglevel = "verbose" outdir = "t/out" template=""" -{{.Title}}{{.Price}}{{.Id}}{{.Category}}{{.Condition}}{{.Created}} +{{.Title}}{{.Price}}{{.ID}}{{.Category}}{{.Condition}}{{.Created}} """ diff --git a/util.go b/util.go index 68632d8..d730f27 100644 --- a/util.go +++ b/util.go @@ -20,6 +20,7 @@ package main import ( "bytes" "errors" + "fmt" "math/rand" "os" "os/exec" @@ -33,7 +34,7 @@ func Mkdir(dir string) error { if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) { err := os.Mkdir(dir, os.ModePerm) if err != nil { - return err + return fmt.Errorf("failed to create directory %s: %w", dir, err) } } @@ -44,7 +45,8 @@ func man() error { man := exec.Command("less", "-") var b bytes.Buffer - b.Write([]byte(manpage)) + + b.WriteString(manpage) man.Stdout = os.Stdout man.Stdin = &b @@ -53,7 +55,7 @@ func man() error { err := man.Run() if err != nil { - return err + return fmt.Errorf("failed to execute 'less': %w", err) } return nil @@ -61,7 +63,7 @@ func man() error { // returns TRUE if stdout is NOT a tty or windows func IsNoTty() bool { - if runtime.GOOS == "windows" || !isatty.IsTerminal(os.Stdout.Fd()) { + if runtime.GOOS == WIN || !isatty.IsTerminal(os.Stdout.Fd()) { return true }