diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index e383eeb..4066875 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -21,6 +21,9 @@ jobs:
- name: build
run: go build
+ - name: test
+ run: make test
+
golangci:
name: lint
runs-on: ubuntu-latest
diff --git a/Makefile b/Makefile
index 5812f8f..7b4e1ca 100644
--- a/Makefile
+++ b/Makefile
@@ -50,9 +50,10 @@ install: buildlocal
install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/
clean:
- rm -rf $(tool) coverage.out testdata
+ rm -rf $(tool) coverage.out testdata t/out
test: clean
+ mkdir -p t/out
go test ./... $(ARGS)
testfuzzy: clean
diff --git a/go.mod b/go.mod
index c395625..5a4e11a 100644
--- a/go.mod
+++ b/go.mod
@@ -18,6 +18,7 @@ require (
github.com/PuerkitoBio/goquery v1.5.0 // indirect
github.com/andybalholm/cascadia v1.0.0 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
+ github.com/jarcoal/httpmock v1.3.1 // indirect
github.com/knadh/koanf/maps v0.1.1 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
diff --git a/go.sum b/go.sum
index 5a71b9d..fa9fff7 100644
--- a/go.sum
+++ b/go.sum
@@ -9,6 +9,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
+github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww=
+github.com/jarcoal/httpmock v1.3.1/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg=
github.com/knadh/koanf/maps v0.1.1 h1:G5TjmUh2D7G2YWf5SQQqSiHRJEjaicvU0KpypqB3NIs=
github.com/knadh/koanf/maps v0.1.1/go.mod h1:npD/QZY3V6ghQDdcQzl1W4ICNVTkohC8E73eI2xW4yI=
github.com/knadh/koanf/parsers/toml v0.1.0 h1:S2hLqS4TgWZYj4/7mI5m1CQQcWurxUz6ODgOub/6LCI=
diff --git a/scrape.go b/scrape.go
index 25127c2..fc3c3c1 100644
--- a/scrape.go
+++ b/scrape.go
@@ -140,7 +140,7 @@ func Scrape(c *Config, uri string) error {
// extract slug and id from uri
uriparts := strings.Split(uri, "/")
if len(uriparts) < 6 {
- return errors.New("invalid uri")
+ return errors.New("invalid uri: " + uri)
}
ad.Slug = uriparts[4]
ad.Id = uriparts[5]
diff --git a/scrape_test.go b/scrape_test.go
new file mode 100644
index 0000000..f96506a
--- /dev/null
+++ b/scrape_test.go
@@ -0,0 +1,268 @@
+/*
+Copyright © 2023 Thomas von Dein
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see .
+*/
+
+package main
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "strings"
+ "testing"
+ tpl "text/template"
+
+ "github.com/jarcoal/httpmock"
+)
+
+// used to fill an ad template and the ad listing page template
+type AdConfig struct {
+ Title string
+ Slug string
+ Id string
+ Price string
+ Category string
+ Condition string
+ Created string
+ Text string
+ Images []string // files in ./t/
+}
+
+// the ad list, aka:
+// https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX
+// Note, that this HTML code is reduced to the max, so that it only
+// contains the stuff required to satisfy goquery
+const LISTTPL string = `
+
+
+ Ads
+
+
+{{ range . }}
+
+{{ end }}
+
+
+`
+
+// an actual ad listing, aka:
+// https://www.kleinanzeigen.de/s-anzeige/ad-text-slug/1010101010
+// Note, that this HTML code is reduced to the max, so that it only
+// contains the stuff required to satisfy goquery
+const ADTPL string = `DOCTYPE html>
+
+
+ Ad Listing
+
+
+
+ {{ range $image := .Images }}
+
+

+
+ {{ end }}
+
+
+ {{ .Title }}
+
+
+ {{ .Price }}
+
+
+
+
+
+
+ -
+ Art
+ {{ .Category }}
+
+ -
+ Zustand
+ {{ .Condition }}
+
+
+
+
+
+
+
+`
+
+// An Adsource is used to construct a httpmock responder for a
+// particular url. So, the code (scrape.go) scrapes
+// https://kleinanzeigen.de, but in reality httpmock captures the
+// request and responds with our mock data
+type Adsource struct {
+ uri string
+ content string
+}
+
+// Render a HTML template for an adlisting or an ad
+func GetTemplate(l []AdConfig, a AdConfig, htmltemplate string) string {
+ tmpl, err := tpl.New("template").Parse(htmltemplate)
+ if err != nil {
+ panic(err)
+ }
+
+ var out bytes.Buffer
+ if len(a.Id) == 0 {
+ err = tmpl.Execute(&out, l)
+ } else {
+ err = tmpl.Execute(&out, a)
+ }
+
+ if err != nil {
+ panic(err)
+ }
+
+ return out.String()
+}
+
+func InitAds() []AdConfig {
+ return []AdConfig{
+ {Title: "First Ad", Id: "1", Price: "5€", Category: "Klimbim", Text: "Thing to sale", Slug: "first-ad",
+ Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}},
+ {Title: "Secnd Ad", Id: "2", Price: "5€", Category: "Kram", Text: "Thing to sale", Slug: "second-ad",
+ Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}},
+ {Title: "Third Ad", Id: "3", Price: "5€", Category: "Kuddelmuddel", Text: "Thing to sale", Slug: "third-ad",
+ Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}},
+ {Title: "Forth Ad", Id: "4", Price: "5€", Category: "Krempel", Text: "Thing to sale", Slug: "fourth-ad",
+ Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}},
+ {Title: "Fifth Ad", Id: "5", Price: "5€", Category: "Kladderadatsch", Text: "Thing to sale", Slug: "fifth-ad",
+ Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}},
+ {Title: "Sixth Ad", Id: "6", Price: "5€", Category: "Klunker", Text: "Thing to sale", Slug: "sixth-ad",
+ Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}},
+ }
+}
+
+// Initialize the valid sources for the httpmock responder
+func InitValidSources(conf *Config) []Adsource {
+ // all our valid ads
+ adsrc := InitAds()
+
+ // valid ad listing page 1
+ list1 := []AdConfig{
+ adsrc[0], adsrc[1], adsrc[2],
+ }
+
+ // valid ad listing page 2
+ list2 := []AdConfig{
+ adsrc[3], adsrc[4], adsrc[5],
+ }
+
+ // valid ad listing page 3, which is empty
+ list3 := []AdConfig{}
+
+ // used to signal GetTemplate() to render a listing
+ empty := AdConfig{}
+
+ // prepare urls for the listing pages
+ ads := []Adsource{
+ {
+ uri: fmt.Sprintf("%s%s?userId=%d", Baseuri, Listuri, conf.User),
+ content: GetTemplate(list1, empty, LISTTPL),
+ },
+ {
+ uri: fmt.Sprintf("%s%s?userId=%d&pageNum=2", Baseuri, Listuri, conf.User),
+ content: GetTemplate(list2, empty, LISTTPL),
+ },
+ {
+ uri: fmt.Sprintf("%s%s?userId=%d&pageNum=3", Baseuri, Listuri, conf.User),
+ content: GetTemplate(list3, empty, LISTTPL),
+ },
+ }
+
+ // prepare urls for the ads
+ for _, ad := range adsrc {
+ ads = append(ads, Adsource{
+ uri: fmt.Sprintf("%s/s-anzeige/%s/%s", Baseuri, ad.Slug, ad.Id),
+ content: GetTemplate(nil, ad, ADTPL),
+ })
+ //panic(GetTemplate(nil, ad, ADTPL))
+ }
+
+ return ads
+}
+
+// load a test image from disk
+func GetImage(path string) []byte {
+ dat, err := os.ReadFile(path)
+ if err != nil {
+ panic(err)
+ }
+
+ return dat
+}
+
+// setup httpmock
+func SetIntercept(conf *Config) {
+ ads := InitValidSources(conf)
+
+ for _, ad := range ads {
+ httpmock.RegisterResponder("GET", ad.uri,
+ httpmock.NewStringResponder(200, ad.content))
+ }
+
+ // we just use 2 images, put this here
+ for _, image := range []string{"t/1.jpg", "t/2.jpg"} {
+ httpmock.RegisterResponder("GET", image, httpmock.NewBytesResponder(200, GetImage(image)))
+ }
+
+}
+
+// the actual test, calls Start() from scrape, which recursively
+// scrapes ads from a user
+func TestStart(t *testing.T) {
+ httpmock.Activate()
+ defer httpmock.DeactivateAndReset()
+
+ // fake config
+ conf := &Config{User: 1, Outdir: "t/out", Template: DefaultTemplate}
+
+ // prepare httpmock responders
+ SetIntercept(conf)
+
+ // run
+ if err := Start(conf); err != nil {
+ t.Errorf("failed to scrape: %s", err.Error())
+ }
+
+ // verify
+ for _, ad := range InitAds() {
+ file := fmt.Sprintf("t/out/%s/Adlisting.txt", ad.Slug)
+ content, err := os.ReadFile(file)
+ if err != nil {
+ t.Errorf("failed to read adlisting: %s", err.Error())
+ }
+
+ if !strings.Contains(string(content), ad.Category) && !strings.Contains(string(content), ad.Title) {
+ t.Errorf("failed to verify: %s content doesn't contain expected data", file)
+ }
+ }
+
+ // uncomment to see slogs
+ //t.Errorf("debug")
+}
diff --git a/store.go b/store.go
index 4d52cbe..e159f95 100644
--- a/store.go
+++ b/store.go
@@ -41,6 +41,7 @@ func WriteAd(dir string, ad *Ad, template string) error {
if err != nil {
return err
}
+ defer f.Close()
if runtime.GOOS == "windows" {
ad.Text = strings.ReplaceAll(ad.Text, "
", "\r\n")
@@ -52,6 +53,7 @@ func WriteAd(dir string, ad *Ad, template string) error {
if err != nil {
return err
}
+
err = tmpl.Execute(f, ad)
if err != nil {
return err
diff --git a/t/1.jpg b/t/1.jpg
new file mode 100644
index 0000000..ef2a54b
Binary files /dev/null and b/t/1.jpg differ
diff --git a/t/2.jpg b/t/2.jpg
new file mode 100644
index 0000000..31ee515
Binary files /dev/null and b/t/2.jpg differ