diff --git a/.gitignore b/.gitignore
index d1b1031..b992fd1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
test
kleingebaeck
releases
+t/out
diff --git a/main_test.go b/main_test.go
index a66a9f0..7fbd98f 100644
--- a/main_test.go
+++ b/main_test.go
@@ -19,67 +19,502 @@ package main
import (
"bytes"
+ "errors"
+ "fmt"
"os"
"strings"
"testing"
+ tpl "text/template"
+
+ "github.com/jarcoal/httpmock"
)
-type Cmdline struct {
+// the ad list, aka:
+// https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX
+// Note, that this HTML code is reduced to the max, so that it only
+// contains the stuff required to satisfy goquery
+const LISTTPL string = `
+
+
+ Ads
+
+
+{{ range . }}
+
+{{ end }}
+
+
+`
+
+// an actual ad listing, aka:
+// https://www.kleinanzeigen.de/s-anzeige/ad-text-slug/1010101010
+// Note, that this HTML code is reduced to the max, so that it only
+// contains the stuff required to satisfy goquery
+const ADTPL string = `DOCTYPE html>
+
+
+ Ad Listing
+
+
+
+ {{ range $image := .Images }}
+
+

+
+ {{ end }}
+
+
+ {{ .Title }}
+
+
+ {{ .Price }}
+
+
+
+
+
+
+ -
+ Art
+ {{ .Category }}
+
+ -
+ Zustand
+ {{ .Condition }}
+
+
+
+
+
+
+
+`
+
+const EMPTYPAGE string = `DOCTYPE html>
+
+
+
+
+`
+
+const (
+ EMPTYURI string = `https://www.kleinanzeigen.de/s-anzeige/empty/1`
+ INVALIDPATHURI string = `https://www.kleinanzeigen.de/anzeige/name/1`
+ INVALID404URI string = `https://www.kleinanzeigen.de/anzeige/name/1/foo/bar`
+ INVALIDURI string = `https://foo.bar/weird/things`
+)
+
+var base = "kleingebaeck -c t/config-empty.conf"
+
+type Tests struct {
name string
args string
expect string
exitcode int
}
-func TestMain(t *testing.T) {
- base := "kleingebaeck -c t/config-empty.conf"
- cmdlines := []Cmdline{
- {
- name: "version",
- args: base + " -V",
- expect: "This is",
- exitcode: 0,
- },
- {
- name: "help",
- args: base + " -h",
- expect: "Usage:",
- exitcode: 0,
- },
- {
- name: "debug",
- args: base + " -d",
- expect: "program_info",
- exitcode: 1,
- },
- {
- name: "no-args-no-user",
- args: base,
- expect: "invalid or no user id",
- exitcode: 1,
- },
- // FIXME: add scrape tests as well, re-use scrape_test.go
- // stuff or simply move all of it to here
+var tests = []Tests{
+ {
+ name: "version",
+ args: base + " -V",
+ expect: "This is",
+ exitcode: 0,
+ },
+ {
+ name: "help",
+ args: base + " -h",
+ expect: "Usage:",
+ exitcode: 0,
+ },
+ {
+ name: "debug",
+ args: base + " -d",
+ expect: "program_info",
+ exitcode: 1,
+ },
+ {
+ name: "no-args-no-user",
+ args: base,
+ expect: "invalid or no user id",
+ exitcode: 1,
+ },
+ {
+ name: "download-single-ad",
+ args: base + " -o t/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1",
+ expect: "Successfully downloaded 1 ad with 2 images to t/out",
+ exitcode: 0,
+ },
+ {
+ name: "download-single-ad-verbose",
+ args: base + " -o t/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1 -v",
+ expect: "wrote ad listing",
+ exitcode: 0,
+ },
+ {
+ name: "download-single-ad-debug",
+ args: base + " -o t/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1 -d",
+ expect: "extracted ad listing program_info.pid=",
+ exitcode: 0,
+ },
+ {
+ name: "download-all-ads",
+ args: base + " -o t/out -u 1",
+ expect: "Successfully downloaded 6 ads with 12 images to t/out",
+ exitcode: 0,
+ },
+ {
+ name: "download-all-ads-using-config",
+ args: "kleingebaeck -c t/fullconfig.conf",
+ expect: "Successfully downloaded 6 ads with 12 images to t/out",
+ exitcode: 0,
+ },
+}
+
+var invalidtests = []Tests{
+ {
+ name: "empty-ad",
+ args: base + " " + EMPTYURI,
+ expect: "could not extract ad data from page, got empty struct",
+ exitcode: 1,
+ },
+ {
+ name: "invalid-ad",
+ args: base + " " + INVALIDURI,
+ expect: "invalid uri",
+ exitcode: 1,
+ },
+ {
+ name: "invalid-path",
+ args: base + " " + INVALIDPATHURI,
+ expect: "could not extract ad data from page, got empty struct",
+ exitcode: 1,
+ },
+ {
+ name: "404",
+ args: base + " " + INVALID404URI,
+ expect: "could not get page via HTTP",
+ exitcode: 1,
+ },
+ {
+ name: "outdir-no-exists",
+ args: base + " -o t/foo/bar/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1 -v",
+ expect: "mkdir t/foo/bar/out: no such file or directory",
+ exitcode: 1,
+ },
+ {
+ name: "wrong-flag",
+ args: base + " -X",
+ expect: "unknown shorthand flag: 'X' in -X",
+ exitcode: 1,
+ },
+ {
+ name: "no-config",
+ args: "kleingebaeck -c t/invalid.conf",
+ expect: "error loading config file",
+ exitcode: 1,
+ },
+}
+
+type AdConfig struct {
+ Title string
+ Slug string
+ Id string
+ Price string
+ Category string
+ Condition string
+ Created string
+ Text string
+ Images []string // files in ./t/
+}
+
+var adsrc = []AdConfig{
+ {
+ Title: "First Ad",
+ Id: "1", Price: "5€",
+ Category: "Klimbim",
+ Text: "Thing to sale",
+ Slug: "first-ad",
+ Condition: "works",
+ Created: "Yesterday",
+ Images: []string{"t/1.jpg", "t/2.jpg"},
+ },
+ {
+ Title: "Secnd Ad",
+ Id: "2", Price: "5€",
+ Category: "Kram",
+ Text: "Thing to sale",
+ Slug: "second-ad",
+ Condition: "works",
+ Created: "Yesterday",
+ Images: []string{"t/1.jpg", "t/2.jpg"},
+ },
+ {
+ Title: "Third Ad",
+ Id: "3",
+ Price: "5€",
+ Category: "Kuddelmuddel",
+ Text: "Thing to sale",
+ Slug: "third-ad",
+ Condition: "works",
+ Created: "Yesterday",
+ Images: []string{"t/1.jpg", "t/2.jpg"},
+ },
+ {
+ Title: "Forth Ad",
+ Id: "4",
+ Price: "5€",
+ Category: "Krempel",
+ Text: "Thing to sale",
+ Slug: "fourth-ad",
+ Condition: "works",
+ Created: "Yesterday",
+ Images: []string{"t/1.jpg", "t/2.jpg"},
+ },
+ {
+ Title: "Fifth Ad",
+ Id: "5",
+ Price: "5€",
+ Category: "Kladderadatsch",
+ Text: "Thing to sale",
+ Slug: "fifth-ad",
+ Condition: "works",
+ Created: "Yesterday",
+ Images: []string{"t/1.jpg", "t/2.jpg"},
+ },
+ {
+ Title: "Sixth Ad",
+ Id: "6",
+ Price: "5€",
+ Category: "Klunker",
+ Text: "Thing to sale",
+ Slug: "sixth-ad",
+ Condition: "works",
+ Created: "Yesterday",
+ Images: []string{"t/1.jpg", "t/2.jpg"},
+ },
+}
+
+// An Adsource is used to construct a httpmock responder for a
+// particular url. So, the code (scrape.go) scrapes
+// https://kleinanzeigen.de, but in reality httpmock captures the
+// request and responds with our mock data
+type Adsource struct {
+ uri string
+ content string
+ status int
+}
+
+// Render a HTML template for an adlisting or an ad
+func GetTemplate(l []AdConfig, a AdConfig, htmltemplate string) string {
+ tmpl, err := tpl.New("template").Parse(htmltemplate)
+ if err != nil {
+ panic(err)
}
- oldargs := os.Args
- defer func() { os.Args = oldargs }()
+ var out bytes.Buffer
+ if len(a.Id) == 0 {
+ err = tmpl.Execute(&out, l)
+ } else {
+ err = tmpl.Execute(&out, a)
+ }
- for _, c := range cmdlines {
- var buf bytes.Buffer
- os.Args = strings.Split(c.args, " ")
+ if err != nil {
+ panic(err)
+ }
- ret := Main(&buf)
+ return out.String()
+}
- if ret != c.exitcode {
- t.Errorf("%s with cmd <%s> did not exit with %d but %d",
- c.name, c.args, c.exitcode, ret)
+// Initialize the valid sources for the httpmock responder
+func InitValidSources() []Adsource {
+ // valid ad listing page 1
+ list1 := []AdConfig{
+ adsrc[0], adsrc[1], adsrc[2],
+ }
+
+ // valid ad listing page 2
+ list2 := []AdConfig{
+ adsrc[3], adsrc[4], adsrc[5],
+ }
+
+ // valid ad listing page 3, which is empty
+ list3 := []AdConfig{}
+
+ // used to signal GetTemplate() to render a listing
+ empty := AdConfig{}
+
+ // prepare urls for the listing pages
+ ads := []Adsource{
+ {
+ uri: fmt.Sprintf("%s%s?userId=1", Baseuri, Listuri),
+ content: GetTemplate(list1, empty, LISTTPL),
+ },
+ {
+ uri: fmt.Sprintf("%s%s?userId=1&pageNum=2", Baseuri, Listuri),
+ content: GetTemplate(list2, empty, LISTTPL),
+ },
+ {
+ uri: fmt.Sprintf("%s%s?userId=1&pageNum=3", Baseuri, Listuri),
+ content: GetTemplate(list3, empty, LISTTPL),
+ },
+ }
+
+ // prepare urls for the ads
+ for _, ad := range adsrc {
+ ads = append(ads, Adsource{
+ uri: fmt.Sprintf("%s/s-anzeige/%s/%s", Baseuri, ad.Slug, ad.Id),
+ content: GetTemplate(nil, ad, ADTPL),
+ })
+ //panic(GetTemplate(nil, ad, ADTPL))
+ }
+
+ return ads
+}
+
+func InitInvalidSources() []Adsource {
+ empty := AdConfig{}
+ ads := []Adsource{
+ {
+ // valid ad page but without content
+ uri: fmt.Sprintf("%s/s-anzeige/empty/1", Baseuri),
+ content: GetTemplate(nil, empty, EMPTYPAGE),
+ },
+ {
+ // some random foreign webpage
+ uri: INVALIDURI,
+ content: GetTemplate(nil, empty, "foo"),
+ },
+ {
+ // some invalid page path
+ uri: fmt.Sprintf("%s/anzeige/name/1", Baseuri),
+ content: GetTemplate(nil, empty, ""),
+ },
+ {
+ // some none-ad page
+ uri: fmt.Sprintf("%s/anzeige/name/1/foo/bar", Baseuri),
+ content: GetTemplate(nil, empty, "HTTP 404: /eine-anzeige/ does not exist!"),
+ status: 404,
+ },
+ }
+
+ return ads
+}
+
+// load a test image from disk
+func GetImage(path string) []byte {
+ dat, err := os.ReadFile(path)
+ if err != nil {
+ panic(err)
+ }
+
+ return dat
+}
+
+// setup httpmock
+func SetIntercept(ads []Adsource) {
+ for _, ad := range ads {
+ if ad.status == 0 {
+ ad.status = 200
}
- if !strings.Contains(buf.String(), c.expect) {
- t.Errorf("%s with cmd <%s> output did not match.\nExpect: %s\n Got: %s\n",
- c.name, c.args, c.expect, buf.String())
- }
+ httpmock.RegisterResponder("GET", ad.uri,
+ httpmock.NewStringResponder(ad.status, ad.content))
+ }
+
+ // we just use 2 images, put this here
+ for _, image := range []string{"t/1.jpg", "t/2.jpg"} {
+ httpmock.RegisterResponder("GET", image, httpmock.NewBytesResponder(200, GetImage(image)))
}
}
+
+func VerifyAd(ad AdConfig) error {
+ body := ad.Title + ad.Price + ad.Id + ad.Category + ad.Condition + ad.Created
+
+ file := fmt.Sprintf("t/out/%s/Adlisting.txt", ad.Slug)
+ content, err := os.ReadFile(file)
+ if err != nil {
+ return err
+ }
+
+ if body != strings.TrimSpace(string(content)) {
+ msg := fmt.Sprintf("ad content doesn't match.\nExpect: %s\n Got: %s\n", body, content)
+ return errors.New(msg)
+ }
+
+ return nil
+}
+
+func TestMain(t *testing.T) {
+ oldargs := os.Args
+ defer func() { os.Args = oldargs }()
+
+ httpmock.Activate()
+ defer httpmock.DeactivateAndReset()
+
+ // prepare httpmock responders
+ SetIntercept(InitValidSources())
+
+ // run commandline tests
+ for _, tt := range tests {
+ var buf bytes.Buffer
+ os.Args = strings.Split(tt.args, " ")
+
+ ret := Main(&buf)
+
+ if ret != tt.exitcode {
+ t.Errorf("%s with cmd <%s> did not exit with %d but %d",
+ tt.name, tt.args, tt.exitcode, ret)
+ }
+
+ if !strings.Contains(buf.String(), tt.expect) {
+ t.Errorf("%s with cmd <%s> output did not match.\nExpect: %s\n Got: %s\n",
+ tt.name, tt.args, tt.expect, buf.String())
+ }
+ }
+
+ // verify if downloaded ads match
+ for _, ad := range adsrc {
+ if err := VerifyAd(ad); err != nil {
+ t.Errorf(err.Error())
+ }
+ }
+}
+
+func TestMainInvalids(t *testing.T) {
+ oldargs := os.Args
+ defer func() { os.Args = oldargs }()
+
+ httpmock.Activate()
+ defer httpmock.DeactivateAndReset()
+
+ // prepare httpmock responders
+ SetIntercept(InitInvalidSources())
+
+ // run commandline tests
+ for _, tt := range invalidtests {
+ var buf bytes.Buffer
+ os.Args = strings.Split(tt.args, " ")
+
+ ret := Main(&buf)
+
+ if ret != tt.exitcode {
+ t.Errorf("%s with cmd <%s> did not exit with %d but %d",
+ tt.name, tt.args, tt.exitcode, ret)
+ }
+
+ if !strings.Contains(buf.String(), tt.expect) {
+ t.Errorf("%s with cmd <%s> output did not match.\nExpect: %s\n Got: %s\n",
+ tt.name, tt.args, tt.expect, buf.String())
+ }
+ }
+}
diff --git a/t/config-empty.conf b/t/config-empty.conf
index 26f68c3..107a152 100644
--- a/t/config-empty.conf
+++ b/t/config-empty.conf
@@ -1,2 +1,6 @@
# empty config for Main() unit tests to force unit tests NOT to use an
# eventually existing ~/.kleingebaeck!
+template="""
+{{.Title}}{{.Price}}{{.Id}}{{.Category}}{{.Condition}}{{.Created}}
+"""
+