From f893f9c3d75d5f073874887034522928abe3cfe7 Mon Sep 17 00:00:00 2001 From: "T.v.Dein" Date: Fri, 29 Dec 2023 13:47:18 +0100 Subject: [PATCH] Test/add mock tests (#24) * add scrape unit test using httpmock lib --- .github/workflows/ci.yaml | 3 + Makefile | 3 +- go.mod | 1 + go.sum | 2 + scrape.go | 2 +- scrape_test.go | 268 ++++++++++++++++++++++++++++++++++++++ store.go | 2 + t/1.jpg | Bin 0 -> 1001 bytes t/2.jpg | Bin 0 -> 1002 bytes 9 files changed, 279 insertions(+), 2 deletions(-) create mode 100644 scrape_test.go create mode 100644 t/1.jpg create mode 100644 t/2.jpg diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index e383eeb..4066875 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -21,6 +21,9 @@ jobs: - name: build run: go build + - name: test + run: make test + golangci: name: lint runs-on: ubuntu-latest diff --git a/Makefile b/Makefile index 5812f8f..7b4e1ca 100644 --- a/Makefile +++ b/Makefile @@ -50,9 +50,10 @@ install: buildlocal install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/ clean: - rm -rf $(tool) coverage.out testdata + rm -rf $(tool) coverage.out testdata t/out test: clean + mkdir -p t/out go test ./... $(ARGS) testfuzzy: clean diff --git a/go.mod b/go.mod index c395625..5a4e11a 100644 --- a/go.mod +++ b/go.mod @@ -18,6 +18,7 @@ require ( github.com/PuerkitoBio/goquery v1.5.0 // indirect github.com/andybalholm/cascadia v1.0.0 // indirect github.com/fsnotify/fsnotify v1.6.0 // indirect + github.com/jarcoal/httpmock v1.3.1 // indirect github.com/knadh/koanf/maps v0.1.1 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect diff --git a/go.sum b/go.sum index 5a71b9d..fa9fff7 100644 --- a/go.sum +++ b/go.sum @@ -9,6 +9,8 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= +github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww= +github.com/jarcoal/httpmock v1.3.1/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg= github.com/knadh/koanf/maps v0.1.1 h1:G5TjmUh2D7G2YWf5SQQqSiHRJEjaicvU0KpypqB3NIs= github.com/knadh/koanf/maps v0.1.1/go.mod h1:npD/QZY3V6ghQDdcQzl1W4ICNVTkohC8E73eI2xW4yI= github.com/knadh/koanf/parsers/toml v0.1.0 h1:S2hLqS4TgWZYj4/7mI5m1CQQcWurxUz6ODgOub/6LCI= diff --git a/scrape.go b/scrape.go index 25127c2..fc3c3c1 100644 --- a/scrape.go +++ b/scrape.go @@ -140,7 +140,7 @@ func Scrape(c *Config, uri string) error { // extract slug and id from uri uriparts := strings.Split(uri, "/") if len(uriparts) < 6 { - return errors.New("invalid uri") + return errors.New("invalid uri: " + uri) } ad.Slug = uriparts[4] ad.Id = uriparts[5] diff --git a/scrape_test.go b/scrape_test.go new file mode 100644 index 0000000..f96506a --- /dev/null +++ b/scrape_test.go @@ -0,0 +1,268 @@ +/* +Copyright © 2023 Thomas von Dein + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package main + +import ( + "bytes" + "fmt" + "os" + "strings" + "testing" + tpl "text/template" + + "github.com/jarcoal/httpmock" +) + +// used to fill an ad template and the ad listing page template +type AdConfig struct { + Title string + Slug string + Id string + Price string + Category string + Condition string + Created string + Text string + Images []string // files in ./t/ +} + +// the ad list, aka: +// https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX +// Note, that this HTML code is reduced to the max, so that it only +// contains the stuff required to satisfy goquery +const LISTTPL string = ` + + + Ads + + +{{ range . }} +

+ {{ .Title }} +

+{{ end }} + + +` + +// an actual ad listing, aka: +// https://www.kleinanzeigen.de/s-anzeige/ad-text-slug/1010101010 +// Note, that this HTML code is reduced to the max, so that it only +// contains the stuff required to satisfy goquery +const ADTPL string = `DOCTYPE html> + + + Ad Listing + + + + {{ range $image := .Images }} +
+ +
+ {{ end }} + +

+ {{ .Title }}

+
+

+ {{ .Price }}

+
+ +
+
{{ .Created }}
+
+ +
+
    +
  • + Art + {{ .Category }} +
  • +
  • + Zustand + {{ .Condition }} +
  • +
+
+ +
+

+ {{ .Text }} +

+
+ + +` + +// An Adsource is used to construct a httpmock responder for a +// particular url. So, the code (scrape.go) scrapes +// https://kleinanzeigen.de, but in reality httpmock captures the +// request and responds with our mock data +type Adsource struct { + uri string + content string +} + +// Render a HTML template for an adlisting or an ad +func GetTemplate(l []AdConfig, a AdConfig, htmltemplate string) string { + tmpl, err := tpl.New("template").Parse(htmltemplate) + if err != nil { + panic(err) + } + + var out bytes.Buffer + if len(a.Id) == 0 { + err = tmpl.Execute(&out, l) + } else { + err = tmpl.Execute(&out, a) + } + + if err != nil { + panic(err) + } + + return out.String() +} + +func InitAds() []AdConfig { + return []AdConfig{ + {Title: "First Ad", Id: "1", Price: "5€", Category: "Klimbim", Text: "Thing to sale", Slug: "first-ad", + Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}}, + {Title: "Secnd Ad", Id: "2", Price: "5€", Category: "Kram", Text: "Thing to sale", Slug: "second-ad", + Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}}, + {Title: "Third Ad", Id: "3", Price: "5€", Category: "Kuddelmuddel", Text: "Thing to sale", Slug: "third-ad", + Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}}, + {Title: "Forth Ad", Id: "4", Price: "5€", Category: "Krempel", Text: "Thing to sale", Slug: "fourth-ad", + Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}}, + {Title: "Fifth Ad", Id: "5", Price: "5€", Category: "Kladderadatsch", Text: "Thing to sale", Slug: "fifth-ad", + Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}}, + {Title: "Sixth Ad", Id: "6", Price: "5€", Category: "Klunker", Text: "Thing to sale", Slug: "sixth-ad", + Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}}, + } +} + +// Initialize the valid sources for the httpmock responder +func InitValidSources(conf *Config) []Adsource { + // all our valid ads + adsrc := InitAds() + + // valid ad listing page 1 + list1 := []AdConfig{ + adsrc[0], adsrc[1], adsrc[2], + } + + // valid ad listing page 2 + list2 := []AdConfig{ + adsrc[3], adsrc[4], adsrc[5], + } + + // valid ad listing page 3, which is empty + list3 := []AdConfig{} + + // used to signal GetTemplate() to render a listing + empty := AdConfig{} + + // prepare urls for the listing pages + ads := []Adsource{ + { + uri: fmt.Sprintf("%s%s?userId=%d", Baseuri, Listuri, conf.User), + content: GetTemplate(list1, empty, LISTTPL), + }, + { + uri: fmt.Sprintf("%s%s?userId=%d&pageNum=2", Baseuri, Listuri, conf.User), + content: GetTemplate(list2, empty, LISTTPL), + }, + { + uri: fmt.Sprintf("%s%s?userId=%d&pageNum=3", Baseuri, Listuri, conf.User), + content: GetTemplate(list3, empty, LISTTPL), + }, + } + + // prepare urls for the ads + for _, ad := range adsrc { + ads = append(ads, Adsource{ + uri: fmt.Sprintf("%s/s-anzeige/%s/%s", Baseuri, ad.Slug, ad.Id), + content: GetTemplate(nil, ad, ADTPL), + }) + //panic(GetTemplate(nil, ad, ADTPL)) + } + + return ads +} + +// load a test image from disk +func GetImage(path string) []byte { + dat, err := os.ReadFile(path) + if err != nil { + panic(err) + } + + return dat +} + +// setup httpmock +func SetIntercept(conf *Config) { + ads := InitValidSources(conf) + + for _, ad := range ads { + httpmock.RegisterResponder("GET", ad.uri, + httpmock.NewStringResponder(200, ad.content)) + } + + // we just use 2 images, put this here + for _, image := range []string{"t/1.jpg", "t/2.jpg"} { + httpmock.RegisterResponder("GET", image, httpmock.NewBytesResponder(200, GetImage(image))) + } + +} + +// the actual test, calls Start() from scrape, which recursively +// scrapes ads from a user +func TestStart(t *testing.T) { + httpmock.Activate() + defer httpmock.DeactivateAndReset() + + // fake config + conf := &Config{User: 1, Outdir: "t/out", Template: DefaultTemplate} + + // prepare httpmock responders + SetIntercept(conf) + + // run + if err := Start(conf); err != nil { + t.Errorf("failed to scrape: %s", err.Error()) + } + + // verify + for _, ad := range InitAds() { + file := fmt.Sprintf("t/out/%s/Adlisting.txt", ad.Slug) + content, err := os.ReadFile(file) + if err != nil { + t.Errorf("failed to read adlisting: %s", err.Error()) + } + + if !strings.Contains(string(content), ad.Category) && !strings.Contains(string(content), ad.Title) { + t.Errorf("failed to verify: %s content doesn't contain expected data", file) + } + } + + // uncomment to see slogs + //t.Errorf("debug") +} diff --git a/store.go b/store.go index 4d52cbe..e159f95 100644 --- a/store.go +++ b/store.go @@ -41,6 +41,7 @@ func WriteAd(dir string, ad *Ad, template string) error { if err != nil { return err } + defer f.Close() if runtime.GOOS == "windows" { ad.Text = strings.ReplaceAll(ad.Text, "
", "\r\n") @@ -52,6 +53,7 @@ func WriteAd(dir string, ad *Ad, template string) error { if err != nil { return err } + err = tmpl.Execute(f, ad) if err != nil { return err diff --git a/t/1.jpg b/t/1.jpg new file mode 100644 index 0000000000000000000000000000000000000000..ef2a54bd616d107793e86decc635f96b1d947345 GIT binary patch literal 1001 zcmb7@PiWI{6vw~6r0bG2U6Zwfh_W22Ufe}Ryg6HKsdO0XgiQra)4$MkCC%)`qu_Pm zLFdJ(2QMBx>2-%)1wojbiij64o)o>v{P}+AXs1}g7xK#|-}k;R`MnUn$v?n2mn-DJ zn1t(efd51?>sOtiTCsY~pkd_;vo`-IJuPIjSMAdIT%mZDssU-W<$B%74B)i`zm(5d z^9z@(@lVik05MGAu;X?+r)|54k@0;4)CqVsEqaYC`+Fi@sdim}S(Kh}JAOcXjrc?_ z=m@?`oN^mZh4=~a5uaX=cvI-*K7STm4_P8a%AYG`X>TcFtGD-Sz3Ydir3uP>!i53D()3r@X7bVJPM)>!I zew$onay8+S;|vzz!3q5k-+*c;hO_O~jvWmpre}b$_ndz{2#ntbwwF2o@qqK~hk*PJ zSZfdci>tuNZ=^2|we)=;aT9p9KGe#0fakY>y;~i}cf#k$v`V!a@Z~WOUjULX0qt^s zZj=*wJbDex(X5IufR%f|p*r=xGGgBf83uFvXU)$1_*-NlOCrQ7Q$(T>5lS>V7F88Z zQx#R!^n|X(CSs}@H{%lt!!Qj^pG-~~Nn%4ZVZtd#6fznyHAd=x2491zK!YYT6Ozef ylks)z3I8E6he0SLrwlf@-vLp~#1HpdgvI8cD)<}dGNZ@HCQe~xL+SxjeCroIyP#MA literal 0 HcmV?d00001 diff --git a/t/2.jpg b/t/2.jpg new file mode 100644 index 0000000000000000000000000000000000000000..31ee515599e8e5ebe18bf01fe1da9e09dc1458f0 GIT binary patch literal 1002 zcmb7@O=#0#7{~u_(sfCiuE|2-%)1wqixFT{%%Pl{e-zCLd{+9_7>33>C&|9SpT@;)KF%Rj(4HJzUZ zV-haY0sa%otY2}0O4(X!1a&KypRxHz>2W@ry=WItPv;9KsTz=0ny%N0OaWdi@Qb;O zH8+3G8vO(vyAi`4?04Kw`?zfvFf_iu1a$&lOo(1X%l@8-mn$6?U>2pP+_oPOUm`xT z6to53Bu=??r%e2i_<&C@NZb{AsmGrM*Fu&Ek@9DYS=w8Q*s5*e(iV2>zDG5OsU}_a zTGU^f>v37|6~SI}k=`u~u|vJmI(vq$X*zcyhdgFr!$Jv*XrPGz6Lf8n(m|2(nIZn2 zq2DG~iChi1MJXKZCErRFFVIW+o() z$tL4#*cSdnVh;UKNKP57f4>Q&Zf6v~>(Q{<{6nSR_ce5x(Q{-IlX%p9cMO>1>%Rcv CPN72p literal 0 HcmV?d00001