mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-17 04:21:00 +01:00
Merge pull request #27 from TLINDEN/test/add-main-tests
Test/add main tests
This commit is contained in:
11
.github/workflows/ci.yaml
vendored
11
.github/workflows/ci.yaml
vendored
@@ -24,6 +24,17 @@ jobs:
|
||||
- name: test
|
||||
run: make test
|
||||
|
||||
- name: Update coverage report
|
||||
uses: ncruces/go-coverage-report@main
|
||||
with:
|
||||
report: true
|
||||
chart: true
|
||||
amend: true
|
||||
if: |
|
||||
matrix.os == 'ubuntu-latest' &&
|
||||
github.event_name == 'push'
|
||||
continue-on-error: true
|
||||
|
||||
golangci:
|
||||
name: lint
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
||||
test
|
||||
kleingebaeck
|
||||
releases
|
||||
t/out
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
[](https://github.com/tlinden/kleingebaeck/blob/master/LICENSE)
|
||||
[](https://goreportcard.com/report/github.com/tlinden/kleingebaeck)
|
||||
[](https://github.com/tlinden/kleingebaeck/actions)
|
||||
[](https://raw.githack.com/wiki/tlinden/kleingebaeck/coverage.html)
|
||||

|
||||
[](https://github.com/TLINDEN/kleingebaeck/releases/latest)
|
||||
|
||||
|
||||
@@ -19,6 +19,7 @@ package main
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
@@ -87,7 +88,7 @@ func (c *Config) IncrImgs(num int) {
|
||||
}
|
||||
|
||||
// load commandline flags and config file
|
||||
func InitConfig() (*Config, error) {
|
||||
func InitConfig(w io.Writer) (*Config, error) {
|
||||
var k = koanf.New(".")
|
||||
|
||||
// determine template based on os
|
||||
@@ -109,7 +110,7 @@ func InitConfig() (*Config, error) {
|
||||
// setup custom usage
|
||||
f := flag.NewFlagSet("config", flag.ContinueOnError)
|
||||
f.Usage = func() {
|
||||
fmt.Println(Usage)
|
||||
fmt.Fprintln(w, Usage)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
|
||||
21
main.go
21
main.go
@@ -20,6 +20,7 @@ package main
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"runtime/debug"
|
||||
@@ -30,10 +31,10 @@ import (
|
||||
const LevelNotice = slog.Level(2)
|
||||
|
||||
func main() {
|
||||
os.Exit(Main())
|
||||
os.Exit(Main(os.Stdout))
|
||||
}
|
||||
|
||||
func Main() int {
|
||||
func Main(w io.Writer) int {
|
||||
logLevel := &slog.LevelVar{}
|
||||
opts := &tint.Options{
|
||||
Level: logLevel,
|
||||
@@ -49,22 +50,22 @@ func Main() int {
|
||||
}
|
||||
|
||||
logLevel.Set(LevelNotice)
|
||||
var handler slog.Handler = tint.NewHandler(os.Stdout, opts)
|
||||
handler := tint.NewHandler(w, opts)
|
||||
logger := slog.New(handler)
|
||||
slog.SetDefault(logger)
|
||||
|
||||
conf, err := InitConfig()
|
||||
conf, err := InitConfig(w)
|
||||
if err != nil {
|
||||
return Die(err)
|
||||
}
|
||||
|
||||
if conf.Showversion {
|
||||
fmt.Printf("This is kleingebaeck version %s\n", VERSION)
|
||||
fmt.Fprintf(w, "This is kleingebaeck version %s\n", VERSION)
|
||||
return 0
|
||||
}
|
||||
|
||||
if conf.Showhelp {
|
||||
fmt.Println(Usage)
|
||||
fmt.Fprintln(w, Usage)
|
||||
return 0
|
||||
}
|
||||
|
||||
@@ -90,7 +91,7 @@ func Main() int {
|
||||
}
|
||||
|
||||
logLevel.Set(slog.LevelDebug)
|
||||
var handler slog.Handler = tint.NewHandler(os.Stdout, opts)
|
||||
handler := tint.NewHandler(w, opts)
|
||||
debuglogger := slog.New(handler).With(
|
||||
slog.Group("program_info",
|
||||
slog.Int("pid", os.Getpid()),
|
||||
@@ -100,6 +101,8 @@ func Main() int {
|
||||
slog.SetDefault(debuglogger)
|
||||
}
|
||||
|
||||
// defaultlogger := log.Default()
|
||||
// defaultlogger.SetOutput(w)
|
||||
slog.Debug("config", "conf", conf)
|
||||
|
||||
// prepare output dir
|
||||
@@ -131,10 +134,10 @@ func Main() int {
|
||||
if conf.StatsCountAds == 1 {
|
||||
adstr = "ad"
|
||||
}
|
||||
fmt.Printf("Successfully downloaded %d %s with %d images to %s.\n",
|
||||
fmt.Fprintf(w, "Successfully downloaded %d %s with %d images to %s.\n",
|
||||
conf.StatsCountAds, adstr, conf.StatsCountImages, conf.Outdir)
|
||||
} else {
|
||||
fmt.Printf("No ads found.")
|
||||
fmt.Fprintf(w, "No ads found.")
|
||||
}
|
||||
|
||||
return 0
|
||||
|
||||
520
main_test.go
Normal file
520
main_test.go
Normal file
@@ -0,0 +1,520 @@
|
||||
/*
|
||||
Copyright © 2023 Thomas von Dein
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
tpl "text/template"
|
||||
|
||||
"github.com/jarcoal/httpmock"
|
||||
)
|
||||
|
||||
// the ad list, aka:
|
||||
// https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX
|
||||
// Note, that this HTML code is reduced to the max, so that it only
|
||||
// contains the stuff required to satisfy goquery
|
||||
const LISTTPL string = `<!DOCTYPE html>
|
||||
<html lang="de" >
|
||||
<head>
|
||||
<title>Ads</title>
|
||||
</head>
|
||||
<body>
|
||||
{{ range . }}
|
||||
<h2 class="text-module-begin">
|
||||
<a class="ellipsis"
|
||||
href="/s-anzeige/{{ .Slug }}/{{ .Id }}">{{ .Title }}</a>
|
||||
</h2>
|
||||
{{ end }}
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
// an actual ad listing, aka:
|
||||
// https://www.kleinanzeigen.de/s-anzeige/ad-text-slug/1010101010
|
||||
// Note, that this HTML code is reduced to the max, so that it only
|
||||
// contains the stuff required to satisfy goquery
|
||||
const ADTPL string = `DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<title>Ad Listing</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
{{ range $image := .Images }}
|
||||
<div class="galleryimage-element" data-ix="3">
|
||||
<img src="{{ $image }}"/>
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
<h1 id="viewad-title" class="boxedarticle--title" itemprop="name" data-soldlabel="Verkauft">
|
||||
{{ .Title }}</h1>
|
||||
<div class="boxedarticle--flex--container">
|
||||
<h2 class="boxedarticle--price" id="viewad-price">
|
||||
{{ .Price }}</h2>
|
||||
</div>
|
||||
|
||||
<div id="viewad-extra-info" class="boxedarticle--details--full">
|
||||
<div><i class="icon icon-small icon-calendar-gray-simple"></i><span>{{ .Created }}</span></div>
|
||||
</div>
|
||||
|
||||
<div class="splitlinebox l-container-row" id="viewad-details">
|
||||
<ul class="addetailslist">
|
||||
<li class="addetailslist--detail">
|
||||
Art<span class="addetailslist--detail--value" >
|
||||
{{ .Category }}</span>
|
||||
</li>
|
||||
<li class="addetailslist--detail">
|
||||
Zustand<span class="addetailslist--detail--value" >
|
||||
{{ .Condition }}</span>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="l-container last-paragraph-no-margin-bottom">
|
||||
<p id="viewad-description-text" class="text-force-linebreak " itemprop="description">
|
||||
{{ .Text }}
|
||||
</p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
const EMPTYPAGE string = `DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head></head>
|
||||
<body></body>
|
||||
</html>
|
||||
`
|
||||
|
||||
const (
|
||||
EMPTYURI string = `https://www.kleinanzeigen.de/s-anzeige/empty/1`
|
||||
INVALIDPATHURI string = `https://www.kleinanzeigen.de/anzeige/name/1`
|
||||
INVALID404URI string = `https://www.kleinanzeigen.de/anzeige/name/1/foo/bar`
|
||||
INVALIDURI string = `https://foo.bar/weird/things`
|
||||
)
|
||||
|
||||
var base = "kleingebaeck -c t/config-empty.conf"
|
||||
|
||||
type Tests struct {
|
||||
name string
|
||||
args string
|
||||
expect string
|
||||
exitcode int
|
||||
}
|
||||
|
||||
var tests = []Tests{
|
||||
{
|
||||
name: "version",
|
||||
args: base + " -V",
|
||||
expect: "This is",
|
||||
exitcode: 0,
|
||||
},
|
||||
{
|
||||
name: "help",
|
||||
args: base + " -h",
|
||||
expect: "Usage:",
|
||||
exitcode: 0,
|
||||
},
|
||||
{
|
||||
name: "debug",
|
||||
args: base + " -d",
|
||||
expect: "program_info",
|
||||
exitcode: 1,
|
||||
},
|
||||
{
|
||||
name: "no-args-no-user",
|
||||
args: base,
|
||||
expect: "invalid or no user id",
|
||||
exitcode: 1,
|
||||
},
|
||||
{
|
||||
name: "download-single-ad",
|
||||
args: base + " -o t/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1",
|
||||
expect: "Successfully downloaded 1 ad with 2 images to t/out",
|
||||
exitcode: 0,
|
||||
},
|
||||
{
|
||||
name: "download-single-ad-verbose",
|
||||
args: base + " -o t/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1 -v",
|
||||
expect: "wrote ad listing",
|
||||
exitcode: 0,
|
||||
},
|
||||
{
|
||||
name: "download-single-ad-debug",
|
||||
args: base + " -o t/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1 -d",
|
||||
expect: "extracted ad listing program_info.pid=",
|
||||
exitcode: 0,
|
||||
},
|
||||
{
|
||||
name: "download-all-ads",
|
||||
args: base + " -o t/out -u 1",
|
||||
expect: "Successfully downloaded 6 ads with 12 images to t/out",
|
||||
exitcode: 0,
|
||||
},
|
||||
{
|
||||
name: "download-all-ads-using-config",
|
||||
args: "kleingebaeck -c t/fullconfig.conf",
|
||||
expect: "Successfully downloaded 6 ads with 12 images to t/out",
|
||||
exitcode: 0,
|
||||
},
|
||||
}
|
||||
|
||||
var invalidtests = []Tests{
|
||||
{
|
||||
name: "empty-ad",
|
||||
args: base + " " + EMPTYURI,
|
||||
expect: "could not extract ad data from page, got empty struct",
|
||||
exitcode: 1,
|
||||
},
|
||||
{
|
||||
name: "invalid-ad",
|
||||
args: base + " " + INVALIDURI,
|
||||
expect: "invalid uri",
|
||||
exitcode: 1,
|
||||
},
|
||||
{
|
||||
name: "invalid-path",
|
||||
args: base + " " + INVALIDPATHURI,
|
||||
expect: "could not extract ad data from page, got empty struct",
|
||||
exitcode: 1,
|
||||
},
|
||||
{
|
||||
name: "404",
|
||||
args: base + " " + INVALID404URI,
|
||||
expect: "could not get page via HTTP",
|
||||
exitcode: 1,
|
||||
},
|
||||
{
|
||||
name: "outdir-no-exists",
|
||||
args: base + " -o t/foo/bar/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1 -v",
|
||||
expect: "Failure",
|
||||
exitcode: 1,
|
||||
},
|
||||
{
|
||||
name: "wrong-flag",
|
||||
args: base + " -X",
|
||||
expect: "unknown shorthand flag: 'X' in -X",
|
||||
exitcode: 1,
|
||||
},
|
||||
{
|
||||
name: "no-config",
|
||||
args: "kleingebaeck -c t/invalid.conf",
|
||||
expect: "error loading config file",
|
||||
exitcode: 1,
|
||||
},
|
||||
}
|
||||
|
||||
type AdConfig struct {
|
||||
Title string
|
||||
Slug string
|
||||
Id string
|
||||
Price string
|
||||
Category string
|
||||
Condition string
|
||||
Created string
|
||||
Text string
|
||||
Images []string // files in ./t/
|
||||
}
|
||||
|
||||
var adsrc = []AdConfig{
|
||||
{
|
||||
Title: "First Ad",
|
||||
Id: "1", Price: "5€",
|
||||
Category: "Klimbim",
|
||||
Text: "Thing to sale",
|
||||
Slug: "first-ad",
|
||||
Condition: "works",
|
||||
Created: "Yesterday",
|
||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||
},
|
||||
{
|
||||
Title: "Secnd Ad",
|
||||
Id: "2", Price: "5€",
|
||||
Category: "Kram",
|
||||
Text: "Thing to sale",
|
||||
Slug: "second-ad",
|
||||
Condition: "works",
|
||||
Created: "Yesterday",
|
||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||
},
|
||||
{
|
||||
Title: "Third Ad",
|
||||
Id: "3",
|
||||
Price: "5€",
|
||||
Category: "Kuddelmuddel",
|
||||
Text: "Thing to sale",
|
||||
Slug: "third-ad",
|
||||
Condition: "works",
|
||||
Created: "Yesterday",
|
||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||
},
|
||||
{
|
||||
Title: "Forth Ad",
|
||||
Id: "4",
|
||||
Price: "5€",
|
||||
Category: "Krempel",
|
||||
Text: "Thing to sale",
|
||||
Slug: "fourth-ad",
|
||||
Condition: "works",
|
||||
Created: "Yesterday",
|
||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||
},
|
||||
{
|
||||
Title: "Fifth Ad",
|
||||
Id: "5",
|
||||
Price: "5€",
|
||||
Category: "Kladderadatsch",
|
||||
Text: "Thing to sale",
|
||||
Slug: "fifth-ad",
|
||||
Condition: "works",
|
||||
Created: "Yesterday",
|
||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||
},
|
||||
{
|
||||
Title: "Sixth Ad",
|
||||
Id: "6",
|
||||
Price: "5€",
|
||||
Category: "Klunker",
|
||||
Text: "Thing to sale",
|
||||
Slug: "sixth-ad",
|
||||
Condition: "works",
|
||||
Created: "Yesterday",
|
||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||
},
|
||||
}
|
||||
|
||||
// An Adsource is used to construct a httpmock responder for a
|
||||
// particular url. So, the code (scrape.go) scrapes
|
||||
// https://kleinanzeigen.de, but in reality httpmock captures the
|
||||
// request and responds with our mock data
|
||||
type Adsource struct {
|
||||
uri string
|
||||
content string
|
||||
status int
|
||||
}
|
||||
|
||||
// Render a HTML template for an adlisting or an ad
|
||||
func GetTemplate(l []AdConfig, a AdConfig, htmltemplate string) string {
|
||||
tmpl, err := tpl.New("template").Parse(htmltemplate)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
var out bytes.Buffer
|
||||
if len(a.Id) == 0 {
|
||||
err = tmpl.Execute(&out, l)
|
||||
} else {
|
||||
err = tmpl.Execute(&out, a)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return out.String()
|
||||
}
|
||||
|
||||
// Initialize the valid sources for the httpmock responder
|
||||
func InitValidSources() []Adsource {
|
||||
// valid ad listing page 1
|
||||
list1 := []AdConfig{
|
||||
adsrc[0], adsrc[1], adsrc[2],
|
||||
}
|
||||
|
||||
// valid ad listing page 2
|
||||
list2 := []AdConfig{
|
||||
adsrc[3], adsrc[4], adsrc[5],
|
||||
}
|
||||
|
||||
// valid ad listing page 3, which is empty
|
||||
list3 := []AdConfig{}
|
||||
|
||||
// used to signal GetTemplate() to render a listing
|
||||
empty := AdConfig{}
|
||||
|
||||
// prepare urls for the listing pages
|
||||
ads := []Adsource{
|
||||
{
|
||||
uri: fmt.Sprintf("%s%s?userId=1", Baseuri, Listuri),
|
||||
content: GetTemplate(list1, empty, LISTTPL),
|
||||
},
|
||||
{
|
||||
uri: fmt.Sprintf("%s%s?userId=1&pageNum=2", Baseuri, Listuri),
|
||||
content: GetTemplate(list2, empty, LISTTPL),
|
||||
},
|
||||
{
|
||||
uri: fmt.Sprintf("%s%s?userId=1&pageNum=3", Baseuri, Listuri),
|
||||
content: GetTemplate(list3, empty, LISTTPL),
|
||||
},
|
||||
}
|
||||
|
||||
// prepare urls for the ads
|
||||
for _, ad := range adsrc {
|
||||
ads = append(ads, Adsource{
|
||||
uri: fmt.Sprintf("%s/s-anzeige/%s/%s", Baseuri, ad.Slug, ad.Id),
|
||||
content: GetTemplate(nil, ad, ADTPL),
|
||||
})
|
||||
//panic(GetTemplate(nil, ad, ADTPL))
|
||||
}
|
||||
|
||||
return ads
|
||||
}
|
||||
|
||||
func InitInvalidSources() []Adsource {
|
||||
empty := AdConfig{}
|
||||
ads := []Adsource{
|
||||
{
|
||||
// valid ad page but without content
|
||||
uri: fmt.Sprintf("%s/s-anzeige/empty/1", Baseuri),
|
||||
content: GetTemplate(nil, empty, EMPTYPAGE),
|
||||
},
|
||||
{
|
||||
// some random foreign webpage
|
||||
uri: INVALIDURI,
|
||||
content: GetTemplate(nil, empty, "<html>foo</html>"),
|
||||
},
|
||||
{
|
||||
// some invalid page path
|
||||
uri: fmt.Sprintf("%s/anzeige/name/1", Baseuri),
|
||||
content: GetTemplate(nil, empty, "<html></html>"),
|
||||
},
|
||||
{
|
||||
// some none-ad page
|
||||
uri: fmt.Sprintf("%s/anzeige/name/1/foo/bar", Baseuri),
|
||||
content: GetTemplate(nil, empty, "<html>HTTP 404: /eine-anzeige/ does not exist!</html>"),
|
||||
status: 404,
|
||||
},
|
||||
}
|
||||
|
||||
return ads
|
||||
}
|
||||
|
||||
// load a test image from disk
|
||||
func GetImage(path string) []byte {
|
||||
dat, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return dat
|
||||
}
|
||||
|
||||
// setup httpmock
|
||||
func SetIntercept(ads []Adsource) {
|
||||
for _, ad := range ads {
|
||||
if ad.status == 0 {
|
||||
ad.status = 200
|
||||
}
|
||||
|
||||
httpmock.RegisterResponder("GET", ad.uri,
|
||||
httpmock.NewStringResponder(ad.status, ad.content))
|
||||
}
|
||||
|
||||
// we just use 2 images, put this here
|
||||
for _, image := range []string{"t/1.jpg", "t/2.jpg"} {
|
||||
httpmock.RegisterResponder("GET", image, httpmock.NewBytesResponder(200, GetImage(image)))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func VerifyAd(ad AdConfig) error {
|
||||
body := ad.Title + ad.Price + ad.Id + ad.Category + ad.Condition + ad.Created
|
||||
|
||||
file := fmt.Sprintf("t/out/%s/Adlisting.txt", ad.Slug)
|
||||
content, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if body != strings.TrimSpace(string(content)) {
|
||||
msg := fmt.Sprintf("ad content doesn't match.\nExpect: %s\n Got: %s\n", body, content)
|
||||
return errors.New(msg)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func TestMain(t *testing.T) {
|
||||
oldargs := os.Args
|
||||
defer func() { os.Args = oldargs }()
|
||||
|
||||
httpmock.Activate()
|
||||
defer httpmock.DeactivateAndReset()
|
||||
|
||||
// prepare httpmock responders
|
||||
SetIntercept(InitValidSources())
|
||||
|
||||
// run commandline tests
|
||||
for _, tt := range tests {
|
||||
var buf bytes.Buffer
|
||||
os.Args = strings.Split(tt.args, " ")
|
||||
|
||||
ret := Main(&buf)
|
||||
|
||||
if ret != tt.exitcode {
|
||||
t.Errorf("%s with cmd <%s> did not exit with %d but %d",
|
||||
tt.name, tt.args, tt.exitcode, ret)
|
||||
}
|
||||
|
||||
if !strings.Contains(buf.String(), tt.expect) {
|
||||
t.Errorf("%s with cmd <%s> output did not match.\nExpect: %s\n Got: %s\n",
|
||||
tt.name, tt.args, tt.expect, buf.String())
|
||||
}
|
||||
}
|
||||
|
||||
// verify if downloaded ads match
|
||||
for _, ad := range adsrc {
|
||||
if err := VerifyAd(ad); err != nil {
|
||||
t.Errorf(err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMainInvalids(t *testing.T) {
|
||||
oldargs := os.Args
|
||||
defer func() { os.Args = oldargs }()
|
||||
|
||||
httpmock.Activate()
|
||||
defer httpmock.DeactivateAndReset()
|
||||
|
||||
// prepare httpmock responders
|
||||
SetIntercept(InitInvalidSources())
|
||||
|
||||
// run commandline tests
|
||||
for _, tt := range invalidtests {
|
||||
var buf bytes.Buffer
|
||||
os.Args = strings.Split(tt.args, " ")
|
||||
|
||||
ret := Main(&buf)
|
||||
|
||||
if ret != tt.exitcode {
|
||||
t.Errorf("%s with cmd <%s> did not exit with %d but %d",
|
||||
tt.name, tt.args, tt.exitcode, ret)
|
||||
}
|
||||
|
||||
if !strings.Contains(buf.String(), tt.expect) {
|
||||
t.Errorf("%s with cmd <%s> output did not match.\nExpect: %s\n Got: %s\n",
|
||||
tt.name, tt.args, tt.expect, buf.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -196,7 +196,7 @@ func Getimage(uri, fileName string) error {
|
||||
defer response.Body.Close()
|
||||
|
||||
if response.StatusCode != 200 {
|
||||
return errors.New("received non 200 response code")
|
||||
return errors.New("could not get image via HTTP")
|
||||
}
|
||||
|
||||
err = WriteImage(fileName, response.Body)
|
||||
|
||||
345
scrape_test.go
345
scrape_test.go
@@ -1,345 +0,0 @@
|
||||
/*
|
||||
Copyright © 2023 Thomas von Dein
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
tpl "text/template"
|
||||
|
||||
"github.com/jarcoal/httpmock"
|
||||
)
|
||||
|
||||
// used to fill an ad template and the ad listing page template
|
||||
type AdConfig struct {
|
||||
Title string
|
||||
Slug string
|
||||
Id string
|
||||
Price string
|
||||
Category string
|
||||
Condition string
|
||||
Created string
|
||||
Text string
|
||||
Images []string // files in ./t/
|
||||
}
|
||||
|
||||
// the ad list, aka:
|
||||
// https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX
|
||||
// Note, that this HTML code is reduced to the max, so that it only
|
||||
// contains the stuff required to satisfy goquery
|
||||
const LISTTPL string = `<!DOCTYPE html>
|
||||
<html lang="de" >
|
||||
<head>
|
||||
<title>Ads</title>
|
||||
</head>
|
||||
<body>
|
||||
{{ range . }}
|
||||
<h2 class="text-module-begin">
|
||||
<a class="ellipsis"
|
||||
href="/s-anzeige/{{ .Slug }}/{{ .Id }}">{{ .Title }}</a>
|
||||
</h2>
|
||||
{{ end }}
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
// an actual ad listing, aka:
|
||||
// https://www.kleinanzeigen.de/s-anzeige/ad-text-slug/1010101010
|
||||
// Note, that this HTML code is reduced to the max, so that it only
|
||||
// contains the stuff required to satisfy goquery
|
||||
const ADTPL string = `DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head>
|
||||
<title>Ad Listing</title>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
{{ range $image := .Images }}
|
||||
<div class="galleryimage-element" data-ix="3">
|
||||
<img src="{{ $image }}"/>
|
||||
</div>
|
||||
{{ end }}
|
||||
|
||||
<h1 id="viewad-title" class="boxedarticle--title" itemprop="name" data-soldlabel="Verkauft">
|
||||
{{ .Title }}</h1>
|
||||
<div class="boxedarticle--flex--container">
|
||||
<h2 class="boxedarticle--price" id="viewad-price">
|
||||
{{ .Price }}</h2>
|
||||
</div>
|
||||
|
||||
<div id="viewad-extra-info" class="boxedarticle--details--full">
|
||||
<div><i class="icon icon-small icon-calendar-gray-simple"></i><span>{{ .Created }}</span></div>
|
||||
</div>
|
||||
|
||||
<div class="splitlinebox l-container-row" id="viewad-details">
|
||||
<ul class="addetailslist">
|
||||
<li class="addetailslist--detail">
|
||||
Art<span class="addetailslist--detail--value" >
|
||||
{{ .Category }}</span>
|
||||
</li>
|
||||
<li class="addetailslist--detail">
|
||||
Zustand<span class="addetailslist--detail--value" >
|
||||
{{ .Condition }}</span>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<div class="l-container last-paragraph-no-margin-bottom">
|
||||
<p id="viewad-description-text" class="text-force-linebreak " itemprop="description">
|
||||
{{ .Text }}
|
||||
</p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
`
|
||||
|
||||
const EMPTYPAGE string = `DOCTYPE html>
|
||||
<html lang="de">
|
||||
<head></head>
|
||||
<body></body>
|
||||
</html>
|
||||
`
|
||||
|
||||
const EMPTYURI string = `https://www.kleinanzeigen.de/s-anzeige/empty/1`
|
||||
const INVALIDURI string = `https://foo.bar/weird/things`
|
||||
|
||||
// An Adsource is used to construct a httpmock responder for a
|
||||
// particular url. So, the code (scrape.go) scrapes
|
||||
// https://kleinanzeigen.de, but in reality httpmock captures the
|
||||
// request and responds with our mock data
|
||||
type Adsource struct {
|
||||
uri string
|
||||
content string
|
||||
status int
|
||||
}
|
||||
|
||||
// Render a HTML template for an adlisting or an ad
|
||||
func GetTemplate(l []AdConfig, a AdConfig, htmltemplate string) string {
|
||||
tmpl, err := tpl.New("template").Parse(htmltemplate)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
var out bytes.Buffer
|
||||
if len(a.Id) == 0 {
|
||||
err = tmpl.Execute(&out, l)
|
||||
} else {
|
||||
err = tmpl.Execute(&out, a)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return out.String()
|
||||
}
|
||||
|
||||
func InitAds() []AdConfig {
|
||||
return []AdConfig{
|
||||
{Title: "First Ad", Id: "1", Price: "5€", Category: "Klimbim", Text: "Thing to sale", Slug: "first-ad",
|
||||
Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}},
|
||||
{Title: "Secnd Ad", Id: "2", Price: "5€", Category: "Kram", Text: "Thing to sale", Slug: "second-ad",
|
||||
Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}},
|
||||
{Title: "Third Ad", Id: "3", Price: "5€", Category: "Kuddelmuddel", Text: "Thing to sale", Slug: "third-ad",
|
||||
Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}},
|
||||
{Title: "Forth Ad", Id: "4", Price: "5€", Category: "Krempel", Text: "Thing to sale", Slug: "fourth-ad",
|
||||
Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}},
|
||||
{Title: "Fifth Ad", Id: "5", Price: "5€", Category: "Kladderadatsch", Text: "Thing to sale", Slug: "fifth-ad",
|
||||
Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}},
|
||||
{Title: "Sixth Ad", Id: "6", Price: "5€", Category: "Klunker", Text: "Thing to sale", Slug: "sixth-ad",
|
||||
Condition: "works", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}},
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the valid sources for the httpmock responder
|
||||
func InitValidSources(conf *Config) []Adsource {
|
||||
// all our valid ads
|
||||
adsrc := InitAds()
|
||||
|
||||
// valid ad listing page 1
|
||||
list1 := []AdConfig{
|
||||
adsrc[0], adsrc[1], adsrc[2],
|
||||
}
|
||||
|
||||
// valid ad listing page 2
|
||||
list2 := []AdConfig{
|
||||
adsrc[3], adsrc[4], adsrc[5],
|
||||
}
|
||||
|
||||
// valid ad listing page 3, which is empty
|
||||
list3 := []AdConfig{}
|
||||
|
||||
// used to signal GetTemplate() to render a listing
|
||||
empty := AdConfig{}
|
||||
|
||||
// prepare urls for the listing pages
|
||||
ads := []Adsource{
|
||||
{
|
||||
uri: fmt.Sprintf("%s%s?userId=%d", Baseuri, Listuri, conf.User),
|
||||
content: GetTemplate(list1, empty, LISTTPL),
|
||||
},
|
||||
{
|
||||
uri: fmt.Sprintf("%s%s?userId=%d&pageNum=2", Baseuri, Listuri, conf.User),
|
||||
content: GetTemplate(list2, empty, LISTTPL),
|
||||
},
|
||||
{
|
||||
uri: fmt.Sprintf("%s%s?userId=%d&pageNum=3", Baseuri, Listuri, conf.User),
|
||||
content: GetTemplate(list3, empty, LISTTPL),
|
||||
},
|
||||
}
|
||||
|
||||
// prepare urls for the ads
|
||||
for _, ad := range adsrc {
|
||||
ads = append(ads, Adsource{
|
||||
uri: fmt.Sprintf("%s/s-anzeige/%s/%s", Baseuri, ad.Slug, ad.Id),
|
||||
content: GetTemplate(nil, ad, ADTPL),
|
||||
})
|
||||
//panic(GetTemplate(nil, ad, ADTPL))
|
||||
}
|
||||
|
||||
return ads
|
||||
}
|
||||
|
||||
func InitInvalidSources(conf *Config) []Adsource {
|
||||
empty := AdConfig{}
|
||||
ads := []Adsource{
|
||||
{
|
||||
// valid ad page but without content
|
||||
uri: fmt.Sprintf("%s/s-anzeige/empty/1", Baseuri),
|
||||
content: GetTemplate(nil, empty, EMPTYPAGE),
|
||||
},
|
||||
{
|
||||
// some random foreign webpage
|
||||
uri: INVALIDURI,
|
||||
content: GetTemplate(nil, empty, "<html>foo</html>"),
|
||||
},
|
||||
{
|
||||
// some invalid page path
|
||||
uri: fmt.Sprintf("%s/anzeige/name/1", Baseuri),
|
||||
content: GetTemplate(nil, empty, "<html></html>"),
|
||||
},
|
||||
{
|
||||
// some none-ad page
|
||||
uri: fmt.Sprintf("%s/anzeige/name/1/foo/bar", Baseuri),
|
||||
content: GetTemplate(nil, empty, "<html>HTTP 404: /eine-anzeige/ does not exist!</html>"),
|
||||
status: 404,
|
||||
},
|
||||
}
|
||||
|
||||
return ads
|
||||
}
|
||||
|
||||
// load a test image from disk
|
||||
func GetImage(path string) []byte {
|
||||
dat, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return dat
|
||||
}
|
||||
|
||||
// setup httpmock
|
||||
func SetIntercept(conf *Config) {
|
||||
ads := InitValidSources(conf)
|
||||
eads := InitInvalidSources(conf)
|
||||
|
||||
ads = append(ads, eads...)
|
||||
|
||||
for _, ad := range ads {
|
||||
if ad.status == 0 {
|
||||
ad.status = 200
|
||||
}
|
||||
|
||||
httpmock.RegisterResponder("GET", ad.uri,
|
||||
httpmock.NewStringResponder(ad.status, ad.content))
|
||||
}
|
||||
|
||||
// we just use 2 images, put this here
|
||||
for _, image := range []string{"t/1.jpg", "t/2.jpg"} {
|
||||
httpmock.RegisterResponder("GET", image, httpmock.NewBytesResponder(200, GetImage(image)))
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// the actual test, calls Start() from scrape, which recursively
|
||||
// scrapes ads from a user
|
||||
func TestStart(t *testing.T) {
|
||||
httpmock.Activate()
|
||||
defer httpmock.DeactivateAndReset()
|
||||
|
||||
// fake config
|
||||
conf := &Config{User: 1, Outdir: "t/out", Template: DefaultTemplate}
|
||||
|
||||
// prepare httpmock responders
|
||||
SetIntercept(conf)
|
||||
|
||||
// run
|
||||
if err := Start(conf); err != nil {
|
||||
t.Errorf("failed to scrape: %s", err.Error())
|
||||
}
|
||||
|
||||
// verify
|
||||
for _, ad := range InitAds() {
|
||||
file := fmt.Sprintf("t/out/%s/Adlisting.txt", ad.Slug)
|
||||
content, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
t.Errorf("failed to read adlisting: %s", err.Error())
|
||||
}
|
||||
|
||||
if !strings.Contains(string(content), ad.Category) && !strings.Contains(string(content), ad.Title) {
|
||||
t.Errorf("failed to verify: %s content doesn't contain expected data", file)
|
||||
}
|
||||
}
|
||||
|
||||
// uncomment to see slogs
|
||||
//t.Errorf("debug")
|
||||
}
|
||||
|
||||
func TestSingleFail(t *testing.T) {
|
||||
httpmock.Activate()
|
||||
defer httpmock.DeactivateAndReset()
|
||||
|
||||
// fake config
|
||||
conf := &Config{Outdir: "t/out", Template: DefaultTemplate, Adlinks: []string{EMPTYURI}}
|
||||
|
||||
SetIntercept(conf)
|
||||
|
||||
// check empty ad
|
||||
if err := Scrape(conf, EMPTYURI); err == nil {
|
||||
t.Errorf("scrape returned empty ad")
|
||||
}
|
||||
|
||||
// wrong uri
|
||||
if err := Scrape(conf, INVALIDURI); err == nil {
|
||||
t.Errorf("scrape returned ad from invalid web site")
|
||||
}
|
||||
|
||||
// wrong path
|
||||
if err := Scrape(conf, fmt.Sprintf("%s/anzeige/name/1", Baseuri)); err == nil {
|
||||
t.Errorf("scrape returned ad from invalid page")
|
||||
}
|
||||
|
||||
// wrong path
|
||||
if err := Scrape(conf, fmt.Sprintf("%s/anzeige/name/1/foo/bar", Baseuri)); err == nil {
|
||||
t.Errorf("scrape returned ad from 404 page")
|
||||
}
|
||||
}
|
||||
6
t/config-empty.conf
Normal file
6
t/config-empty.conf
Normal file
@@ -0,0 +1,6 @@
|
||||
# empty config for Main() unit tests to force unit tests NOT to use an
|
||||
# eventually existing ~/.kleingebaeck!
|
||||
template="""
|
||||
{{.Title}}{{.Price}}{{.Id}}{{.Category}}{{.Condition}}{{.Created}}
|
||||
"""
|
||||
|
||||
6
t/fullconfig.conf
Normal file
6
t/fullconfig.conf
Normal file
@@ -0,0 +1,6 @@
|
||||
user = 1
|
||||
loglevel = "verbose"
|
||||
outdir = "t/out"
|
||||
template="""
|
||||
{{.Title}}{{.Price}}{{.Id}}{{.Category}}{{.Condition}}{{.Created}}
|
||||
"""
|
||||
1
t/invalid.conf
Normal file
1
t/invalid.conf
Normal file
@@ -0,0 +1 @@
|
||||
user = "
|
||||
Reference in New Issue
Block a user