mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-17 12:31:03 +01:00
Compare commits
10 Commits
fix/check-
...
v0.3.6
| Author | SHA1 | Date | |
|---|---|---|---|
| 834fbcd9c5 | |||
| 4c10ae89f8 | |||
| df6fc47ca3 | |||
| 5956a68e72 | |||
| eee0167574 | |||
| a3b2748479 | |||
| 8cc5a9e3ed | |||
| d2bcd7b505 | |||
| c59c2e2931 | |||
| 2288806105 |
1
Makefile
1
Makefile
@@ -63,6 +63,7 @@ lint:
|
|||||||
|
|
||||||
lint-full:
|
lint-full:
|
||||||
golangci-lint run --enable-all --exclude-use-default --disable exhaustivestruct,exhaustruct,depguard,interfacer,deadcode,golint,structcheck,scopelint,varcheck,ifshort,maligned,nosnakecase,godot,funlen,gofumpt,cyclop,noctx,gochecknoglobals,paralleltest
|
golangci-lint run --enable-all --exclude-use-default --disable exhaustivestruct,exhaustruct,depguard,interfacer,deadcode,golint,structcheck,scopelint,varcheck,ifshort,maligned,nosnakecase,godot,funlen,gofumpt,cyclop,noctx,gochecknoglobals,paralleltest
|
||||||
|
gocritic check -enableAll *.go
|
||||||
|
|
||||||
testfuzzy: clean
|
testfuzzy: clean
|
||||||
go test -fuzz ./... $(ARGS)
|
go test -fuzz ./... $(ARGS)
|
||||||
|
|||||||
43
README-de.md
43
README-de.md
@@ -222,6 +222,49 @@ Sowie alle Bilder.
|
|||||||
Das Format kann man mit der Variable `template` in der Konfiguration
|
Das Format kann man mit der Variable `template` in der Konfiguration
|
||||||
ändern. Die `example.conf` enthält ein Beispiel für das Standard Template.
|
ändern. Die `example.conf` enthält ein Beispiel für das Standard Template.
|
||||||
|
|
||||||
|
## Verhalten des Tools
|
||||||
|
|
||||||
|
Es gibt einige Dinge über das Verhalten von kleingebäck, über die Du
|
||||||
|
Bescheid wissen solltest:
|
||||||
|
|
||||||
|
- alle HTML Seiten und Bilder werden immer heruntergeladen
|
||||||
|
- es wird ein (konfigurierbarer) Useragent verwendet
|
||||||
|
- HTTP Cookies werden beachtet
|
||||||
|
- bei Fehlern wird dreimal mit unterschiedlichem Abstand erneut
|
||||||
|
versucht
|
||||||
|
- Bilder Downloads laufen parallelisiert mit leicht unterschiedlichen
|
||||||
|
zeitlichen Abständen ab
|
||||||
|
- Gleich aussehende Bilder werden nicht überschrieben
|
||||||
|
|
||||||
|
Der letzte Punkt muss genauer erläutert werden:
|
||||||
|
|
||||||
|
Wenn man bei Kleinanzeigen.de eine Anzeige einstellt und Bilder
|
||||||
|
postet, werden diese dort in ihrer Grösse reduziert (durch Kompression
|
||||||
|
und Verkleinerung der Bilder usw.). Diese reduzierten Bilder werden
|
||||||
|
dann von kleingebäck heruntergeladen. Falls Du Deine original Bilder
|
||||||
|
behalten hast, kannst Du diese danach in das Backupverzeichnis
|
||||||
|
kopieren. Bei einem erneuten kleingebäck-Lauf werden diese Bilder dann
|
||||||
|
nicht überschrieben.
|
||||||
|
|
||||||
|
Wir verwenden dafür einen Algorythmus namens [distance
|
||||||
|
hashing](https://github.com/corona10/goimagehash). Dieser Algorithmus
|
||||||
|
prüft die Ähnlichkeit von Bildern. Diese können in ihrer Auflösung,
|
||||||
|
Kompression, Farbtiefe und vielem mehr manipuliert worden sein und
|
||||||
|
trotzdem als das "gleiche Bild" erkannt werden (wohlgemerkt nicht "das
|
||||||
|
selbe": die Dateien sind durchaus unterschiedlich!). Bis zu einer
|
||||||
|
Distance von 5 überschreiben wir keine Bilder, weil wir dann davon
|
||||||
|
ausgehen, dass das lokal Vorhandene das Original ist.
|
||||||
|
|
||||||
|
Bitte beachte aber, dass dies KEIN Cachingmechanismus ist: die Bilder
|
||||||
|
werden trotzdem immer alle heruntergeladen. Das muss so sein, da wir
|
||||||
|
uns nicht die Dateinamen anschauen können, da kleinanzeigen.de diese
|
||||||
|
nämlich zu Zahlen umbenennt. Und die Dateinamen können sich auch
|
||||||
|
ändern, wenn der User in der Anzeige die Bilder umarrangiert hat.
|
||||||
|
|
||||||
|
Du kannst dieses Verhalten mit der Option **--force** ausschalten. Du
|
||||||
|
kannst ausserdem mit der Option **--ignoreerrors** auch alle Fehler
|
||||||
|
ignorieren, die beim Bilderdownload auftreten könnten.
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
|
|
||||||
Die Dokumentation kann man
|
Die Dokumentation kann man
|
||||||
|
|||||||
42
README.md
42
README.md
@@ -207,6 +207,48 @@ variable. The supplied sample config contains the default template.
|
|||||||
|
|
||||||
All images will be stored in the same directory.
|
All images will be stored in the same directory.
|
||||||
|
|
||||||
|
## Tool Behavior
|
||||||
|
|
||||||
|
There are a bunch of things you might want to know about the behavior
|
||||||
|
of the kleingebäck tool:
|
||||||
|
|
||||||
|
- all HTML pages and IMAGEs are always being downloaded
|
||||||
|
- we use a (customizable) user agent
|
||||||
|
- we respect HTTP cookies
|
||||||
|
- in the case of an error, the tool does 3 retries, the time it waits
|
||||||
|
between tries is longer for each retry
|
||||||
|
- image download is parallized using small time differences to look
|
||||||
|
more natural
|
||||||
|
- same images are not being overwritten on subsequent download
|
||||||
|
|
||||||
|
|
||||||
|
The latter needs to be elaborated a bit more:
|
||||||
|
|
||||||
|
If you publish an ad on kleinanzeigen.de and post images, those images
|
||||||
|
will be reduced in size by the site (by compressing and down sizing
|
||||||
|
them). This reduced images will be downloaded by kleingebäck. However,
|
||||||
|
you may still own the original images and may want to put them into
|
||||||
|
that backup directory so that you have all things for one ad together.
|
||||||
|
|
||||||
|
You can easily do that, because kleingebäck won't overwrite those
|
||||||
|
original images. It uses something called a distance hash using
|
||||||
|
[goimagehash](https://github.com/corona10/goimagehash). This
|
||||||
|
algorithmus checks the similarity of images. If an image has been
|
||||||
|
resized it is still very similar to the original one. We accept a
|
||||||
|
maximum of a distance of 5, everything above leads to overwrite.
|
||||||
|
|
||||||
|
This works with resizes, cropped and otherwise manipulated images as
|
||||||
|
long as the image still shows the original contents good enough.
|
||||||
|
|
||||||
|
Also note, that this is NOT a caching mechanism: the images will be
|
||||||
|
downloaded anyway during each run. We also can't look at the file
|
||||||
|
names because kleinanzeigen.de renames all images to numbers. And
|
||||||
|
those might even change if the user re-arranges the images.
|
||||||
|
|
||||||
|
You can override this behavior using the **--force** option. Another
|
||||||
|
option, **--ignoreerrors**, can be used to ignore all kinds of image
|
||||||
|
errors.
|
||||||
|
|
||||||
## Documentation
|
## Documentation
|
||||||
|
|
||||||
You can read the documentation [online](https://github.com/TLINDEN/kleingebaeck/blob/main/kleingebaeck.pod) or locally once you have installed kleingebaeck with: `kleingebaeck --manual`.
|
You can read the documentation [online](https://github.com/TLINDEN/kleingebaeck/blob/main/kleingebaeck.pod) or locally once you have installed kleingebaeck with: `kleingebaeck --manual`.
|
||||||
|
|||||||
17
SECURITY.md
Normal file
17
SECURITY.md
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
# Security Policy
|
||||||
|
|
||||||
|
## Supported Versions
|
||||||
|
|
||||||
|
Only the latest release is supported. If you find an issue (any
|
||||||
|
issue!), please check with the latest release first.
|
||||||
|
|
||||||
|
## Reporting a Vulnerability
|
||||||
|
|
||||||
|
I don't agree with the "responsible disclosure" process most projects
|
||||||
|
(and companies) work these days.
|
||||||
|
|
||||||
|
So, if you find a vulnerability of any kind, please just open an
|
||||||
|
[issue](https://github.com/TLINDEN/kleingebaeck/issues). Please add
|
||||||
|
all details required to reproduce the vulnerability. You won't be chased.
|
||||||
|
|
||||||
|
That's just all about it.
|
||||||
2
ad.go
2
ad.go
@@ -73,7 +73,7 @@ func (ad *Ad) Incomplete() bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (ad *Ad) CalculateExpire() {
|
func (ad *Ad) CalculateExpire() {
|
||||||
if len(ad.Created) > 0 {
|
if ad.Created != "" {
|
||||||
ts, err := time.Parse("02.01.2006", ad.Created)
|
ts, err := time.Parse("02.01.2006", ad.Created)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
ad.Expire = ts.AddDate(0, ExpireMonths, ExpireDays).Format("02.01.2006")
|
ad.Expire = ts.AddDate(0, ExpireMonths, ExpireDays).Format("02.01.2006")
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
VERSION string = "0.3.4"
|
VERSION string = "0.3.6"
|
||||||
Baseuri string = "https://www.kleinanzeigen.de"
|
Baseuri string = "https://www.kleinanzeigen.de"
|
||||||
Listuri string = "/s-bestandsliste.html"
|
Listuri string = "/s-bestandsliste.html"
|
||||||
Defaultdir string = "."
|
Defaultdir string = "."
|
||||||
|
|||||||
2
fetch.go
2
fetch.go
@@ -52,7 +52,7 @@ func NewFetcher(conf *Config) (*Fetcher, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (f *Fetcher) Get(uri string) (io.ReadCloser, error) {
|
func (f *Fetcher) Get(uri string) (io.ReadCloser, error) {
|
||||||
req, err := http.NewRequest(http.MethodGet, uri, nil)
|
req, err := http.NewRequest(http.MethodGet, uri, http.NoBody)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create a new HTTP request obj: %w", err)
|
return nil, fmt.Errorf("failed to create a new HTTP request obj: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
1
go.mod
1
go.mod
@@ -24,6 +24,7 @@ require (
|
|||||||
github.com/corona10/goimagehash v1.1.0 // indirect
|
github.com/corona10/goimagehash v1.1.0 // indirect
|
||||||
github.com/fatih/color v1.16.0 // indirect
|
github.com/fatih/color v1.16.0 // indirect
|
||||||
github.com/fsnotify/fsnotify v1.6.0 // indirect
|
github.com/fsnotify/fsnotify v1.6.0 // indirect
|
||||||
|
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||||
github.com/knadh/koanf/maps v0.1.1 // indirect
|
github.com/knadh/koanf/maps v0.1.1 // indirect
|
||||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||||
github.com/mitchellh/copystructure v1.2.0 // indirect
|
github.com/mitchellh/copystructure v1.2.0 // indirect
|
||||||
|
|||||||
2
go.sum
2
go.sum
@@ -15,6 +15,8 @@ github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
|
|||||||
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
|
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
|
||||||
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
|
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
|
||||||
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
|
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
|
||||||
|
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
|
||||||
|
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
|
||||||
github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww=
|
github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww=
|
||||||
github.com/jarcoal/httpmock v1.3.1/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg=
|
github.com/jarcoal/httpmock v1.3.1/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg=
|
||||||
github.com/knadh/koanf/maps v0.1.1 h1:G5TjmUh2D7G2YWf5SQQqSiHRJEjaicvU0KpypqB3NIs=
|
github.com/knadh/koanf/maps v0.1.1 h1:G5TjmUh2D7G2YWf5SQQqSiHRJEjaicvU0KpypqB3NIs=
|
||||||
|
|||||||
4
image.go
4
image.go
@@ -49,7 +49,7 @@ func (img *Image) LogValue() slog.Value {
|
|||||||
// holds all images of an ad
|
// holds all images of an ad
|
||||||
type Cache []*goimagehash.ImageHash
|
type Cache []*goimagehash.ImageHash
|
||||||
|
|
||||||
func NewImage(buf *bytes.Reader, filename string, uri string) *Image {
|
func NewImage(buf *bytes.Reader, filename, uri string) *Image {
|
||||||
img := &Image{
|
img := &Image{
|
||||||
Filename: filename,
|
Filename: filename,
|
||||||
URI: uri,
|
URI: uri,
|
||||||
@@ -134,7 +134,7 @@ func ReadImages(addir string, dont bool) (Cache, error) {
|
|||||||
reader := bytes.NewReader(data.Bytes())
|
reader := bytes.NewReader(data.Bytes())
|
||||||
|
|
||||||
img := NewImage(reader, filename, "")
|
img := NewImage(reader, filename, "")
|
||||||
if err = img.CalcHash(); err != nil {
|
if err := img.CalcHash(); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
27
main.go
27
main.go
@@ -18,13 +18,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
|
"runtime"
|
||||||
"runtime/debug"
|
"runtime/debug"
|
||||||
|
|
||||||
|
"github.com/inconshreveable/mousetrap"
|
||||||
"github.com/lmittmann/tint"
|
"github.com/lmittmann/tint"
|
||||||
"github.com/tlinden/yadu"
|
"github.com/tlinden/yadu"
|
||||||
)
|
)
|
||||||
@@ -35,6 +38,25 @@ func main() {
|
|||||||
os.Exit(Main(os.Stdout))
|
os.Exit(Main(os.Stdout))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
// if we're running on Windows AND if the user double clicked the
|
||||||
|
// exe file from explorer, we tell them and then wait until any
|
||||||
|
// key has been hit, which will make the cmd window disappear and
|
||||||
|
// thus give the user time to read it.
|
||||||
|
if runtime.GOOS == "windows" {
|
||||||
|
if mousetrap.StartedByExplorer() {
|
||||||
|
fmt.Println("Do no double click kleingebaeck.exe!")
|
||||||
|
fmt.Println("Please open a command shell and run it from there.")
|
||||||
|
fmt.Println()
|
||||||
|
fmt.Print("Press any key to quit: ")
|
||||||
|
_, err := bufio.NewReader(os.Stdin).ReadString('\n')
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func Main(output io.Writer) int {
|
func Main(output io.Writer) int {
|
||||||
logLevel := &slog.LevelVar{}
|
logLevel := &slog.LevelVar{}
|
||||||
opts := &tint.Options{
|
opts := &tint.Options{
|
||||||
@@ -116,11 +138,6 @@ func Main(output io.Writer) int {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return Die(err)
|
return Die(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = Mkdir(outdir)
|
|
||||||
if err != nil {
|
|
||||||
return Die(err)
|
|
||||||
}
|
|
||||||
conf.Outdir = outdir
|
conf.Outdir = outdir
|
||||||
|
|
||||||
// used for all HTTP requests
|
// used for all HTTP requests
|
||||||
|
|||||||
26
main_test.go
26
main_test.go
@@ -334,14 +334,14 @@ type Adsource struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Render a HTML template for an adlisting or an ad
|
// Render a HTML template for an adlisting or an ad
|
||||||
func GetTemplate(adconfigs []AdConfig, adconfig AdConfig, htmltemplate string) string {
|
func GetTemplate(adconfigs []AdConfig, adconfig *AdConfig, htmltemplate string) string {
|
||||||
tmpl, err := tpl.New("template").Parse(htmltemplate)
|
tmpl, err := tpl.New("template").Parse(htmltemplate)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var out bytes.Buffer
|
var out bytes.Buffer
|
||||||
if len(adconfig.ID) == 0 {
|
if adconfig.ID == "" {
|
||||||
err = tmpl.Execute(&out, adconfigs)
|
err = tmpl.Execute(&out, adconfigs)
|
||||||
} else {
|
} else {
|
||||||
err = tmpl.Execute(&out, adconfig)
|
err = tmpl.Execute(&out, adconfig)
|
||||||
@@ -376,15 +376,15 @@ func InitValidSources() []Adsource {
|
|||||||
ads := []Adsource{
|
ads := []Adsource{
|
||||||
{
|
{
|
||||||
uri: fmt.Sprintf("%s%s?userId=1", Baseuri, Listuri),
|
uri: fmt.Sprintf("%s%s?userId=1", Baseuri, Listuri),
|
||||||
content: GetTemplate(list1, empty, LISTTPL),
|
content: GetTemplate(list1, &empty, LISTTPL),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
uri: fmt.Sprintf("%s%s?userId=1&pageNum=2", Baseuri, Listuri),
|
uri: fmt.Sprintf("%s%s?userId=1&pageNum=2", Baseuri, Listuri),
|
||||||
content: GetTemplate(list2, empty, LISTTPL),
|
content: GetTemplate(list2, &empty, LISTTPL),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
uri: fmt.Sprintf("%s%s?userId=1&pageNum=3", Baseuri, Listuri),
|
uri: fmt.Sprintf("%s%s?userId=1&pageNum=3", Baseuri, Listuri),
|
||||||
content: GetTemplate(list3, empty, LISTTPL),
|
content: GetTemplate(list3, &empty, LISTTPL),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -392,7 +392,7 @@ func InitValidSources() []Adsource {
|
|||||||
for _, ad := range adsrc {
|
for _, ad := range adsrc {
|
||||||
ads = append(ads, Adsource{
|
ads = append(ads, Adsource{
|
||||||
uri: fmt.Sprintf("%s/s-anzeige/%s/%s", Baseuri, ad.Slug, ad.ID),
|
uri: fmt.Sprintf("%s/s-anzeige/%s/%s", Baseuri, ad.Slug, ad.ID),
|
||||||
content: GetTemplate(nil, ad, ADTPL),
|
content: GetTemplate(nil, &ad, ADTPL),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -405,28 +405,28 @@ func InitInvalidSources() []Adsource {
|
|||||||
{
|
{
|
||||||
// valid ad page but without content
|
// valid ad page but without content
|
||||||
uri: fmt.Sprintf("%s/s-anzeige/empty/1", Baseuri),
|
uri: fmt.Sprintf("%s/s-anzeige/empty/1", Baseuri),
|
||||||
content: GetTemplate(nil, empty, EMPTYPAGE),
|
content: GetTemplate(nil, &empty, EMPTYPAGE),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
// some random foreign webpage
|
// some random foreign webpage
|
||||||
uri: INVALIDURI,
|
uri: INVALIDURI,
|
||||||
content: GetTemplate(nil, empty, "<html>foo</html>"),
|
content: GetTemplate(nil, &empty, "<html>foo</html>"),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
// some invalid page path
|
// some invalid page path
|
||||||
uri: fmt.Sprintf("%s/anzeige/name/1", Baseuri),
|
uri: fmt.Sprintf("%s/anzeige/name/1", Baseuri),
|
||||||
content: GetTemplate(nil, empty, "<html></html>"),
|
content: GetTemplate(nil, &empty, "<html></html>"),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
// some none-ad page
|
// some none-ad page
|
||||||
uri: fmt.Sprintf("%s/anzeige/name/1/foo/bar", Baseuri),
|
uri: fmt.Sprintf("%s/anzeige/name/1/foo/bar", Baseuri),
|
||||||
content: GetTemplate(nil, empty, "<html>HTTP 404: /eine-anzeige/ does not exist!</html>"),
|
content: GetTemplate(nil, &empty, "<html>HTTP 404: /eine-anzeige/ does not exist!</html>"),
|
||||||
status: 404,
|
status: 404,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
// valid ad page but 503
|
// valid ad page but 503
|
||||||
uri: fmt.Sprintf("%s/s-anzeige/503/1", Baseuri),
|
uri: fmt.Sprintf("%s/s-anzeige/503/1", Baseuri),
|
||||||
content: GetTemplate(nil, empty, "<html>HTTP 503: service unavailable</html>"),
|
content: GetTemplate(nil, &empty, "<html>HTTP 503: service unavailable</html>"),
|
||||||
status: 503,
|
status: 503,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
@@ -465,7 +465,7 @@ func SetIntercept(ads []Adsource) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func VerifyAd(advertisement AdConfig) error {
|
func VerifyAd(advertisement *AdConfig) error {
|
||||||
body := advertisement.Title + advertisement.Price + advertisement.ID + "Kleinanzeigen => " +
|
body := advertisement.Title + advertisement.Price + advertisement.ID + "Kleinanzeigen => " +
|
||||||
advertisement.Category + advertisement.Condition + advertisement.Created
|
advertisement.Category + advertisement.Condition + advertisement.Created
|
||||||
|
|
||||||
@@ -525,7 +525,7 @@ func TestMain(t *testing.T) {
|
|||||||
|
|
||||||
// verify if downloaded ads match
|
// verify if downloaded ads match
|
||||||
for _, ad := range adsrc {
|
for _, ad := range adsrc {
|
||||||
if err := VerifyAd(ad); err != nil {
|
if err := VerifyAd(&ad); err != nil {
|
||||||
t.Errorf(err.Error())
|
t.Errorf(err.Error())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
7
mkrel.sh
7
mkrel.sh
@@ -22,7 +22,12 @@ freebsd/amd64
|
|||||||
linux/amd64
|
linux/amd64
|
||||||
netbsd/amd64
|
netbsd/amd64
|
||||||
openbsd/amd64
|
openbsd/amd64
|
||||||
windows/amd64"
|
windows/amd64
|
||||||
|
freebsd/arm64
|
||||||
|
linux/arm64
|
||||||
|
netbsd/arm64
|
||||||
|
openbsd/arm64
|
||||||
|
windows/arm64"
|
||||||
|
|
||||||
tool="$1"
|
tool="$1"
|
||||||
version="$2"
|
version="$2"
|
||||||
|
|||||||
15
scrape.go
15
scrape.go
@@ -126,21 +126,32 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
|
|||||||
|
|
||||||
advertisement.CalculateExpire()
|
advertisement.CalculateExpire()
|
||||||
|
|
||||||
proceed := CheckAdVisited(fetch.Config, advertisement.Slug)
|
// prepare ad dir name
|
||||||
|
addir, err := AdDirName(fetch.Config, advertisement)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
proceed := CheckAdVisited(fetch.Config, addir)
|
||||||
if !proceed {
|
if !proceed {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// write listing
|
// write listing
|
||||||
addir, err := WriteAd(fetch.Config, advertisement)
|
err = WriteAd(fetch.Config, advertisement, addir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// tell the user
|
||||||
slog.Debug("extracted ad listing", "ad", advertisement)
|
slog.Debug("extracted ad listing", "ad", advertisement)
|
||||||
|
|
||||||
|
// stats
|
||||||
fetch.Config.IncrAds()
|
fetch.Config.IncrAds()
|
||||||
|
|
||||||
|
// register for later checks
|
||||||
|
DirsVisited[addir] = 1
|
||||||
|
|
||||||
return ScrapeImages(fetch, advertisement, addir)
|
return ScrapeImages(fetch, advertisement, addir)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
23
store.go
23
store.go
@@ -72,19 +72,13 @@ func AdDirName(conf *Config, advertisement *Ad) (string, error) {
|
|||||||
return buf.String(), nil
|
return buf.String(), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteAd(conf *Config, advertisement *Ad) (string, error) {
|
func WriteAd(conf *Config, advertisement *Ad, addir string) error {
|
||||||
// prepare ad dir name
|
|
||||||
addir, err := AdDirName(conf, advertisement)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
|
|
||||||
// prepare output dir
|
// prepare output dir
|
||||||
dir := filepath.Join(conf.Outdir, addir)
|
dir := filepath.Join(conf.Outdir, addir)
|
||||||
|
|
||||||
err = Mkdir(dir)
|
err := Mkdir(dir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// write ad file
|
// write ad file
|
||||||
@@ -92,7 +86,7 @@ func WriteAd(conf *Config, advertisement *Ad) (string, error) {
|
|||||||
|
|
||||||
listingfd, err := os.Create(listingfile)
|
listingfd, err := os.Create(listingfile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("failed to create Adlisting.txt: %w", err)
|
return fmt.Errorf("failed to create Adlisting.txt: %w", err)
|
||||||
}
|
}
|
||||||
defer listingfd.Close()
|
defer listingfd.Close()
|
||||||
|
|
||||||
@@ -104,17 +98,17 @@ func WriteAd(conf *Config, advertisement *Ad) (string, error) {
|
|||||||
|
|
||||||
tmpl, err := tpl.New("adlisting").Parse(conf.Template)
|
tmpl, err := tpl.New("adlisting").Parse(conf.Template)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("failed to parse adlisting template: %w", err)
|
return fmt.Errorf("failed to parse adlisting template: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
err = tmpl.Execute(listingfd, advertisement)
|
err = tmpl.Execute(listingfd, advertisement)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("failed to execute adlisting template: %w", err)
|
return fmt.Errorf("failed to execute adlisting template: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Info("wrote ad listing", "listingfile", listingfile)
|
slog.Info("wrote ad listing", "listingfile", listingfile)
|
||||||
|
|
||||||
return addir, nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteImage(filename string, reader *bytes.Reader) error {
|
func WriteImage(filename string, reader *bytes.Reader) error {
|
||||||
@@ -176,9 +170,6 @@ func CheckAdVisited(conf *Config, adname string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// register
|
|
||||||
DirsVisited[adname] = 1
|
|
||||||
|
|
||||||
// overwrite
|
// overwrite
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|||||||
2
util.go
2
util.go
@@ -32,7 +32,7 @@ import (
|
|||||||
|
|
||||||
func Mkdir(dir string) error {
|
func Mkdir(dir string) error {
|
||||||
if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) {
|
if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) {
|
||||||
err := os.Mkdir(dir, os.ModePerm)
|
err := os.MkdirAll(dir, os.ModePerm)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to create directory %s: %w", dir, err)
|
return fmt.Errorf("failed to create directory %s: %w", dir, err)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user