From 12329f6ae0f795abfcee1809745d1024659f60aa Mon Sep 17 00:00:00 2001 From: Thomas von Dein Date: Thu, 14 Dec 2023 19:00:04 +0100 Subject: [PATCH] initial commit --- Makefile | 88 ++++++++++++++++++++++++ README.md | 26 +++++++ go.mod | 12 ++++ go.sum | 19 +++++ kleingebaeck.1 | 143 ++++++++++++++++++++++++++++++++++++++ kleingebaeck.go | 7 ++ kleingebaeck.pod | 5 ++ main.go | 96 ++++++++++++++++++++++++++ scrape.go | 175 +++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 571 insertions(+) create mode 100644 Makefile create mode 100644 README.md create mode 100644 go.mod create mode 100644 go.sum create mode 100644 kleingebaeck.1 create mode 100644 kleingebaeck.go create mode 100644 kleingebaeck.pod create mode 100644 main.go create mode 100644 scrape.go diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2cbebc2 --- /dev/null +++ b/Makefile @@ -0,0 +1,88 @@ +# Copyright © 2023 Thomas von Dein + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# +# no need to modify anything below +tool = kleingebaeck +VERSION = $(shell grep VERSION main.go | head -1 | cut -d '"' -f2) +archs = darwin freebsd linux windows +PREFIX = /usr/local +UID = root +GID = 0 +HAVE_POD := $(shell pod2text -h 2>/dev/null) + +all: $(tool).1 $(tool).go buildlocal + +%.1: %.pod +ifdef HAVE_POD + pod2man -c "User Commands" -r 1 -s 1 $*.pod > $*.1 +endif + +%.go: %.pod +ifdef HAVE_POD + echo "package main" > $*.go + echo >> $*.go + echo "var manpage = \`" >> $*.go + pod2text $*.pod >> $*.go + echo "\`" >> $*.go +endif + +buildlocal: + CGO_LDFLAGS='-static' go build -tags osusergo,netgo -ldflags "-extldflags=-static" -o $(tool) + +install: buildlocal + install -d -o $(UID) -g $(GID) $(PREFIX)/bin + install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1 + install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/ + install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/ + +clean: + rm -rf $(tool) coverage.out testdata + +test: clean + go test ./... $(ARGS) + +testfuzzy: clean + go test -fuzz ./... $(ARGS) + +singletest: + @echo "Call like this: make singletest TEST=TestPrepareColumns ARGS=-v" + go test -run $(TEST) $(ARGS) + +cover-report: + go test ./... -cover -coverprofile=coverage.out + go tool cover -html=coverage.out + +goupdate: + go get -t -u=patch ./... + +buildall: + ./mkrel.sh $(tool) $(VERSION) + +release: buildall + gh release create v$(VERSION) --generate-notes releases/* + +show-versions: buildlocal + @echo "### kleingebaeck version:" + @./kleingebaeck -v + + @echo + @echo "### go module versions:" + @go list -m all + + @echo + @echo "### go version used for building:" + @grep -m 1 go go.mod diff --git a/README.md b/README.md new file mode 100644 index 0000000..0452e4d --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +## kleinanzeigen.de Backup + +[![License](https://img.shields.io/badge/license-GPL-blue.svg)](https://github.com/tlinden/kleingebaeck/blob/master/LICENSE) +[![Go Report Card](https://goreportcard.com/badge/github.com/tlinden/kleingebaeck)](https://goreportcard.com/report/github.com/tlinden/kleingebaeck) + +Mit diesem kleinen aber feinen Tool kann man seine +[https://kleinanzeigen.de](Anzeigen bei kleinanzeigen.de) sichern. Das +Problem ist ja bekanntlich, dass Kleinanzeigen nach einer Weile (2 +Monate?) automatisch gelöscht werden. Wenn man keine Sicherung hat, +wird es schwierig, die erneut einzustellen. Mit dem Tool braucht man +sich keine Texte zu merken. Man kann auch einfach Änderungen +(z.B. Preis runter) durchführen oder den Text anpassen und dann ein +neues Backup anfertigen. + +Es wird pro Anzeige ein Verzeichnis erstellt. In der Datei +`Anzeige.txt` wird der Titel, die Beschreibung sowie der Preis +eingetragen. Ausserdem werden alle Bilder heruntergeladen. + +## Copyright und Lizenz + +Lizensiert unter der GNU GENERAL PUBLIC LICENSE version 3. + +## Author + +T.v.Dein + diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..b7ee371 --- /dev/null +++ b/go.mod @@ -0,0 +1,12 @@ +module kleingebaeck + +go 1.20 + +require ( + astuart.co/goq v1.0.0 // indirect + github.com/PuerkitoBio/goquery v1.5.0 // indirect + github.com/andybalholm/cascadia v1.0.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + golang.org/x/net v0.0.0-20190606173856-1492cefac77f // indirect + +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..8571d31 --- /dev/null +++ b/go.sum @@ -0,0 +1,19 @@ +astuart.co/goq v1.0.0 h1:nnYIhu/Z/j0VaX9Dp+pmh2Uh7ldEz6XfgSg+bAY5Yrw= +astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno= +github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= +github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= +github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= +github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190606173856-1492cefac77f h1:IWHgpgFqnL5AhBUBZSgBdjl2vkQUEzcY+JNKWfcgAU0= +golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/kleingebaeck.1 b/kleingebaeck.1 new file mode 100644 index 0000000..2f5056a --- /dev/null +++ b/kleingebaeck.1 @@ -0,0 +1,143 @@ +.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.42) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +. ds C` +. ds C' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is >0, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.\" +.\" Avoid warning from groff about undefined register 'F'. +.de IX +.. +.nr rF 0 +.if \n(.g .if rF .nr rF 1 +.if (\n(rF:(\n(.g==0)) \{\ +. if \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. if !\nF==2 \{\ +. nr % 0 +. nr F 2 +. \} +. \} +.\} +.rr rF +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "KLEINGEBAECK 1" +.TH KLEINGEBAECK 1 "2023-12-14" "1" "User Commands" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SS "kleingebaeck" +.IX Subsection "kleingebaeck" +Backup of kleinanzeigen.de diff --git a/kleingebaeck.go b/kleingebaeck.go new file mode 100644 index 0000000..ea296d1 --- /dev/null +++ b/kleingebaeck.go @@ -0,0 +1,7 @@ +package main + +var manpage = ` + kleingebaeck + Backup of kleinanzeigen.de + +` diff --git a/kleingebaeck.pod b/kleingebaeck.pod new file mode 100644 index 0000000..8c66148 --- /dev/null +++ b/kleingebaeck.pod @@ -0,0 +1,5 @@ +=head2 kleingebaeck + +Backup of kleinanzeigen.de + +=cut diff --git a/main.go b/main.go new file mode 100644 index 0000000..d6dfed4 --- /dev/null +++ b/main.go @@ -0,0 +1,96 @@ +/* +Copyright © 2023 Thomas von Dein + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package main + +import ( + "errors" + "fmt" + "os" + + flag "github.com/spf13/pflag" +) + +const VERSION string = "0.0.1" +const Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" +const Baseuri string = "https://www.kleinanzeigen.de" +const Listuri string = "/s-bestandsliste.html" +const Defaultdir string = "." + +func main() { + os.Exit(Main()) +} + +func Main() int { + showversion := false + showhelp := false + showmanual := false + enabledebug := false + configfile := "" + dir := Defaultdir + + flag.BoolVarP(&enabledebug, "debug", "d", false, "debug mode") + flag.BoolVarP(&showversion, "version", "v", false, "show version") + flag.BoolVarP(&showhelp, "help", "h", false, "show usage") + flag.BoolVarP(&showmanual, "manual", "m", false, "show manual") + flag.StringVarP(&dir, "output-dir", "o", dir, "where to store ads") + flag.StringVarP(&configfile, "config", "c", + os.Getenv("HOME")+"/.kleingebaeck", "config file") + + flag.Parse() + + if showversion { + fmt.Printf("This is kleingebaeck version %s\n", VERSION) + return 0 + } + + /* + + if showhelp { + fmt.Println(Usage) + return 0 + } + + if enabledebug { + calc.ToggleDebug() + } + + if showmanual { + man() + return 0 + } + + */ + + if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) { + err := os.Mkdir(dir, os.ModePerm) + if err != nil { + return Die(err) + } + } + + if len(flag.Args()) == 1 { + Start(flag.Args()[0], dir) + } + + return 0 +} + +func Die(err error) int { + fmt.Println(err) + return 1 +} diff --git a/scrape.go b/scrape.go new file mode 100644 index 0000000..598924a --- /dev/null +++ b/scrape.go @@ -0,0 +1,175 @@ +/* +Copyright © 2023 Thomas von Dein + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package main + +import ( + "errors" + "fmt" + "io" + "os" + "strings" + + "net/http" + + "astuart.co/goq" +) + +type Index struct { + Links []string `goquery:".text-module-begin a,[href]"` +} + +// fetch some web page content +func Get(uri string, client *http.Client) (io.ReadCloser, error) { + req, err := http.NewRequest("GET", uri, nil) + if err != nil { + return nil, err + } + + req.Header.Set("User-Agent", Useragent) + + // fmt.Println(uri) + + res, err := client.Do(req) + if err != nil { + return nil, err + } + + return res.Body, nil +} + +// extract links from all ad listing pages (that is: use pagination) +// and scrape every page +func Start(uid string, dir string) error { + client := &http.Client{} + ads := []string{} + + baseuri := Baseuri + Listuri + "?userId=" + uid + page := 1 + uri := baseuri + + for { + var index Index + body, err := Get(uri, client) + if err != nil { + return err + } + defer body.Close() + + err = goq.NewDecoder(body).Decode(&index) + if err != nil { + return err + } + + if len(index.Links) == 0 { + break + } + + for _, href := range index.Links { + ads = append(ads, href) + fmt.Println(href) + } + + page++ + uri = baseuri + "&pageNum=" + fmt.Sprintf("%d", page) + } + + for _, ad := range ads { + err := Scrape(ad, dir) + if err != nil { + return err + } + return nil + } + + return nil +} + +type Ad struct { + Title string `goquery:"h1"` + Text string `goquery:"p#viewad-description-text,html"` + Images []string `goquery:".galleryimage-element img,[src]"` + Price string `goquery:"h2#viewad-price"` +} + +func Scrape(link string, dir string) error { + client := &http.Client{} + uri := Baseuri + link + slurp := strings.Split(uri, "/")[1] + + var ad Ad + body, err := Get(uri, client) + if err != nil { + return err + } + defer body.Close() + + err = goq.NewDecoder(body).Decode(&ad) + if err != nil { + return err + } + + f, err := os.Create(strings.Join([]string{dir, slurp, "Anzeige.txt"}, "/")) + if err != nil { + return err + } + + ad.Text = strings.ReplaceAll(ad.Text, "
", "\n") + _, err = fmt.Fprintf(f, "Title: %s\nPrice: %s\n\n%s", ad.Title, ad.Price, ad.Text) + if err != nil { + return err + } + + img := 1 + for _, imguri := range ad.Images { + file := fmt.Sprintf("%s/%d.jpg", dir, img) + err := Getimage(imguri, file) + if err != nil { + return err + } + + img++ + } + + return nil +} + +// fetch an image +func Getimage(uri, fileName string) error { + response, err := http.Get(uri) + if err != nil { + return err + } + defer response.Body.Close() + + if response.StatusCode != 200 { + return errors.New("received non 200 response code") + } + + file, err := os.Create(fileName) + if err != nil { + return err + } + defer file.Close() + + _, err = io.Copy(file, response.Body) + if err != nil { + return err + } + + return nil +}