diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2cbebc2 --- /dev/null +++ b/Makefile @@ -0,0 +1,88 @@ +# Copyright © 2023 Thomas von Dein + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +# +# no need to modify anything below +tool = kleingebaeck +VERSION = $(shell grep VERSION main.go | head -1 | cut -d '"' -f2) +archs = darwin freebsd linux windows +PREFIX = /usr/local +UID = root +GID = 0 +HAVE_POD := $(shell pod2text -h 2>/dev/null) + +all: $(tool).1 $(tool).go buildlocal + +%.1: %.pod +ifdef HAVE_POD + pod2man -c "User Commands" -r 1 -s 1 $*.pod > $*.1 +endif + +%.go: %.pod +ifdef HAVE_POD + echo "package main" > $*.go + echo >> $*.go + echo "var manpage = \`" >> $*.go + pod2text $*.pod >> $*.go + echo "\`" >> $*.go +endif + +buildlocal: + CGO_LDFLAGS='-static' go build -tags osusergo,netgo -ldflags "-extldflags=-static" -o $(tool) + +install: buildlocal + install -d -o $(UID) -g $(GID) $(PREFIX)/bin + install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1 + install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/ + install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/ + +clean: + rm -rf $(tool) coverage.out testdata + +test: clean + go test ./... $(ARGS) + +testfuzzy: clean + go test -fuzz ./... $(ARGS) + +singletest: + @echo "Call like this: make singletest TEST=TestPrepareColumns ARGS=-v" + go test -run $(TEST) $(ARGS) + +cover-report: + go test ./... -cover -coverprofile=coverage.out + go tool cover -html=coverage.out + +goupdate: + go get -t -u=patch ./... + +buildall: + ./mkrel.sh $(tool) $(VERSION) + +release: buildall + gh release create v$(VERSION) --generate-notes releases/* + +show-versions: buildlocal + @echo "### kleingebaeck version:" + @./kleingebaeck -v + + @echo + @echo "### go module versions:" + @go list -m all + + @echo + @echo "### go version used for building:" + @grep -m 1 go go.mod diff --git a/README.md b/README.md new file mode 100644 index 0000000..0452e4d --- /dev/null +++ b/README.md @@ -0,0 +1,26 @@ +## kleinanzeigen.de Backup + +[![License](https://img.shields.io/badge/license-GPL-blue.svg)](https://github.com/tlinden/kleingebaeck/blob/master/LICENSE) +[![Go Report Card](https://goreportcard.com/badge/github.com/tlinden/kleingebaeck)](https://goreportcard.com/report/github.com/tlinden/kleingebaeck) + +Mit diesem kleinen aber feinen Tool kann man seine +[https://kleinanzeigen.de](Anzeigen bei kleinanzeigen.de) sichern. Das +Problem ist ja bekanntlich, dass Kleinanzeigen nach einer Weile (2 +Monate?) automatisch gelöscht werden. Wenn man keine Sicherung hat, +wird es schwierig, die erneut einzustellen. Mit dem Tool braucht man +sich keine Texte zu merken. Man kann auch einfach Änderungen +(z.B. Preis runter) durchführen oder den Text anpassen und dann ein +neues Backup anfertigen. + +Es wird pro Anzeige ein Verzeichnis erstellt. In der Datei +`Anzeige.txt` wird der Titel, die Beschreibung sowie der Preis +eingetragen. Ausserdem werden alle Bilder heruntergeladen. + +## Copyright und Lizenz + +Lizensiert unter der GNU GENERAL PUBLIC LICENSE version 3. + +## Author + +T.v.Dein + diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..b7ee371 --- /dev/null +++ b/go.mod @@ -0,0 +1,12 @@ +module kleingebaeck + +go 1.20 + +require ( + astuart.co/goq v1.0.0 // indirect + github.com/PuerkitoBio/goquery v1.5.0 // indirect + github.com/andybalholm/cascadia v1.0.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + golang.org/x/net v0.0.0-20190606173856-1492cefac77f // indirect + +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..8571d31 --- /dev/null +++ b/go.sum @@ -0,0 +1,19 @@ +astuart.co/goq v1.0.0 h1:nnYIhu/Z/j0VaX9Dp+pmh2Uh7ldEz6XfgSg+bAY5Yrw= +astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno= +github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= +github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= +github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= +github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190606173856-1492cefac77f h1:IWHgpgFqnL5AhBUBZSgBdjl2vkQUEzcY+JNKWfcgAU0= +golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/kleingebaeck.1 b/kleingebaeck.1 new file mode 100644 index 0000000..2f5056a --- /dev/null +++ b/kleingebaeck.1 @@ -0,0 +1,143 @@ +.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.42) +.\" +.\" Standard preamble: +.\" ======================================================================== +.de Sp \" Vertical space (when we can't use .PP) +.if t .sp .5v +.if n .sp +.. +.de Vb \" Begin verbatim text +.ft CW +.nf +.ne \\$1 +.. +.de Ve \" End verbatim text +.ft R +.fi +.. +.\" Set up some character translations and predefined strings. \*(-- will +.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left +.\" double quote, and \*(R" will give a right double quote. \*(C+ will +.\" give a nicer C++. Capital omega is used to do unbreakable dashes and +.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, +.\" nothing in troff, for use with C<>. +.tr \(*W- +.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' +.ie n \{\ +. ds -- \(*W- +. ds PI pi +. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch +. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch +. ds L" "" +. ds R" "" +. ds C` "" +. ds C' "" +'br\} +.el\{\ +. ds -- \|\(em\| +. ds PI \(*p +. ds L" `` +. ds R" '' +. ds C` +. ds C' +'br\} +.\" +.\" Escape single quotes in literal strings from groff's Unicode transform. +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.\" +.\" If the F register is >0, we'll generate index entries on stderr for +.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index +.\" entries marked with X<> in POD. Of course, you'll have to process the +.\" output yourself in some meaningful fashion. +.\" +.\" Avoid warning from groff about undefined register 'F'. +.de IX +.. +.nr rF 0 +.if \n(.g .if rF .nr rF 1 +.if (\n(rF:(\n(.g==0)) \{\ +. if \nF \{\ +. de IX +. tm Index:\\$1\t\\n%\t"\\$2" +.. +. if !\nF==2 \{\ +. nr % 0 +. nr F 2 +. \} +. \} +.\} +.rr rF +.\" +.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). +.\" Fear. Run. Save yourself. No user-serviceable parts. +. \" fudge factors for nroff and troff +.if n \{\ +. ds #H 0 +. ds #V .8m +. ds #F .3m +. ds #[ \f1 +. ds #] \fP +.\} +.if t \{\ +. ds #H ((1u-(\\\\n(.fu%2u))*.13m) +. ds #V .6m +. ds #F 0 +. ds #[ \& +. ds #] \& +.\} +. \" simple accents for nroff and troff +.if n \{\ +. ds ' \& +. ds ` \& +. ds ^ \& +. ds , \& +. ds ~ ~ +. ds / +.\} +.if t \{\ +. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" +. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' +. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' +. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' +. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' +. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' +.\} +. \" troff and (daisy-wheel) nroff accents +.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' +.ds 8 \h'\*(#H'\(*b\h'-\*(#H' +.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] +.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' +.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' +.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] +.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] +.ds ae a\h'-(\w'a'u*4/10)'e +.ds Ae A\h'-(\w'A'u*4/10)'E +. \" corrections for vroff +.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' +.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' +. \" for low resolution devices (crt and lpr) +.if \n(.H>23 .if \n(.V>19 \ +\{\ +. ds : e +. ds 8 ss +. ds o a +. ds d- d\h'-1'\(ga +. ds D- D\h'-1'\(hy +. ds th \o'bp' +. ds Th \o'LP' +. ds ae ae +. ds Ae AE +.\} +.rm #[ #] #H #V #F C +.\" ======================================================================== +.\" +.IX Title "KLEINGEBAECK 1" +.TH KLEINGEBAECK 1 "2023-12-14" "1" "User Commands" +.\" For nroff, turn off justification. Always turn off hyphenation; it makes +.\" way too many mistakes in technical documents. +.if n .ad l +.nh +.SS "kleingebaeck" +.IX Subsection "kleingebaeck" +Backup of kleinanzeigen.de diff --git a/kleingebaeck.go b/kleingebaeck.go new file mode 100644 index 0000000..ea296d1 --- /dev/null +++ b/kleingebaeck.go @@ -0,0 +1,7 @@ +package main + +var manpage = ` + kleingebaeck + Backup of kleinanzeigen.de + +` diff --git a/kleingebaeck.pod b/kleingebaeck.pod new file mode 100644 index 0000000..8c66148 --- /dev/null +++ b/kleingebaeck.pod @@ -0,0 +1,5 @@ +=head2 kleingebaeck + +Backup of kleinanzeigen.de + +=cut diff --git a/main.go b/main.go new file mode 100644 index 0000000..d6dfed4 --- /dev/null +++ b/main.go @@ -0,0 +1,96 @@ +/* +Copyright © 2023 Thomas von Dein + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package main + +import ( + "errors" + "fmt" + "os" + + flag "github.com/spf13/pflag" +) + +const VERSION string = "0.0.1" +const Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" +const Baseuri string = "https://www.kleinanzeigen.de" +const Listuri string = "/s-bestandsliste.html" +const Defaultdir string = "." + +func main() { + os.Exit(Main()) +} + +func Main() int { + showversion := false + showhelp := false + showmanual := false + enabledebug := false + configfile := "" + dir := Defaultdir + + flag.BoolVarP(&enabledebug, "debug", "d", false, "debug mode") + flag.BoolVarP(&showversion, "version", "v", false, "show version") + flag.BoolVarP(&showhelp, "help", "h", false, "show usage") + flag.BoolVarP(&showmanual, "manual", "m", false, "show manual") + flag.StringVarP(&dir, "output-dir", "o", dir, "where to store ads") + flag.StringVarP(&configfile, "config", "c", + os.Getenv("HOME")+"/.kleingebaeck", "config file") + + flag.Parse() + + if showversion { + fmt.Printf("This is kleingebaeck version %s\n", VERSION) + return 0 + } + + /* + + if showhelp { + fmt.Println(Usage) + return 0 + } + + if enabledebug { + calc.ToggleDebug() + } + + if showmanual { + man() + return 0 + } + + */ + + if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) { + err := os.Mkdir(dir, os.ModePerm) + if err != nil { + return Die(err) + } + } + + if len(flag.Args()) == 1 { + Start(flag.Args()[0], dir) + } + + return 0 +} + +func Die(err error) int { + fmt.Println(err) + return 1 +} diff --git a/scrape.go b/scrape.go new file mode 100644 index 0000000..598924a --- /dev/null +++ b/scrape.go @@ -0,0 +1,175 @@ +/* +Copyright © 2023 Thomas von Dein + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package main + +import ( + "errors" + "fmt" + "io" + "os" + "strings" + + "net/http" + + "astuart.co/goq" +) + +type Index struct { + Links []string `goquery:".text-module-begin a,[href]"` +} + +// fetch some web page content +func Get(uri string, client *http.Client) (io.ReadCloser, error) { + req, err := http.NewRequest("GET", uri, nil) + if err != nil { + return nil, err + } + + req.Header.Set("User-Agent", Useragent) + + // fmt.Println(uri) + + res, err := client.Do(req) + if err != nil { + return nil, err + } + + return res.Body, nil +} + +// extract links from all ad listing pages (that is: use pagination) +// and scrape every page +func Start(uid string, dir string) error { + client := &http.Client{} + ads := []string{} + + baseuri := Baseuri + Listuri + "?userId=" + uid + page := 1 + uri := baseuri + + for { + var index Index + body, err := Get(uri, client) + if err != nil { + return err + } + defer body.Close() + + err = goq.NewDecoder(body).Decode(&index) + if err != nil { + return err + } + + if len(index.Links) == 0 { + break + } + + for _, href := range index.Links { + ads = append(ads, href) + fmt.Println(href) + } + + page++ + uri = baseuri + "&pageNum=" + fmt.Sprintf("%d", page) + } + + for _, ad := range ads { + err := Scrape(ad, dir) + if err != nil { + return err + } + return nil + } + + return nil +} + +type Ad struct { + Title string `goquery:"h1"` + Text string `goquery:"p#viewad-description-text,html"` + Images []string `goquery:".galleryimage-element img,[src]"` + Price string `goquery:"h2#viewad-price"` +} + +func Scrape(link string, dir string) error { + client := &http.Client{} + uri := Baseuri + link + slurp := strings.Split(uri, "/")[1] + + var ad Ad + body, err := Get(uri, client) + if err != nil { + return err + } + defer body.Close() + + err = goq.NewDecoder(body).Decode(&ad) + if err != nil { + return err + } + + f, err := os.Create(strings.Join([]string{dir, slurp, "Anzeige.txt"}, "/")) + if err != nil { + return err + } + + ad.Text = strings.ReplaceAll(ad.Text, "
", "\n") + _, err = fmt.Fprintf(f, "Title: %s\nPrice: %s\n\n%s", ad.Title, ad.Price, ad.Text) + if err != nil { + return err + } + + img := 1 + for _, imguri := range ad.Images { + file := fmt.Sprintf("%s/%d.jpg", dir, img) + err := Getimage(imguri, file) + if err != nil { + return err + } + + img++ + } + + return nil +} + +// fetch an image +func Getimage(uri, fileName string) error { + response, err := http.Get(uri) + if err != nil { + return err + } + defer response.Body.Close() + + if response.StatusCode != 200 { + return errors.New("received non 200 response code") + } + + file, err := os.Create(fileName) + if err != nil { + return err + } + defer file.Close() + + _, err = io.Copy(file, response.Body) + if err != nil { + return err + } + + return nil +}