initial commit

This commit is contained in:
2023-12-14 19:00:04 +01:00
parent 8bd62b3c30
commit 12329f6ae0
9 changed files with 571 additions and 0 deletions

88
Makefile Normal file
View File

@@ -0,0 +1,88 @@
# Copyright © 2023 Thomas von Dein
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# no need to modify anything below
tool = kleingebaeck
VERSION = $(shell grep VERSION main.go | head -1 | cut -d '"' -f2)
archs = darwin freebsd linux windows
PREFIX = /usr/local
UID = root
GID = 0
HAVE_POD := $(shell pod2text -h 2>/dev/null)
all: $(tool).1 $(tool).go buildlocal
%.1: %.pod
ifdef HAVE_POD
pod2man -c "User Commands" -r 1 -s 1 $*.pod > $*.1
endif
%.go: %.pod
ifdef HAVE_POD
echo "package main" > $*.go
echo >> $*.go
echo "var manpage = \`" >> $*.go
pod2text $*.pod >> $*.go
echo "\`" >> $*.go
endif
buildlocal:
CGO_LDFLAGS='-static' go build -tags osusergo,netgo -ldflags "-extldflags=-static" -o $(tool)
install: buildlocal
install -d -o $(UID) -g $(GID) $(PREFIX)/bin
install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1
install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/
install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/
clean:
rm -rf $(tool) coverage.out testdata
test: clean
go test ./... $(ARGS)
testfuzzy: clean
go test -fuzz ./... $(ARGS)
singletest:
@echo "Call like this: make singletest TEST=TestPrepareColumns ARGS=-v"
go test -run $(TEST) $(ARGS)
cover-report:
go test ./... -cover -coverprofile=coverage.out
go tool cover -html=coverage.out
goupdate:
go get -t -u=patch ./...
buildall:
./mkrel.sh $(tool) $(VERSION)
release: buildall
gh release create v$(VERSION) --generate-notes releases/*
show-versions: buildlocal
@echo "### kleingebaeck version:"
@./kleingebaeck -v
@echo
@echo "### go module versions:"
@go list -m all
@echo
@echo "### go version used for building:"
@grep -m 1 go go.mod

26
README.md Normal file
View File

@@ -0,0 +1,26 @@
## kleinanzeigen.de Backup
[![License](https://img.shields.io/badge/license-GPL-blue.svg)](https://github.com/tlinden/kleingebaeck/blob/master/LICENSE)
[![Go Report Card](https://goreportcard.com/badge/github.com/tlinden/kleingebaeck)](https://goreportcard.com/report/github.com/tlinden/kleingebaeck)
Mit diesem kleinen aber feinen Tool kann man seine
[https://kleinanzeigen.de](Anzeigen bei kleinanzeigen.de) sichern. Das
Problem ist ja bekanntlich, dass Kleinanzeigen nach einer Weile (2
Monate?) automatisch gelöscht werden. Wenn man keine Sicherung hat,
wird es schwierig, die erneut einzustellen. Mit dem Tool braucht man
sich keine Texte zu merken. Man kann auch einfach Änderungen
(z.B. Preis runter) durchführen oder den Text anpassen und dann ein
neues Backup anfertigen.
Es wird pro Anzeige ein Verzeichnis erstellt. In der Datei
`Anzeige.txt` wird der Titel, die Beschreibung sowie der Preis
eingetragen. Ausserdem werden alle Bilder heruntergeladen.
## Copyright und Lizenz
Lizensiert unter der GNU GENERAL PUBLIC LICENSE version 3.
## Author
T.v.Dein <tom AT vondein DOT org>

12
go.mod Normal file
View File

@@ -0,0 +1,12 @@
module kleingebaeck
go 1.20
require (
astuart.co/goq v1.0.0 // indirect
github.com/PuerkitoBio/goquery v1.5.0 // indirect
github.com/andybalholm/cascadia v1.0.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
golang.org/x/net v0.0.0-20190606173856-1492cefac77f // indirect
)

19
go.sum Normal file
View File

@@ -0,0 +1,19 @@
astuart.co/goq v1.0.0 h1:nnYIhu/Z/j0VaX9Dp+pmh2Uh7ldEz6XfgSg+bAY5Yrw=
astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno=
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190606173856-1492cefac77f h1:IWHgpgFqnL5AhBUBZSgBdjl2vkQUEzcY+JNKWfcgAU0=
golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

143
kleingebaeck.1 Normal file
View File

@@ -0,0 +1,143 @@
.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.42)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings. \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote. \*(C+ will
.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
. ds -- \(*W-
. ds PI pi
. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
. ds L" ""
. ds R" ""
. ds C` ""
. ds C' ""
'br\}
.el\{\
. ds -- \|\(em\|
. ds PI \(*p
. ds L" ``
. ds R" ''
. ds C`
. ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el .ds Aq '
.\"
.\" If the F register is >0, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD. Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{\
. if \nF \{\
. de IX
. tm Index:\\$1\t\\n%\t"\\$2"
..
. if !\nF==2 \{\
. nr % 0
. nr F 2
. \}
. \}
.\}
.rr rF
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear. Run. Save yourself. No user-serviceable parts.
. \" fudge factors for nroff and troff
.if n \{\
. ds #H 0
. ds #V .8m
. ds #F .3m
. ds #[ \f1
. ds #] \fP
.\}
.if t \{\
. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
. ds #V .6m
. ds #F 0
. ds #[ \&
. ds #] \&
.\}
. \" simple accents for nroff and troff
.if n \{\
. ds ' \&
. ds ` \&
. ds ^ \&
. ds , \&
. ds ~ ~
. ds /
.\}
.if t \{\
. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
. \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
. \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
. \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
. ds : e
. ds 8 ss
. ds o a
. ds d- d\h'-1'\(ga
. ds D- D\h'-1'\(hy
. ds th \o'bp'
. ds Th \o'LP'
. ds ae ae
. ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "KLEINGEBAECK 1"
.TH KLEINGEBAECK 1 "2023-12-14" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SS "kleingebaeck"
.IX Subsection "kleingebaeck"
Backup of kleinanzeigen.de

7
kleingebaeck.go Normal file
View File

@@ -0,0 +1,7 @@
package main
var manpage = `
kleingebaeck
Backup of kleinanzeigen.de
`

5
kleingebaeck.pod Normal file
View File

@@ -0,0 +1,5 @@
=head2 kleingebaeck
Backup of kleinanzeigen.de
=cut

96
main.go Normal file
View File

@@ -0,0 +1,96 @@
/*
Copyright © 2023 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"errors"
"fmt"
"os"
flag "github.com/spf13/pflag"
)
const VERSION string = "0.0.1"
const Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
const Baseuri string = "https://www.kleinanzeigen.de"
const Listuri string = "/s-bestandsliste.html"
const Defaultdir string = "."
func main() {
os.Exit(Main())
}
func Main() int {
showversion := false
showhelp := false
showmanual := false
enabledebug := false
configfile := ""
dir := Defaultdir
flag.BoolVarP(&enabledebug, "debug", "d", false, "debug mode")
flag.BoolVarP(&showversion, "version", "v", false, "show version")
flag.BoolVarP(&showhelp, "help", "h", false, "show usage")
flag.BoolVarP(&showmanual, "manual", "m", false, "show manual")
flag.StringVarP(&dir, "output-dir", "o", dir, "where to store ads")
flag.StringVarP(&configfile, "config", "c",
os.Getenv("HOME")+"/.kleingebaeck", "config file")
flag.Parse()
if showversion {
fmt.Printf("This is kleingebaeck version %s\n", VERSION)
return 0
}
/*
if showhelp {
fmt.Println(Usage)
return 0
}
if enabledebug {
calc.ToggleDebug()
}
if showmanual {
man()
return 0
}
*/
if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) {
err := os.Mkdir(dir, os.ModePerm)
if err != nil {
return Die(err)
}
}
if len(flag.Args()) == 1 {
Start(flag.Args()[0], dir)
}
return 0
}
func Die(err error) int {
fmt.Println(err)
return 1
}

175
scrape.go Normal file
View File

@@ -0,0 +1,175 @@
/*
Copyright © 2023 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"errors"
"fmt"
"io"
"os"
"strings"
"net/http"
"astuart.co/goq"
)
type Index struct {
Links []string `goquery:".text-module-begin a,[href]"`
}
// fetch some web page content
func Get(uri string, client *http.Client) (io.ReadCloser, error) {
req, err := http.NewRequest("GET", uri, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", Useragent)
// fmt.Println(uri)
res, err := client.Do(req)
if err != nil {
return nil, err
}
return res.Body, nil
}
// extract links from all ad listing pages (that is: use pagination)
// and scrape every page
func Start(uid string, dir string) error {
client := &http.Client{}
ads := []string{}
baseuri := Baseuri + Listuri + "?userId=" + uid
page := 1
uri := baseuri
for {
var index Index
body, err := Get(uri, client)
if err != nil {
return err
}
defer body.Close()
err = goq.NewDecoder(body).Decode(&index)
if err != nil {
return err
}
if len(index.Links) == 0 {
break
}
for _, href := range index.Links {
ads = append(ads, href)
fmt.Println(href)
}
page++
uri = baseuri + "&pageNum=" + fmt.Sprintf("%d", page)
}
for _, ad := range ads {
err := Scrape(ad, dir)
if err != nil {
return err
}
return nil
}
return nil
}
type Ad struct {
Title string `goquery:"h1"`
Text string `goquery:"p#viewad-description-text,html"`
Images []string `goquery:".galleryimage-element img,[src]"`
Price string `goquery:"h2#viewad-price"`
}
func Scrape(link string, dir string) error {
client := &http.Client{}
uri := Baseuri + link
slurp := strings.Split(uri, "/")[1]
var ad Ad
body, err := Get(uri, client)
if err != nil {
return err
}
defer body.Close()
err = goq.NewDecoder(body).Decode(&ad)
if err != nil {
return err
}
f, err := os.Create(strings.Join([]string{dir, slurp, "Anzeige.txt"}, "/"))
if err != nil {
return err
}
ad.Text = strings.ReplaceAll(ad.Text, "<br/>", "\n")
_, err = fmt.Fprintf(f, "Title: %s\nPrice: %s\n\n%s", ad.Title, ad.Price, ad.Text)
if err != nil {
return err
}
img := 1
for _, imguri := range ad.Images {
file := fmt.Sprintf("%s/%d.jpg", dir, img)
err := Getimage(imguri, file)
if err != nil {
return err
}
img++
}
return nil
}
// fetch an image
func Getimage(uri, fileName string) error {
response, err := http.Get(uri)
if err != nil {
return err
}
defer response.Body.Close()
if response.StatusCode != 200 {
return errors.New("received non 200 response code")
}
file, err := os.Create(fileName)
if err != nil {
return err
}
defer file.Close()
_, err = io.Copy(file, response.Body)
if err != nil {
return err
}
return nil
}