mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-17 04:21:00 +01:00
initial commit
This commit is contained in:
88
Makefile
Normal file
88
Makefile
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
# Copyright © 2023 Thomas von Dein
|
||||||
|
|
||||||
|
# This program is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
|
||||||
|
# This program is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# no need to modify anything below
|
||||||
|
tool = kleingebaeck
|
||||||
|
VERSION = $(shell grep VERSION main.go | head -1 | cut -d '"' -f2)
|
||||||
|
archs = darwin freebsd linux windows
|
||||||
|
PREFIX = /usr/local
|
||||||
|
UID = root
|
||||||
|
GID = 0
|
||||||
|
HAVE_POD := $(shell pod2text -h 2>/dev/null)
|
||||||
|
|
||||||
|
all: $(tool).1 $(tool).go buildlocal
|
||||||
|
|
||||||
|
%.1: %.pod
|
||||||
|
ifdef HAVE_POD
|
||||||
|
pod2man -c "User Commands" -r 1 -s 1 $*.pod > $*.1
|
||||||
|
endif
|
||||||
|
|
||||||
|
%.go: %.pod
|
||||||
|
ifdef HAVE_POD
|
||||||
|
echo "package main" > $*.go
|
||||||
|
echo >> $*.go
|
||||||
|
echo "var manpage = \`" >> $*.go
|
||||||
|
pod2text $*.pod >> $*.go
|
||||||
|
echo "\`" >> $*.go
|
||||||
|
endif
|
||||||
|
|
||||||
|
buildlocal:
|
||||||
|
CGO_LDFLAGS='-static' go build -tags osusergo,netgo -ldflags "-extldflags=-static" -o $(tool)
|
||||||
|
|
||||||
|
install: buildlocal
|
||||||
|
install -d -o $(UID) -g $(GID) $(PREFIX)/bin
|
||||||
|
install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1
|
||||||
|
install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/
|
||||||
|
install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -rf $(tool) coverage.out testdata
|
||||||
|
|
||||||
|
test: clean
|
||||||
|
go test ./... $(ARGS)
|
||||||
|
|
||||||
|
testfuzzy: clean
|
||||||
|
go test -fuzz ./... $(ARGS)
|
||||||
|
|
||||||
|
singletest:
|
||||||
|
@echo "Call like this: make singletest TEST=TestPrepareColumns ARGS=-v"
|
||||||
|
go test -run $(TEST) $(ARGS)
|
||||||
|
|
||||||
|
cover-report:
|
||||||
|
go test ./... -cover -coverprofile=coverage.out
|
||||||
|
go tool cover -html=coverage.out
|
||||||
|
|
||||||
|
goupdate:
|
||||||
|
go get -t -u=patch ./...
|
||||||
|
|
||||||
|
buildall:
|
||||||
|
./mkrel.sh $(tool) $(VERSION)
|
||||||
|
|
||||||
|
release: buildall
|
||||||
|
gh release create v$(VERSION) --generate-notes releases/*
|
||||||
|
|
||||||
|
show-versions: buildlocal
|
||||||
|
@echo "### kleingebaeck version:"
|
||||||
|
@./kleingebaeck -v
|
||||||
|
|
||||||
|
@echo
|
||||||
|
@echo "### go module versions:"
|
||||||
|
@go list -m all
|
||||||
|
|
||||||
|
@echo
|
||||||
|
@echo "### go version used for building:"
|
||||||
|
@grep -m 1 go go.mod
|
||||||
26
README.md
Normal file
26
README.md
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
## kleinanzeigen.de Backup
|
||||||
|
|
||||||
|
[](https://github.com/tlinden/kleingebaeck/blob/master/LICENSE)
|
||||||
|
[](https://goreportcard.com/report/github.com/tlinden/kleingebaeck)
|
||||||
|
|
||||||
|
Mit diesem kleinen aber feinen Tool kann man seine
|
||||||
|
[https://kleinanzeigen.de](Anzeigen bei kleinanzeigen.de) sichern. Das
|
||||||
|
Problem ist ja bekanntlich, dass Kleinanzeigen nach einer Weile (2
|
||||||
|
Monate?) automatisch gelöscht werden. Wenn man keine Sicherung hat,
|
||||||
|
wird es schwierig, die erneut einzustellen. Mit dem Tool braucht man
|
||||||
|
sich keine Texte zu merken. Man kann auch einfach Änderungen
|
||||||
|
(z.B. Preis runter) durchführen oder den Text anpassen und dann ein
|
||||||
|
neues Backup anfertigen.
|
||||||
|
|
||||||
|
Es wird pro Anzeige ein Verzeichnis erstellt. In der Datei
|
||||||
|
`Anzeige.txt` wird der Titel, die Beschreibung sowie der Preis
|
||||||
|
eingetragen. Ausserdem werden alle Bilder heruntergeladen.
|
||||||
|
|
||||||
|
## Copyright und Lizenz
|
||||||
|
|
||||||
|
Lizensiert unter der GNU GENERAL PUBLIC LICENSE version 3.
|
||||||
|
|
||||||
|
## Author
|
||||||
|
|
||||||
|
T.v.Dein <tom AT vondein DOT org>
|
||||||
|
|
||||||
12
go.mod
Normal file
12
go.mod
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
module kleingebaeck
|
||||||
|
|
||||||
|
go 1.20
|
||||||
|
|
||||||
|
require (
|
||||||
|
astuart.co/goq v1.0.0 // indirect
|
||||||
|
github.com/PuerkitoBio/goquery v1.5.0 // indirect
|
||||||
|
github.com/andybalholm/cascadia v1.0.0 // indirect
|
||||||
|
github.com/spf13/pflag v1.0.5 // indirect
|
||||||
|
golang.org/x/net v0.0.0-20190606173856-1492cefac77f // indirect
|
||||||
|
|
||||||
|
)
|
||||||
19
go.sum
Normal file
19
go.sum
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
astuart.co/goq v1.0.0 h1:nnYIhu/Z/j0VaX9Dp+pmh2Uh7ldEz6XfgSg+bAY5Yrw=
|
||||||
|
astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno=
|
||||||
|
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
|
||||||
|
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
|
||||||
|
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
|
||||||
|
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||||
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
|
||||||
|
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||||
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||||
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
|
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
|
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
|
golang.org/x/net v0.0.0-20190606173856-1492cefac77f h1:IWHgpgFqnL5AhBUBZSgBdjl2vkQUEzcY+JNKWfcgAU0=
|
||||||
|
golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
||||||
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
143
kleingebaeck.1
Normal file
143
kleingebaeck.1
Normal file
@@ -0,0 +1,143 @@
|
|||||||
|
.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.42)
|
||||||
|
.\"
|
||||||
|
.\" Standard preamble:
|
||||||
|
.\" ========================================================================
|
||||||
|
.de Sp \" Vertical space (when we can't use .PP)
|
||||||
|
.if t .sp .5v
|
||||||
|
.if n .sp
|
||||||
|
..
|
||||||
|
.de Vb \" Begin verbatim text
|
||||||
|
.ft CW
|
||||||
|
.nf
|
||||||
|
.ne \\$1
|
||||||
|
..
|
||||||
|
.de Ve \" End verbatim text
|
||||||
|
.ft R
|
||||||
|
.fi
|
||||||
|
..
|
||||||
|
.\" Set up some character translations and predefined strings. \*(-- will
|
||||||
|
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
|
||||||
|
.\" double quote, and \*(R" will give a right double quote. \*(C+ will
|
||||||
|
.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
|
||||||
|
.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
|
||||||
|
.\" nothing in troff, for use with C<>.
|
||||||
|
.tr \(*W-
|
||||||
|
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
|
||||||
|
.ie n \{\
|
||||||
|
. ds -- \(*W-
|
||||||
|
. ds PI pi
|
||||||
|
. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
|
||||||
|
. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
|
||||||
|
. ds L" ""
|
||||||
|
. ds R" ""
|
||||||
|
. ds C` ""
|
||||||
|
. ds C' ""
|
||||||
|
'br\}
|
||||||
|
.el\{\
|
||||||
|
. ds -- \|\(em\|
|
||||||
|
. ds PI \(*p
|
||||||
|
. ds L" ``
|
||||||
|
. ds R" ''
|
||||||
|
. ds C`
|
||||||
|
. ds C'
|
||||||
|
'br\}
|
||||||
|
.\"
|
||||||
|
.\" Escape single quotes in literal strings from groff's Unicode transform.
|
||||||
|
.ie \n(.g .ds Aq \(aq
|
||||||
|
.el .ds Aq '
|
||||||
|
.\"
|
||||||
|
.\" If the F register is >0, we'll generate index entries on stderr for
|
||||||
|
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
|
||||||
|
.\" entries marked with X<> in POD. Of course, you'll have to process the
|
||||||
|
.\" output yourself in some meaningful fashion.
|
||||||
|
.\"
|
||||||
|
.\" Avoid warning from groff about undefined register 'F'.
|
||||||
|
.de IX
|
||||||
|
..
|
||||||
|
.nr rF 0
|
||||||
|
.if \n(.g .if rF .nr rF 1
|
||||||
|
.if (\n(rF:(\n(.g==0)) \{\
|
||||||
|
. if \nF \{\
|
||||||
|
. de IX
|
||||||
|
. tm Index:\\$1\t\\n%\t"\\$2"
|
||||||
|
..
|
||||||
|
. if !\nF==2 \{\
|
||||||
|
. nr % 0
|
||||||
|
. nr F 2
|
||||||
|
. \}
|
||||||
|
. \}
|
||||||
|
.\}
|
||||||
|
.rr rF
|
||||||
|
.\"
|
||||||
|
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
|
||||||
|
.\" Fear. Run. Save yourself. No user-serviceable parts.
|
||||||
|
. \" fudge factors for nroff and troff
|
||||||
|
.if n \{\
|
||||||
|
. ds #H 0
|
||||||
|
. ds #V .8m
|
||||||
|
. ds #F .3m
|
||||||
|
. ds #[ \f1
|
||||||
|
. ds #] \fP
|
||||||
|
.\}
|
||||||
|
.if t \{\
|
||||||
|
. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
|
||||||
|
. ds #V .6m
|
||||||
|
. ds #F 0
|
||||||
|
. ds #[ \&
|
||||||
|
. ds #] \&
|
||||||
|
.\}
|
||||||
|
. \" simple accents for nroff and troff
|
||||||
|
.if n \{\
|
||||||
|
. ds ' \&
|
||||||
|
. ds ` \&
|
||||||
|
. ds ^ \&
|
||||||
|
. ds , \&
|
||||||
|
. ds ~ ~
|
||||||
|
. ds /
|
||||||
|
.\}
|
||||||
|
.if t \{\
|
||||||
|
. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
|
||||||
|
. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
|
||||||
|
. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
|
||||||
|
. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
|
||||||
|
. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
|
||||||
|
. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
|
||||||
|
.\}
|
||||||
|
. \" troff and (daisy-wheel) nroff accents
|
||||||
|
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
|
||||||
|
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
|
||||||
|
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
|
||||||
|
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
|
||||||
|
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
|
||||||
|
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
|
||||||
|
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
|
||||||
|
.ds ae a\h'-(\w'a'u*4/10)'e
|
||||||
|
.ds Ae A\h'-(\w'A'u*4/10)'E
|
||||||
|
. \" corrections for vroff
|
||||||
|
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
|
||||||
|
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
|
||||||
|
. \" for low resolution devices (crt and lpr)
|
||||||
|
.if \n(.H>23 .if \n(.V>19 \
|
||||||
|
\{\
|
||||||
|
. ds : e
|
||||||
|
. ds 8 ss
|
||||||
|
. ds o a
|
||||||
|
. ds d- d\h'-1'\(ga
|
||||||
|
. ds D- D\h'-1'\(hy
|
||||||
|
. ds th \o'bp'
|
||||||
|
. ds Th \o'LP'
|
||||||
|
. ds ae ae
|
||||||
|
. ds Ae AE
|
||||||
|
.\}
|
||||||
|
.rm #[ #] #H #V #F C
|
||||||
|
.\" ========================================================================
|
||||||
|
.\"
|
||||||
|
.IX Title "KLEINGEBAECK 1"
|
||||||
|
.TH KLEINGEBAECK 1 "2023-12-14" "1" "User Commands"
|
||||||
|
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
||||||
|
.\" way too many mistakes in technical documents.
|
||||||
|
.if n .ad l
|
||||||
|
.nh
|
||||||
|
.SS "kleingebaeck"
|
||||||
|
.IX Subsection "kleingebaeck"
|
||||||
|
Backup of kleinanzeigen.de
|
||||||
7
kleingebaeck.go
Normal file
7
kleingebaeck.go
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
var manpage = `
|
||||||
|
kleingebaeck
|
||||||
|
Backup of kleinanzeigen.de
|
||||||
|
|
||||||
|
`
|
||||||
5
kleingebaeck.pod
Normal file
5
kleingebaeck.pod
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
=head2 kleingebaeck
|
||||||
|
|
||||||
|
Backup of kleinanzeigen.de
|
||||||
|
|
||||||
|
=cut
|
||||||
96
main.go
Normal file
96
main.go
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
/*
|
||||||
|
Copyright © 2023 Thomas von Dein
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
flag "github.com/spf13/pflag"
|
||||||
|
)
|
||||||
|
|
||||||
|
const VERSION string = "0.0.1"
|
||||||
|
const Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
const Baseuri string = "https://www.kleinanzeigen.de"
|
||||||
|
const Listuri string = "/s-bestandsliste.html"
|
||||||
|
const Defaultdir string = "."
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
os.Exit(Main())
|
||||||
|
}
|
||||||
|
|
||||||
|
func Main() int {
|
||||||
|
showversion := false
|
||||||
|
showhelp := false
|
||||||
|
showmanual := false
|
||||||
|
enabledebug := false
|
||||||
|
configfile := ""
|
||||||
|
dir := Defaultdir
|
||||||
|
|
||||||
|
flag.BoolVarP(&enabledebug, "debug", "d", false, "debug mode")
|
||||||
|
flag.BoolVarP(&showversion, "version", "v", false, "show version")
|
||||||
|
flag.BoolVarP(&showhelp, "help", "h", false, "show usage")
|
||||||
|
flag.BoolVarP(&showmanual, "manual", "m", false, "show manual")
|
||||||
|
flag.StringVarP(&dir, "output-dir", "o", dir, "where to store ads")
|
||||||
|
flag.StringVarP(&configfile, "config", "c",
|
||||||
|
os.Getenv("HOME")+"/.kleingebaeck", "config file")
|
||||||
|
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
if showversion {
|
||||||
|
fmt.Printf("This is kleingebaeck version %s\n", VERSION)
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
if showhelp {
|
||||||
|
fmt.Println(Usage)
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
if enabledebug {
|
||||||
|
calc.ToggleDebug()
|
||||||
|
}
|
||||||
|
|
||||||
|
if showmanual {
|
||||||
|
man()
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) {
|
||||||
|
err := os.Mkdir(dir, os.ModePerm)
|
||||||
|
if err != nil {
|
||||||
|
return Die(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(flag.Args()) == 1 {
|
||||||
|
Start(flag.Args()[0], dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func Die(err error) int {
|
||||||
|
fmt.Println(err)
|
||||||
|
return 1
|
||||||
|
}
|
||||||
175
scrape.go
Normal file
175
scrape.go
Normal file
@@ -0,0 +1,175 @@
|
|||||||
|
/*
|
||||||
|
Copyright © 2023 Thomas von Dein
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"astuart.co/goq"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Index struct {
|
||||||
|
Links []string `goquery:".text-module-begin a,[href]"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetch some web page content
|
||||||
|
func Get(uri string, client *http.Client) (io.ReadCloser, error) {
|
||||||
|
req, err := http.NewRequest("GET", uri, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Header.Set("User-Agent", Useragent)
|
||||||
|
|
||||||
|
// fmt.Println(uri)
|
||||||
|
|
||||||
|
res, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.Body, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// extract links from all ad listing pages (that is: use pagination)
|
||||||
|
// and scrape every page
|
||||||
|
func Start(uid string, dir string) error {
|
||||||
|
client := &http.Client{}
|
||||||
|
ads := []string{}
|
||||||
|
|
||||||
|
baseuri := Baseuri + Listuri + "?userId=" + uid
|
||||||
|
page := 1
|
||||||
|
uri := baseuri
|
||||||
|
|
||||||
|
for {
|
||||||
|
var index Index
|
||||||
|
body, err := Get(uri, client)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer body.Close()
|
||||||
|
|
||||||
|
err = goq.NewDecoder(body).Decode(&index)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(index.Links) == 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, href := range index.Links {
|
||||||
|
ads = append(ads, href)
|
||||||
|
fmt.Println(href)
|
||||||
|
}
|
||||||
|
|
||||||
|
page++
|
||||||
|
uri = baseuri + "&pageNum=" + fmt.Sprintf("%d", page)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, ad := range ads {
|
||||||
|
err := Scrape(ad, dir)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type Ad struct {
|
||||||
|
Title string `goquery:"h1"`
|
||||||
|
Text string `goquery:"p#viewad-description-text,html"`
|
||||||
|
Images []string `goquery:".galleryimage-element img,[src]"`
|
||||||
|
Price string `goquery:"h2#viewad-price"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func Scrape(link string, dir string) error {
|
||||||
|
client := &http.Client{}
|
||||||
|
uri := Baseuri + link
|
||||||
|
slurp := strings.Split(uri, "/")[1]
|
||||||
|
|
||||||
|
var ad Ad
|
||||||
|
body, err := Get(uri, client)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer body.Close()
|
||||||
|
|
||||||
|
err = goq.NewDecoder(body).Decode(&ad)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
f, err := os.Create(strings.Join([]string{dir, slurp, "Anzeige.txt"}, "/"))
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
ad.Text = strings.ReplaceAll(ad.Text, "<br/>", "\n")
|
||||||
|
_, err = fmt.Fprintf(f, "Title: %s\nPrice: %s\n\n%s", ad.Title, ad.Price, ad.Text)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
img := 1
|
||||||
|
for _, imguri := range ad.Images {
|
||||||
|
file := fmt.Sprintf("%s/%d.jpg", dir, img)
|
||||||
|
err := Getimage(imguri, file)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
img++
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetch an image
|
||||||
|
func Getimage(uri, fileName string) error {
|
||||||
|
response, err := http.Get(uri)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer response.Body.Close()
|
||||||
|
|
||||||
|
if response.StatusCode != 200 {
|
||||||
|
return errors.New("received non 200 response code")
|
||||||
|
}
|
||||||
|
|
||||||
|
file, err := os.Create(fileName)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
_, err = io.Copy(file, response.Body)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user