mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-16 12:01:00 +01:00
initial commit
This commit is contained in:
88
Makefile
Normal file
88
Makefile
Normal file
@@ -0,0 +1,88 @@
|
||||
# Copyright © 2023 Thomas von Dein
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
#
|
||||
# no need to modify anything below
|
||||
tool = kleingebaeck
|
||||
VERSION = $(shell grep VERSION main.go | head -1 | cut -d '"' -f2)
|
||||
archs = darwin freebsd linux windows
|
||||
PREFIX = /usr/local
|
||||
UID = root
|
||||
GID = 0
|
||||
HAVE_POD := $(shell pod2text -h 2>/dev/null)
|
||||
|
||||
all: $(tool).1 $(tool).go buildlocal
|
||||
|
||||
%.1: %.pod
|
||||
ifdef HAVE_POD
|
||||
pod2man -c "User Commands" -r 1 -s 1 $*.pod > $*.1
|
||||
endif
|
||||
|
||||
%.go: %.pod
|
||||
ifdef HAVE_POD
|
||||
echo "package main" > $*.go
|
||||
echo >> $*.go
|
||||
echo "var manpage = \`" >> $*.go
|
||||
pod2text $*.pod >> $*.go
|
||||
echo "\`" >> $*.go
|
||||
endif
|
||||
|
||||
buildlocal:
|
||||
CGO_LDFLAGS='-static' go build -tags osusergo,netgo -ldflags "-extldflags=-static" -o $(tool)
|
||||
|
||||
install: buildlocal
|
||||
install -d -o $(UID) -g $(GID) $(PREFIX)/bin
|
||||
install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1
|
||||
install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/
|
||||
install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/
|
||||
|
||||
clean:
|
||||
rm -rf $(tool) coverage.out testdata
|
||||
|
||||
test: clean
|
||||
go test ./... $(ARGS)
|
||||
|
||||
testfuzzy: clean
|
||||
go test -fuzz ./... $(ARGS)
|
||||
|
||||
singletest:
|
||||
@echo "Call like this: make singletest TEST=TestPrepareColumns ARGS=-v"
|
||||
go test -run $(TEST) $(ARGS)
|
||||
|
||||
cover-report:
|
||||
go test ./... -cover -coverprofile=coverage.out
|
||||
go tool cover -html=coverage.out
|
||||
|
||||
goupdate:
|
||||
go get -t -u=patch ./...
|
||||
|
||||
buildall:
|
||||
./mkrel.sh $(tool) $(VERSION)
|
||||
|
||||
release: buildall
|
||||
gh release create v$(VERSION) --generate-notes releases/*
|
||||
|
||||
show-versions: buildlocal
|
||||
@echo "### kleingebaeck version:"
|
||||
@./kleingebaeck -v
|
||||
|
||||
@echo
|
||||
@echo "### go module versions:"
|
||||
@go list -m all
|
||||
|
||||
@echo
|
||||
@echo "### go version used for building:"
|
||||
@grep -m 1 go go.mod
|
||||
26
README.md
Normal file
26
README.md
Normal file
@@ -0,0 +1,26 @@
|
||||
## kleinanzeigen.de Backup
|
||||
|
||||
[](https://github.com/tlinden/kleingebaeck/blob/master/LICENSE)
|
||||
[](https://goreportcard.com/report/github.com/tlinden/kleingebaeck)
|
||||
|
||||
Mit diesem kleinen aber feinen Tool kann man seine
|
||||
[https://kleinanzeigen.de](Anzeigen bei kleinanzeigen.de) sichern. Das
|
||||
Problem ist ja bekanntlich, dass Kleinanzeigen nach einer Weile (2
|
||||
Monate?) automatisch gelöscht werden. Wenn man keine Sicherung hat,
|
||||
wird es schwierig, die erneut einzustellen. Mit dem Tool braucht man
|
||||
sich keine Texte zu merken. Man kann auch einfach Änderungen
|
||||
(z.B. Preis runter) durchführen oder den Text anpassen und dann ein
|
||||
neues Backup anfertigen.
|
||||
|
||||
Es wird pro Anzeige ein Verzeichnis erstellt. In der Datei
|
||||
`Anzeige.txt` wird der Titel, die Beschreibung sowie der Preis
|
||||
eingetragen. Ausserdem werden alle Bilder heruntergeladen.
|
||||
|
||||
## Copyright und Lizenz
|
||||
|
||||
Lizensiert unter der GNU GENERAL PUBLIC LICENSE version 3.
|
||||
|
||||
## Author
|
||||
|
||||
T.v.Dein <tom AT vondein DOT org>
|
||||
|
||||
12
go.mod
Normal file
12
go.mod
Normal file
@@ -0,0 +1,12 @@
|
||||
module kleingebaeck
|
||||
|
||||
go 1.20
|
||||
|
||||
require (
|
||||
astuart.co/goq v1.0.0 // indirect
|
||||
github.com/PuerkitoBio/goquery v1.5.0 // indirect
|
||||
github.com/andybalholm/cascadia v1.0.0 // indirect
|
||||
github.com/spf13/pflag v1.0.5 // indirect
|
||||
golang.org/x/net v0.0.0-20190606173856-1492cefac77f // indirect
|
||||
|
||||
)
|
||||
19
go.sum
Normal file
19
go.sum
Normal file
@@ -0,0 +1,19 @@
|
||||
astuart.co/goq v1.0.0 h1:nnYIhu/Z/j0VaX9Dp+pmh2Uh7ldEz6XfgSg+bAY5Yrw=
|
||||
astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno=
|
||||
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
|
||||
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
|
||||
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
|
||||
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
|
||||
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190606173856-1492cefac77f h1:IWHgpgFqnL5AhBUBZSgBdjl2vkQUEzcY+JNKWfcgAU0=
|
||||
golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
143
kleingebaeck.1
Normal file
143
kleingebaeck.1
Normal file
@@ -0,0 +1,143 @@
|
||||
.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.42)
|
||||
.\"
|
||||
.\" Standard preamble:
|
||||
.\" ========================================================================
|
||||
.de Sp \" Vertical space (when we can't use .PP)
|
||||
.if t .sp .5v
|
||||
.if n .sp
|
||||
..
|
||||
.de Vb \" Begin verbatim text
|
||||
.ft CW
|
||||
.nf
|
||||
.ne \\$1
|
||||
..
|
||||
.de Ve \" End verbatim text
|
||||
.ft R
|
||||
.fi
|
||||
..
|
||||
.\" Set up some character translations and predefined strings. \*(-- will
|
||||
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
|
||||
.\" double quote, and \*(R" will give a right double quote. \*(C+ will
|
||||
.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
|
||||
.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
|
||||
.\" nothing in troff, for use with C<>.
|
||||
.tr \(*W-
|
||||
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
|
||||
.ie n \{\
|
||||
. ds -- \(*W-
|
||||
. ds PI pi
|
||||
. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
|
||||
. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
|
||||
. ds L" ""
|
||||
. ds R" ""
|
||||
. ds C` ""
|
||||
. ds C' ""
|
||||
'br\}
|
||||
.el\{\
|
||||
. ds -- \|\(em\|
|
||||
. ds PI \(*p
|
||||
. ds L" ``
|
||||
. ds R" ''
|
||||
. ds C`
|
||||
. ds C'
|
||||
'br\}
|
||||
.\"
|
||||
.\" Escape single quotes in literal strings from groff's Unicode transform.
|
||||
.ie \n(.g .ds Aq \(aq
|
||||
.el .ds Aq '
|
||||
.\"
|
||||
.\" If the F register is >0, we'll generate index entries on stderr for
|
||||
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
|
||||
.\" entries marked with X<> in POD. Of course, you'll have to process the
|
||||
.\" output yourself in some meaningful fashion.
|
||||
.\"
|
||||
.\" Avoid warning from groff about undefined register 'F'.
|
||||
.de IX
|
||||
..
|
||||
.nr rF 0
|
||||
.if \n(.g .if rF .nr rF 1
|
||||
.if (\n(rF:(\n(.g==0)) \{\
|
||||
. if \nF \{\
|
||||
. de IX
|
||||
. tm Index:\\$1\t\\n%\t"\\$2"
|
||||
..
|
||||
. if !\nF==2 \{\
|
||||
. nr % 0
|
||||
. nr F 2
|
||||
. \}
|
||||
. \}
|
||||
.\}
|
||||
.rr rF
|
||||
.\"
|
||||
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
|
||||
.\" Fear. Run. Save yourself. No user-serviceable parts.
|
||||
. \" fudge factors for nroff and troff
|
||||
.if n \{\
|
||||
. ds #H 0
|
||||
. ds #V .8m
|
||||
. ds #F .3m
|
||||
. ds #[ \f1
|
||||
. ds #] \fP
|
||||
.\}
|
||||
.if t \{\
|
||||
. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
|
||||
. ds #V .6m
|
||||
. ds #F 0
|
||||
. ds #[ \&
|
||||
. ds #] \&
|
||||
.\}
|
||||
. \" simple accents for nroff and troff
|
||||
.if n \{\
|
||||
. ds ' \&
|
||||
. ds ` \&
|
||||
. ds ^ \&
|
||||
. ds , \&
|
||||
. ds ~ ~
|
||||
. ds /
|
||||
.\}
|
||||
.if t \{\
|
||||
. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
|
||||
. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
|
||||
. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
|
||||
. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
|
||||
. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
|
||||
. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
|
||||
.\}
|
||||
. \" troff and (daisy-wheel) nroff accents
|
||||
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
|
||||
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
|
||||
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
|
||||
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
|
||||
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
|
||||
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
|
||||
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
|
||||
.ds ae a\h'-(\w'a'u*4/10)'e
|
||||
.ds Ae A\h'-(\w'A'u*4/10)'E
|
||||
. \" corrections for vroff
|
||||
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
|
||||
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
|
||||
. \" for low resolution devices (crt and lpr)
|
||||
.if \n(.H>23 .if \n(.V>19 \
|
||||
\{\
|
||||
. ds : e
|
||||
. ds 8 ss
|
||||
. ds o a
|
||||
. ds d- d\h'-1'\(ga
|
||||
. ds D- D\h'-1'\(hy
|
||||
. ds th \o'bp'
|
||||
. ds Th \o'LP'
|
||||
. ds ae ae
|
||||
. ds Ae AE
|
||||
.\}
|
||||
.rm #[ #] #H #V #F C
|
||||
.\" ========================================================================
|
||||
.\"
|
||||
.IX Title "KLEINGEBAECK 1"
|
||||
.TH KLEINGEBAECK 1 "2023-12-14" "1" "User Commands"
|
||||
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
||||
.\" way too many mistakes in technical documents.
|
||||
.if n .ad l
|
||||
.nh
|
||||
.SS "kleingebaeck"
|
||||
.IX Subsection "kleingebaeck"
|
||||
Backup of kleinanzeigen.de
|
||||
7
kleingebaeck.go
Normal file
7
kleingebaeck.go
Normal file
@@ -0,0 +1,7 @@
|
||||
package main
|
||||
|
||||
var manpage = `
|
||||
kleingebaeck
|
||||
Backup of kleinanzeigen.de
|
||||
|
||||
`
|
||||
5
kleingebaeck.pod
Normal file
5
kleingebaeck.pod
Normal file
@@ -0,0 +1,5 @@
|
||||
=head2 kleingebaeck
|
||||
|
||||
Backup of kleinanzeigen.de
|
||||
|
||||
=cut
|
||||
96
main.go
Normal file
96
main.go
Normal file
@@ -0,0 +1,96 @@
|
||||
/*
|
||||
Copyright © 2023 Thomas von Dein
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
flag "github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
const VERSION string = "0.0.1"
|
||||
const Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
const Baseuri string = "https://www.kleinanzeigen.de"
|
||||
const Listuri string = "/s-bestandsliste.html"
|
||||
const Defaultdir string = "."
|
||||
|
||||
func main() {
|
||||
os.Exit(Main())
|
||||
}
|
||||
|
||||
func Main() int {
|
||||
showversion := false
|
||||
showhelp := false
|
||||
showmanual := false
|
||||
enabledebug := false
|
||||
configfile := ""
|
||||
dir := Defaultdir
|
||||
|
||||
flag.BoolVarP(&enabledebug, "debug", "d", false, "debug mode")
|
||||
flag.BoolVarP(&showversion, "version", "v", false, "show version")
|
||||
flag.BoolVarP(&showhelp, "help", "h", false, "show usage")
|
||||
flag.BoolVarP(&showmanual, "manual", "m", false, "show manual")
|
||||
flag.StringVarP(&dir, "output-dir", "o", dir, "where to store ads")
|
||||
flag.StringVarP(&configfile, "config", "c",
|
||||
os.Getenv("HOME")+"/.kleingebaeck", "config file")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
if showversion {
|
||||
fmt.Printf("This is kleingebaeck version %s\n", VERSION)
|
||||
return 0
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
if showhelp {
|
||||
fmt.Println(Usage)
|
||||
return 0
|
||||
}
|
||||
|
||||
if enabledebug {
|
||||
calc.ToggleDebug()
|
||||
}
|
||||
|
||||
if showmanual {
|
||||
man()
|
||||
return 0
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) {
|
||||
err := os.Mkdir(dir, os.ModePerm)
|
||||
if err != nil {
|
||||
return Die(err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(flag.Args()) == 1 {
|
||||
Start(flag.Args()[0], dir)
|
||||
}
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
func Die(err error) int {
|
||||
fmt.Println(err)
|
||||
return 1
|
||||
}
|
||||
175
scrape.go
Normal file
175
scrape.go
Normal file
@@ -0,0 +1,175 @@
|
||||
/*
|
||||
Copyright © 2023 Thomas von Dein
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"net/http"
|
||||
|
||||
"astuart.co/goq"
|
||||
)
|
||||
|
||||
type Index struct {
|
||||
Links []string `goquery:".text-module-begin a,[href]"`
|
||||
}
|
||||
|
||||
// fetch some web page content
|
||||
func Get(uri string, client *http.Client) (io.ReadCloser, error) {
|
||||
req, err := http.NewRequest("GET", uri, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req.Header.Set("User-Agent", Useragent)
|
||||
|
||||
// fmt.Println(uri)
|
||||
|
||||
res, err := client.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return res.Body, nil
|
||||
}
|
||||
|
||||
// extract links from all ad listing pages (that is: use pagination)
|
||||
// and scrape every page
|
||||
func Start(uid string, dir string) error {
|
||||
client := &http.Client{}
|
||||
ads := []string{}
|
||||
|
||||
baseuri := Baseuri + Listuri + "?userId=" + uid
|
||||
page := 1
|
||||
uri := baseuri
|
||||
|
||||
for {
|
||||
var index Index
|
||||
body, err := Get(uri, client)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer body.Close()
|
||||
|
||||
err = goq.NewDecoder(body).Decode(&index)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if len(index.Links) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
for _, href := range index.Links {
|
||||
ads = append(ads, href)
|
||||
fmt.Println(href)
|
||||
}
|
||||
|
||||
page++
|
||||
uri = baseuri + "&pageNum=" + fmt.Sprintf("%d", page)
|
||||
}
|
||||
|
||||
for _, ad := range ads {
|
||||
err := Scrape(ad, dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type Ad struct {
|
||||
Title string `goquery:"h1"`
|
||||
Text string `goquery:"p#viewad-description-text,html"`
|
||||
Images []string `goquery:".galleryimage-element img,[src]"`
|
||||
Price string `goquery:"h2#viewad-price"`
|
||||
}
|
||||
|
||||
func Scrape(link string, dir string) error {
|
||||
client := &http.Client{}
|
||||
uri := Baseuri + link
|
||||
slurp := strings.Split(uri, "/")[1]
|
||||
|
||||
var ad Ad
|
||||
body, err := Get(uri, client)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer body.Close()
|
||||
|
||||
err = goq.NewDecoder(body).Decode(&ad)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
f, err := os.Create(strings.Join([]string{dir, slurp, "Anzeige.txt"}, "/"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ad.Text = strings.ReplaceAll(ad.Text, "<br/>", "\n")
|
||||
_, err = fmt.Fprintf(f, "Title: %s\nPrice: %s\n\n%s", ad.Title, ad.Price, ad.Text)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
img := 1
|
||||
for _, imguri := range ad.Images {
|
||||
file := fmt.Sprintf("%s/%d.jpg", dir, img)
|
||||
err := Getimage(imguri, file)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
img++
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// fetch an image
|
||||
func Getimage(uri, fileName string) error {
|
||||
response, err := http.Get(uri)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer response.Body.Close()
|
||||
|
||||
if response.StatusCode != 200 {
|
||||
return errors.New("received non 200 response code")
|
||||
}
|
||||
|
||||
file, err := os.Create(fileName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
_, err = io.Copy(file, response.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user