mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-17 04:21:00 +01:00
Make ad directory name tunable, adapt to kleinanzeigen.de site changes
Add template for ad name, adapt kleinanzeigen.de changes
This commit is contained in:
25
ad.go
25
ad.go
@@ -19,6 +19,7 @@ package main
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type Index struct {
|
||||
@@ -26,16 +27,16 @@ type Index struct {
|
||||
}
|
||||
|
||||
type Ad struct {
|
||||
Title string `goquery:"h1"`
|
||||
Slug string
|
||||
Id string
|
||||
Condition string
|
||||
Category string
|
||||
Price string `goquery:"h2#viewad-price"`
|
||||
Created string `goquery:"#viewad-extra-info,text"`
|
||||
Text string `goquery:"p#viewad-description-text,html"`
|
||||
Images []string `goquery:".galleryimage-element img,[src]"`
|
||||
Meta []string `goquery:".addetailslist--detail--value,text"`
|
||||
Title string `goquery:"h1"`
|
||||
Slug string
|
||||
Id string
|
||||
Condition string `goquery:".addetailslist--detail--value,text"`
|
||||
Category string
|
||||
CategoryTree []string `goquery:".breadcrump-link,text"`
|
||||
Price string `goquery:"h2#viewad-price"`
|
||||
Created string `goquery:"#viewad-extra-info,text"`
|
||||
Text string `goquery:"p#viewad-description-text,html"`
|
||||
Images []string `goquery:".galleryimage-element img,[src]"`
|
||||
}
|
||||
|
||||
// Used by slog to pretty print an ad
|
||||
@@ -46,6 +47,8 @@ func (ad *Ad) LogValue() slog.Value {
|
||||
slog.String("id", ad.Id),
|
||||
slog.Int("imagecount", len(ad.Images)),
|
||||
slog.Int("bodysize", len(ad.Text)),
|
||||
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
|
||||
slog.String("condition", ad.Condition),
|
||||
)
|
||||
}
|
||||
|
||||
@@ -58,7 +61,7 @@ func (ad *Ad) LogValue() slog.Value {
|
||||
//
|
||||
// Note: we return true for "ad is incomplete" and false for "ad is complete"!
|
||||
func (ad *Ad) Incomplete() bool {
|
||||
if ad.Category == "" || ad.Condition == "" || ad.Created == "" || ad.Text == "" {
|
||||
if ad.Category == "" || ad.Created == "" || ad.Text == "" {
|
||||
return true
|
||||
}
|
||||
|
||||
|
||||
15
config.go
15
config.go
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright © 2023 Thomas von Dein
|
||||
Copyright © 2023-2024 Thomas von Dein
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@@ -33,7 +33,7 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
VERSION string = "0.1.0"
|
||||
VERSION string = "0.1.1"
|
||||
Baseuri string = "https://www.kleinanzeigen.de"
|
||||
Listuri string = "/s-bestandsliste.html"
|
||||
Defaultdir string = "."
|
||||
@@ -43,6 +43,7 @@ const (
|
||||
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nCreated: {{.Created}}\r\n\r\n{{.Text}}\r\n"
|
||||
Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||
DefaultAdNameTemplate string = "{{.Slug}}-{{.Id}}"
|
||||
)
|
||||
|
||||
const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.
|
||||
@@ -72,6 +73,7 @@ type Config struct {
|
||||
User int `koanf:"user"`
|
||||
Outdir string `koanf:"outdir"`
|
||||
Template string `koanf:"template"`
|
||||
Adnametemplate string `koanf:"adnametemplate"`
|
||||
Loglevel string `koanf:"loglevel"`
|
||||
Limit int `koanf:"limit"`
|
||||
Adlinks []string
|
||||
@@ -99,10 +101,11 @@ func InitConfig(w io.Writer) (*Config, error) {
|
||||
|
||||
// Load default values using the confmap provider.
|
||||
if err := k.Load(confmap.Provider(map[string]interface{}{
|
||||
"template": template,
|
||||
"outdir": ".",
|
||||
"loglevel": "notice",
|
||||
"userid": 0,
|
||||
"template": template,
|
||||
"outdir": ".",
|
||||
"loglevel": "notice",
|
||||
"userid": 0,
|
||||
"adnametemplate": DefaultAdNameTemplate,
|
||||
}, "."), nil); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -133,7 +133,7 @@
|
||||
.\" ========================================================================
|
||||
.\"
|
||||
.IX Title "KLEINGEBAECK 1"
|
||||
.TH KLEINGEBAECK 1 "2023-12-19" "1" "User Commands"
|
||||
.TH KLEINGEBAECK 1 "2024-01-12" "1" "User Commands"
|
||||
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
||||
.\" way too many mistakes in technical documents.
|
||||
.if n .ad l
|
||||
@@ -142,7 +142,7 @@
|
||||
kleingebaeck \- kleinanzeigen.de backup tool
|
||||
.SH "SYNOPSYS"
|
||||
.IX Header "SYNOPSYS"
|
||||
.Vb 10
|
||||
.Vb 11
|
||||
\& Usage: kleingebaeck [\-dvVhmoc] [<ad\-listing\-url>,...]
|
||||
\& Options:
|
||||
\& \-\-user \-u <uid> Backup ads from user with uid <uid>.
|
||||
@@ -153,6 +153,7 @@ kleingebaeck \- kleinanzeigen.de backup tool
|
||||
\& \-\-config \-c <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||
\& \-\-manual \-m Show manual.
|
||||
\& \-\-help \-h Show usage.
|
||||
\& \-\-version \-V Show program version.
|
||||
.Ve
|
||||
.SH "DESCRIPTION"
|
||||
.IX Header "DESCRIPTION"
|
||||
@@ -235,7 +236,20 @@ Also there's currently no parallelization implemented. This will
|
||||
change in the future.
|
||||
.SH "LICENSE"
|
||||
.IX Header "LICENSE"
|
||||
Licensed under the \s-1GNU GENERAL PUBLIC LICENSE\s0 version 3.
|
||||
Copyright 2023\-2024 Thomas von Dein
|
||||
.PP
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the \s-1GNU\s0 General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
.PP
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but \s-1WITHOUT ANY WARRANTY\s0; without even the implied warranty of
|
||||
\&\s-1MERCHANTABILITY\s0 or \s-1FITNESS FOR A PARTICULAR PURPOSE.\s0 See the
|
||||
\&\s-1GNU\s0 General Public License for more details.
|
||||
.PP
|
||||
You should have received a copy of the \s-1GNU\s0 General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
.SH "Author"
|
||||
.IX Header "Author"
|
||||
T.v.Dein <tom \s-1AT\s0 vondein \s-1DOT\s0 org>
|
||||
|
||||
@@ -15,6 +15,7 @@ SYNOPSYS
|
||||
--config -c <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||
--manual -m Show manual.
|
||||
--help -h Show usage.
|
||||
--version -V Show program version.
|
||||
|
||||
DESCRIPTION
|
||||
This tool can be used to backup ads on the german ad page
|
||||
@@ -89,7 +90,20 @@ LIMITATIONS
|
||||
in the future.
|
||||
|
||||
LICENSE
|
||||
Licensed under the GNU GENERAL PUBLIC LICENSE version 3.
|
||||
Copyright 2023-2024 Thomas von Dein
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
Free Software Foundation, either version 3 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Author
|
||||
T.v.Dein <tom AT vondein DOT org>
|
||||
|
||||
@@ -96,7 +96,20 @@ change in the future.
|
||||
|
||||
=head1 LICENSE
|
||||
|
||||
Licensed under the GNU GENERAL PUBLIC LICENSE version 3.
|
||||
Copyright 2023-2024 Thomas von Dein
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see L<http://www.gnu.org/licenses/>.
|
||||
|
||||
=head1 Author
|
||||
|
||||
|
||||
28
main_test.go
28
main_test.go
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright © 2023 Thomas von Dein
|
||||
Copyright © 2023-2024 Thomas von Dein
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@@ -60,6 +60,16 @@ const ADTPL string = `DOCTYPE html>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="l-container-row">
|
||||
<div id="vap-brdcrmb" class="breadcrump">
|
||||
<a class="breadcrump-link" itemprop="url" href="/" title="Kleinanzeigen ">
|
||||
<span itemprop="title">Kleinanzeigen </span>
|
||||
</a>
|
||||
<a class="breadcrump-link" itemprop="url" href="/egal">
|
||||
<span itemprop="title">{{ .Category }}</span></a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{{ range $image := .Images }}
|
||||
<div class="galleryimage-element" data-ix="3">
|
||||
<img src="{{ $image }}"/>
|
||||
@@ -79,10 +89,6 @@ const ADTPL string = `DOCTYPE html>
|
||||
|
||||
<div class="splitlinebox l-container-row" id="viewad-details">
|
||||
<ul class="addetailslist">
|
||||
<li class="addetailslist--detail">
|
||||
Art<span class="addetailslist--detail--value" >
|
||||
{{ .Category }}</span>
|
||||
</li>
|
||||
<li class="addetailslist--detail">
|
||||
Zustand<span class="addetailslist--detail--value" >
|
||||
{{ .Condition }}</span>
|
||||
@@ -438,9 +444,17 @@ func SetIntercept(ads []Adsource) {
|
||||
}
|
||||
|
||||
func VerifyAd(ad AdConfig) error {
|
||||
body := ad.Title + ad.Price + ad.Id + ad.Category + ad.Condition + ad.Created
|
||||
body := ad.Title + ad.Price + ad.Id + "Kleinanzeigen => " + ad.Category + ad.Condition + ad.Created
|
||||
|
||||
file := fmt.Sprintf("t/out/%s/Adlisting.txt", ad.Slug)
|
||||
// prepare ad dir name using DefaultAdNameTemplate
|
||||
c := Config{Adnametemplate: DefaultAdNameTemplate}
|
||||
adstruct := Ad{Slug: ad.Slug, Id: ad.Id}
|
||||
addir, err := AdDirName(&c, &adstruct)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
file := fmt.Sprintf("t/out/%s/Adlisting.txt", addir)
|
||||
content, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return err
|
||||
|
||||
17
scrape.go
17
scrape.go
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright © 2023 Thomas von Dein
|
||||
Copyright © 2023-2024 Thomas von Dein
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@@ -135,36 +135,37 @@ func Scrape(c *Config, uri string) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if len(ad.Meta) == 2 {
|
||||
ad.Category = ad.Meta[0]
|
||||
ad.Condition = ad.Meta[1]
|
||||
|
||||
if len(ad.CategoryTree) > 0 {
|
||||
ad.Category = strings.Join(ad.CategoryTree, " => ")
|
||||
}
|
||||
|
||||
if ad.Incomplete() {
|
||||
slog.Debug("got ad", "ad", ad)
|
||||
return errors.New("could not extract ad data from page, got empty struct")
|
||||
}
|
||||
|
||||
slog.Debug("extracted ad listing", "ad", ad)
|
||||
|
||||
// write listing
|
||||
err = WriteAd(c.Outdir, ad, c.Template)
|
||||
addir, err := WriteAd(c, ad)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.IncrAds()
|
||||
|
||||
return ScrapeImages(c, ad)
|
||||
return ScrapeImages(c, ad, addir)
|
||||
}
|
||||
|
||||
func ScrapeImages(c *Config, ad *Ad) error {
|
||||
func ScrapeImages(c *Config, ad *Ad, addir string) error {
|
||||
// fetch images
|
||||
img := 1
|
||||
g := new(errgroup.Group)
|
||||
|
||||
for _, imguri := range ad.Images {
|
||||
imguri := imguri
|
||||
file := filepath.Join(c.Outdir, ad.Slug, fmt.Sprintf("%d.jpg", img))
|
||||
file := filepath.Join(c.Outdir, addir, fmt.Sprintf("%d.jpg", img))
|
||||
g.Go(func() error {
|
||||
err := Getimage(imguri, file)
|
||||
if err != nil {
|
||||
|
||||
44
store.go
44
store.go
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright © 2023 Thomas von Dein
|
||||
Copyright © 2023-2024 Thomas von Dein
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
@@ -27,19 +28,40 @@ import (
|
||||
tpl "text/template"
|
||||
)
|
||||
|
||||
func WriteAd(dir string, ad *Ad, template string) error {
|
||||
// prepare output dir
|
||||
dir = filepath.Join(dir, ad.Slug)
|
||||
err := Mkdir(dir)
|
||||
func AdDirName(c *Config, ad *Ad) (string, error) {
|
||||
tmpl, err := tpl.New("adname").Parse(c.Adnametemplate)
|
||||
if err != nil {
|
||||
return err
|
||||
return "", err
|
||||
}
|
||||
|
||||
buf := bytes.Buffer{}
|
||||
err = tmpl.Execute(&buf, ad)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return buf.String(), nil
|
||||
}
|
||||
|
||||
func WriteAd(c *Config, ad *Ad) (string, error) {
|
||||
// prepare ad dir name
|
||||
addir, err := AdDirName(c, ad)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// prepare output dir
|
||||
dir := filepath.Join(c.Outdir, addir)
|
||||
err = Mkdir(dir)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// write ad file
|
||||
listingfile := filepath.Join(dir, "Adlisting.txt")
|
||||
f, err := os.Create(listingfile)
|
||||
if err != nil {
|
||||
return err
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
@@ -49,19 +71,19 @@ func WriteAd(dir string, ad *Ad, template string) error {
|
||||
ad.Text = strings.ReplaceAll(ad.Text, "<br/>", "\n")
|
||||
}
|
||||
|
||||
tmpl, err := tpl.New("adlisting").Parse(template)
|
||||
tmpl, err := tpl.New("adlisting").Parse(c.Template)
|
||||
if err != nil {
|
||||
return err
|
||||
return "", err
|
||||
}
|
||||
|
||||
err = tmpl.Execute(f, ad)
|
||||
if err != nil {
|
||||
return err
|
||||
return "", err
|
||||
}
|
||||
|
||||
slog.Info("wrote ad listing", "listingfile", listingfile)
|
||||
|
||||
return nil
|
||||
return addir, nil
|
||||
}
|
||||
|
||||
func WriteImage(filename string, reader io.ReadCloser) error {
|
||||
|
||||
Reference in New Issue
Block a user