mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-17 04:21:00 +01:00
Compare commits
4 Commits
ad-conditi
...
v0.3.17
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6675c4d232 | ||
|
|
46be48af38 | ||
|
|
09948a6b39 | ||
|
|
bc01391872 |
@@ -204,6 +204,7 @@ Price: 99 € VB
|
||||
Id: 1919191919
|
||||
Category: Sachbücher
|
||||
Condition: Sehr Gut
|
||||
Type: Buch
|
||||
Created: 10.12.2023
|
||||
|
||||
This is the description text.
|
||||
|
||||
75
ad.go
75
ad.go
@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -31,9 +32,12 @@ type Ad struct {
|
||||
Title string `goquery:"h1"`
|
||||
Slug string
|
||||
ID string
|
||||
Details []string `goquery:".addetailslist--detail--value,text"`
|
||||
Condition string // post processed from details
|
||||
Type string // post processed from details
|
||||
Details string `goquery:".addetailslist--detail,text"`
|
||||
Attributes map[string]string // processed afterwards
|
||||
Condition string // post processed from details for backward compatibility
|
||||
Type string // post processed from details for backward compatibility
|
||||
Color string // post processed from details for backward compatibility
|
||||
Material string // post processed from details for backward compatibility
|
||||
Category string
|
||||
CategoryTree []string `goquery:".breadcrump-link,text"`
|
||||
Price string `goquery:"h2#viewad-price"`
|
||||
@@ -52,15 +56,11 @@ func (ad *Ad) LogValue() slog.Value {
|
||||
slog.Int("imagecount", len(ad.Images)),
|
||||
slog.Int("bodysize", len(ad.Text)),
|
||||
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
|
||||
slog.String("condition", ad.Condition),
|
||||
slog.String("created", ad.Created),
|
||||
slog.String("expire", ad.Expire),
|
||||
)
|
||||
}
|
||||
|
||||
// static set of conditions available, used for post processing details
|
||||
var CONDITIONS = []string{"Neu", "Gut", "Sehr Gut", "In Ordnung"}
|
||||
|
||||
// check for completeness. I erected these fields to be mandatory
|
||||
// (though I really don't know if they really are). I consider images
|
||||
// and meta optional. So, if either of the checked fields here is
|
||||
@@ -85,3 +85,64 @@ func (ad *Ad) CalculateExpire() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Decode attributes like color or condition. See
|
||||
https://github.com/TLINDEN/kleingebaeck/issues/117
|
||||
for more details. In short: the HTML delivered by
|
||||
kleinanzeigen.de has no css attribute for the keys
|
||||
so we cannot extract key=>value mappings of the
|
||||
ad details but have to parse them manually.
|
||||
|
||||
The ad.Details member contains this after goq run:
|
||||
|
||||
Art
|
||||
|
||||
Weitere Kinderzimmermöbel
|
||||
|
||||
Farbe
|
||||
Holz
|
||||
|
||||
Zustand
|
||||
In Ordnung
|
||||
|
||||
We parse this into ad.Attributes and fill in some
|
||||
static members for backward compatibility reasons.
|
||||
*/
|
||||
func (ad *Ad) DecodeAttributes() {
|
||||
rd := strings.NewReader(ad.Details)
|
||||
scanner := bufio.NewScanner(rd)
|
||||
|
||||
isattr := true
|
||||
attr := ""
|
||||
attrmap := map[string]string{}
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if isattr {
|
||||
attr = line
|
||||
} else {
|
||||
attrmap[attr] = line
|
||||
}
|
||||
|
||||
isattr = !isattr
|
||||
}
|
||||
|
||||
ad.Attributes = attrmap
|
||||
|
||||
switch {
|
||||
case Exists(ad.Attributes, "Zustand"):
|
||||
ad.Condition = ad.Attributes["Zustand"]
|
||||
case Exists(ad.Attributes, "Farbe"):
|
||||
ad.Color = ad.Attributes["Farbe"]
|
||||
case Exists(ad.Attributes, "Art"):
|
||||
ad.Type = ad.Attributes["Type"]
|
||||
case Exists(ad.Attributes, "Material"):
|
||||
ad.Material = ad.Attributes["Material"]
|
||||
}
|
||||
}
|
||||
|
||||
16
config.go
16
config.go
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright © 2023-2024 Thomas von Dein
|
||||
Copyright © 2023-2025 Thomas von Dein
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@@ -34,17 +34,25 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
VERSION string = "0.3.14"
|
||||
VERSION string = "0.3.17"
|
||||
Baseuri string = "https://www.kleinanzeigen.de"
|
||||
Listuri string = "/s-bestandsliste.html"
|
||||
Defaultdir string = "."
|
||||
|
||||
/*
|
||||
Also possible: loop through .Attributes:
|
||||
|
||||
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
|
||||
"Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\n" +
|
||||
"Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" +
|
||||
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
|
||||
|
||||
*/
|
||||
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
|
||||
"Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" +
|
||||
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
|
||||
|
||||
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" +
|
||||
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\n" +
|
||||
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\nColor: {{.Color}}\r\n" +
|
||||
"Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n"
|
||||
|
||||
DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
|
||||
|
||||
42
example.conf
42
example.conf
@@ -12,20 +12,36 @@ user = 00000000
|
||||
loglevel = "verbose"
|
||||
|
||||
# directory where to store downloaded ads. kleingebaeck will try to
|
||||
# create it. must be a quoted string.
|
||||
# create it. must be a quoted string. You can also include a couple of
|
||||
# template variables, e.g:
|
||||
# outdir = "test-{{.Year}}-{{.Month}}-{{.Day}}"
|
||||
outdir = "test"
|
||||
|
||||
# template for stored adlistings. To enable it, remove the comment
|
||||
# chars up until the last #"""
|
||||
#template="""
|
||||
#Title: {{.Title}}
|
||||
#Price: {{.Price}}
|
||||
#Id: {{.Id}}
|
||||
#Category: {{.Category}}
|
||||
#Condition: {{.Condition}}
|
||||
#Type: {{.Type}}
|
||||
#Created: {{.Created}}
|
||||
# template for stored adlistings.
|
||||
template="""
|
||||
Title: {{.Title}}
|
||||
Price: {{.Price}}
|
||||
Id: {{.Id}}
|
||||
Category: {{.Category}}
|
||||
Condition: {{.Condition}}
|
||||
Type: {{.Type}}
|
||||
Created: {{.Created}}
|
||||
|
||||
#{{.Text}}
|
||||
# """
|
||||
{{.Text}}
|
||||
"""
|
||||
|
||||
# Ads may contain more attributes than just the Condition. To print
|
||||
# all attributes, loop over all of them:
|
||||
|
||||
template="""
|
||||
Title: {{.Title}}
|
||||
Price: {{.Price}}
|
||||
Id: {{.Id}}
|
||||
Category: {{.Category}}
|
||||
{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}
|
||||
{{ end }}
|
||||
Type: {{.Type}}
|
||||
Created: {{.Created}}
|
||||
|
||||
{{.Text}}
|
||||
"""
|
||||
|
||||
@@ -133,7 +133,7 @@
|
||||
.\" ========================================================================
|
||||
.\"
|
||||
.IX Title "KLEINGEBAECK 1"
|
||||
.TH KLEINGEBAECK 1 "2025-02-06" "1" "User Commands"
|
||||
.TH KLEINGEBAECK 1 "2025-02-10" "1" "User Commands"
|
||||
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
||||
.\" way too many mistakes in technical documents.
|
||||
.if n .ad l
|
||||
@@ -174,7 +174,7 @@ well. We use \s-1TOML\s0 as our configuration language. See
|
||||
.PP
|
||||
Format is pretty simple:
|
||||
.PP
|
||||
.Vb 12
|
||||
.Vb 11
|
||||
\& user = 1010101
|
||||
\& loglevel = verbose
|
||||
\& outdir = "test"
|
||||
@@ -185,7 +185,6 @@ Format is pretty simple:
|
||||
\& Id: {{.ID}}
|
||||
\& Category: {{.Category}}
|
||||
\& Condition: {{.Condition}}
|
||||
\& Type: {{.Type}}
|
||||
\& Created: {{.Created}}
|
||||
\&
|
||||
\& {{.Text}}
|
||||
|
||||
@@ -46,7 +46,6 @@ CONFIGURATION
|
||||
Id: {{.ID}}
|
||||
Category: {{.Category}}
|
||||
Condition: {{.Condition}}
|
||||
Type: {{.Type}}
|
||||
Created: {{.Created}}
|
||||
|
||||
{{.Text}}
|
||||
|
||||
@@ -46,7 +46,6 @@ Format is pretty simple:
|
||||
Id: {{.ID}}
|
||||
Category: {{.Category}}
|
||||
Condition: {{.Condition}}
|
||||
Type: {{.Type}}
|
||||
Created: {{.Created}}
|
||||
|
||||
{{.Text}}
|
||||
|
||||
20
main_test.go
20
main_test.go
@@ -93,6 +93,10 @@ const ADTPL string = `DOCTYPE html>
|
||||
<li class="addetailslist--detail">
|
||||
Zustand<span class="addetailslist--detail--value" >
|
||||
{{ .Condition }}</span>
|
||||
Farbe<span class="addetailslist--detail--value" >
|
||||
{{ .Color }}</span>
|
||||
Art<span class="addetailslist--detail--value" >
|
||||
{{ .Type }}</span>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
@@ -251,6 +255,8 @@ type AdConfig struct {
|
||||
Price string
|
||||
Category string
|
||||
Condition string
|
||||
Type string
|
||||
Color string
|
||||
Created string
|
||||
Text string
|
||||
Images []string // files in ./t/
|
||||
@@ -265,6 +271,8 @@ var adsrc = []AdConfig{
|
||||
Text: "Thing to sale",
|
||||
Slug: "first-ad",
|
||||
Condition: "Sehr Gut",
|
||||
Color: "Grün",
|
||||
Type: "Ball",
|
||||
Created: "Yesterday",
|
||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||
},
|
||||
@@ -275,6 +283,8 @@ var adsrc = []AdConfig{
|
||||
Text: "Thing to sale",
|
||||
Slug: "second-ad",
|
||||
Condition: "Gut",
|
||||
Color: "Lila",
|
||||
Type: "Schoki",
|
||||
Created: "Yesterday",
|
||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||
},
|
||||
@@ -286,6 +296,8 @@ var adsrc = []AdConfig{
|
||||
Text: "Thing to sale",
|
||||
Slug: "third-ad",
|
||||
Condition: "In Ordnung",
|
||||
Color: "Blau",
|
||||
Type: "Auto",
|
||||
Created: "Yesterday",
|
||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||
},
|
||||
@@ -297,6 +309,8 @@ var adsrc = []AdConfig{
|
||||
Text: "Thing to sale",
|
||||
Slug: "fourth-ad",
|
||||
Condition: "Neu",
|
||||
Color: "Rot",
|
||||
Type: "Spielzeut",
|
||||
Created: "Yesterday",
|
||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||
},
|
||||
@@ -308,6 +322,8 @@ var adsrc = []AdConfig{
|
||||
Text: "Thing to sale",
|
||||
Slug: "fifth-ad",
|
||||
Condition: "Sehr Gut",
|
||||
Color: "Braun",
|
||||
Type: "Parteibuch",
|
||||
Created: "Yesterday",
|
||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||
},
|
||||
@@ -319,6 +335,8 @@ var adsrc = []AdConfig{
|
||||
Text: "Thing to sale",
|
||||
Slug: "sixth-ad",
|
||||
Condition: "Sehr Gut",
|
||||
Color: "Silber",
|
||||
Type: "Ring",
|
||||
Created: "Yesterday",
|
||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||
},
|
||||
@@ -330,6 +348,8 @@ var adsrc = []AdConfig{
|
||||
Text: "Thing to sale",
|
||||
Slug: "seventh-ad",
|
||||
Condition: "Sehr Gut",
|
||||
Color: "Gelpb",
|
||||
Type: "Schmuck",
|
||||
Created: "Yesterday",
|
||||
Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"},
|
||||
},
|
||||
|
||||
11
scrape.go
11
scrape.go
@@ -22,7 +22,6 @@ import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -125,15 +124,7 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
|
||||
return fmt.Errorf("could not extract ad data from page, got empty struct")
|
||||
}
|
||||
|
||||
for _, detail := range advertisement.Details {
|
||||
if slices.Contains(CONDITIONS, detail) {
|
||||
advertisement.Condition = detail
|
||||
} else {
|
||||
advertisement.Type = detail
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
advertisement.DecodeAttributes()
|
||||
advertisement.CalculateExpire()
|
||||
|
||||
// prepare ad dir name
|
||||
|
||||
6
store.go
6
store.go
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright © 2023-2024 Thomas von Dein
|
||||
Copyright © 2023-2025 Thomas von Dein
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@@ -44,8 +44,8 @@ func OutDirName(conf *Config) (string, error) {
|
||||
now := time.Now()
|
||||
data := OutdirData{
|
||||
Year: now.Format("2006"),
|
||||
Month: now.Format("02"),
|
||||
Day: now.Format("01"),
|
||||
Month: now.Format("01"),
|
||||
Day: now.Format("02"),
|
||||
}
|
||||
|
||||
err = tmpl.Execute(&buf, data)
|
||||
|
||||
Reference in New Issue
Block a user