Compare commits

..

2 Commits

Author SHA1 Message Date
T.v.Dein
09948a6b39 add color detail as well (#119)
Co-authored-by: Thomas von Dein <tom@vondein.org>
2025-02-06 20:13:08 +01:00
T.v.Dein
bc01391872 Fix ad condition parsing (#118)
* fix #117: use details slice and pre-set to properly extract condition
* also added the type part of the detail content (original de: "Art")

---------

Co-authored-by: Thomas von Dein <tom@vondein.org>
2025-02-06 13:48:20 +01:00
8 changed files with 26 additions and 7 deletions

View File

@@ -204,6 +204,7 @@ Price: 99 € VB
Id: 1919191919 Id: 1919191919
Category: Sachbücher Category: Sachbücher
Condition: Sehr Gut Condition: Sehr Gut
Type: Buch
Created: 10.12.2023 Created: 10.12.2023
This is the description text. This is the description text.

5
ad.go
View File

@@ -34,6 +34,7 @@ type Ad struct {
Details []string `goquery:".addetailslist--detail--value,text"` Details []string `goquery:".addetailslist--detail--value,text"`
Condition string // post processed from details Condition string // post processed from details
Type string // post processed from details Type string // post processed from details
Color string // post processed from details
Category string Category string
CategoryTree []string `goquery:".breadcrump-link,text"` CategoryTree []string `goquery:".breadcrump-link,text"`
Price string `goquery:"h2#viewad-price"` Price string `goquery:"h2#viewad-price"`
@@ -60,6 +61,10 @@ func (ad *Ad) LogValue() slog.Value {
// static set of conditions available, used for post processing details // static set of conditions available, used for post processing details
var CONDITIONS = []string{"Neu", "Gut", "Sehr Gut", "In Ordnung"} var CONDITIONS = []string{"Neu", "Gut", "Sehr Gut", "In Ordnung"}
var COLORS = []string{"Beige", "Blau", "Braun", "Bunt", "Burgunderrot",
"Creme", "Gelb", "Gold", "Grau", "Grün", "Holz", "Khaki", "Lavelndel",
"Lila", "Orange", "Pink", "Print", "Rot", "Schwarz", "Silber",
"Transparent", "Türkis", "Weiß", "Sonstige"}
// check for completeness. I erected these fields to be mandatory // check for completeness. I erected these fields to be mandatory
// (though I really don't know if they really are). I consider images // (though I really don't know if they really are). I consider images

View File

@@ -34,17 +34,17 @@ import (
) )
const ( const (
VERSION string = "0.3.14" VERSION string = "0.3.15"
Baseuri string = "https://www.kleinanzeigen.de" Baseuri string = "https://www.kleinanzeigen.de"
Listuri string = "/s-bestandsliste.html" Listuri string = "/s-bestandsliste.html"
Defaultdir string = "." Defaultdir string = "."
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" + DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
"Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\n" + "Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" +
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" + DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" +
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\n" + "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\nColor: {{.Color}}\r\n" +
"Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n" "Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n"
DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +

View File

@@ -174,7 +174,7 @@ well. We use \s-1TOML\s0 as our configuration language. See
.PP .PP
Format is pretty simple: Format is pretty simple:
.PP .PP
.Vb 12 .Vb 10
\& user = 1010101 \& user = 1010101
\& loglevel = verbose \& loglevel = verbose
\& outdir = "test" \& outdir = "test"
@@ -186,6 +186,7 @@ Format is pretty simple:
\& Category: {{.Category}} \& Category: {{.Category}}
\& Condition: {{.Condition}} \& Condition: {{.Condition}}
\& Type: {{.Type}} \& Type: {{.Type}}
\& Color: {{.Color}}
\& Created: {{.Created}} \& Created: {{.Created}}
\& \&
\& {{.Text}} \& {{.Text}}

View File

@@ -47,6 +47,7 @@ CONFIGURATION
Category: {{.Category}} Category: {{.Category}}
Condition: {{.Condition}} Condition: {{.Condition}}
Type: {{.Type}} Type: {{.Type}}
Color: {{.Color}}
Created: {{.Created}} Created: {{.Created}}
{{.Text}} {{.Text}}

View File

@@ -47,6 +47,7 @@ Format is pretty simple:
Category: {{.Category}} Category: {{.Category}}
Condition: {{.Condition}} Condition: {{.Condition}}
Type: {{.Type}} Type: {{.Type}}
Color: {{.Color}}
Created: {{.Created}} Created: {{.Created}}
{{.Text}} {{.Text}}

View File

@@ -93,6 +93,10 @@ const ADTPL string = `DOCTYPE html>
<li class="addetailslist--detail"> <li class="addetailslist--detail">
Zustand<span class="addetailslist--detail--value" > Zustand<span class="addetailslist--detail--value" >
{{ .Condition }}</span> {{ .Condition }}</span>
Farbe<span class="addetailslist--detail--value" >
{{ .Color }}</span>
Art<span class="addetailslist--detail--value" >
{{ .Type }}</span>
</li> </li>
</ul> </ul>
</div> </div>
@@ -251,6 +255,8 @@ type AdConfig struct {
Price string Price string
Category string Category string
Condition string Condition string
Type string
Color string
Created string Created string
Text string Text string
Images []string // files in ./t/ Images []string // files in ./t/
@@ -265,6 +271,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "first-ad", Slug: "first-ad",
Condition: "Sehr Gut", Condition: "Sehr Gut",
Color: "Grün",
Type: "Ball",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },

View File

@@ -126,12 +126,14 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
} }
for _, detail := range advertisement.Details { for _, detail := range advertisement.Details {
if slices.Contains(CONDITIONS, detail) { switch {
case slices.Contains(CONDITIONS, detail):
advertisement.Condition = detail advertisement.Condition = detail
} else { case slices.Contains(COLORS, detail):
advertisement.Color = detail
default:
advertisement.Type = detail advertisement.Type = detail
} }
} }
advertisement.CalculateExpire() advertisement.CalculateExpire()