Compare commits

..

1 Commits

Author SHA1 Message Date
d5885854ec add color detail as well 2025-02-06 20:09:46 +01:00
9 changed files with 51 additions and 126 deletions

80
ad.go
View File

@@ -18,7 +18,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package main package main
import ( import (
"bufio"
"log/slog" "log/slog"
"strings" "strings"
"time" "time"
@@ -32,12 +31,10 @@ type Ad struct {
Title string `goquery:"h1"` Title string `goquery:"h1"`
Slug string Slug string
ID string ID string
Details string `goquery:".addetailslist--detail,text"` Details []string `goquery:".addetailslist--detail--value,text"`
Attributes map[string]string // processed afterwards Condition string // post processed from details
Condition string // post processed from details for backward compatibility Type string // post processed from details
Type string // post processed from details for backward compatibility Color string // post processed from details
Color string // post processed from details for backward compatibility
Material string // post processed from details for backward compatibility
Category string Category string
CategoryTree []string `goquery:".breadcrump-link,text"` CategoryTree []string `goquery:".breadcrump-link,text"`
Price string `goquery:"h2#viewad-price"` Price string `goquery:"h2#viewad-price"`
@@ -56,11 +53,19 @@ func (ad *Ad) LogValue() slog.Value {
slog.Int("imagecount", len(ad.Images)), slog.Int("imagecount", len(ad.Images)),
slog.Int("bodysize", len(ad.Text)), slog.Int("bodysize", len(ad.Text)),
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")), slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
slog.String("condition", ad.Condition),
slog.String("created", ad.Created), slog.String("created", ad.Created),
slog.String("expire", ad.Expire), slog.String("expire", ad.Expire),
) )
} }
// static set of conditions available, used for post processing details
var CONDITIONS = []string{"Neu", "Gut", "Sehr Gut", "In Ordnung"}
var COLORS = []string{"Beige", "Blau", "Braun", "Bunt", "Burgunderrot",
"Creme", "Gelb", "Gold", "Grau", "Grün", "Holz", "Khaki", "Lavelndel",
"Lila", "Orange", "Pink", "Print", "Rot", "Schwarz", "Silber",
"Transparent", "Türkis", "Weiß", "Sonstige"}
// check for completeness. I erected these fields to be mandatory // check for completeness. I erected these fields to be mandatory
// (though I really don't know if they really are). I consider images // (though I really don't know if they really are). I consider images
// and meta optional. So, if either of the checked fields here is // and meta optional. So, if either of the checked fields here is
@@ -85,64 +90,3 @@ func (ad *Ad) CalculateExpire() {
} }
} }
} }
/*
Decode attributes like color or condition. See
https://github.com/TLINDEN/kleingebaeck/issues/117
for more details. In short: the HTML delivered by
kleinanzeigen.de has no css attribute for the keys
so we cannot extract key=>value mappings of the
ad details but have to parse them manually.
The ad.Details member contains this after goq run:
Art
Weitere Kinderzimmermöbel
Farbe
Holz
Zustand
In Ordnung
We parse this into ad.Attributes and fill in some
static members for backward compatibility reasons.
*/
func (ad *Ad) DecodeAttributes() {
rd := strings.NewReader(ad.Details)
scanner := bufio.NewScanner(rd)
isattr := true
attr := ""
attrmap := map[string]string{}
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
if isattr {
attr = line
} else {
attrmap[attr] = line
}
isattr = !isattr
}
ad.Attributes = attrmap
switch {
case Exists(ad.Attributes, "Zustand"):
ad.Condition = ad.Attributes["Zustand"]
case Exists(ad.Attributes, "Farbe"):
ad.Color = ad.Attributes["Farbe"]
case Exists(ad.Attributes, "Art"):
ad.Type = ad.Attributes["Type"]
case Exists(ad.Attributes, "Material"):
ad.Material = ad.Attributes["Material"]
}
}

View File

@@ -1,5 +1,5 @@
/* /*
Copyright © 2023-2025 Thomas von Dein Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@@ -34,19 +34,11 @@ import (
) )
const ( const (
VERSION string = "0.3.17" VERSION string = "0.3.15"
Baseuri string = "https://www.kleinanzeigen.de" Baseuri string = "https://www.kleinanzeigen.de"
Listuri string = "/s-bestandsliste.html" Listuri string = "/s-bestandsliste.html"
Defaultdir string = "." Defaultdir string = "."
/*
Also possible: loop through .Attributes:
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
"Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" +
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
*/
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" + DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
"Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" + "Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" +
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"

View File

@@ -12,36 +12,20 @@ user = 00000000
loglevel = "verbose" loglevel = "verbose"
# directory where to store downloaded ads. kleingebaeck will try to # directory where to store downloaded ads. kleingebaeck will try to
# create it. must be a quoted string. You can also include a couple of # create it. must be a quoted string.
# template variables, e.g:
# outdir = "test-{{.Year}}-{{.Month}}-{{.Day}}"
outdir = "test" outdir = "test"
# template for stored adlistings. # template for stored adlistings. To enable it, remove the comment
template=""" # chars up until the last #"""
Title: {{.Title}} #template="""
Price: {{.Price}} #Title: {{.Title}}
Id: {{.Id}} #Price: {{.Price}}
Category: {{.Category}} #Id: {{.Id}}
Condition: {{.Condition}} #Category: {{.Category}}
Type: {{.Type}} #Condition: {{.Condition}}
Created: {{.Created}} #Type: {{.Type}}
#Created: {{.Created}}
{{.Text}} #{{.Text}}
""" # """
# Ads may contain more attributes than just the Condition. To print
# all attributes, loop over all of them:
template="""
Title: {{.Title}}
Price: {{.Price}}
Id: {{.Id}}
Category: {{.Category}}
{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}
{{ end }}
Type: {{.Type}}
Created: {{.Created}}
{{.Text}}
"""

View File

@@ -133,7 +133,7 @@
.\" ======================================================================== .\" ========================================================================
.\" .\"
.IX Title "KLEINGEBAECK 1" .IX Title "KLEINGEBAECK 1"
.TH KLEINGEBAECK 1 "2025-02-10" "1" "User Commands" .TH KLEINGEBAECK 1 "2025-02-06" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents. .\" way too many mistakes in technical documents.
.if n .ad l .if n .ad l
@@ -174,7 +174,7 @@ well. We use \s-1TOML\s0 as our configuration language. See
.PP .PP
Format is pretty simple: Format is pretty simple:
.PP .PP
.Vb 11 .Vb 10
\& user = 1010101 \& user = 1010101
\& loglevel = verbose \& loglevel = verbose
\& outdir = "test" \& outdir = "test"
@@ -185,6 +185,8 @@ Format is pretty simple:
\& Id: {{.ID}} \& Id: {{.ID}}
\& Category: {{.Category}} \& Category: {{.Category}}
\& Condition: {{.Condition}} \& Condition: {{.Condition}}
\& Type: {{.Type}}
\& Color: {{.Color}}
\& Created: {{.Created}} \& Created: {{.Created}}
\& \&
\& {{.Text}} \& {{.Text}}

View File

@@ -46,6 +46,8 @@ CONFIGURATION
Id: {{.ID}} Id: {{.ID}}
Category: {{.Category}} Category: {{.Category}}
Condition: {{.Condition}} Condition: {{.Condition}}
Type: {{.Type}}
Color: {{.Color}}
Created: {{.Created}} Created: {{.Created}}
{{.Text}} {{.Text}}

View File

@@ -46,6 +46,8 @@ Format is pretty simple:
Id: {{.ID}} Id: {{.ID}}
Category: {{.Category}} Category: {{.Category}}
Condition: {{.Condition}} Condition: {{.Condition}}
Type: {{.Type}}
Color: {{.Color}}
Created: {{.Created}} Created: {{.Created}}
{{.Text}} {{.Text}}

View File

@@ -283,8 +283,6 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "second-ad", Slug: "second-ad",
Condition: "Gut", Condition: "Gut",
Color: "Lila",
Type: "Schoki",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -296,8 +294,6 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "third-ad", Slug: "third-ad",
Condition: "In Ordnung", Condition: "In Ordnung",
Color: "Blau",
Type: "Auto",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -309,8 +305,6 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "fourth-ad", Slug: "fourth-ad",
Condition: "Neu", Condition: "Neu",
Color: "Rot",
Type: "Spielzeut",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -322,8 +316,6 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "fifth-ad", Slug: "fifth-ad",
Condition: "Sehr Gut", Condition: "Sehr Gut",
Color: "Braun",
Type: "Parteibuch",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -335,8 +327,6 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "sixth-ad", Slug: "sixth-ad",
Condition: "Sehr Gut", Condition: "Sehr Gut",
Color: "Silber",
Type: "Ring",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -348,8 +338,6 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "seventh-ad", Slug: "seventh-ad",
Condition: "Sehr Gut", Condition: "Sehr Gut",
Color: "Gelpb",
Type: "Schmuck",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"}, Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"},
}, },

View File

@@ -22,6 +22,7 @@ import (
"fmt" "fmt"
"log/slog" "log/slog"
"path/filepath" "path/filepath"
"slices"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@@ -124,7 +125,17 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
return fmt.Errorf("could not extract ad data from page, got empty struct") return fmt.Errorf("could not extract ad data from page, got empty struct")
} }
advertisement.DecodeAttributes() for _, detail := range advertisement.Details {
switch {
case slices.Contains(CONDITIONS, detail):
advertisement.Condition = detail
case slices.Contains(COLORS, detail):
advertisement.Color = detail
default:
advertisement.Type = detail
}
}
advertisement.CalculateExpire() advertisement.CalculateExpire()
// prepare ad dir name // prepare ad dir name

View File

@@ -1,5 +1,5 @@
/* /*
Copyright © 2023-2025 Thomas von Dein Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@@ -44,8 +44,8 @@ func OutDirName(conf *Config) (string, error) {
now := time.Now() now := time.Now()
data := OutdirData{ data := OutdirData{
Year: now.Format("2006"), Year: now.Format("2006"),
Month: now.Format("01"), Month: now.Format("02"),
Day: now.Format("02"), Day: now.Format("01"),
} }
err = tmpl.Execute(&buf, data) err = tmpl.Execute(&buf, data)