diff --git a/ad.go b/ad.go
index eb3d1e3..94fc38c 100644
--- a/ad.go
+++ b/ad.go
@@ -18,6 +18,7 @@ along with this program. If not, see .
package main
import (
+ "bufio"
"log/slog"
"strings"
"time"
@@ -33,6 +34,10 @@ type Ad struct {
ID string
Details string `goquery:".addetailslist--detail,text"`
Attributes map[string]string // processed afterwards
+ Condition string // post processed from details for backward compatibility
+ Type string // post processed from details for backward compatibility
+ Color string // post processed from details for backward compatibility
+ Material string // post processed from details for backward compatibility
Category string
CategoryTree []string `goquery:".breadcrump-link,text"`
Price string `goquery:"h2#viewad-price"`
@@ -80,3 +85,64 @@ func (ad *Ad) CalculateExpire() {
}
}
}
+
+/*
+Decode attributes like color or condition. See
+https://github.com/TLINDEN/kleingebaeck/issues/117
+for more details. In short: the HTML delivered by
+kleinanzeigen.de has no css attribute for the keys
+so we cannot extract key=>value mappings of the
+ad details but have to parse them manually.
+
+The ad.Details member contains this after goq run:
+
+Art
+
+ Weitere Kinderzimmermöbel
+
+ Farbe
+ Holz
+
+ Zustand
+ In Ordnung
+
+We parse this into ad.Attributes and fill in some
+static members for backward compatibility reasons.
+*/
+func (ad *Ad) DecodeAttributes() {
+ rd := strings.NewReader(ad.Details)
+ scanner := bufio.NewScanner(rd)
+
+ isattr := true
+ attr := ""
+ attrmap := map[string]string{}
+
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+
+ if line == "" {
+ continue
+ }
+
+ if isattr {
+ attr = line
+ } else {
+ attrmap[attr] = line
+ }
+
+ isattr = !isattr
+ }
+
+ ad.Attributes = attrmap
+
+ switch {
+ case Exists(ad.Attributes, "Zustand"):
+ ad.Condition = ad.Attributes["Zustand"]
+ case Exists(ad.Attributes, "Farbe"):
+ ad.Color = ad.Attributes["Farbe"]
+ case Exists(ad.Attributes, "Art"):
+ ad.Type = ad.Attributes["Type"]
+ case Exists(ad.Attributes, "Material"):
+ ad.Material = ad.Attributes["Material"]
+ }
+}
diff --git a/config.go b/config.go
index 2a04d8d..0b1bb84 100644
--- a/config.go
+++ b/config.go
@@ -34,17 +34,25 @@ import (
)
const (
- VERSION string = "0.3.15"
+ VERSION string = "0.3.16"
Baseuri string = "https://www.kleinanzeigen.de"
Listuri string = "/s-bestandsliste.html"
Defaultdir string = "."
+ /*
+ Also possible: loop through .Attributes:
+
+ DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
+ "Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" +
+ "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
+
+ */
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
- "Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" +
+ "Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" +
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" +
- "Category: {{.Category}}\r\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\r\n{{ end }}\r\n" +
+ "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\nColor: {{.Color}}\r\n" +
"Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n"
DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
diff --git a/example.conf b/example.conf
index 450361f..d1433ac 100644
--- a/example.conf
+++ b/example.conf
@@ -15,17 +15,31 @@ loglevel = "verbose"
# create it. must be a quoted string.
outdir = "test"
-# template for stored adlistings. To enable it, remove the comment
-# chars up until the last #"""
-#template="""
-#Title: {{.Title}}
-#Price: {{.Price}}
-#Id: {{.Id}}
-#Category: {{.Category}}
-#Condition: {{.Attributes.Condition}}
-#Type: {{.Type}}
-#Created: {{.Created}}
+# template for stored adlistings.
+template="""
+Title: {{.Title}}
+Price: {{.Price}}
+Id: {{.Id}}
+Category: {{.Category}}
+Condition: {{.Condition}}
+Type: {{.Type}}
+Created: {{.Created}}
-#{{.Text}}
-# """
+{{.Text}}
+"""
+# Ads may contain more attributes than just the Condition. To print
+# all attributes, loop over all of them:
+
+template="""
+Title: {{.Title}}
+Price: {{.Price}}
+Id: {{.Id}}
+Category: {{.Category}}
+{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}
+{{ end }}
+Type: {{.Type}}
+Created: {{.Created}}
+
+{{.Text}}
+"""
diff --git a/kleingebaeck.1 b/kleingebaeck.1
index 96aaf50..0667b43 100644
--- a/kleingebaeck.1
+++ b/kleingebaeck.1
@@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "KLEINGEBAECK 1"
-.TH KLEINGEBAECK 1 "2025-02-06" "1" "User Commands"
+.TH KLEINGEBAECK 1 "2025-02-10" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
@@ -174,7 +174,7 @@ well. We use \s-1TOML\s0 as our configuration language. See
.PP
Format is pretty simple:
.PP
-.Vb 10
+.Vb 11
\& user = 1010101
\& loglevel = verbose
\& outdir = "test"
@@ -185,8 +185,6 @@ Format is pretty simple:
\& Id: {{.ID}}
\& Category: {{.Category}}
\& Condition: {{.Condition}}
-\& Type: {{.Type}}
-\& Color: {{.Color}}
\& Created: {{.Created}}
\&
\& {{.Text}}
diff --git a/kleingebaeck.go b/kleingebaeck.go
index 705fb4a..d15e567 100644
--- a/kleingebaeck.go
+++ b/kleingebaeck.go
@@ -46,8 +46,6 @@ CONFIGURATION
Id: {{.ID}}
Category: {{.Category}}
Condition: {{.Condition}}
- Type: {{.Type}}
- Color: {{.Color}}
Created: {{.Created}}
{{.Text}}
diff --git a/kleingebaeck.pod b/kleingebaeck.pod
index 06042c6..3b8dfd4 100644
--- a/kleingebaeck.pod
+++ b/kleingebaeck.pod
@@ -45,8 +45,7 @@ Format is pretty simple:
Price: {{.Price}}
Id: {{.ID}}
Category: {{.Category}}
- {{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}
- {{ end }}
+ Condition: {{.Condition}}
Created: {{.Created}}
{{.Text}}
diff --git a/main_test.go b/main_test.go
index 07b5115..37c9da6 100644
--- a/main_test.go
+++ b/main_test.go
@@ -283,6 +283,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale",
Slug: "second-ad",
Condition: "Gut",
+ Color: "Lila",
+ Type: "Schoki",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
@@ -294,6 +296,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale",
Slug: "third-ad",
Condition: "In Ordnung",
+ Color: "Blau",
+ Type: "Auto",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
@@ -305,6 +309,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale",
Slug: "fourth-ad",
Condition: "Neu",
+ Color: "Rot",
+ Type: "Spielzeut",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
@@ -316,6 +322,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale",
Slug: "fifth-ad",
Condition: "Sehr Gut",
+ Color: "Braun",
+ Type: "Parteibuch",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
@@ -327,6 +335,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale",
Slug: "sixth-ad",
Condition: "Sehr Gut",
+ Color: "Silber",
+ Type: "Ring",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
@@ -338,6 +348,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale",
Slug: "seventh-ad",
Condition: "Sehr Gut",
+ Color: "Gelpb",
+ Type: "Schmuck",
Created: "Yesterday",
Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"},
},
diff --git a/scrape.go b/scrape.go
index 98fba0b..2b279e0 100644
--- a/scrape.go
+++ b/scrape.go
@@ -18,7 +18,6 @@ along with this program. If not, see .
package main
import (
- "bufio"
"bytes"
"fmt"
"log/slog"
@@ -125,8 +124,7 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
return fmt.Errorf("could not extract ad data from page, got empty struct")
}
- advertisement.Attributes = DecodeAttributes(advertisement.Details)
-
+ advertisement.DecodeAttributes()
advertisement.CalculateExpire()
// prepare ad dir name
@@ -158,35 +156,6 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
return ScrapeImages(fetch, advertisement, addir)
}
-func DecodeAttributes(attributes string) map[string]string {
- rd := strings.NewReader(attributes)
- scanner := bufio.NewScanner(rd)
-
- isattr := true
- attr := ""
- attrmap := map[string]string{}
-
- for scanner.Scan() {
- line := strings.TrimSpace(scanner.Text())
-
- if line == "" {
- continue
- }
-
- if isattr {
- attr = line
- } else {
- attrmap[attr] = line
- }
-
- isattr = !isattr
- }
-
- fmt.Println(attributes)
-
- return attrmap
-}
-
func ScrapeImages(fetch *Fetcher, advertisement *Ad, addir string) error {
// fetch images
img := 1