diff --git a/ad.go b/ad.go index eb3d1e3..94fc38c 100644 --- a/ad.go +++ b/ad.go @@ -18,6 +18,7 @@ along with this program. If not, see . package main import ( + "bufio" "log/slog" "strings" "time" @@ -33,6 +34,10 @@ type Ad struct { ID string Details string `goquery:".addetailslist--detail,text"` Attributes map[string]string // processed afterwards + Condition string // post processed from details for backward compatibility + Type string // post processed from details for backward compatibility + Color string // post processed from details for backward compatibility + Material string // post processed from details for backward compatibility Category string CategoryTree []string `goquery:".breadcrump-link,text"` Price string `goquery:"h2#viewad-price"` @@ -80,3 +85,64 @@ func (ad *Ad) CalculateExpire() { } } } + +/* +Decode attributes like color or condition. See +https://github.com/TLINDEN/kleingebaeck/issues/117 +for more details. In short: the HTML delivered by +kleinanzeigen.de has no css attribute for the keys +so we cannot extract key=>value mappings of the +ad details but have to parse them manually. + +The ad.Details member contains this after goq run: + +Art + + Weitere Kinderzimmermöbel + + Farbe + Holz + + Zustand + In Ordnung + +We parse this into ad.Attributes and fill in some +static members for backward compatibility reasons. +*/ +func (ad *Ad) DecodeAttributes() { + rd := strings.NewReader(ad.Details) + scanner := bufio.NewScanner(rd) + + isattr := true + attr := "" + attrmap := map[string]string{} + + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + + if line == "" { + continue + } + + if isattr { + attr = line + } else { + attrmap[attr] = line + } + + isattr = !isattr + } + + ad.Attributes = attrmap + + switch { + case Exists(ad.Attributes, "Zustand"): + ad.Condition = ad.Attributes["Zustand"] + case Exists(ad.Attributes, "Farbe"): + ad.Color = ad.Attributes["Farbe"] + case Exists(ad.Attributes, "Art"): + ad.Type = ad.Attributes["Type"] + case Exists(ad.Attributes, "Material"): + ad.Material = ad.Attributes["Material"] + } +} diff --git a/config.go b/config.go index 2a04d8d..0b1bb84 100644 --- a/config.go +++ b/config.go @@ -34,17 +34,25 @@ import ( ) const ( - VERSION string = "0.3.15" + VERSION string = "0.3.16" Baseuri string = "https://www.kleinanzeigen.de" Listuri string = "/s-bestandsliste.html" Defaultdir string = "." + /* + Also possible: loop through .Attributes: + + DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" + + "Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" + + "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" + + */ DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" + - "Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" + + "Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" + "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" + - "Category: {{.Category}}\r\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\r\n{{ end }}\r\n" + + "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\nColor: {{.Color}}\r\n" + "Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n" DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + diff --git a/example.conf b/example.conf index 450361f..d1433ac 100644 --- a/example.conf +++ b/example.conf @@ -15,17 +15,31 @@ loglevel = "verbose" # create it. must be a quoted string. outdir = "test" -# template for stored adlistings. To enable it, remove the comment -# chars up until the last #""" -#template=""" -#Title: {{.Title}} -#Price: {{.Price}} -#Id: {{.Id}} -#Category: {{.Category}} -#Condition: {{.Attributes.Condition}} -#Type: {{.Type}} -#Created: {{.Created}} +# template for stored adlistings. +template=""" +Title: {{.Title}} +Price: {{.Price}} +Id: {{.Id}} +Category: {{.Category}} +Condition: {{.Condition}} +Type: {{.Type}} +Created: {{.Created}} -#{{.Text}} -# """ +{{.Text}} +""" +# Ads may contain more attributes than just the Condition. To print +# all attributes, loop over all of them: + +template=""" +Title: {{.Title}} +Price: {{.Price}} +Id: {{.Id}} +Category: {{.Category}} +{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }} +{{ end }} +Type: {{.Type}} +Created: {{.Created}} + +{{.Text}} +""" diff --git a/kleingebaeck.1 b/kleingebaeck.1 index 96aaf50..0667b43 100644 --- a/kleingebaeck.1 +++ b/kleingebaeck.1 @@ -133,7 +133,7 @@ .\" ======================================================================== .\" .IX Title "KLEINGEBAECK 1" -.TH KLEINGEBAECK 1 "2025-02-06" "1" "User Commands" +.TH KLEINGEBAECK 1 "2025-02-10" "1" "User Commands" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -174,7 +174,7 @@ well. We use \s-1TOML\s0 as our configuration language. See .PP Format is pretty simple: .PP -.Vb 10 +.Vb 11 \& user = 1010101 \& loglevel = verbose \& outdir = "test" @@ -185,8 +185,6 @@ Format is pretty simple: \& Id: {{.ID}} \& Category: {{.Category}} \& Condition: {{.Condition}} -\& Type: {{.Type}} -\& Color: {{.Color}} \& Created: {{.Created}} \& \& {{.Text}} diff --git a/kleingebaeck.go b/kleingebaeck.go index 705fb4a..d15e567 100644 --- a/kleingebaeck.go +++ b/kleingebaeck.go @@ -46,8 +46,6 @@ CONFIGURATION Id: {{.ID}} Category: {{.Category}} Condition: {{.Condition}} - Type: {{.Type}} - Color: {{.Color}} Created: {{.Created}} {{.Text}} diff --git a/kleingebaeck.pod b/kleingebaeck.pod index 06042c6..3b8dfd4 100644 --- a/kleingebaeck.pod +++ b/kleingebaeck.pod @@ -45,8 +45,7 @@ Format is pretty simple: Price: {{.Price}} Id: {{.ID}} Category: {{.Category}} - {{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }} - {{ end }} + Condition: {{.Condition}} Created: {{.Created}} {{.Text}} diff --git a/main_test.go b/main_test.go index 07b5115..37c9da6 100644 --- a/main_test.go +++ b/main_test.go @@ -283,6 +283,8 @@ var adsrc = []AdConfig{ Text: "Thing to sale", Slug: "second-ad", Condition: "Gut", + Color: "Lila", + Type: "Schoki", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -294,6 +296,8 @@ var adsrc = []AdConfig{ Text: "Thing to sale", Slug: "third-ad", Condition: "In Ordnung", + Color: "Blau", + Type: "Auto", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -305,6 +309,8 @@ var adsrc = []AdConfig{ Text: "Thing to sale", Slug: "fourth-ad", Condition: "Neu", + Color: "Rot", + Type: "Spielzeut", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -316,6 +322,8 @@ var adsrc = []AdConfig{ Text: "Thing to sale", Slug: "fifth-ad", Condition: "Sehr Gut", + Color: "Braun", + Type: "Parteibuch", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -327,6 +335,8 @@ var adsrc = []AdConfig{ Text: "Thing to sale", Slug: "sixth-ad", Condition: "Sehr Gut", + Color: "Silber", + Type: "Ring", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -338,6 +348,8 @@ var adsrc = []AdConfig{ Text: "Thing to sale", Slug: "seventh-ad", Condition: "Sehr Gut", + Color: "Gelpb", + Type: "Schmuck", Created: "Yesterday", Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"}, }, diff --git a/scrape.go b/scrape.go index 98fba0b..2b279e0 100644 --- a/scrape.go +++ b/scrape.go @@ -18,7 +18,6 @@ along with this program. If not, see . package main import ( - "bufio" "bytes" "fmt" "log/slog" @@ -125,8 +124,7 @@ func ScrapeAd(fetch *Fetcher, uri string) error { return fmt.Errorf("could not extract ad data from page, got empty struct") } - advertisement.Attributes = DecodeAttributes(advertisement.Details) - + advertisement.DecodeAttributes() advertisement.CalculateExpire() // prepare ad dir name @@ -158,35 +156,6 @@ func ScrapeAd(fetch *Fetcher, uri string) error { return ScrapeImages(fetch, advertisement, addir) } -func DecodeAttributes(attributes string) map[string]string { - rd := strings.NewReader(attributes) - scanner := bufio.NewScanner(rd) - - isattr := true - attr := "" - attrmap := map[string]string{} - - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - - if line == "" { - continue - } - - if isattr { - attr = line - } else { - attrmap[attr] = line - } - - isattr = !isattr - } - - fmt.Println(attributes) - - return attrmap -} - func ScrapeImages(fetch *Fetcher, advertisement *Ad, addir string) error { // fetch images img := 1