From 602085d15ec9f4619189dcbf9db0ff927dbf9228 Mon Sep 17 00:00:00 2001 From: Thomas von Dein Date: Mon, 10 Feb 2025 16:13:21 +0100 Subject: [PATCH] attempt to fix #117. code works, tests are still failing --- ad.go | 14 ++------------ config.go | 4 ++-- example.conf | 2 +- kleingebaeck.pod | 5 ++--- scrape.go | 42 +++++++++++++++++++++++++++++++----------- 5 files changed, 38 insertions(+), 29 deletions(-) diff --git a/ad.go b/ad.go index ac479ba..eb3d1e3 100644 --- a/ad.go +++ b/ad.go @@ -31,10 +31,8 @@ type Ad struct { Title string `goquery:"h1"` Slug string ID string - Details []string `goquery:".addetailslist--detail--value,text"` - Condition string // post processed from details - Type string // post processed from details - Color string // post processed from details + Details string `goquery:".addetailslist--detail,text"` + Attributes map[string]string // processed afterwards Category string CategoryTree []string `goquery:".breadcrump-link,text"` Price string `goquery:"h2#viewad-price"` @@ -53,19 +51,11 @@ func (ad *Ad) LogValue() slog.Value { slog.Int("imagecount", len(ad.Images)), slog.Int("bodysize", len(ad.Text)), slog.String("categorytree", strings.Join(ad.CategoryTree, "+")), - slog.String("condition", ad.Condition), slog.String("created", ad.Created), slog.String("expire", ad.Expire), ) } -// static set of conditions available, used for post processing details -var CONDITIONS = []string{"Neu", "Gut", "Sehr Gut", "In Ordnung"} -var COLORS = []string{"Beige", "Blau", "Braun", "Bunt", "Burgunderrot", - "Creme", "Gelb", "Gold", "Grau", "Grün", "Holz", "Khaki", "Lavelndel", - "Lila", "Orange", "Pink", "Print", "Rot", "Schwarz", "Silber", - "Transparent", "Türkis", "Weiß", "Sonstige"} - // check for completeness. I erected these fields to be mandatory // (though I really don't know if they really are). I consider images // and meta optional. So, if either of the checked fields here is diff --git a/config.go b/config.go index 8f6613d..2a04d8d 100644 --- a/config.go +++ b/config.go @@ -40,11 +40,11 @@ const ( Defaultdir string = "." DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" + - "Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" + + "Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" + "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" + - "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\nColor: {{.Color}}\r\n" + + "Category: {{.Category}}\r\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\r\n{{ end }}\r\n" + "Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n" DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + diff --git a/example.conf b/example.conf index 9b94fe2..450361f 100644 --- a/example.conf +++ b/example.conf @@ -22,7 +22,7 @@ outdir = "test" #Price: {{.Price}} #Id: {{.Id}} #Category: {{.Category}} -#Condition: {{.Condition}} +#Condition: {{.Attributes.Condition}} #Type: {{.Type}} #Created: {{.Created}} diff --git a/kleingebaeck.pod b/kleingebaeck.pod index 7737a30..06042c6 100644 --- a/kleingebaeck.pod +++ b/kleingebaeck.pod @@ -45,9 +45,8 @@ Format is pretty simple: Price: {{.Price}} Id: {{.ID}} Category: {{.Category}} - Condition: {{.Condition}} - Type: {{.Type}} - Color: {{.Color}} + {{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }} + {{ end }} Created: {{.Created}} {{.Text}} diff --git a/scrape.go b/scrape.go index 2774125..98fba0b 100644 --- a/scrape.go +++ b/scrape.go @@ -18,11 +18,11 @@ along with this program. If not, see . package main import ( + "bufio" "bytes" "fmt" "log/slog" "path/filepath" - "slices" "strconv" "strings" "time" @@ -125,16 +125,7 @@ func ScrapeAd(fetch *Fetcher, uri string) error { return fmt.Errorf("could not extract ad data from page, got empty struct") } - for _, detail := range advertisement.Details { - switch { - case slices.Contains(CONDITIONS, detail): - advertisement.Condition = detail - case slices.Contains(COLORS, detail): - advertisement.Color = detail - default: - advertisement.Type = detail - } - } + advertisement.Attributes = DecodeAttributes(advertisement.Details) advertisement.CalculateExpire() @@ -167,6 +158,35 @@ func ScrapeAd(fetch *Fetcher, uri string) error { return ScrapeImages(fetch, advertisement, addir) } +func DecodeAttributes(attributes string) map[string]string { + rd := strings.NewReader(attributes) + scanner := bufio.NewScanner(rd) + + isattr := true + attr := "" + attrmap := map[string]string{} + + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + + if line == "" { + continue + } + + if isattr { + attr = line + } else { + attrmap[attr] = line + } + + isattr = !isattr + } + + fmt.Println(attributes) + + return attrmap +} + func ScrapeImages(fetch *Fetcher, advertisement *Ad, addir string) error { // fetch images img := 1