diff --git a/ad.go b/ad.go index c9da0a6..22ccdc0 100644 --- a/ad.go +++ b/ad.go @@ -31,7 +31,9 @@ type Ad struct { Title string `goquery:"h1"` Slug string ID string - Condition string `goquery:".addetailslist--detail--value,text"` + Details []string `goquery:".addetailslist--detail--value,text"` + Condition string // post processed from details + Type string // post processed from details Category string CategoryTree []string `goquery:".breadcrump-link,text"` Price string `goquery:"h2#viewad-price"` @@ -56,6 +58,9 @@ func (ad *Ad) LogValue() slog.Value { ) } +// static set of conditions available, used for post processing details +var CONDITIONS = []string{"Neu", "Gut", "Sehr Gut", "In Ordnung"} + // check for completeness. I erected these fields to be mandatory // (though I really don't know if they really are). I consider images // and meta optional. So, if either of the checked fields here is diff --git a/config.go b/config.go index 791450b..6f1bae9 100644 --- a/config.go +++ b/config.go @@ -34,17 +34,17 @@ import ( ) const ( - VERSION string = "0.3.13" + VERSION string = "0.3.14" Baseuri string = "https://www.kleinanzeigen.de" Listuri string = "/s-bestandsliste.html" Defaultdir string = "." DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" + - "Category: {{.Category}}\nCondition: {{.Condition}}\n" + + "Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\n" + "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" + - "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\n" + + "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\n" + "Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n" DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + diff --git a/example.conf b/example.conf index 12ba22e..9b94fe2 100644 --- a/example.conf +++ b/example.conf @@ -23,6 +23,7 @@ outdir = "test" #Id: {{.Id}} #Category: {{.Category}} #Condition: {{.Condition}} +#Type: {{.Type}} #Created: {{.Created}} #{{.Text}} diff --git a/go.mod b/go.mod index 6efed1b..b2f9da6 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,7 @@ require ( require ( github.com/PuerkitoBio/goquery v1.5.1 // indirect + github.com/alecthomas/repr v0.4.0 // indirect github.com/andybalholm/cascadia v1.1.0 // indirect github.com/fatih/color v1.16.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect diff --git a/go.sum b/go.sum index b070bed..cc823ab 100644 --- a/go.sum +++ b/go.sum @@ -3,6 +3,8 @@ astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno= github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= +github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= +github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= diff --git a/kleingebaeck.1 b/kleingebaeck.1 index 5cc6d1d..ab38d62 100644 --- a/kleingebaeck.1 +++ b/kleingebaeck.1 @@ -133,7 +133,7 @@ .\" ======================================================================== .\" .IX Title "KLEINGEBAECK 1" -.TH KLEINGEBAECK 1 "2024-02-10" "1" "User Commands" +.TH KLEINGEBAECK 1 "2025-02-06" "1" "User Commands" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -174,7 +174,7 @@ well. We use \s-1TOML\s0 as our configuration language. See .PP Format is pretty simple: .PP -.Vb 11 +.Vb 12 \& user = 1010101 \& loglevel = verbose \& outdir = "test" @@ -185,6 +185,7 @@ Format is pretty simple: \& Id: {{.ID}} \& Category: {{.Category}} \& Condition: {{.Condition}} +\& Type: {{.Type}} \& Created: {{.Created}} \& \& {{.Text}} @@ -267,12 +268,13 @@ variables as the ad name template above. .PP This is the default template: .PP -.Vb 7 +.Vb 8 \& Title: {{.Title}} \& Price: {{.Price}} \& Id: {{.ID}} \& Category: {{.Category}} \& Condition: {{.Condition}} +\& Type: {{.Type}} \& Created: {{.Created}} \& Expire: {{.Expire}} \& diff --git a/kleingebaeck.go b/kleingebaeck.go index 65a9f0e..4fbf9fe 100644 --- a/kleingebaeck.go +++ b/kleingebaeck.go @@ -46,6 +46,7 @@ CONFIGURATION Id: {{.ID}} Category: {{.Category}} Condition: {{.Condition}} + Type: {{.Type}} Created: {{.Created}} {{.Text}} @@ -111,6 +112,7 @@ TEMPLATES Id: {{.ID}} Category: {{.Category}} Condition: {{.Condition}} + Type: {{.Type}} Created: {{.Created}} Expire: {{.Expire}} diff --git a/kleingebaeck.pod b/kleingebaeck.pod index bff9e4d..522c84f 100644 --- a/kleingebaeck.pod +++ b/kleingebaeck.pod @@ -46,6 +46,7 @@ Format is pretty simple: Id: {{.ID}} Category: {{.Category}} Condition: {{.Condition}} + Type: {{.Type}} Created: {{.Created}} {{.Text}} @@ -131,6 +132,7 @@ This is the default template: Id: {{.ID}} Category: {{.Category}} Condition: {{.Condition}} + Type: {{.Type}} Created: {{.Created}} Expire: {{.Expire}} diff --git a/main_test.go b/main_test.go index 75db981..1a0fd7b 100644 --- a/main_test.go +++ b/main_test.go @@ -256,6 +256,7 @@ type AdConfig struct { Images []string // files in ./t/ } +// used to generate ad listings returned by httpmock using templates var adsrc = []AdConfig{ { Title: "First Ad", @@ -263,7 +264,7 @@ var adsrc = []AdConfig{ Category: "Klimbim", Text: "Thing to sale", Slug: "first-ad", - Condition: "works", + Condition: "Sehr Gut", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -273,7 +274,7 @@ var adsrc = []AdConfig{ Category: "Kram", Text: "Thing to sale", Slug: "second-ad", - Condition: "works", + Condition: "Gut", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -284,7 +285,7 @@ var adsrc = []AdConfig{ Category: "Kuddelmuddel", Text: "Thing to sale", Slug: "third-ad", - Condition: "works", + Condition: "In Ordnung", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -295,7 +296,7 @@ var adsrc = []AdConfig{ Category: "Krempel", Text: "Thing to sale", Slug: "fourth-ad", - Condition: "works", + Condition: "Neu", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -306,7 +307,7 @@ var adsrc = []AdConfig{ Category: "Kladderadatsch", Text: "Thing to sale", Slug: "fifth-ad", - Condition: "works", + Condition: "Sehr Gut", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -317,7 +318,7 @@ var adsrc = []AdConfig{ Category: "Klunker", Text: "Thing to sale", Slug: "sixth-ad", - Condition: "works", + Condition: "Sehr Gut", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -328,7 +329,7 @@ var adsrc = []AdConfig{ Category: "Klunker", Text: "Thing to sale", Slug: "seventh-ad", - Condition: "works", + Condition: "Sehr Gut", Created: "Yesterday", Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"}, }, diff --git a/scrape.go b/scrape.go index b6a454a..6289067 100644 --- a/scrape.go +++ b/scrape.go @@ -22,6 +22,7 @@ import ( "fmt" "log/slog" "path/filepath" + "slices" "strconv" "strings" "time" @@ -124,6 +125,15 @@ func ScrapeAd(fetch *Fetcher, uri string) error { return fmt.Errorf("could not extract ad data from page, got empty struct") } + for _, detail := range advertisement.Details { + if slices.Contains(CONDITIONS, detail) { + advertisement.Condition = detail + } else { + advertisement.Type = detail + } + + } + advertisement.CalculateExpire() // prepare ad dir name