diff --git a/ad.go b/ad.go index c9da0a6..dc463f9 100644 --- a/ad.go +++ b/ad.go @@ -31,7 +31,8 @@ type Ad struct { Title string `goquery:"h1"` Slug string ID string - Condition string `goquery:".addetailslist--detail--value,text"` + Details []string `goquery:".addetailslist--detail--value,text"` + Condition string // post processed Category string CategoryTree []string `goquery:".breadcrump-link,text"` Price string `goquery:"h2#viewad-price"` @@ -56,6 +57,9 @@ func (ad *Ad) LogValue() slog.Value { ) } +// static set of conditions available, used for post processing details +var CONDITIONS = []string{"Neu", "Gut", "Sehr Gut", "In Ordnung"} + // check for completeness. I erected these fields to be mandatory // (though I really don't know if they really are). I consider images // and meta optional. So, if either of the checked fields here is diff --git a/config.go b/config.go index 791450b..91a3a24 100644 --- a/config.go +++ b/config.go @@ -34,7 +34,7 @@ import ( ) const ( - VERSION string = "0.3.13" + VERSION string = "0.3.14" Baseuri string = "https://www.kleinanzeigen.de" Listuri string = "/s-bestandsliste.html" Defaultdir string = "." diff --git a/go.mod b/go.mod index 6efed1b..b2f9da6 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,7 @@ require ( require ( github.com/PuerkitoBio/goquery v1.5.1 // indirect + github.com/alecthomas/repr v0.4.0 // indirect github.com/andybalholm/cascadia v1.1.0 // indirect github.com/fatih/color v1.16.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect diff --git a/go.sum b/go.sum index b070bed..cc823ab 100644 --- a/go.sum +++ b/go.sum @@ -3,6 +3,8 @@ astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno= github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= +github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc= +github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= diff --git a/main_test.go b/main_test.go index 75db981..1a0fd7b 100644 --- a/main_test.go +++ b/main_test.go @@ -256,6 +256,7 @@ type AdConfig struct { Images []string // files in ./t/ } +// used to generate ad listings returned by httpmock using templates var adsrc = []AdConfig{ { Title: "First Ad", @@ -263,7 +264,7 @@ var adsrc = []AdConfig{ Category: "Klimbim", Text: "Thing to sale", Slug: "first-ad", - Condition: "works", + Condition: "Sehr Gut", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -273,7 +274,7 @@ var adsrc = []AdConfig{ Category: "Kram", Text: "Thing to sale", Slug: "second-ad", - Condition: "works", + Condition: "Gut", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -284,7 +285,7 @@ var adsrc = []AdConfig{ Category: "Kuddelmuddel", Text: "Thing to sale", Slug: "third-ad", - Condition: "works", + Condition: "In Ordnung", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -295,7 +296,7 @@ var adsrc = []AdConfig{ Category: "Krempel", Text: "Thing to sale", Slug: "fourth-ad", - Condition: "works", + Condition: "Neu", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -306,7 +307,7 @@ var adsrc = []AdConfig{ Category: "Kladderadatsch", Text: "Thing to sale", Slug: "fifth-ad", - Condition: "works", + Condition: "Sehr Gut", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -317,7 +318,7 @@ var adsrc = []AdConfig{ Category: "Klunker", Text: "Thing to sale", Slug: "sixth-ad", - Condition: "works", + Condition: "Sehr Gut", Created: "Yesterday", Images: []string{"t/1.jpg", "t/2.jpg"}, }, @@ -328,7 +329,7 @@ var adsrc = []AdConfig{ Category: "Klunker", Text: "Thing to sale", Slug: "seventh-ad", - Condition: "works", + Condition: "Sehr Gut", Created: "Yesterday", Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"}, }, diff --git a/scrape.go b/scrape.go index b6a454a..a37c6b4 100644 --- a/scrape.go +++ b/scrape.go @@ -22,6 +22,7 @@ import ( "fmt" "log/slog" "path/filepath" + "slices" "strconv" "strings" "time" @@ -124,6 +125,13 @@ func ScrapeAd(fetch *Fetcher, uri string) error { return fmt.Errorf("could not extract ad data from page, got empty struct") } + for _, detail := range advertisement.Details { + if slices.Contains(CONDITIONS, detail) { + advertisement.Condition = detail + } + + } + advertisement.CalculateExpire() // prepare ad dir name