From 8771ec110857610d0d992a9ada746d985141332e Mon Sep 17 00:00:00 2001 From: "T.v.Dein" Date: Fri, 19 Jan 2024 14:41:47 +0100 Subject: [PATCH] added support to calculate and store the ad expire date (#43) --- ad.go | 13 +++++++++++++ config.go | 8 +++++--- scrape.go | 2 ++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/ad.go b/ad.go index c0889ae..633af3d 100644 --- a/ad.go +++ b/ad.go @@ -20,6 +20,7 @@ package main import ( "log/slog" "strings" + "time" ) type Index struct { @@ -37,6 +38,7 @@ type Ad struct { Created string `goquery:"#viewad-extra-info,text"` Text string `goquery:"p#viewad-description-text,html"` Images []string `goquery:".galleryimage-element img,[src]"` + Expire string } // Used by slog to pretty print an ad @@ -49,6 +51,8 @@ func (ad *Ad) LogValue() slog.Value { slog.Int("bodysize", len(ad.Text)), slog.String("categorytree", strings.Join(ad.CategoryTree, "+")), slog.String("condition", ad.Condition), + slog.String("created", ad.Created), + slog.String("expire", ad.Expire), ) } @@ -67,3 +71,12 @@ func (ad *Ad) Incomplete() bool { return false } + +func (ad *Ad) CalculateExpire() { + if len(ad.Created) > 0 { + ts, err := time.Parse("02.01.2006", ad.Created) + if err == nil { + ad.Expire = ts.AddDate(0, 2, 1).Format("02.01.2006") + } + } +} diff --git a/config.go b/config.go index 81882ff..8227c9f 100644 --- a/config.go +++ b/config.go @@ -35,14 +35,16 @@ import ( ) const ( - VERSION string = "0.1.3" + VERSION string = "0.2.0" Baseuri string = "https://www.kleinanzeigen.de" Listuri string = "/s-bestandsliste.html" Defaultdir string = "." DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.Id}}\n" + - "Category: {{.Category}}\nCondition: {{.Condition}}\nCreated: {{.Created}}\n\n{{.Text}}\n" + "Category: {{.Category}}\nCondition: {{.Condition}}\n" + + "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.Id}}\r\n" + - "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nCreated: {{.Created}}\r\n\r\n{{.Text}}\r\n" + "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\n" + + "Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n" Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" DefaultAdNameTemplate string = "{{.Slug}}" diff --git a/scrape.go b/scrape.go index 3766a94..71a6e2e 100644 --- a/scrape.go +++ b/scrape.go @@ -117,6 +117,8 @@ func ScrapeAd(fetch *Fetcher, uri string) error { return errors.New("could not extract ad data from page, got empty struct") } + ad.CalculateExpire() + slog.Debug("extracted ad listing", "ad", ad) // write listing