fix #117: use details slice and pre-set to properly extract condition

This commit is contained in:
2025-02-06 10:29:24 +01:00
parent cd3d00adbe
commit 22da878f27
6 changed files with 25 additions and 9 deletions

6
ad.go
View File

@@ -31,7 +31,8 @@ type Ad struct {
Title string `goquery:"h1"`
Slug string
ID string
Condition string `goquery:".addetailslist--detail--value,text"`
Details []string `goquery:".addetailslist--detail--value,text"`
Condition string // post processed
Category string
CategoryTree []string `goquery:".breadcrump-link,text"`
Price string `goquery:"h2#viewad-price"`
@@ -56,6 +57,9 @@ func (ad *Ad) LogValue() slog.Value {
)
}
// static set of conditions available, used for post processing details
var CONDITIONS = []string{"Neu", "Gut", "Sehr Gut", "In Ordnung"}
// check for completeness. I erected these fields to be mandatory
// (though I really don't know if they really are). I consider images
// and meta optional. So, if either of the checked fields here is

View File

@@ -34,7 +34,7 @@ import (
)
const (
VERSION string = "0.3.13"
VERSION string = "0.3.14"
Baseuri string = "https://www.kleinanzeigen.de"
Listuri string = "/s-bestandsliste.html"
Defaultdir string = "."

1
go.mod
View File

@@ -23,6 +23,7 @@ require (
require (
github.com/PuerkitoBio/goquery v1.5.1 // indirect
github.com/alecthomas/repr v0.4.0 // indirect
github.com/andybalholm/cascadia v1.1.0 // indirect
github.com/fatih/color v1.16.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect

2
go.sum
View File

@@ -3,6 +3,8 @@ astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=

View File

@@ -256,6 +256,7 @@ type AdConfig struct {
Images []string // files in ./t/
}
// used to generate ad listings returned by httpmock using templates
var adsrc = []AdConfig{
{
Title: "First Ad",
@@ -263,7 +264,7 @@ var adsrc = []AdConfig{
Category: "Klimbim",
Text: "Thing to sale",
Slug: "first-ad",
Condition: "works",
Condition: "Sehr Gut",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
@@ -273,7 +274,7 @@ var adsrc = []AdConfig{
Category: "Kram",
Text: "Thing to sale",
Slug: "second-ad",
Condition: "works",
Condition: "Gut",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
@@ -284,7 +285,7 @@ var adsrc = []AdConfig{
Category: "Kuddelmuddel",
Text: "Thing to sale",
Slug: "third-ad",
Condition: "works",
Condition: "In Ordnung",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
@@ -295,7 +296,7 @@ var adsrc = []AdConfig{
Category: "Krempel",
Text: "Thing to sale",
Slug: "fourth-ad",
Condition: "works",
Condition: "Neu",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
@@ -306,7 +307,7 @@ var adsrc = []AdConfig{
Category: "Kladderadatsch",
Text: "Thing to sale",
Slug: "fifth-ad",
Condition: "works",
Condition: "Sehr Gut",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
@@ -317,7 +318,7 @@ var adsrc = []AdConfig{
Category: "Klunker",
Text: "Thing to sale",
Slug: "sixth-ad",
Condition: "works",
Condition: "Sehr Gut",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
@@ -328,7 +329,7 @@ var adsrc = []AdConfig{
Category: "Klunker",
Text: "Thing to sale",
Slug: "seventh-ad",
Condition: "works",
Condition: "Sehr Gut",
Created: "Yesterday",
Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"},
},

View File

@@ -22,6 +22,7 @@ import (
"fmt"
"log/slog"
"path/filepath"
"slices"
"strconv"
"strings"
"time"
@@ -124,6 +125,13 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
return fmt.Errorf("could not extract ad data from page, got empty struct")
}
for _, detail := range advertisement.Details {
if slices.Contains(CONDITIONS, detail) {
advertisement.Condition = detail
}
}
advertisement.CalculateExpire()
// prepare ad dir name