fixed changes on kleinanzeigen.de:

- Meta did not contain condition and category together anymore, they
removed  the category. Therefore fetching (that is, validation)
failed.
- Now we extract the condition and category directly.
- On top, category now includes the whole category tree.
- unit tests had to be tweaked for this measure.
This commit is contained in:
2024-01-12 14:11:02 +01:00
parent 110ee17091
commit cdf58efd45
3 changed files with 29 additions and 19 deletions

25
ad.go
View File

@@ -19,6 +19,7 @@ package main
import ( import (
"log/slog" "log/slog"
"strings"
) )
type Index struct { type Index struct {
@@ -26,16 +27,16 @@ type Index struct {
} }
type Ad struct { type Ad struct {
Title string `goquery:"h1"` Title string `goquery:"h1"`
Slug string Slug string
Id string Id string
Condition string Condition string `goquery:".addetailslist--detail--value,text"`
Category string Category string
Price string `goquery:"h2#viewad-price"` CategoryTree []string `goquery:".breadcrump-link,text"`
Created string `goquery:"#viewad-extra-info,text"` Price string `goquery:"h2#viewad-price"`
Text string `goquery:"p#viewad-description-text,html"` Created string `goquery:"#viewad-extra-info,text"`
Images []string `goquery:".galleryimage-element img,[src]"` Text string `goquery:"p#viewad-description-text,html"`
Meta []string `goquery:".addetailslist--detail--value,text"` Images []string `goquery:".galleryimage-element img,[src]"`
} }
// Used by slog to pretty print an ad // Used by slog to pretty print an ad
@@ -46,6 +47,8 @@ func (ad *Ad) LogValue() slog.Value {
slog.String("id", ad.Id), slog.String("id", ad.Id),
slog.Int("imagecount", len(ad.Images)), slog.Int("imagecount", len(ad.Images)),
slog.Int("bodysize", len(ad.Text)), slog.Int("bodysize", len(ad.Text)),
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
slog.String("condition", ad.Condition),
) )
} }
@@ -58,7 +61,7 @@ func (ad *Ad) LogValue() slog.Value {
// //
// Note: we return true for "ad is incomplete" and false for "ad is complete"! // Note: we return true for "ad is incomplete" and false for "ad is complete"!
func (ad *Ad) Incomplete() bool { func (ad *Ad) Incomplete() bool {
if ad.Category == "" || ad.Condition == "" || ad.Created == "" || ad.Text == "" { if ad.Category == "" || ad.Created == "" || ad.Text == "" {
return true return true
} }

View File

@@ -60,6 +60,16 @@ const ADTPL string = `DOCTYPE html>
</head> </head>
<body> <body>
<div class="l-container-row">
<div id="vap-brdcrmb" class="breadcrump">
<a class="breadcrump-link" itemprop="url" href="/" title="Kleinanzeigen ">
<span itemprop="title">Kleinanzeigen </span>
</a>
<a class="breadcrump-link" itemprop="url" href="/egal">
<span itemprop="title">{{ .Category }}</span></a>
</div>
</div>
{{ range $image := .Images }} {{ range $image := .Images }}
<div class="galleryimage-element" data-ix="3"> <div class="galleryimage-element" data-ix="3">
<img src="{{ $image }}"/> <img src="{{ $image }}"/>
@@ -79,10 +89,6 @@ const ADTPL string = `DOCTYPE html>
<div class="splitlinebox l-container-row" id="viewad-details"> <div class="splitlinebox l-container-row" id="viewad-details">
<ul class="addetailslist"> <ul class="addetailslist">
<li class="addetailslist--detail">
Art<span class="addetailslist--detail--value" >
{{ .Category }}</span>
</li>
<li class="addetailslist--detail"> <li class="addetailslist--detail">
Zustand<span class="addetailslist--detail--value" > Zustand<span class="addetailslist--detail--value" >
{{ .Condition }}</span> {{ .Condition }}</span>
@@ -438,7 +444,7 @@ func SetIntercept(ads []Adsource) {
} }
func VerifyAd(ad AdConfig) error { func VerifyAd(ad AdConfig) error {
body := ad.Title + ad.Price + ad.Id + ad.Category + ad.Condition + ad.Created body := ad.Title + ad.Price + ad.Id + "Kleinanzeigen => " + ad.Category + ad.Condition + ad.Created
// prepare ad dir name using DefaultAdNameTemplate // prepare ad dir name using DefaultAdNameTemplate
c := Config{Adnametemplate: DefaultAdNameTemplate} c := Config{Adnametemplate: DefaultAdNameTemplate}

View File

@@ -135,12 +135,13 @@ func Scrape(c *Config, uri string) error {
if err != nil { if err != nil {
return err return err
} }
if len(ad.Meta) == 2 {
ad.Category = ad.Meta[0] if len(ad.CategoryTree) > 0 {
ad.Condition = ad.Meta[1] ad.Category = strings.Join(ad.CategoryTree, " => ")
} }
if ad.Incomplete() { if ad.Incomplete() {
slog.Debug("got ad", "ad", ad)
return errors.New("could not extract ad data from page, got empty struct") return errors.New("could not extract ad data from page, got empty struct")
} }