mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-17 12:31:03 +01:00
fixed changes on kleinanzeigen.de:
- Meta did not contain condition and category together anymore, they removed the category. Therefore fetching (that is, validation) failed. - Now we extract the condition and category directly. - On top, category now includes the whole category tree. - unit tests had to be tweaked for this measure.
This commit is contained in:
9
ad.go
9
ad.go
@@ -19,6 +19,7 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Index struct {
|
type Index struct {
|
||||||
@@ -29,13 +30,13 @@ type Ad struct {
|
|||||||
Title string `goquery:"h1"`
|
Title string `goquery:"h1"`
|
||||||
Slug string
|
Slug string
|
||||||
Id string
|
Id string
|
||||||
Condition string
|
Condition string `goquery:".addetailslist--detail--value,text"`
|
||||||
Category string
|
Category string
|
||||||
|
CategoryTree []string `goquery:".breadcrump-link,text"`
|
||||||
Price string `goquery:"h2#viewad-price"`
|
Price string `goquery:"h2#viewad-price"`
|
||||||
Created string `goquery:"#viewad-extra-info,text"`
|
Created string `goquery:"#viewad-extra-info,text"`
|
||||||
Text string `goquery:"p#viewad-description-text,html"`
|
Text string `goquery:"p#viewad-description-text,html"`
|
||||||
Images []string `goquery:".galleryimage-element img,[src]"`
|
Images []string `goquery:".galleryimage-element img,[src]"`
|
||||||
Meta []string `goquery:".addetailslist--detail--value,text"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used by slog to pretty print an ad
|
// Used by slog to pretty print an ad
|
||||||
@@ -46,6 +47,8 @@ func (ad *Ad) LogValue() slog.Value {
|
|||||||
slog.String("id", ad.Id),
|
slog.String("id", ad.Id),
|
||||||
slog.Int("imagecount", len(ad.Images)),
|
slog.Int("imagecount", len(ad.Images)),
|
||||||
slog.Int("bodysize", len(ad.Text)),
|
slog.Int("bodysize", len(ad.Text)),
|
||||||
|
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
|
||||||
|
slog.String("condition", ad.Condition),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -58,7 +61,7 @@ func (ad *Ad) LogValue() slog.Value {
|
|||||||
//
|
//
|
||||||
// Note: we return true for "ad is incomplete" and false for "ad is complete"!
|
// Note: we return true for "ad is incomplete" and false for "ad is complete"!
|
||||||
func (ad *Ad) Incomplete() bool {
|
func (ad *Ad) Incomplete() bool {
|
||||||
if ad.Category == "" || ad.Condition == "" || ad.Created == "" || ad.Text == "" {
|
if ad.Category == "" || ad.Created == "" || ad.Text == "" {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
16
main_test.go
16
main_test.go
@@ -60,6 +60,16 @@ const ADTPL string = `DOCTYPE html>
|
|||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
|
|
||||||
|
<div class="l-container-row">
|
||||||
|
<div id="vap-brdcrmb" class="breadcrump">
|
||||||
|
<a class="breadcrump-link" itemprop="url" href="/" title="Kleinanzeigen ">
|
||||||
|
<span itemprop="title">Kleinanzeigen </span>
|
||||||
|
</a>
|
||||||
|
<a class="breadcrump-link" itemprop="url" href="/egal">
|
||||||
|
<span itemprop="title">{{ .Category }}</span></a>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
{{ range $image := .Images }}
|
{{ range $image := .Images }}
|
||||||
<div class="galleryimage-element" data-ix="3">
|
<div class="galleryimage-element" data-ix="3">
|
||||||
<img src="{{ $image }}"/>
|
<img src="{{ $image }}"/>
|
||||||
@@ -79,10 +89,6 @@ const ADTPL string = `DOCTYPE html>
|
|||||||
|
|
||||||
<div class="splitlinebox l-container-row" id="viewad-details">
|
<div class="splitlinebox l-container-row" id="viewad-details">
|
||||||
<ul class="addetailslist">
|
<ul class="addetailslist">
|
||||||
<li class="addetailslist--detail">
|
|
||||||
Art<span class="addetailslist--detail--value" >
|
|
||||||
{{ .Category }}</span>
|
|
||||||
</li>
|
|
||||||
<li class="addetailslist--detail">
|
<li class="addetailslist--detail">
|
||||||
Zustand<span class="addetailslist--detail--value" >
|
Zustand<span class="addetailslist--detail--value" >
|
||||||
{{ .Condition }}</span>
|
{{ .Condition }}</span>
|
||||||
@@ -438,7 +444,7 @@ func SetIntercept(ads []Adsource) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func VerifyAd(ad AdConfig) error {
|
func VerifyAd(ad AdConfig) error {
|
||||||
body := ad.Title + ad.Price + ad.Id + ad.Category + ad.Condition + ad.Created
|
body := ad.Title + ad.Price + ad.Id + "Kleinanzeigen => " + ad.Category + ad.Condition + ad.Created
|
||||||
|
|
||||||
// prepare ad dir name using DefaultAdNameTemplate
|
// prepare ad dir name using DefaultAdNameTemplate
|
||||||
c := Config{Adnametemplate: DefaultAdNameTemplate}
|
c := Config{Adnametemplate: DefaultAdNameTemplate}
|
||||||
|
|||||||
@@ -135,12 +135,13 @@ func Scrape(c *Config, uri string) error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if len(ad.Meta) == 2 {
|
|
||||||
ad.Category = ad.Meta[0]
|
if len(ad.CategoryTree) > 0 {
|
||||||
ad.Condition = ad.Meta[1]
|
ad.Category = strings.Join(ad.CategoryTree, " => ")
|
||||||
}
|
}
|
||||||
|
|
||||||
if ad.Incomplete() {
|
if ad.Incomplete() {
|
||||||
|
slog.Debug("got ad", "ad", ad)
|
||||||
return errors.New("could not extract ad data from page, got empty struct")
|
return errors.New("could not extract ad data from page, got empty struct")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user