mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-16 20:11:01 +01:00
Fix ad condition parsing (#118)
* fix #117: use details slice and pre-set to properly extract condition * also added the type part of the detail content (original de: "Art") --------- Co-authored-by: Thomas von Dein <tom@vondein.org>
This commit is contained in:
7
ad.go
7
ad.go
@@ -31,7 +31,9 @@ type Ad struct {
|
|||||||
Title string `goquery:"h1"`
|
Title string `goquery:"h1"`
|
||||||
Slug string
|
Slug string
|
||||||
ID string
|
ID string
|
||||||
Condition string `goquery:".addetailslist--detail--value,text"`
|
Details []string `goquery:".addetailslist--detail--value,text"`
|
||||||
|
Condition string // post processed from details
|
||||||
|
Type string // post processed from details
|
||||||
Category string
|
Category string
|
||||||
CategoryTree []string `goquery:".breadcrump-link,text"`
|
CategoryTree []string `goquery:".breadcrump-link,text"`
|
||||||
Price string `goquery:"h2#viewad-price"`
|
Price string `goquery:"h2#viewad-price"`
|
||||||
@@ -56,6 +58,9 @@ func (ad *Ad) LogValue() slog.Value {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// static set of conditions available, used for post processing details
|
||||||
|
var CONDITIONS = []string{"Neu", "Gut", "Sehr Gut", "In Ordnung"}
|
||||||
|
|
||||||
// check for completeness. I erected these fields to be mandatory
|
// check for completeness. I erected these fields to be mandatory
|
||||||
// (though I really don't know if they really are). I consider images
|
// (though I really don't know if they really are). I consider images
|
||||||
// and meta optional. So, if either of the checked fields here is
|
// and meta optional. So, if either of the checked fields here is
|
||||||
|
|||||||
@@ -34,17 +34,17 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
VERSION string = "0.3.13"
|
VERSION string = "0.3.14"
|
||||||
Baseuri string = "https://www.kleinanzeigen.de"
|
Baseuri string = "https://www.kleinanzeigen.de"
|
||||||
Listuri string = "/s-bestandsliste.html"
|
Listuri string = "/s-bestandsliste.html"
|
||||||
Defaultdir string = "."
|
Defaultdir string = "."
|
||||||
|
|
||||||
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
|
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
|
||||||
"Category: {{.Category}}\nCondition: {{.Condition}}\n" +
|
"Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\n" +
|
||||||
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
|
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
|
||||||
|
|
||||||
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" +
|
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" +
|
||||||
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\n" +
|
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\n" +
|
||||||
"Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n"
|
"Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n"
|
||||||
|
|
||||||
DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
|
DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ outdir = "test"
|
|||||||
#Id: {{.Id}}
|
#Id: {{.Id}}
|
||||||
#Category: {{.Category}}
|
#Category: {{.Category}}
|
||||||
#Condition: {{.Condition}}
|
#Condition: {{.Condition}}
|
||||||
|
#Type: {{.Type}}
|
||||||
#Created: {{.Created}}
|
#Created: {{.Created}}
|
||||||
|
|
||||||
#{{.Text}}
|
#{{.Text}}
|
||||||
|
|||||||
1
go.mod
1
go.mod
@@ -23,6 +23,7 @@ require (
|
|||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/PuerkitoBio/goquery v1.5.1 // indirect
|
github.com/PuerkitoBio/goquery v1.5.1 // indirect
|
||||||
|
github.com/alecthomas/repr v0.4.0 // indirect
|
||||||
github.com/andybalholm/cascadia v1.1.0 // indirect
|
github.com/andybalholm/cascadia v1.1.0 // indirect
|
||||||
github.com/fatih/color v1.16.0 // indirect
|
github.com/fatih/color v1.16.0 // indirect
|
||||||
github.com/fsnotify/fsnotify v1.7.0 // indirect
|
github.com/fsnotify/fsnotify v1.7.0 // indirect
|
||||||
|
|||||||
2
go.sum
2
go.sum
@@ -3,6 +3,8 @@ astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno=
|
|||||||
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
|
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
|
||||||
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
|
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
|
||||||
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
|
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
|
||||||
|
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
|
||||||
|
github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
|
||||||
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||||
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
||||||
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||||
|
|||||||
@@ -133,7 +133,7 @@
|
|||||||
.\" ========================================================================
|
.\" ========================================================================
|
||||||
.\"
|
.\"
|
||||||
.IX Title "KLEINGEBAECK 1"
|
.IX Title "KLEINGEBAECK 1"
|
||||||
.TH KLEINGEBAECK 1 "2024-02-10" "1" "User Commands"
|
.TH KLEINGEBAECK 1 "2025-02-06" "1" "User Commands"
|
||||||
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
||||||
.\" way too many mistakes in technical documents.
|
.\" way too many mistakes in technical documents.
|
||||||
.if n .ad l
|
.if n .ad l
|
||||||
@@ -174,7 +174,7 @@ well. We use \s-1TOML\s0 as our configuration language. See
|
|||||||
.PP
|
.PP
|
||||||
Format is pretty simple:
|
Format is pretty simple:
|
||||||
.PP
|
.PP
|
||||||
.Vb 11
|
.Vb 12
|
||||||
\& user = 1010101
|
\& user = 1010101
|
||||||
\& loglevel = verbose
|
\& loglevel = verbose
|
||||||
\& outdir = "test"
|
\& outdir = "test"
|
||||||
@@ -185,6 +185,7 @@ Format is pretty simple:
|
|||||||
\& Id: {{.ID}}
|
\& Id: {{.ID}}
|
||||||
\& Category: {{.Category}}
|
\& Category: {{.Category}}
|
||||||
\& Condition: {{.Condition}}
|
\& Condition: {{.Condition}}
|
||||||
|
\& Type: {{.Type}}
|
||||||
\& Created: {{.Created}}
|
\& Created: {{.Created}}
|
||||||
\&
|
\&
|
||||||
\& {{.Text}}
|
\& {{.Text}}
|
||||||
@@ -267,12 +268,13 @@ variables as the ad name template above.
|
|||||||
.PP
|
.PP
|
||||||
This is the default template:
|
This is the default template:
|
||||||
.PP
|
.PP
|
||||||
.Vb 7
|
.Vb 8
|
||||||
\& Title: {{.Title}}
|
\& Title: {{.Title}}
|
||||||
\& Price: {{.Price}}
|
\& Price: {{.Price}}
|
||||||
\& Id: {{.ID}}
|
\& Id: {{.ID}}
|
||||||
\& Category: {{.Category}}
|
\& Category: {{.Category}}
|
||||||
\& Condition: {{.Condition}}
|
\& Condition: {{.Condition}}
|
||||||
|
\& Type: {{.Type}}
|
||||||
\& Created: {{.Created}}
|
\& Created: {{.Created}}
|
||||||
\& Expire: {{.Expire}}
|
\& Expire: {{.Expire}}
|
||||||
\&
|
\&
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ CONFIGURATION
|
|||||||
Id: {{.ID}}
|
Id: {{.ID}}
|
||||||
Category: {{.Category}}
|
Category: {{.Category}}
|
||||||
Condition: {{.Condition}}
|
Condition: {{.Condition}}
|
||||||
|
Type: {{.Type}}
|
||||||
Created: {{.Created}}
|
Created: {{.Created}}
|
||||||
|
|
||||||
{{.Text}}
|
{{.Text}}
|
||||||
@@ -111,6 +112,7 @@ TEMPLATES
|
|||||||
Id: {{.ID}}
|
Id: {{.ID}}
|
||||||
Category: {{.Category}}
|
Category: {{.Category}}
|
||||||
Condition: {{.Condition}}
|
Condition: {{.Condition}}
|
||||||
|
Type: {{.Type}}
|
||||||
Created: {{.Created}}
|
Created: {{.Created}}
|
||||||
Expire: {{.Expire}}
|
Expire: {{.Expire}}
|
||||||
|
|
||||||
|
|||||||
@@ -46,6 +46,7 @@ Format is pretty simple:
|
|||||||
Id: {{.ID}}
|
Id: {{.ID}}
|
||||||
Category: {{.Category}}
|
Category: {{.Category}}
|
||||||
Condition: {{.Condition}}
|
Condition: {{.Condition}}
|
||||||
|
Type: {{.Type}}
|
||||||
Created: {{.Created}}
|
Created: {{.Created}}
|
||||||
|
|
||||||
{{.Text}}
|
{{.Text}}
|
||||||
@@ -131,6 +132,7 @@ This is the default template:
|
|||||||
Id: {{.ID}}
|
Id: {{.ID}}
|
||||||
Category: {{.Category}}
|
Category: {{.Category}}
|
||||||
Condition: {{.Condition}}
|
Condition: {{.Condition}}
|
||||||
|
Type: {{.Type}}
|
||||||
Created: {{.Created}}
|
Created: {{.Created}}
|
||||||
Expire: {{.Expire}}
|
Expire: {{.Expire}}
|
||||||
|
|
||||||
|
|||||||
15
main_test.go
15
main_test.go
@@ -256,6 +256,7 @@ type AdConfig struct {
|
|||||||
Images []string // files in ./t/
|
Images []string // files in ./t/
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// used to generate ad listings returned by httpmock using templates
|
||||||
var adsrc = []AdConfig{
|
var adsrc = []AdConfig{
|
||||||
{
|
{
|
||||||
Title: "First Ad",
|
Title: "First Ad",
|
||||||
@@ -263,7 +264,7 @@ var adsrc = []AdConfig{
|
|||||||
Category: "Klimbim",
|
Category: "Klimbim",
|
||||||
Text: "Thing to sale",
|
Text: "Thing to sale",
|
||||||
Slug: "first-ad",
|
Slug: "first-ad",
|
||||||
Condition: "works",
|
Condition: "Sehr Gut",
|
||||||
Created: "Yesterday",
|
Created: "Yesterday",
|
||||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||||
},
|
},
|
||||||
@@ -273,7 +274,7 @@ var adsrc = []AdConfig{
|
|||||||
Category: "Kram",
|
Category: "Kram",
|
||||||
Text: "Thing to sale",
|
Text: "Thing to sale",
|
||||||
Slug: "second-ad",
|
Slug: "second-ad",
|
||||||
Condition: "works",
|
Condition: "Gut",
|
||||||
Created: "Yesterday",
|
Created: "Yesterday",
|
||||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||||
},
|
},
|
||||||
@@ -284,7 +285,7 @@ var adsrc = []AdConfig{
|
|||||||
Category: "Kuddelmuddel",
|
Category: "Kuddelmuddel",
|
||||||
Text: "Thing to sale",
|
Text: "Thing to sale",
|
||||||
Slug: "third-ad",
|
Slug: "third-ad",
|
||||||
Condition: "works",
|
Condition: "In Ordnung",
|
||||||
Created: "Yesterday",
|
Created: "Yesterday",
|
||||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||||
},
|
},
|
||||||
@@ -295,7 +296,7 @@ var adsrc = []AdConfig{
|
|||||||
Category: "Krempel",
|
Category: "Krempel",
|
||||||
Text: "Thing to sale",
|
Text: "Thing to sale",
|
||||||
Slug: "fourth-ad",
|
Slug: "fourth-ad",
|
||||||
Condition: "works",
|
Condition: "Neu",
|
||||||
Created: "Yesterday",
|
Created: "Yesterday",
|
||||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||||
},
|
},
|
||||||
@@ -306,7 +307,7 @@ var adsrc = []AdConfig{
|
|||||||
Category: "Kladderadatsch",
|
Category: "Kladderadatsch",
|
||||||
Text: "Thing to sale",
|
Text: "Thing to sale",
|
||||||
Slug: "fifth-ad",
|
Slug: "fifth-ad",
|
||||||
Condition: "works",
|
Condition: "Sehr Gut",
|
||||||
Created: "Yesterday",
|
Created: "Yesterday",
|
||||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||||
},
|
},
|
||||||
@@ -317,7 +318,7 @@ var adsrc = []AdConfig{
|
|||||||
Category: "Klunker",
|
Category: "Klunker",
|
||||||
Text: "Thing to sale",
|
Text: "Thing to sale",
|
||||||
Slug: "sixth-ad",
|
Slug: "sixth-ad",
|
||||||
Condition: "works",
|
Condition: "Sehr Gut",
|
||||||
Created: "Yesterday",
|
Created: "Yesterday",
|
||||||
Images: []string{"t/1.jpg", "t/2.jpg"},
|
Images: []string{"t/1.jpg", "t/2.jpg"},
|
||||||
},
|
},
|
||||||
@@ -328,7 +329,7 @@ var adsrc = []AdConfig{
|
|||||||
Category: "Klunker",
|
Category: "Klunker",
|
||||||
Text: "Thing to sale",
|
Text: "Thing to sale",
|
||||||
Slug: "seventh-ad",
|
Slug: "seventh-ad",
|
||||||
Condition: "works",
|
Condition: "Sehr Gut",
|
||||||
Created: "Yesterday",
|
Created: "Yesterday",
|
||||||
Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"},
|
Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"},
|
||||||
},
|
},
|
||||||
|
|||||||
10
scrape.go
10
scrape.go
@@ -22,6 +22,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -124,6 +125,15 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
|
|||||||
return fmt.Errorf("could not extract ad data from page, got empty struct")
|
return fmt.Errorf("could not extract ad data from page, got empty struct")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, detail := range advertisement.Details {
|
||||||
|
if slices.Contains(CONDITIONS, detail) {
|
||||||
|
advertisement.Condition = detail
|
||||||
|
} else {
|
||||||
|
advertisement.Type = detail
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
advertisement.CalculateExpire()
|
advertisement.CalculateExpire()
|
||||||
|
|
||||||
// prepare ad dir name
|
// prepare ad dir name
|
||||||
|
|||||||
Reference in New Issue
Block a user