Compare commits

..

5 Commits

Author SHA1 Message Date
T.v.Dein
6675c4d232 Fix/timeformat (#122)
* Fix #121: confused day with month thanks to time.Format
* Add outdir template variable example
2025-02-10 22:20:25 +01:00
T.v.Dein
46be48af38 Generic attributes (#120)
* fix #117: use a generic attribute parser, still support fixed attrs
2025-02-10 18:20:54 +01:00
T.v.Dein
09948a6b39 add color detail as well (#119)
Co-authored-by: Thomas von Dein <tom@vondein.org>
2025-02-06 20:13:08 +01:00
T.v.Dein
bc01391872 Fix ad condition parsing (#118)
* fix #117: use details slice and pre-set to properly extract condition
* also added the type part of the detail content (original de: "Art")

---------

Co-authored-by: Thomas von Dein <tom@vondein.org>
2025-02-06 13:48:20 +01:00
cd3d00adbe add changelog builder, update release builder 2025-02-05 17:54:47 +01:00
13 changed files with 210 additions and 35 deletions

View File

@@ -1,8 +1,8 @@
name: build-and-test name: build-release
on: on:
push: push:
tags: tags:
- "*" - "v*.*.*"
jobs: jobs:
release: release:
@@ -10,10 +10,10 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@v2 uses: actions/checkout@v4
- name: Set up Go - name: Set up Go
uses: actions/setup-go@v1 uses: actions/setup-go@v5
with: with:
go-version: 1.22.11 go-version: 1.22.11
@@ -30,3 +30,58 @@ jobs:
tag: ${{ github.ref_name }} tag: ${{ github.ref_name }}
file: ./releases/* file: ./releases/*
file_glob: true file_glob: true
- name: Build Changelog
id: github_release
uses: mikepenz/release-changelog-builder-action@v5
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
mode: "PR"
configurationJson: |
{
"template": "#{{CHANGELOG}}\n\n**Full Changelog**: #{{RELEASE_DIFF}}",
"pr_template": "- #{{TITLE}} (##{{NUMBER}}) by #{{AUTHOR}}\n#{{BODY}}",
"empty_template": "- no changes",
"categories": [
{
"title": "## New Features",
"labels": ["add", "feature"]
},
{
"title": "## Bug Fixes",
"labels": ["fix", "bug", "revert"]
},
{
"title": "## Documentation Enhancements",
"labels": ["doc"]
},
{
"title": "## Refactoring Efforts",
"labels": ["refactor"]
},
{
"title": "## Miscellaneus Changes",
"labels": []
}
],
"ignore_labels": [
"duplicate", "good first issue", "help wanted", "invalid", "question", "wontfix"
],
"label_extractor": [
{
"pattern": "(.) (.+)",
"target": "$1"
},
{
"pattern": "(.) (.+)",
"target": "$1",
"on_property": "title"
}
]
}
- name: Create Release
uses: softprops/action-gh-release@v2
with:
body: ${{steps.github_release.outputs.changelog}}

View File

@@ -204,6 +204,7 @@ Price: 99 € VB
Id: 1919191919 Id: 1919191919
Category: Sachbücher Category: Sachbücher
Condition: Sehr Gut Condition: Sehr Gut
Type: Buch
Created: 10.12.2023 Created: 10.12.2023
This is the description text. This is the description text.

70
ad.go
View File

@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package main package main
import ( import (
"bufio"
"log/slog" "log/slog"
"strings" "strings"
"time" "time"
@@ -31,7 +32,12 @@ type Ad struct {
Title string `goquery:"h1"` Title string `goquery:"h1"`
Slug string Slug string
ID string ID string
Condition string `goquery:".addetailslist--detail--value,text"` Details string `goquery:".addetailslist--detail,text"`
Attributes map[string]string // processed afterwards
Condition string // post processed from details for backward compatibility
Type string // post processed from details for backward compatibility
Color string // post processed from details for backward compatibility
Material string // post processed from details for backward compatibility
Category string Category string
CategoryTree []string `goquery:".breadcrump-link,text"` CategoryTree []string `goquery:".breadcrump-link,text"`
Price string `goquery:"h2#viewad-price"` Price string `goquery:"h2#viewad-price"`
@@ -50,7 +56,6 @@ func (ad *Ad) LogValue() slog.Value {
slog.Int("imagecount", len(ad.Images)), slog.Int("imagecount", len(ad.Images)),
slog.Int("bodysize", len(ad.Text)), slog.Int("bodysize", len(ad.Text)),
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")), slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
slog.String("condition", ad.Condition),
slog.String("created", ad.Created), slog.String("created", ad.Created),
slog.String("expire", ad.Expire), slog.String("expire", ad.Expire),
) )
@@ -80,3 +85,64 @@ func (ad *Ad) CalculateExpire() {
} }
} }
} }
/*
Decode attributes like color or condition. See
https://github.com/TLINDEN/kleingebaeck/issues/117
for more details. In short: the HTML delivered by
kleinanzeigen.de has no css attribute for the keys
so we cannot extract key=>value mappings of the
ad details but have to parse them manually.
The ad.Details member contains this after goq run:
Art
Weitere Kinderzimmermöbel
Farbe
Holz
Zustand
In Ordnung
We parse this into ad.Attributes and fill in some
static members for backward compatibility reasons.
*/
func (ad *Ad) DecodeAttributes() {
rd := strings.NewReader(ad.Details)
scanner := bufio.NewScanner(rd)
isattr := true
attr := ""
attrmap := map[string]string{}
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
if isattr {
attr = line
} else {
attrmap[attr] = line
}
isattr = !isattr
}
ad.Attributes = attrmap
switch {
case Exists(ad.Attributes, "Zustand"):
ad.Condition = ad.Attributes["Zustand"]
case Exists(ad.Attributes, "Farbe"):
ad.Color = ad.Attributes["Farbe"]
case Exists(ad.Attributes, "Art"):
ad.Type = ad.Attributes["Type"]
case Exists(ad.Attributes, "Material"):
ad.Material = ad.Attributes["Material"]
}
}

View File

@@ -1,5 +1,5 @@
/* /*
Copyright © 2023-2024 Thomas von Dein Copyright © 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@@ -34,17 +34,25 @@ import (
) )
const ( const (
VERSION string = "0.3.13" VERSION string = "0.3.17"
Baseuri string = "https://www.kleinanzeigen.de" Baseuri string = "https://www.kleinanzeigen.de"
Listuri string = "/s-bestandsliste.html" Listuri string = "/s-bestandsliste.html"
Defaultdir string = "." Defaultdir string = "."
/*
Also possible: loop through .Attributes:
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
"Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" +
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
*/
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" + DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
"Category: {{.Category}}\nCondition: {{.Condition}}\n" + "Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" +
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" + DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" +
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\n" + "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\nColor: {{.Color}}\r\n" +
"Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n" "Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n"
DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +

View File

@@ -12,19 +12,36 @@ user = 00000000
loglevel = "verbose" loglevel = "verbose"
# directory where to store downloaded ads. kleingebaeck will try to # directory where to store downloaded ads. kleingebaeck will try to
# create it. must be a quoted string. # create it. must be a quoted string. You can also include a couple of
outdir = "test" # template variables, e.g:
# outdir = "test-{{.Year}}-{{.Month}}-{{.Day}}"
outdir = "test"
# template for stored adlistings. To enable it, remove the comment # template for stored adlistings.
# chars up until the last #""" template="""
#template=""" Title: {{.Title}}
#Title: {{.Title}} Price: {{.Price}}
#Price: {{.Price}} Id: {{.Id}}
#Id: {{.Id}} Category: {{.Category}}
#Category: {{.Category}} Condition: {{.Condition}}
#Condition: {{.Condition}} Type: {{.Type}}
#Created: {{.Created}} Created: {{.Created}}
#{{.Text}} {{.Text}}
# """ """
# Ads may contain more attributes than just the Condition. To print
# all attributes, loop over all of them:
template="""
Title: {{.Title}}
Price: {{.Price}}
Id: {{.Id}}
Category: {{.Category}}
{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}
{{ end }}
Type: {{.Type}}
Created: {{.Created}}
{{.Text}}
"""

1
go.mod
View File

@@ -23,6 +23,7 @@ require (
require ( require (
github.com/PuerkitoBio/goquery v1.5.1 // indirect github.com/PuerkitoBio/goquery v1.5.1 // indirect
github.com/alecthomas/repr v0.4.0 // indirect
github.com/andybalholm/cascadia v1.1.0 // indirect github.com/andybalholm/cascadia v1.1.0 // indirect
github.com/fatih/color v1.16.0 // indirect github.com/fatih/color v1.16.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect

2
go.sum
View File

@@ -3,6 +3,8 @@ astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=

View File

@@ -133,7 +133,7 @@
.\" ======================================================================== .\" ========================================================================
.\" .\"
.IX Title "KLEINGEBAECK 1" .IX Title "KLEINGEBAECK 1"
.TH KLEINGEBAECK 1 "2024-02-10" "1" "User Commands" .TH KLEINGEBAECK 1 "2025-02-10" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents. .\" way too many mistakes in technical documents.
.if n .ad l .if n .ad l
@@ -267,12 +267,13 @@ variables as the ad name template above.
.PP .PP
This is the default template: This is the default template:
.PP .PP
.Vb 7 .Vb 8
\& Title: {{.Title}} \& Title: {{.Title}}
\& Price: {{.Price}} \& Price: {{.Price}}
\& Id: {{.ID}} \& Id: {{.ID}}
\& Category: {{.Category}} \& Category: {{.Category}}
\& Condition: {{.Condition}} \& Condition: {{.Condition}}
\& Type: {{.Type}}
\& Created: {{.Created}} \& Created: {{.Created}}
\& Expire: {{.Expire}} \& Expire: {{.Expire}}
\& \&

View File

@@ -111,6 +111,7 @@ TEMPLATES
Id: {{.ID}} Id: {{.ID}}
Category: {{.Category}} Category: {{.Category}}
Condition: {{.Condition}} Condition: {{.Condition}}
Type: {{.Type}}
Created: {{.Created}} Created: {{.Created}}
Expire: {{.Expire}} Expire: {{.Expire}}

View File

@@ -131,6 +131,7 @@ This is the default template:
Id: {{.ID}} Id: {{.ID}}
Category: {{.Category}} Category: {{.Category}}
Condition: {{.Condition}} Condition: {{.Condition}}
Type: {{.Type}}
Created: {{.Created}} Created: {{.Created}}
Expire: {{.Expire}} Expire: {{.Expire}}

View File

@@ -93,6 +93,10 @@ const ADTPL string = `DOCTYPE html>
<li class="addetailslist--detail"> <li class="addetailslist--detail">
Zustand<span class="addetailslist--detail--value" > Zustand<span class="addetailslist--detail--value" >
{{ .Condition }}</span> {{ .Condition }}</span>
Farbe<span class="addetailslist--detail--value" >
{{ .Color }}</span>
Art<span class="addetailslist--detail--value" >
{{ .Type }}</span>
</li> </li>
</ul> </ul>
</div> </div>
@@ -251,11 +255,14 @@ type AdConfig struct {
Price string Price string
Category string Category string
Condition string Condition string
Type string
Color string
Created string Created string
Text string Text string
Images []string // files in ./t/ Images []string // files in ./t/
} }
// used to generate ad listings returned by httpmock using templates
var adsrc = []AdConfig{ var adsrc = []AdConfig{
{ {
Title: "First Ad", Title: "First Ad",
@@ -263,7 +270,9 @@ var adsrc = []AdConfig{
Category: "Klimbim", Category: "Klimbim",
Text: "Thing to sale", Text: "Thing to sale",
Slug: "first-ad", Slug: "first-ad",
Condition: "works", Condition: "Sehr Gut",
Color: "Grün",
Type: "Ball",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -273,7 +282,9 @@ var adsrc = []AdConfig{
Category: "Kram", Category: "Kram",
Text: "Thing to sale", Text: "Thing to sale",
Slug: "second-ad", Slug: "second-ad",
Condition: "works", Condition: "Gut",
Color: "Lila",
Type: "Schoki",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -284,7 +295,9 @@ var adsrc = []AdConfig{
Category: "Kuddelmuddel", Category: "Kuddelmuddel",
Text: "Thing to sale", Text: "Thing to sale",
Slug: "third-ad", Slug: "third-ad",
Condition: "works", Condition: "In Ordnung",
Color: "Blau",
Type: "Auto",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -295,7 +308,9 @@ var adsrc = []AdConfig{
Category: "Krempel", Category: "Krempel",
Text: "Thing to sale", Text: "Thing to sale",
Slug: "fourth-ad", Slug: "fourth-ad",
Condition: "works", Condition: "Neu",
Color: "Rot",
Type: "Spielzeut",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -306,7 +321,9 @@ var adsrc = []AdConfig{
Category: "Kladderadatsch", Category: "Kladderadatsch",
Text: "Thing to sale", Text: "Thing to sale",
Slug: "fifth-ad", Slug: "fifth-ad",
Condition: "works", Condition: "Sehr Gut",
Color: "Braun",
Type: "Parteibuch",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -317,7 +334,9 @@ var adsrc = []AdConfig{
Category: "Klunker", Category: "Klunker",
Text: "Thing to sale", Text: "Thing to sale",
Slug: "sixth-ad", Slug: "sixth-ad",
Condition: "works", Condition: "Sehr Gut",
Color: "Silber",
Type: "Ring",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -328,7 +347,9 @@ var adsrc = []AdConfig{
Category: "Klunker", Category: "Klunker",
Text: "Thing to sale", Text: "Thing to sale",
Slug: "seventh-ad", Slug: "seventh-ad",
Condition: "works", Condition: "Sehr Gut",
Color: "Gelpb",
Type: "Schmuck",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"}, Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"},
}, },

View File

@@ -124,6 +124,7 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
return fmt.Errorf("could not extract ad data from page, got empty struct") return fmt.Errorf("could not extract ad data from page, got empty struct")
} }
advertisement.DecodeAttributes()
advertisement.CalculateExpire() advertisement.CalculateExpire()
// prepare ad dir name // prepare ad dir name

View File

@@ -1,5 +1,5 @@
/* /*
Copyright © 2023-2024 Thomas von Dein Copyright © 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@@ -44,8 +44,8 @@ func OutDirName(conf *Config) (string, error) {
now := time.Now() now := time.Now()
data := OutdirData{ data := OutdirData{
Year: now.Format("2006"), Year: now.Format("2006"),
Month: now.Format("02"), Month: now.Format("01"),
Day: now.Format("01"), Day: now.Format("02"),
} }
err = tmpl.Execute(&buf, data) err = tmpl.Execute(&buf, data)