Compare commits

..

7 Commits

Author SHA1 Message Date
T.v.Dein
4030d04b06 Add year,month,day support to Adnametemplate (#123)
* add year, month and day to adnametemplate as well
2025-02-27 17:58:05 +01:00
eff0af0b34 pie only on linux 2025-02-19 18:08:15 +01:00
34b1ad9d1e remove symbols and crap from released binaries 2025-02-19 18:01:05 +01:00
T.v.Dein
6675c4d232 Fix/timeformat (#122)
* Fix #121: confused day with month thanks to time.Format
* Add outdir template variable example
2025-02-10 22:20:25 +01:00
T.v.Dein
46be48af38 Generic attributes (#120)
* fix #117: use a generic attribute parser, still support fixed attrs
2025-02-10 18:20:54 +01:00
T.v.Dein
09948a6b39 add color detail as well (#119)
Co-authored-by: Thomas von Dein <tom@vondein.org>
2025-02-06 20:13:08 +01:00
T.v.Dein
bc01391872 Fix ad condition parsing (#118)
* fix #117: use details slice and pre-set to properly extract condition
* also added the type part of the detail content (original de: "Art")

---------

Co-authored-by: Thomas von Dein <tom@vondein.org>
2025-02-06 13:48:20 +01:00
11 changed files with 213 additions and 50 deletions

View File

@@ -204,6 +204,7 @@ Price: 99 € VB
Id: 1919191919 Id: 1919191919
Category: Sachbücher Category: Sachbücher
Condition: Sehr Gut Condition: Sehr Gut
Type: Buch
Created: 10.12.2023 Created: 10.12.2023
This is the description text. This is the description text.

80
ad.go
View File

@@ -1,5 +1,5 @@
/* /*
Copyright © 2023-2024 Thomas von Dein Copyright © 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package main package main
import ( import (
"bufio"
"log/slog" "log/slog"
"strings" "strings"
"time" "time"
@@ -31,9 +32,12 @@ type Ad struct {
Title string `goquery:"h1"` Title string `goquery:"h1"`
Slug string Slug string
ID string ID string
Details []string `goquery:".addetailslist--detail--value,text"` Details string `goquery:".addetailslist--detail,text"`
Condition string // post processed from details Attributes map[string]string // processed afterwards
Type string // post processed from details Condition string // post processed from details for backward compatibility
Type string // post processed from details for backward compatibility
Color string // post processed from details for backward compatibility
Material string // post processed from details for backward compatibility
Category string Category string
CategoryTree []string `goquery:".breadcrump-link,text"` CategoryTree []string `goquery:".breadcrump-link,text"`
Price string `goquery:"h2#viewad-price"` Price string `goquery:"h2#viewad-price"`
@@ -41,6 +45,9 @@ type Ad struct {
Text string `goquery:"p#viewad-description-text,html"` Text string `goquery:"p#viewad-description-text,html"`
Images []string `goquery:".galleryimage-element img,[src]"` Images []string `goquery:".galleryimage-element img,[src]"`
Expire string Expire string
// runtime computed
Year, Day, Month string
} }
// Used by slog to pretty print an ad // Used by slog to pretty print an ad
@@ -52,15 +59,11 @@ func (ad *Ad) LogValue() slog.Value {
slog.Int("imagecount", len(ad.Images)), slog.Int("imagecount", len(ad.Images)),
slog.Int("bodysize", len(ad.Text)), slog.Int("bodysize", len(ad.Text)),
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")), slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
slog.String("condition", ad.Condition),
slog.String("created", ad.Created), slog.String("created", ad.Created),
slog.String("expire", ad.Expire), slog.String("expire", ad.Expire),
) )
} }
// static set of conditions available, used for post processing details
var CONDITIONS = []string{"Neu", "Gut", "Sehr Gut", "In Ordnung"}
// check for completeness. I erected these fields to be mandatory // check for completeness. I erected these fields to be mandatory
// (though I really don't know if they really are). I consider images // (though I really don't know if they really are). I consider images
// and meta optional. So, if either of the checked fields here is // and meta optional. So, if either of the checked fields here is
@@ -85,3 +88,64 @@ func (ad *Ad) CalculateExpire() {
} }
} }
} }
/*
Decode attributes like color or condition. See
https://github.com/TLINDEN/kleingebaeck/issues/117
for more details. In short: the HTML delivered by
kleinanzeigen.de has no css attribute for the keys
so we cannot extract key=>value mappings of the
ad details but have to parse them manually.
The ad.Details member contains this after goq run:
Art
Weitere Kinderzimmermöbel
Farbe
Holz
Zustand
In Ordnung
We parse this into ad.Attributes and fill in some
static members for backward compatibility reasons.
*/
func (ad *Ad) DecodeAttributes() {
rd := strings.NewReader(ad.Details)
scanner := bufio.NewScanner(rd)
isattr := true
attr := ""
attrmap := map[string]string{}
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
if isattr {
attr = line
} else {
attrmap[attr] = line
}
isattr = !isattr
}
ad.Attributes = attrmap
switch {
case Exists(ad.Attributes, "Zustand"):
ad.Condition = ad.Attributes["Zustand"]
case Exists(ad.Attributes, "Farbe"):
ad.Color = ad.Attributes["Farbe"]
case Exists(ad.Attributes, "Art"):
ad.Type = ad.Attributes["Type"]
case Exists(ad.Attributes, "Material"):
ad.Material = ad.Attributes["Material"]
}
}

View File

@@ -1,5 +1,5 @@
/* /*
Copyright © 2023-2024 Thomas von Dein Copyright © 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@@ -34,17 +34,25 @@ import (
) )
const ( const (
VERSION string = "0.3.14" VERSION string = "0.3.18"
Baseuri string = "https://www.kleinanzeigen.de" Baseuri string = "https://www.kleinanzeigen.de"
Listuri string = "/s-bestandsliste.html" Listuri string = "/s-bestandsliste.html"
Defaultdir string = "." Defaultdir string = "."
/*
Also possible: loop through .Attributes:
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" + DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
"Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\n" + "Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" +
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
*/
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
"Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" +
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" + DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" +
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\n" + "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\nColor: {{.Color}}\r\n" +
"Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n" "Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n"
DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +

View File

@@ -12,20 +12,36 @@ user = 00000000
loglevel = "verbose" loglevel = "verbose"
# directory where to store downloaded ads. kleingebaeck will try to # directory where to store downloaded ads. kleingebaeck will try to
# create it. must be a quoted string. # create it. must be a quoted string. You can also include a couple of
# template variables, e.g:
# outdir = "test-{{.Year}}-{{.Month}}-{{.Day}}"
outdir = "test" outdir = "test"
# template for stored adlistings. To enable it, remove the comment # template for stored adlistings.
# chars up until the last #""" template="""
#template=""" Title: {{.Title}}
#Title: {{.Title}} Price: {{.Price}}
#Price: {{.Price}} Id: {{.Id}}
#Id: {{.Id}} Category: {{.Category}}
#Category: {{.Category}} Condition: {{.Condition}}
#Condition: {{.Condition}} Type: {{.Type}}
#Type: {{.Type}} Created: {{.Created}}
#Created: {{.Created}}
#{{.Text}} {{.Text}}
# """ """
# Ads may contain more attributes than just the Condition. To print
# all attributes, loop over all of them:
template="""
Title: {{.Title}}
Price: {{.Price}}
Id: {{.Id}}
Category: {{.Category}}
{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}
{{ end }}
Type: {{.Type}}
Created: {{.Created}}
{{.Text}}
"""

View File

@@ -133,7 +133,7 @@
.\" ======================================================================== .\" ========================================================================
.\" .\"
.IX Title "KLEINGEBAECK 1" .IX Title "KLEINGEBAECK 1"
.TH KLEINGEBAECK 1 "2025-02-06" "1" "User Commands" .TH KLEINGEBAECK 1 "2025-02-27" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents. .\" way too many mistakes in technical documents.
.if n .ad l .if n .ad l
@@ -174,7 +174,7 @@ well. We use \s-1TOML\s0 as our configuration language. See
.PP .PP
Format is pretty simple: Format is pretty simple:
.PP .PP
.Vb 12 .Vb 11
\& user = 1010101 \& user = 1010101
\& loglevel = verbose \& loglevel = verbose
\& outdir = "test" \& outdir = "test"
@@ -185,7 +185,6 @@ Format is pretty simple:
\& Id: {{.ID}} \& Id: {{.ID}}
\& Category: {{.Category}} \& Category: {{.Category}}
\& Condition: {{.Condition}} \& Condition: {{.Condition}}
\& Type: {{.Type}}
\& Created: {{.Created}} \& Created: {{.Created}}
\& \&
\& {{.Text}} \& {{.Text}}
@@ -261,6 +260,29 @@ The ad directory name can be modified using the following ad values:
.PP .PP
It can only be configured in the config file. By default only It can only be configured in the config file. By default only
\&\f(CW\*(C`{{.Slug}}\*(C'\fR is being used, this is the title of the ad in url format. \&\f(CW\*(C`{{.Slug}}\*(C'\fR is being used, this is the title of the ad in url format.
.SS "\s-1AD NAME TEMPLATE\s0"
.IX Subsection "AD NAME TEMPLATE"
The name of the directory per ad can be tuned as well:
.ie n .IP """{{.Year}}""" 4
.el .IP "\f(CW{{.Year}}\fR" 4
.IX Item "{{.Year}}"
.PD 0
.ie n .IP """{{.Month}}""" 4
.el .IP "\f(CW{{.Month}}\fR" 4
.IX Item "{{.Month}}"
.ie n .IP """{{.Day}}""" 4
.el .IP "\f(CW{{.Day}}\fR" 4
.IX Item "{{.Day}}"
.ie n .IP """{{.Slug}}""" 4
.el .IP "\f(CW{{.Slug}}\fR" 4
.IX Item "{{.Slug}}"
.ie n .IP """{{.Category}}""" 4
.el .IP "\f(CW{{.Category}}\fR" 4
.IX Item "{{.Category}}"
.ie n .IP """{{.ID}}""" 4
.el .IP "\f(CW{{.ID}}\fR" 4
.IX Item "{{.ID}}"
.PD
.SS "\s-1AD TEMPLATE\s0" .SS "\s-1AD TEMPLATE\s0"
.IX Subsection "AD TEMPLATE" .IX Subsection "AD TEMPLATE"
The ad listing itself can be modified as well, using the same The ad listing itself can be modified as well, using the same
@@ -345,7 +367,7 @@ Also there's currently no parallelization implemented. This will
change in the future. change in the future.
.SH "LICENSE" .SH "LICENSE"
.IX Header "LICENSE" .IX Header "LICENSE"
Copyright 2023\-2024 Thomas von Dein Copyright 2023\-2025 Thomas von Dein
.PP .PP
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the \s-1GNU\s0 General Public License as published by it under the terms of the \s-1GNU\s0 General Public License as published by

View File

@@ -46,7 +46,6 @@ CONFIGURATION
Id: {{.ID}} Id: {{.ID}}
Category: {{.Category}} Category: {{.Category}}
Condition: {{.Condition}} Condition: {{.Condition}}
Type: {{.Type}}
Created: {{.Created}} Created: {{.Created}}
{{.Text}} {{.Text}}
@@ -101,6 +100,16 @@ TEMPLATES
It can only be configured in the config file. By default only It can only be configured in the config file. By default only
"{{.Slug}}" is being used, this is the title of the ad in url format. "{{.Slug}}" is being used, this is the title of the ad in url format.
AD NAME TEMPLATE
The name of the directory per ad can be tuned as well:
"{{.Year}}"
"{{.Month}}"
"{{.Day}}"
"{{.Slug}}"
"{{.Category}}"
"{{.ID}}"
AD TEMPLATE AD TEMPLATE
The ad listing itself can be modified as well, using the same variables The ad listing itself can be modified as well, using the same variables
as the ad name template above. as the ad name template above.
@@ -175,7 +184,7 @@ LIMITATIONS
in the future. in the future.
LICENSE LICENSE
Copyright 2023-2024 Thomas von Dein Copyright 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify it This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the under the terms of the GNU General Public License as published by the

View File

@@ -46,7 +46,6 @@ Format is pretty simple:
Id: {{.ID}} Id: {{.ID}}
Category: {{.Category}} Category: {{.Category}}
Condition: {{.Condition}} Condition: {{.Condition}}
Type: {{.Type}}
Created: {{.Created}} Created: {{.Created}}
{{.Text}} {{.Text}}
@@ -120,6 +119,27 @@ The ad directory name can be modified using the following ad values:
It can only be configured in the config file. By default only It can only be configured in the config file. By default only
C<{{.Slug}}> is being used, this is the title of the ad in url format. C<{{.Slug}}> is being used, this is the title of the ad in url format.
=head2 AD NAME TEMPLATE
The name of the directory per ad can be tuned as well:
=over
=item C<{{.Year}}>
=item C<{{.Month}}>
=item C<{{.Day}}>
=item C<{{.Slug}}>
=item C<{{.Category}}>
=item C<{{.ID}}>
=back
=head2 AD TEMPLATE =head2 AD TEMPLATE
The ad listing itself can be modified as well, using the same The ad listing itself can be modified as well, using the same
@@ -203,7 +223,7 @@ change in the future.
=head1 LICENSE =head1 LICENSE
Copyright 2023-2024 Thomas von Dein Copyright 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by

View File

@@ -93,6 +93,10 @@ const ADTPL string = `DOCTYPE html>
<li class="addetailslist--detail"> <li class="addetailslist--detail">
Zustand<span class="addetailslist--detail--value" > Zustand<span class="addetailslist--detail--value" >
{{ .Condition }}</span> {{ .Condition }}</span>
Farbe<span class="addetailslist--detail--value" >
{{ .Color }}</span>
Art<span class="addetailslist--detail--value" >
{{ .Type }}</span>
</li> </li>
</ul> </ul>
</div> </div>
@@ -251,6 +255,8 @@ type AdConfig struct {
Price string Price string
Category string Category string
Condition string Condition string
Type string
Color string
Created string Created string
Text string Text string
Images []string // files in ./t/ Images []string // files in ./t/
@@ -265,6 +271,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "first-ad", Slug: "first-ad",
Condition: "Sehr Gut", Condition: "Sehr Gut",
Color: "Grün",
Type: "Ball",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -275,6 +283,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "second-ad", Slug: "second-ad",
Condition: "Gut", Condition: "Gut",
Color: "Lila",
Type: "Schoki",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -286,6 +296,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "third-ad", Slug: "third-ad",
Condition: "In Ordnung", Condition: "In Ordnung",
Color: "Blau",
Type: "Auto",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -297,6 +309,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "fourth-ad", Slug: "fourth-ad",
Condition: "Neu", Condition: "Neu",
Color: "Rot",
Type: "Spielzeut",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -308,6 +322,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "fifth-ad", Slug: "fifth-ad",
Condition: "Sehr Gut", Condition: "Sehr Gut",
Color: "Braun",
Type: "Parteibuch",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -319,6 +335,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "sixth-ad", Slug: "sixth-ad",
Condition: "Sehr Gut", Condition: "Sehr Gut",
Color: "Silber",
Type: "Ring",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"}, Images: []string{"t/1.jpg", "t/2.jpg"},
}, },
@@ -330,6 +348,8 @@ var adsrc = []AdConfig{
Text: "Thing to sale", Text: "Thing to sale",
Slug: "seventh-ad", Slug: "seventh-ad",
Condition: "Sehr Gut", Condition: "Sehr Gut",
Color: "Gelpb",
Type: "Schmuck",
Created: "Yesterday", Created: "Yesterday",
Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"}, Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"},
}, },

View File

@@ -45,15 +45,21 @@ for D in $DIST; do
os=${D/\/*/} os=${D/\/*/}
arch=${D/*\//} arch=${D/*\//}
binfile="releases/${tool}-${os}-${arch}-${version}" binfile="releases/${tool}-${os}-${arch}-${version}"
pie=""
if test "$os" = "windows"; then if test "$os" = "windows"; then
binfile="${binfile}.exe" binfile="${binfile}.exe"
fi fi
if test "$D" = "linux/amd64"; then
pie="-buildmode=pie"
fi
tardir="${tool}-${os}-${arch}-${version}" tardir="${tool}-${os}-${arch}-${version}"
tarfile="releases/${tool}-${os}-${arch}-${version}.tar.gz" tarfile="releases/${tool}-${os}-${arch}-${version}.tar.gz"
set -x set -x
GOOS=${os} GOARCH=${arch} go build -tags osusergo,netgo -ldflags "-extldflags=-static" -o ${binfile} GOOS=${os} GOARCH=${arch} go build -tags osusergo,netgo -ldflags "-extldflags=-static -w" --trimpath $pie -o ${binfile}
strip --strip-all ${binfile}
mkdir -p ${tardir} mkdir -p ${tardir}
cp ${binfile} README.md LICENSE ${tardir}/ cp ${binfile} README.md LICENSE ${tardir}/
echo 'tool = kleingebaeck echo 'tool = kleingebaeck

View File

@@ -1,5 +1,5 @@
/* /*
Copyright © 2023-2024 Thomas von Dein Copyright © 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@@ -22,7 +22,6 @@ import (
"fmt" "fmt"
"log/slog" "log/slog"
"path/filepath" "path/filepath"
"slices"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@@ -89,7 +88,13 @@ func ScrapeUser(fetch *Fetcher) error {
// scrape an ad. uri is the full uri of the ad, dir is the basedir // scrape an ad. uri is the full uri of the ad, dir is the basedir
func ScrapeAd(fetch *Fetcher, uri string) error { func ScrapeAd(fetch *Fetcher, uri string) error {
advertisement := &Ad{} now := time.Now()
advertisement := &Ad{
Year: now.Format("2006"),
Month: now.Format("01"),
Day: now.Format("02"),
}
// extract slug and id from uri // extract slug and id from uri
uriparts := strings.Split(uri, "/") uriparts := strings.Split(uri, "/")
@@ -125,15 +130,7 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
return fmt.Errorf("could not extract ad data from page, got empty struct") return fmt.Errorf("could not extract ad data from page, got empty struct")
} }
for _, detail := range advertisement.Details { advertisement.DecodeAttributes()
if slices.Contains(CONDITIONS, detail) {
advertisement.Condition = detail
} else {
advertisement.Type = detail
}
}
advertisement.CalculateExpire() advertisement.CalculateExpire()
// prepare ad dir name // prepare ad dir name

View File

@@ -1,5 +1,5 @@
/* /*
Copyright © 2023-2024 Thomas von Dein Copyright © 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@@ -44,8 +44,8 @@ func OutDirName(conf *Config) (string, error) {
now := time.Now() now := time.Now()
data := OutdirData{ data := OutdirData{
Year: now.Format("2006"), Year: now.Format("2006"),
Month: now.Format("02"), Month: now.Format("01"),
Day: now.Format("01"), Day: now.Format("02"),
} }
err = tmpl.Execute(&buf, data) err = tmpl.Execute(&buf, data)