Compare commits

...

13 Commits

Author SHA1 Message Date
7337464112 check return of ReadString() 2024-04-26 13:37:38 +02:00
dbb64dcae1 bump version 2024-04-26 12:03:35 +02:00
74db3f534e fix #88: respond accordingly when user double clicks kleingebaeck.exe 2024-04-26 11:36:27 +02:00
8cc5a9e3ed missed commits 2024-02-12 13:36:29 +01:00
d2bcd7b505 fix #80: using os.MkdirAll():
Recursively create ad dir including output dir. The output dir itself
is not being created separately anymore. That way, no directory will
be created if no ads could be downloaded.
2024-02-12 13:32:25 +01:00
c59c2e2931 fix #81: add arm64 build support 2024-02-12 13:32:25 +01:00
2288806105 fix #77: use processed ad dir for duplicate checking, not slug 2024-02-10 15:15:43 +01:00
5a2c277f0e fix #71 and #73: add support for outdir template and enhance docs 2024-02-10 14:44:09 +01:00
612ed2aa79 fix #74: warn if about to write to already visited ad, overwrite if -f 2024-02-10 14:44:09 +01:00
ed78731b3c check seek error 2024-01-27 17:34:44 +01:00
a84f0e1436 get rid of duplicate bytes.Buffer, use bytes.Reader instead, #39 2024-01-27 17:34:44 +01:00
d8d5be5c7d fix #58: add missing dashes to self issue template 2024-01-27 17:34:44 +01:00
bcf920c91e correct #39 add --ignoreerrors flag 2024-01-27 17:34:44 +01:00
16 changed files with 401 additions and 42 deletions

View File

@@ -5,3 +5,4 @@ title: "[bug-report]"
labels: bug
assignees: TLINDEN
---

2
ad.go
View File

@@ -1,5 +1,5 @@
/*
Copyright © 2023 Thomas von Dein
Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by

View File

@@ -34,7 +34,7 @@ import (
)
const (
VERSION string = "0.3.2"
VERSION string = "0.3.6"
Baseuri string = "https://www.kleinanzeigen.de"
Listuri string = "/s-bestandsliste.html"
Defaultdir string = "."
@@ -52,6 +52,8 @@ const (
DefaultAdNameTemplate string = "{{.Slug}}"
DefaultOutdirTemplate string = "."
// for image download throttling
MinThrottle int = 2
MaxThrottle int = 20
@@ -65,6 +67,8 @@ const (
WIN string = "windows"
)
var DirsVisited map[string]int
const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.
Usage: kleingebaeck [-dvVhmoclu] [<ad-listing-url>,...]
@@ -77,7 +81,7 @@ Options:
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Download images even if they already exist.
-f --force Overwrite images and ads even if the already exist.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
@@ -126,7 +130,7 @@ func InitConfig(output io.Writer) (*Config, error) {
// Load default values using the confmap provider.
if err := kloader.Load(confmap.Provider(map[string]interface{}{
"template": template,
"outdir": ".",
"outdir": DefaultOutdirTemplate,
"loglevel": "notice",
"userid": 0,
"adnametemplate": DefaultAdNameTemplate,
@@ -153,6 +157,7 @@ func InitConfig(output io.Writer) (*Config, error) {
flagset.BoolP("help", "h", false, "show usage")
flagset.BoolP("manual", "m", false, "show manual")
flagset.BoolP("force", "f", false, "force")
flagset.BoolP("ignoreerrors", "", false, "ignore image download HTTP errors")
if err := flagset.Parse(os.Args[1:]); err != nil {
return nil, fmt.Errorf("failed to parse program arguments: %w", err)

5
go.mod
View File

@@ -14,7 +14,7 @@ require (
github.com/lmittmann/tint v1.0.4
github.com/mattn/go-isatty v0.0.20
github.com/spf13/pflag v1.0.5
github.com/tlinden/yadu v0.1.1
github.com/tlinden/yadu v0.1.2
golang.org/x/sync v0.5.0
)
@@ -24,6 +24,7 @@ require (
github.com/corona10/goimagehash v1.1.0 // indirect
github.com/fatih/color v1.16.0 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/knadh/koanf/maps v0.1.1 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
@@ -33,7 +34,7 @@ require (
github.com/pelletier/go-toml v1.9.5 // indirect
github.com/pkg/errors v0.9.1 // indirect
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
golang.org/x/sys v0.14.0 // indirect
golang.org/x/sys v0.17.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

6
go.sum
View File

@@ -15,6 +15,8 @@ github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww=
github.com/jarcoal/httpmock v1.3.1/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg=
github.com/knadh/koanf/maps v0.1.1 h1:G5TjmUh2D7G2YWf5SQQqSiHRJEjaicvU0KpypqB3NIs=
@@ -66,6 +68,8 @@ github.com/tlinden/yadu v0.1.0 h1:qtCi1jxg392qVRLFyrJ2LYu6/PiKSp1LT02EX+mNLME=
github.com/tlinden/yadu v0.1.0/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA=
github.com/tlinden/yadu v0.1.1 h1:116oEUy9b4PcMF5wLL2dCFA/sn/praYutOnao07MROw=
github.com/tlinden/yadu v0.1.1/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA=
github.com/tlinden/yadu v0.1.2 h1:TYYVnUJwziRJ9YPbIbRf9ikmDw0Q8Ifixm+J/kBQFh8=
github.com/tlinden/yadu v0.1.2/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -81,6 +85,8 @@ golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q=
golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.17.0 h1:25cE3gD+tdBA7lp7QfhuV+rJiE9YXTcS3VG1SqssI/Y=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=

View File

@@ -33,7 +33,7 @@ const MaxDistance = 3
type Image struct {
Filename string
Hash *goimagehash.ImageHash
Data *bytes.Buffer
Data *bytes.Reader
URI string
}
@@ -49,7 +49,7 @@ func (img *Image) LogValue() slog.Value {
// holds all images of an ad
type Cache []*goimagehash.ImageHash
func NewImage(buf *bytes.Buffer, filename string, uri string) *Image {
func NewImage(buf *bytes.Reader, filename string, uri string) *Image {
img := &Image{
Filename: filename,
URI: uri,
@@ -131,7 +131,9 @@ func ReadImages(addir string, dont bool) (Cache, error) {
return nil, err
}
img := NewImage(data, filename, "")
reader := bytes.NewReader(data.Bytes())
img := NewImage(reader, filename, "")
if err = img.CalcHash(); err != nil {
return nil, err
}

View File

@@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "KLEINGEBAECK 1"
.TH KLEINGEBAECK 1 "2024-01-25" "1" "User Commands"
.TH KLEINGEBAECK 1 "2024-02-10" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
@@ -152,7 +152,7 @@ kleingebaeck \- kleinanzeigen.de backup tool
\& \-l \-\-limit <num> Limit the ads to download to <num>, default: load all.
\& \-c \-\-config <file> Use config file <file> (default: ~/.kleingebaeck).
\& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
\& \-f \-\-force Download images even if they already exist.
\& \-f \-\-force Overwrite images and ads even if the already exist.
\& \-m \-\-manual Show manual.
\& \-h \-\-help Show usage.
\& \-V \-\-version Show program version.
@@ -195,7 +195,7 @@ Be careful if you want to change the template. The variable is a
multiline string surrounded by three double quotes. You can left out
certain fields and use any formatting you like. Refer to
<https://pkg.go.dev/text/template> for details how to write a
template.
template. Also read the \s-1TEMPLATES\s0 section below.
.PP
If you're on windows and want to customize the output directory, put
it into single quotes to avoid the backslashes interpreted as escape
@@ -204,6 +204,94 @@ chars like this:
.Vb 1
\& outdir = \*(AqC:\eData\eAds\*(Aq
.Ve
.SH "TEMPLATES"
.IX Header "TEMPLATES"
Various parts of the configuration can be modified using templates:
the output directory, the ad directory and the ad listing itself.
.SS "\s-1OUTPUT DIR TEMPLATE\s0"
.IX Subsection "OUTPUT DIR TEMPLATE"
The config varialbe \f(CW\*(C`outdir\*(C'\fR or the command line parameter \f(CW\*(C`\-o\*(C'\fR take a
template which may contain:
.ie n .IP """{{.Year}}""" 4
.el .IP "\f(CW{{.Year}}\fR" 4
.IX Item "{{.Year}}"
.PD 0
.ie n .IP """{{.Month}}""" 4
.el .IP "\f(CW{{.Month}}\fR" 4
.IX Item "{{.Month}}"
.ie n .IP """{{.Day}}""" 4
.el .IP "\f(CW{{.Day}}\fR" 4
.IX Item "{{.Day}}"
.PD
.PP
That way you can create a new output directory for every backup
run. For example:
.PP
.Vb 1
\& outdir = "/home/backups/ads\-{{.Year}}\-{{.Month}}\-{{.Day}}"
.Ve
.PP
Or using the command line flag:
.PP
.Vb 1
\& \-o "/home/backups/ads\-{{.Year}}\-{{.Month}}\-{{.Day}}"
.Ve
.PP
The default value is \f(CW\*(C`.\*(C'\fR \- the current directory.
.SS "\s-1AD DIRECTORY TEMPLATE\s0"
.IX Subsection "AD DIRECTORY TEMPLATE"
The ad directory name can be modified using the following ad values:
.IP "{{.Price}}" 4
.IX Item "{{.Price}}"
.PD 0
.IP "{{.ID}}" 4
.IX Item "{{.ID}}"
.IP "{{.Category}}" 4
.IX Item "{{.Category}}"
.IP "{{.Condition}}" 4
.IX Item "{{.Condition}}"
.IP "{{.Created}}" 4
.IX Item "{{.Created}}"
.IP "{{.Slug}}" 4
.IX Item "{{.Slug}}"
.IP "{{.Text}}" 4
.IX Item "{{.Text}}"
.PD
.PP
It can only be configured in the config file. By default only
\&\f(CW\*(C`{{.Slug}}\*(C'\fR is being used, this is the title of the ad in url format.
.SS "\s-1AD TEMPLATE\s0"
.IX Subsection "AD TEMPLATE"
The ad listing itself can be modified as well, using the same
variables as the ad name template above.
.PP
This is the default template:
.PP
.Vb 7
\& Title: {{.Title}}
\& Price: {{.Price}}
\& Id: {{.ID}}
\& Category: {{.Category}}
\& Condition: {{.Condition}}
\& Created: {{.Created}}
\& Expire: {{.Expire}}
\&
\& {{.Text}}
.Ve
.PP
The config parameter to modify is \f(CW\*(C`template\*(C'\fR. See example.conf in the
source repository. Please take care, since this is a multiline
string. This is how it shall look if you modify it:
.PP
.Vb 2
\& template="""
\& Title: {{.Title}}
\&
\& {{.Text}}
\& """
.Ve
.PP
That is, the content between the two \f(CW"""\fR chars is the template.
.SH "SETUP"
.IX Header "SETUP"
To setup the tool, you need to lookup your userid on

View File

@@ -14,7 +14,7 @@ SYNOPSYS
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Download images even if they already exist.
-f --force Overwrite images and ads even if the already exist.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
@@ -55,6 +55,7 @@ CONFIGURATION
multiline string surrounded by three double quotes. You can left out
certain fields and use any formatting you like. Refer to
<https://pkg.go.dev/text/template> for details how to write a template.
Also read the TEMPLATES section below.
If you're on windows and want to customize the output directory, put it
into single quotes to avoid the backslashes interpreted as escape chars
@@ -62,6 +63,71 @@ CONFIGURATION
outdir = 'C:\Data\Ads'
TEMPLATES
Various parts of the configuration can be modified using templates: the
output directory, the ad directory and the ad listing itself.
OUTPUT DIR TEMPLATE
The config varialbe "outdir" or the command line parameter "-o" take a
template which may contain:
"{{.Year}}"
"{{.Month}}"
"{{.Day}}"
That way you can create a new output directory for every backup run. For
example:
outdir = "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}"
Or using the command line flag:
-o "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}"
The default value is "." - the current directory.
AD DIRECTORY TEMPLATE
The ad directory name can be modified using the following ad values:
{{.Price}}
{{.ID}}
{{.Category}}
{{.Condition}}
{{.Created}}
{{.Slug}}
{{.Text}}
It can only be configured in the config file. By default only
"{{.Slug}}" is being used, this is the title of the ad in url format.
AD TEMPLATE
The ad listing itself can be modified as well, using the same variables
as the ad name template above.
This is the default template:
Title: {{.Title}}
Price: {{.Price}}
Id: {{.ID}}
Category: {{.Category}}
Condition: {{.Condition}}
Created: {{.Created}}
Expire: {{.Expire}}
{{.Text}}
The config parameter to modify is "template". See example.conf in the
source repository. Please take care, since this is a multiline string.
This is how it shall look if you modify it:
template="""
Title: {{.Title}}
{{.Text}}
"""
That is, the content between the two """ chars is the template.
SETUP
To setup the tool, you need to lookup your userid on kleinanzeigen.de.
Go to your ad overview page while NOT being logged in:

View File

@@ -13,7 +13,7 @@ kleingebaeck - kleinanzeigen.de backup tool
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Download images even if they already exist.
-f --force Overwrite images and ads even if the already exist.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
@@ -55,7 +55,7 @@ Be careful if you want to change the template. The variable is a
multiline string surrounded by three double quotes. You can left out
certain fields and use any formatting you like. Refer to
L<https://pkg.go.dev/text/template> for details how to write a
template.
template. Also read the TEMPLATES section below.
If you're on windows and want to customize the output directory, put
it into single quotes to avoid the backslashes interpreted as escape
@@ -63,6 +63,91 @@ chars like this:
outdir = 'C:\Data\Ads'
=head1 TEMPLATES
Various parts of the configuration can be modified using templates:
the output directory, the ad directory and the ad listing itself.
=head2 OUTPUT DIR TEMPLATE
The config varialbe C<outdir> or the command line parameter C<-o> take a
template which may contain:
=over
=item C<{{.Year}}>
=item C<{{.Month}}>
=item C<{{.Day}}>
=back
That way you can create a new output directory for every backup
run. For example:
outdir = "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}"
Or using the command line flag:
-o "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}"
The default value is C<.> - the current directory.
=head2 AD DIRECTORY TEMPLATE
The ad directory name can be modified using the following ad values:
=over
=item {{.Price}}
=item {{.ID}}
=item {{.Category}}
=item {{.Condition}}
=item {{.Created}}
=item {{.Slug}}
=item {{.Text}}
=back
It can only be configured in the config file. By default only
C<{{.Slug}}> is being used, this is the title of the ad in url format.
=head2 AD TEMPLATE
The ad listing itself can be modified as well, using the same
variables as the ad name template above.
This is the default template:
Title: {{.Title}}
Price: {{.Price}}
Id: {{.ID}}
Category: {{.Category}}
Condition: {{.Condition}}
Created: {{.Created}}
Expire: {{.Expire}}
{{.Text}}
The config parameter to modify is C<template>. See example.conf in the
source repository. Please take care, since this is a multiline
string. This is how it shall look if you modify it:
template="""
Title: {{.Title}}
{{.Text}}
"""
That is, the content between the two C<"""> chars is the template.
=head1 SETUP
To setup the tool, you need to lookup your userid on

28
main.go
View File

@@ -18,13 +18,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package main
import (
"bufio"
"errors"
"fmt"
"io"
"log/slog"
"os"
"runtime"
"runtime/debug"
"github.com/inconshreveable/mousetrap"
"github.com/lmittmann/tint"
"github.com/tlinden/yadu"
)
@@ -35,6 +38,25 @@ func main() {
os.Exit(Main(os.Stdout))
}
func init() {
// if we're running on Windows AND if the user double clicked the
// exe file from explorer, we tell them and then wait until any
// key has been hit, which will make the cmd window disappear and
// thus give the user time to read it.
if runtime.GOOS == "windows" {
if mousetrap.StartedByExplorer() {
fmt.Println("Do no double click kleingebaeck.exe!")
fmt.Println("Please open a command shell and run it from there.")
fmt.Println()
fmt.Print("Press any key to quit: ")
_, err := bufio.NewReader(os.Stdin).ReadString('\n')
if err != nil {
panic(err)
}
}
}
}
func Main(output io.Writer) int {
logLevel := &slog.LevelVar{}
opts := &tint.Options{
@@ -112,10 +134,11 @@ func Main(output io.Writer) int {
slog.Debug("config", "conf", conf)
// prepare output dir
err = Mkdir(conf.Outdir)
outdir, err := OutDirName(conf)
if err != nil {
return Die(err)
}
conf.Outdir = outdir
// used for all HTTP requests
fetch, err := NewFetcher(conf)
@@ -123,6 +146,9 @@ func Main(output io.Writer) int {
return Die(err)
}
// setup ad dir registry, needed to check for duplicates
DirsVisited = make(map[string]int)
switch {
case len(conf.Adlinks) >= 1:
// directly backup ad listing[s]

View File

@@ -22,7 +22,12 @@ freebsd/amd64
linux/amd64
netbsd/amd64
openbsd/amd64
windows/amd64"
windows/amd64
freebsd/arm64
linux/arm64
netbsd/arm64
openbsd/arm64
windows/arm64"
tool="$1"
version="$2"

View File

@@ -126,16 +126,32 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
advertisement.CalculateExpire()
// write listing
addir, err := WriteAd(fetch.Config, advertisement)
// prepare ad dir name
addir, err := AdDirName(fetch.Config, advertisement)
if err != nil {
return err
}
proceed := CheckAdVisited(fetch.Config, addir)
if !proceed {
return nil
}
// write listing
err = WriteAd(fetch.Config, advertisement, addir)
if err != nil {
return err
}
// tell the user
slog.Debug("extracted ad listing", "ad", advertisement)
// stats
fetch.Config.IncrAds()
// register for later checks
DirsVisited[addir] = 1
return ScrapeImages(fetch, advertisement, addir)
}
@@ -168,14 +184,15 @@ func ScrapeImages(fetch *Fetcher, advertisement *Ad, addir string) error {
}
buf := new(bytes.Buffer)
_, err = buf.ReadFrom(body)
if err != nil {
return fmt.Errorf("failed to read from image buffer: %w", err)
}
buf2 := buf.Bytes() // needed for image writing
reader := bytes.NewReader(buf.Bytes())
image := NewImage(buf, file, imguri)
image := NewImage(reader, file, imguri)
err = image.CalcHash()
if err != nil {
return err
@@ -189,12 +206,17 @@ func ScrapeImages(fetch *Fetcher, advertisement *Ad, addir string) error {
}
}
err = WriteImage(file, buf2)
_, err = reader.Seek(0, 0)
if err != nil {
return fmt.Errorf("failed to seek(0) on image reader: %w", err)
}
err = WriteImage(file, reader)
if err != nil {
return err
}
slog.Debug("wrote image", "image", image, "size", len(buf2), "throttle", throttle)
slog.Debug("wrote image", "image", image, "size", buf.Len(), "throttle", throttle)
return nil
})

View File

@@ -26,8 +26,36 @@ import (
"runtime"
"strings"
tpl "text/template"
"time"
)
type OutdirData struct {
Year, Day, Month string
}
func OutDirName(conf *Config) (string, error) {
tmpl, err := tpl.New("outdir").Parse(conf.Outdir)
if err != nil {
return "", fmt.Errorf("failed to parse outdir template: %w", err)
}
buf := bytes.Buffer{}
now := time.Now()
data := OutdirData{
Year: now.Format("2006"),
Month: now.Format("02"),
Day: now.Format("01"),
}
err = tmpl.Execute(&buf, data)
if err != nil {
return "", fmt.Errorf("failed to execute outdir template: %w", err)
}
return buf.String(), nil
}
func AdDirName(conf *Config, advertisement *Ad) (string, error) {
tmpl, err := tpl.New("adname").Parse(conf.Adnametemplate)
if err != nil {
@@ -44,19 +72,13 @@ func AdDirName(conf *Config, advertisement *Ad) (string, error) {
return buf.String(), nil
}
func WriteAd(conf *Config, advertisement *Ad) (string, error) {
// prepare ad dir name
addir, err := AdDirName(conf, advertisement)
if err != nil {
return "", err
}
func WriteAd(conf *Config, advertisement *Ad, addir string) error {
// prepare output dir
dir := filepath.Join(conf.Outdir, addir)
err = Mkdir(dir)
err := Mkdir(dir)
if err != nil {
return "", err
return err
}
// write ad file
@@ -64,7 +86,7 @@ func WriteAd(conf *Config, advertisement *Ad) (string, error) {
listingfd, err := os.Create(listingfile)
if err != nil {
return "", fmt.Errorf("failed to create Adlisting.txt: %w", err)
return fmt.Errorf("failed to create Adlisting.txt: %w", err)
}
defer listingfd.Close()
@@ -76,27 +98,27 @@ func WriteAd(conf *Config, advertisement *Ad) (string, error) {
tmpl, err := tpl.New("adlisting").Parse(conf.Template)
if err != nil {
return "", fmt.Errorf("failed to parse adlisting template: %w", err)
return fmt.Errorf("failed to parse adlisting template: %w", err)
}
err = tmpl.Execute(listingfd, advertisement)
if err != nil {
return "", fmt.Errorf("failed to execute adlisting template: %w", err)
return fmt.Errorf("failed to execute adlisting template: %w", err)
}
slog.Info("wrote ad listing", "listingfile", listingfile)
return addir, nil
return nil
}
func WriteImage(filename string, buf []byte) error {
func WriteImage(filename string, reader *bytes.Reader) error {
file, err := os.Create(filename)
if err != nil {
return fmt.Errorf("failed to open image file: %w", err)
}
defer file.Close()
_, err = file.Write(buf)
_, err = reader.WriteTo(file)
if err != nil {
return fmt.Errorf("failed to write to image file: %w", err)
@@ -133,3 +155,21 @@ func fileExists(filename string) bool {
return !info.IsDir()
}
// check if an addir has already been processed by current run and
// decide what to do
func CheckAdVisited(conf *Config, adname string) bool {
if Exists(DirsVisited, adname) {
if conf.ForceDownload {
slog.Warn("an ad with the same name has already been downloaded, overwriting", "addir", adname)
return true
}
// don't overwrite
slog.Warn("an ad with the same name has already been downloaded, skipping (use -f to overwrite)", "addir", adname)
return false
}
// overwrite
return true
}

View File

@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package main
import (
"bytes"
"testing"
)
@@ -28,10 +29,10 @@ import (
func TestWriteImage(t *testing.T) {
t.Parallel()
buf := []byte{1, 2, 3, 4, 5, 6, 7, 8}
reader := bytes.NewReader([]byte{1, 2, 3, 4, 5, 6, 7, 8})
file := "t/out/t.jpg"
err := WriteImage(file, buf)
err := WriteImage(file, reader)
if err != nil {
t.Errorf("Could not write mock image to %s: %s", file, err.Error())
}

View File

@@ -1,5 +1,7 @@
#!/bin/sh -x
base="../kleinanzeigen"
rm -rf $base
mkdir -p $base
echo "Generating /s-bestandsliste.html"

13
util.go
View File

@@ -1,5 +1,5 @@
/*
Copyright © 2023 Thomas von Dein
Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -32,7 +32,7 @@ import (
func Mkdir(dir string) error {
if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) {
err := os.Mkdir(dir, os.ModePerm)
err := os.MkdirAll(dir, os.ModePerm)
if err != nil {
return fmt.Errorf("failed to create directory %s: %w", dir, err)
}
@@ -74,3 +74,12 @@ func IsNoTty() bool {
func GetThrottleTime() time.Duration {
return time.Duration(rand.Intn(MaxThrottle-MinThrottle+1)+MinThrottle) * time.Millisecond
}
// look if a key in a map exists, generic variant
func Exists[K comparable, V any](m map[K]V, v K) bool {
if _, ok := m[v]; ok {
return true
}
return false
}