From b8977df9866e7be340f9426ad473c82357b62b20 Mon Sep 17 00:00:00 2001 From: "T.v.Dein" Date: Tue, 19 Dec 2023 18:23:41 +0100 Subject: [PATCH] Bugfixes (#13) * several fixes: - fix #9 + #10: switched to koanf module and dropped support for HCL - fix #11: disabling colors on windows - fix #12: fixed race condition in go routine call inside for loop, images had been downloaded multiple times - remove hcl support and use toml format (same thing, better parser) - update documentation and example config on TOML format of config file - use Config as arg instead of singular args - use x/errgroup instead of sync.Waitgroup inside image download loop --------- Co-authored-by: Thomas von Dein --- Makefile | 3 + README.md | 19 ++--- config.go | 156 +++++++++++++++++++++++++++++++----- example.hcl => example.conf | 21 +++-- go.mod | 31 ++++--- go.sum | 52 ++++++++---- kleingebaeck.1 | 58 +++++++++----- kleingebaeck.go | 49 ++++++----- kleingebaeck.pod | 53 +++++++----- main.go | 108 +++++++------------------ scrape.go | 53 ++++++------ util.go | 13 +++ 12 files changed, 394 insertions(+), 222 deletions(-) rename example.hcl => example.conf (50%) diff --git a/Makefile b/Makefile index bde204e..5812f8f 100644 --- a/Makefile +++ b/Makefile @@ -86,3 +86,6 @@ show-versions: buildlocal @echo @echo "### go version used for building:" @grep -m 1 go go.mod + +lint: + golangci-lint run -p bugs -p unused diff --git a/README.md b/README.md index 568ce4f..a9f20cf 100644 --- a/README.md +++ b/README.md @@ -66,12 +66,14 @@ To install after building either copy the binary or execute `sudo make install`. ``` Usage: kleingebaeck [-dvVhmoc] [,...] Options: ---user,-u Backup ads from user with uid . ---debug, -d Enable debug output. ---verbose,-v Enable verbose output. ---output-dir,-o Set output dir (default: current directory) ---manual,-m Show manual. ---config,-c Use config file (default: ~/.kleingebaeck). +--user -u Backup ads from user with uid . +--debug -d Enable debug output. +--verbose -v Enable verbose output. +--outdir -o Set output dir (default: current directory) +--limit -l Limit the ads to download to , default: load all. +--config -c Use config file (default: ~/.kleingebaeck). +--manual -m Show manual. +--help -h Show usage. If one or more 's are specified, only backup those, otherwise backup all ads of the given user. @@ -80,16 +82,15 @@ otherwise backup all ads of the given user. ## Configfile You can create a config file to save typing. By default -`~/.kleingebaeck.hcl` is being used but you can specify one with +`~/.kleingebaeck` is being used but you can specify one with `-c` as well. Format is simple: ``` user = 1010101 -verbose = true +loglevel = verbose outdir = "test" -template = "" ``` ## Usage diff --git a/config.go b/config.go index 405983b..6d05ef8 100644 --- a/config.go +++ b/config.go @@ -17,13 +17,22 @@ along with this program. If not, see . package main import ( + "errors" + "fmt" "os" + "path/filepath" + "runtime" - "github.com/hashicorp/hcl/v2/hclsimple" + "github.com/knadh/koanf/parsers/toml" + "github.com/knadh/koanf/providers/confmap" + "github.com/knadh/koanf/providers/file" + "github.com/knadh/koanf/providers/posflag" + "github.com/knadh/koanf/v2" + flag "github.com/spf13/pflag" ) const ( - VERSION string = "0.0.6" + VERSION string = "0.1.0" Baseuri string = "https://www.kleinanzeigen.de" Listuri string = "/s-bestandsliste.html" Defaultdir string = "." @@ -35,32 +44,135 @@ const ( "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" ) +const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool. + +Usage: kleingebaeck [-dvVhmoclu] [,...] + +Options: +--user -u Backup ads from user with uid . +--debug -d Enable debug output. +--verbose -v Enable verbose output. +--outdir -o Set output dir (default: current directory) +--limit -l Limit the ads to download to , default: load all. +--config -c Use config file (default: ~/.kleingebaeck). +--manual -m Show manual. +--help -h Show usage. +--version -V Show program version. + +If one or more ad listing url's are specified, only backup those, +otherwise backup all ads of the given user.` + type Config struct { - Verbose *bool `hcl:"verbose"` - User *int `hcl:"user"` - Outdir *string `hcl:"outdir"` - Template *string `hcl:"template"` + Verbose bool `koanf:"verbose"` // loglevel=info + Debug bool `koanf:"debug"` // loglevel=debug + Showversion bool `koanf:"version"` // -v + Showhelp bool `koanf:"help"` // -h + Showmanual bool `koanf:"manual"` // -m + User int `koanf:"user"` + Outdir string `koanf:"outdir"` + Template string `koanf:"template"` + Loglevel string `koanf:"loglevel"` + Limit int `koanf:"limit"` + Adlinks []string + StatsCountAds int + StatsCountImages int } -func ParseConfigfile(file string) (*Config, error) { - c := Config{} - if path, err := os.Stat(file); !os.IsNotExist(err) { - if !path.IsDir() { - configstring, err := os.ReadFile(file) - if err != nil { - return nil, err - } +func (c *Config) IncrAds() { + c.StatsCountAds++ +} - err = hclsimple.Decode( - path.Name(), configstring, - nil, &c, - ) +func (c *Config) IncrImgs(num int) { + c.StatsCountImages += num +} - if err != nil { - return nil, err - } +// load commandline flags and config file +func InitConfig() (*Config, error) { + var k = koanf.New(".") + + // determine template based on os + template := DefaultTemplate + if runtime.GOOS == "windows" { + template = DefaultTemplateWin + } + + // Load default values using the confmap provider. + k.Load(confmap.Provider(map[string]interface{}{ + "template": template, + "outdir": ".", + "loglevel": "notice", + "userid": 0, + }, "."), nil) + + // setup custom usage + f := flag.NewFlagSet("config", flag.ContinueOnError) + f.Usage = func() { + fmt.Println(Usage) + os.Exit(0) + } + + // parse commandline flags + f.StringP("config", "c", "", "config file") + f.StringP("outdir", "o", "", "directory where to store ads") + f.IntP("user", "u", 0, "user id") + f.IntP("limit", "l", 0, "limit ads to be downloaded (default 0, unlimited)") + f.BoolP("verbose", "v", false, "be verbose") + f.BoolP("debug", "d", false, "enable debug log") + f.BoolP("version", "V", false, "show program version") + f.BoolP("help", "h", false, "show usage") + f.BoolP("manual", "m", false, "show manual") + + f.Parse(os.Args[1:]) + + // generate a list of config files to try to load, including the + // one provided via -c, if any + var configfiles []string + configfile, _ := f.GetString("config") + home, _ := os.UserHomeDir() + if configfile != "" { + configfiles = []string{configfile} + } else { + configfiles = []string{ + "/etc/kleingebaeck.conf", "/usr/local/etc/kleingebaeck.conf", // unix variants + filepath.Join(home, ".config", "kleingebaeck", "config"), + filepath.Join(home, ".kleingebaeck"), + "kleingebaeck.conf", } } - return &c, nil + // Load the config file[s] + for _, cfgfile := range configfiles { + if path, err := os.Stat(cfgfile); !os.IsNotExist(err) { + if !path.IsDir() { + if err := k.Load(file.Provider(cfgfile), toml.Parser()); err != nil { + return nil, errors.New("error loading config file: " + err.Error()) + } + } + } + // else: we ignore the file if it doesn't exists + } + + // command line overrides config file + if err := k.Load(posflag.Provider(f, ".", k), nil); err != nil { + return nil, errors.New("error loading flags: " + err.Error()) + } + + // fetch values + conf := &Config{} + if err := k.Unmarshal("", &conf); err != nil { + return nil, errors.New("error unmarshalling: " + err.Error()) + } + + // adjust loglevel + switch conf.Loglevel { + case "verbose": + conf.Verbose = true + case "debug": + conf.Debug = true + } + + // are there any args left on commandline? if so threat them as adlinks + conf.Adlinks = f.Args() + + return conf, nil } diff --git a/example.hcl b/example.conf similarity index 50% rename from example.hcl rename to example.conf index d0542ed..12ba22e 100644 --- a/example.hcl +++ b/example.conf @@ -1,6 +1,6 @@ # # kleingebaeck sample configuration file. -# put this to ~/.kleingebaeck.hcl. +# put this to ~/.kleingebaeck. # # Comments start with the '#' character. @@ -8,12 +8,23 @@ user = 00000000 # enable verbose output (same as -v), may be true or false. -verbose = true +# other values: notice or debug +loglevel = "verbose" # directory where to store downloaded ads. kleingebaeck will try to # create it. must be a quoted string. outdir = "test" -# template. leave empty to use the default one, which is: -# "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.Id}}\nCategory: {{.Category}}\nCondition: {{.Condition}}\nCreated: {{.Created}}\n\n{{.Text}}\n" -template = "" +# template for stored adlistings. To enable it, remove the comment +# chars up until the last #""" +#template=""" +#Title: {{.Title}} +#Price: {{.Price}} +#Id: {{.Id}} +#Category: {{.Category}} +#Condition: {{.Condition}} +#Created: {{.Created}} + +#{{.Text}} +# """ + diff --git a/go.mod b/go.mod index 53b3578..c395625 100644 --- a/go.mod +++ b/go.mod @@ -3,19 +3,28 @@ module kleingebaeck go 1.21 require ( - astuart.co/goq v1.0.0 // indirect + astuart.co/goq v1.0.0 + github.com/knadh/koanf/parsers/toml v0.1.0 + github.com/knadh/koanf/providers/confmap v0.1.0 + github.com/knadh/koanf/providers/file v0.1.0 + github.com/knadh/koanf/providers/posflag v0.1.0 + github.com/knadh/koanf/v2 v2.0.1 + github.com/lmittmann/tint v1.0.3 + github.com/mattn/go-isatty v0.0.20 + github.com/spf13/pflag v1.0.5 +) + +require ( github.com/PuerkitoBio/goquery v1.5.0 // indirect - github.com/agext/levenshtein v1.2.1 // indirect github.com/andybalholm/cascadia v1.0.0 // indirect - github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect - github.com/apparentlymart/go-textseg/v15 v15.0.0 // indirect - github.com/google/go-cmp v0.3.1 // indirect - github.com/hashicorp/hcl/v2 v2.19.1 // indirect - github.com/lmittmann/tint v1.0.3 // indirect - github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 // indirect - github.com/spf13/pflag v1.0.5 // indirect - github.com/zclconf/go-cty v1.13.0 // indirect + github.com/fsnotify/fsnotify v1.6.0 // indirect + github.com/knadh/koanf/maps v0.1.1 // indirect + github.com/mitchellh/copystructure v1.2.0 // indirect + github.com/mitchellh/mapstructure v1.5.0 // indirect + github.com/mitchellh/reflectwalk v1.0.2 // indirect + github.com/pelletier/go-toml v1.9.5 // indirect golang.org/x/net v0.0.0-20190606173856-1492cefac77f // indirect - golang.org/x/text v0.11.0 // indirect + golang.org/x/sync v0.5.0 // indirect + golang.org/x/sys v0.6.0 // indirect ) diff --git a/go.sum b/go.sum index 924e1a8..5a71b9d 100644 --- a/go.sum +++ b/go.sum @@ -2,36 +2,56 @@ astuart.co/goq v1.0.0 h1:nnYIhu/Z/j0VaX9Dp+pmh2Uh7ldEz6XfgSg+bAY5Yrw= astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno= github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= -github.com/agext/levenshtein v1.2.1 h1:QmvMAjj2aEICytGiWzmxoE0x2KZvE0fvmqMOfy2tjT8= -github.com/agext/levenshtein v1.2.1/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558= github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= -github.com/apparentlymart/go-textseg/v13 v13.0.0 h1:Y+KvPE1NYz0xl601PVImeQfFyEy6iT90AvPUL1NNfNw= -github.com/apparentlymart/go-textseg/v13 v13.0.0/go.mod h1:ZK2fH7c4NqDTLtiYLvIkEghdlcqw7yxLeM89kiTRPUo= -github.com/apparentlymart/go-textseg/v15 v15.0.0 h1:uYvfpb3DyLSCGWnctWKGj857c6ew1u1fNQOlOtuGxQY= -github.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmmsvpAG721bKi0joRfFdHIWJ4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/google/go-cmp v0.3.1 h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/hashicorp/hcl/v2 v2.19.1 h1://i05Jqznmb2EXqa39Nsvyan2o5XyMowW5fnCKW5RPI= -github.com/hashicorp/hcl/v2 v2.19.1/go.mod h1:ThLC89FV4p9MPW804KVbe/cEXoQ8NZEh+JtMeeGErHE= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY= +github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw= +github.com/knadh/koanf/maps v0.1.1 h1:G5TjmUh2D7G2YWf5SQQqSiHRJEjaicvU0KpypqB3NIs= +github.com/knadh/koanf/maps v0.1.1/go.mod h1:npD/QZY3V6ghQDdcQzl1W4ICNVTkohC8E73eI2xW4yI= +github.com/knadh/koanf/parsers/toml v0.1.0 h1:S2hLqS4TgWZYj4/7mI5m1CQQcWurxUz6ODgOub/6LCI= +github.com/knadh/koanf/parsers/toml v0.1.0/go.mod h1:yUprhq6eo3GbyVXFFMdbfZSo928ksS+uo0FFqNMnO18= +github.com/knadh/koanf/providers/confmap v0.1.0 h1:gOkxhHkemwG4LezxxN8DMOFopOPghxRVp7JbIvdvqzU= +github.com/knadh/koanf/providers/confmap v0.1.0/go.mod h1:2uLhxQzJnyHKfxG927awZC7+fyHFdQkd697K4MdLnIU= +github.com/knadh/koanf/providers/file v0.1.0 h1:fs6U7nrV58d3CFAFh8VTde8TM262ObYf3ODrc//Lp+c= +github.com/knadh/koanf/providers/file v0.1.0/go.mod h1:rjJ/nHQl64iYCtAW2QQnF0eSmDEX/YZ/eNFj5yR6BvA= +github.com/knadh/koanf/providers/posflag v0.1.0 h1:mKJlLrKPcAP7Ootf4pBZWJ6J+4wHYujwipe7Ie3qW6U= +github.com/knadh/koanf/providers/posflag v0.1.0/go.mod h1:SYg03v/t8ISBNrMBRMlojH8OsKowbkXV7giIbBVgbz0= +github.com/knadh/koanf/v2 v2.0.1 h1:1dYGITt1I23x8cfx8ZnldtezdyaZtfAuRtIFOiRzK7g= +github.com/knadh/koanf/v2 v2.0.1/go.mod h1:ZeiIlIDXTE7w1lMT6UVcNiRAS2/rCeLn/GdLNvY1Dus= github.com/lmittmann/tint v1.0.3 h1:W5PHeA2D8bBJVvabNfQD/XW9HPLZK1XoPZH0cq8NouQ= github.com/lmittmann/tint v1.0.3/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE= -github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 h1:DpOJ2HYzCv8LZP15IdmG+YdwD2luVPHITV96TkirNBM= -github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= +github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= +github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= +github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= +github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= +github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= +github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/zclconf/go-cty v1.13.0 h1:It5dfKTTZHe9aeppbNOda3mN7Ag7sg6QkBNm6TkyFa0= -github.com/zclconf/go-cty v1.13.0/go.mod h1:YKQzy/7pZ7iq2jNFzy5go57xdxdWoLLpaEp4u238AE0= +github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190606173856-1492cefac77f h1:IWHgpgFqnL5AhBUBZSgBdjl2vkQUEzcY+JNKWfcgAU0= golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= +golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= -golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/kleingebaeck.1 b/kleingebaeck.1 index 10324f3..4baf2d4 100644 --- a/kleingebaeck.1 +++ b/kleingebaeck.1 @@ -133,7 +133,7 @@ .\" ======================================================================== .\" .IX Title "KLEINGEBAECK 1" -.TH KLEINGEBAECK 1 "2023-12-17" "1" "User Commands" +.TH KLEINGEBAECK 1 "2023-12-19" "1" "User Commands" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -142,16 +142,17 @@ kleingebaeck \- kleinanzeigen.de backup tool .SH "SYNOPSYS" .IX Header "SYNOPSYS" -.Vb 9 -\& This is kleingebaeck, the kleinanzeigen.de backup tool. +.Vb 10 \& Usage: kleingebaeck [\-dvVhmoc] [,...] \& Options: -\& \-\-user,\-u Backup ads from user with uid . -\& \-\-debug, \-d Enable debug output. -\& \-\-verbose,\-v Enable verbose output. -\& \-\-output\-dir,\-o Set output dir (default: current directory) -\& \-\-manual,\-m Show manual. -\& \-\-config,\-c Use config file (default: ~/.kleingebaeck). +\& \-\-user \-u Backup ads from user with uid . +\& \-\-debug \-d Enable debug output. +\& \-\-verbose \-v Enable verbose output. +\& \-\-outdir \-o Set output dir (default: current directory) +\& \-\-limit \-l Limit the ads to download to , default: load all. +\& \-\-config \-c Use config file (default: ~/.kleingebaeck). +\& \-\-manual \-m Show manual. +\& \-\-help \-h Show usage. .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" @@ -163,27 +164,42 @@ a textfile \fBAdlisting.txt\fR which contains the ad contents such as title, body, price etc. All images will be downloaded as well. .SH "CONFIGURATION" .IX Header "CONFIGURATION" -You can create a config file to save typing. By default -\&\f(CW\*(C`~/.kleingebaeck.hcl\*(C'\fR is being used but you can specify one with -\&\f(CW\*(C`\-c\*(C'\fR as well. +You can create a config file to save typing. By default +\&\f(CW\*(C`~/.kleingebaeck\*(C'\fR is being used but you can specify one with \f(CW\*(C`\-c\*(C'\fR as +well. We use \s-1TOML\s0 as our configuration language. See +. .PP -Format is simple: +Format is pretty simple: .PP -.Vb 4 +.Vb 10 \& user = 1010101 -\& verbose = true +\& loglevel = verbose \& outdir = "test" -\& template = "" +\& template = """ +\& Title: {{.Title}} +\& Price: {{.Price}} +\& Id: {{.Id}} +\& Category: {{.Category}} +\& Condition: {{.Condition}} +\& Created: {{.Created}} +\& +\& {{.Text}} +\& """ .Ve .PP -Be carefull if you want to change the template. The default one looks like this: +Be carefull if you want to change the template. The variable is a +multiline string surrounded by three double quotes. You can left out +certain fields and use any formatting you like. Refer to + for details how to write a +template. +.PP +If you're on windows and want to customize the output directory, put +it into single quotes to avoid the backslashes interpreted as escape +chars like this: .PP .Vb 1 -\& Title: {{.Title}}\enPrice: {{.Price}}\enId: {{.Id}}\enCategory: {{.Category}}\enCondition: {{.Condition}}\enCreated: {{.Created}}\en\en{{.Text}}\en +\& outdir = \*(AqC:\eData\eAds\*(Aq .Ve -.PP -You can left out certain fields and use any formatting you like. Refer -to for details how to write a template. .SH "SETUP" .IX Header "SETUP" To setup the tool, you need to lookup your userid on diff --git a/kleingebaeck.go b/kleingebaeck.go index de55cc9..5b4ff21 100644 --- a/kleingebaeck.go +++ b/kleingebaeck.go @@ -5,15 +5,16 @@ NAME kleingebaeck - kleinanzeigen.de backup tool SYNOPSYS - This is kleingebaeck, the kleinanzeigen.de backup tool. Usage: kleingebaeck [-dvVhmoc] [,...] Options: - --user,-u Backup ads from user with uid . - --debug, -d Enable debug output. - --verbose,-v Enable verbose output. - --output-dir,-o Set output dir (default: current directory) - --manual,-m Show manual. - --config,-c Use config file (default: ~/.kleingebaeck). + --user -u Backup ads from user with uid . + --debug -d Enable debug output. + --verbose -v Enable verbose output. + --outdir -o Set output dir (default: current directory) + --limit -l Limit the ads to download to , default: load all. + --config -c Use config file (default: ~/.kleingebaeck). + --manual -m Show manual. + --help -h Show usage. DESCRIPTION This tool can be used to backup ads on the german ad page @@ -26,24 +27,36 @@ DESCRIPTION CONFIGURATION You can create a config file to save typing. By default - "~/.kleingebaeck.hcl" is being used but you can specify one with "-c" as - well. + "~/.kleingebaeck" is being used but you can specify one with "-c" as + well. We use TOML as our configuration language. See + . - Format is simple: + Format is pretty simple: user = 1010101 - verbose = true + loglevel = verbose outdir = "test" - template = "" + template = """ + Title: {{.Title}} + Price: {{.Price}} + Id: {{.Id}} + Category: {{.Category}} + Condition: {{.Condition}} + Created: {{.Created}} - Be carefull if you want to change the template. The default one looks + {{.Text}} + """ + + Be carefull if you want to change the template. The variable is a + multiline string surrounded by three double quotes. You can left out + certain fields and use any formatting you like. Refer to + for details how to write a template. + + If you're on windows and want to customize the output directory, put it + into single quotes to avoid the backslashes interpreted as escape chars like this: - Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.Id}}\nCategory: {{.Category}}\nCondition: {{.Condition}}\nCreated: {{.Created}}\n\n{{.Text}}\n - - You can left out certain fields and use any formatting you like. Refer - to for details how to write a - template. + outdir = 'C:\Data\Ads' SETUP To setup the tool, you need to lookup your userid on kleinanzeigen.de. diff --git a/kleingebaeck.pod b/kleingebaeck.pod index 92962f8..d8aaeac 100644 --- a/kleingebaeck.pod +++ b/kleingebaeck.pod @@ -4,16 +4,18 @@ kleingebaeck - kleinanzeigen.de backup tool =head1 SYNOPSYS - This is kleingebaeck, the kleinanzeigen.de backup tool. Usage: kleingebaeck [-dvVhmoc] [,...] Options: - --user,-u Backup ads from user with uid . - --debug, -d Enable debug output. - --verbose,-v Enable verbose output. - --output-dir,-o Set output dir (default: current directory) - --manual,-m Show manual. - --config,-c Use config file (default: ~/.kleingebaeck). - + --user -u Backup ads from user with uid . + --debug -d Enable debug output. + --verbose -v Enable verbose output. + --outdir -o Set output dir (default: current directory) + --limit -l Limit the ads to download to , default: load all. + --config -c Use config file (default: ~/.kleingebaeck). + --manual -m Show manual. + --help -h Show usage. + --version -V Show program version. + =head1 DESCRIPTION This tool can be used to backup ads on the german ad page L. @@ -25,23 +27,38 @@ title, body, price etc. All images will be downloaded as well. =head1 CONFIGURATION -You can create a config file to save typing. By default -C<~/.kleingebaeck.hcl> is being used but you can specify one with -C<-c> as well. +You can create a config file to save typing. By default +C<~/.kleingebaeck> is being used but you can specify one with C<-c> as +well. We use TOML as our configuration language. See +L. -Format is simple: +Format is pretty simple: user = 1010101 - verbose = true + loglevel = verbose outdir = "test" - template = "" + template = """ + Title: {{.Title}} + Price: {{.Price}} + Id: {{.Id}} + Category: {{.Category}} + Condition: {{.Condition}} + Created: {{.Created}} -Be carefull if you want to change the template. The default one looks like this: + {{.Text}} + """ - Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.Id}}\nCategory: {{.Category}}\nCondition: {{.Condition}}\nCreated: {{.Created}}\n\n{{.Text}}\n +Be carefull if you want to change the template. The variable is a +multiline string surrounded by three double quotes. You can left out +certain fields and use any formatting you like. Refer to +L for details how to write a +template. -You can left out certain fields and use any formatting you like. Refer -to L for details how to write a template. +If you're on windows and want to customize the output directory, put +it into single quotes to avoid the backslashes interpreted as escape +chars like this: + + outdir = 'C:\Data\Ads' =head1 SETUP diff --git a/main.go b/main.go index 18c7d15..4ad9b26 100644 --- a/main.go +++ b/main.go @@ -26,22 +26,8 @@ import ( "runtime/debug" "github.com/lmittmann/tint" - flag "github.com/spf13/pflag" ) -const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool. -Usage: kleingebaeck [-dvVhmoc] [,...] -Options: ---user,-u Backup ads from user with uid . ---debug, -d Enable debug output. ---verbose,-v Enable verbose output. ---output-dir,-o Set output dir (default: current directory) ---manual,-m Show manual. ---config,-c Use config file (default: ~/.kleingebaeck). - -If one or more 's are specified, only backup those, -otherwise backup all ads of the given user.` - const LevelNotice = slog.Level(2) func main() { @@ -60,6 +46,7 @@ func Main() int { } return a }, + NoColor: IsNoTty(), } logLevel.Set(LevelNotice) @@ -67,37 +54,22 @@ func Main() int { logger := slog.New(handler) slog.SetDefault(logger) - showversion := false - showhelp := false - showmanual := false - enabledebug := false - enableverbose := false - uid := 0 - configfile := os.Getenv("HOME") + "/.kleingebaeck.hcl" - dir := "" + conf, err := InitConfig() + if err != nil { + return Die(err) + } - flag.BoolVarP(&enabledebug, "debug", "d", false, "debug mode") - flag.BoolVarP(&enableverbose, "verbose", "v", false, "be verbose") - flag.BoolVarP(&showversion, "version", "V", false, "show version") - flag.BoolVarP(&showhelp, "help", "h", false, "show usage") - flag.BoolVarP(&showmanual, "manual", "m", false, "show manual") - flag.IntVarP(&uid, "user", "u", uid, "user id") - flag.StringVarP(&dir, "output-dir", "o", dir, "where to store ads") - flag.StringVarP(&configfile, "config", "c", configfile, "config file") - - flag.Parse() - - if showversion { + if conf.Showversion { fmt.Printf("This is kleingebaeck version %s\n", VERSION) return 0 } - if showhelp { + if conf.Showhelp { fmt.Println(Usage) return 0 } - if showmanual { + if conf.Showmanual { err := man() if err != nil { return Die(err) @@ -105,21 +77,17 @@ func Main() int { return 0 } - conf, err := ParseConfigfile(configfile) - if err != nil { - return Die(err) - } - - if enableverbose || *conf.Verbose { + if conf.Verbose { logLevel.Set(slog.LevelInfo) } - if enabledebug { + if conf.Debug { // we're using a more verbose logger in debug mode buildInfo, _ := debug.ReadBuildInfo() opts := &tint.Options{ Level: logLevel, AddSource: true, + NoColor: IsNoTty(), } logLevel.Set(slog.LevelDebug) @@ -135,53 +103,39 @@ func Main() int { slog.Debug("config", "conf", conf) - if len(dir) == 0 { - if len(*conf.Outdir) > 0 { - dir = *conf.Outdir - } else { - dir = Defaultdir - } - } - // prepare output dir - err = Mkdir(dir) + err = Mkdir(conf.Outdir) if err != nil { return Die(err) } - // which template to use - template := DefaultTemplate - if runtime.GOOS == "windows" { - template = DefaultTemplateWin - } - if len(*conf.Template) > 0 { - template = *conf.Template - } - - // directly backup ad listing[s] - if len(flag.Args()) >= 1 { - for _, uri := range flag.Args() { - err := Scrape(uri, dir, template) + if len(conf.Adlinks) >= 1 { + // directly backup ad listing[s] + for _, uri := range conf.Adlinks { + err := Scrape(conf, uri) if err != nil { return Die(err) } } - - return 0 - } - - // backup all ads of the given user (via config or cmdline) - if uid == 0 && *conf.User > 0 { - uid = *conf.User - } - - if uid > 0 { - err := Start(fmt.Sprintf("%d", uid), dir, template) + } else if conf.User > 0 { + // backup all ads of the given user (via config or cmdline) + err := Start(conf) if err != nil { return Die(err) } } else { - return Die(errors.New("invalid or no user id specified")) + return Die(errors.New("invalid or no user id or no ad link specified")) + } + + if conf.StatsCountAds > 0 { + adstr := "ads" + if conf.StatsCountAds == 1 { + adstr = "ad" + } + fmt.Printf("Successfully downloaded %d %s with %d images to %s.\n", + conf.StatsCountAds, adstr, conf.StatsCountImages, conf.Outdir) + } else { + fmt.Printf("No ads found.") } return 0 diff --git a/scrape.go b/scrape.go index 5facc46..25127c2 100644 --- a/scrape.go +++ b/scrape.go @@ -25,9 +25,9 @@ import ( "net/http" "path/filepath" "strings" - "sync" "astuart.co/goq" + "golang.org/x/sync/errgroup" ) type Index struct { @@ -79,15 +79,15 @@ func Get(uri string, client *http.Client) (io.ReadCloser, error) { // extract links from all ad listing pages (that is: use pagination) // and scrape every page -func Start(uid string, dir string, template string) error { +func Start(conf *Config) error { client := &http.Client{} adlinks := []string{} - baseuri := Baseuri + Listuri + "?userId=" + uid + baseuri := fmt.Sprintf("%s%s?userId=%d", Baseuri, Listuri, conf.User) page := 1 uri := baseuri - slog.Info("fetching ad pages", "user", uid) + slog.Info("fetching ad pages", "user", conf.User) for { var index Index @@ -118,18 +118,22 @@ func Start(uid string, dir string, template string) error { uri = baseuri + "&pageNum=" + fmt.Sprintf("%d", page) } - for _, adlink := range adlinks { - err := Scrape(Baseuri+adlink, dir, template) + for i, adlink := range adlinks { + err := Scrape(conf, Baseuri+adlink) if err != nil { return err } + + if conf.Limit > 0 && i == conf.Limit-1 { + break + } } return nil } // scrape an ad. uri is the full uri of the ad, dir is the basedir -func Scrape(uri string, dir string, template string) error { +func Scrape(c *Config, uri string) error { client := &http.Client{} ad := &Ad{} @@ -161,43 +165,42 @@ func Scrape(uri string, dir string, template string) error { slog.Debug("extracted ad listing", "ad", ad) // write listing - err = WriteAd(dir, ad, template) + err = WriteAd(c.Outdir, ad, c.Template) if err != nil { return err } - return ScrapeImages(dir, ad) + c.IncrAds() + + return ScrapeImages(c, ad) } -func ScrapeImages(dir string, ad *Ad) error { +func ScrapeImages(c *Config, ad *Ad) error { // fetch images img := 1 - var wg sync.WaitGroup - wg.Add(len(ad.Images)) - failure := make(chan string) + g := new(errgroup.Group) for _, imguri := range ad.Images { - file := filepath.Join(dir, ad.Slug, fmt.Sprintf("%d.jpg", img)) - go func() { - defer wg.Done() + imguri := imguri + file := filepath.Join(c.Outdir, ad.Slug, fmt.Sprintf("%d.jpg", img)) + g.Go(func() error { err := Getimage(imguri, file) if err != nil { - failure <- err.Error() - return + return err } slog.Info("wrote ad image", "image", file) - }() + + return nil + }) img++ } - close(failure) - wg.Wait() - goterr := <-failure - - if goterr != "" { - return errors.New(goterr) + if err := g.Wait(); err != nil { + return err } + c.IncrImgs(len(ad.Images)) + return nil } diff --git a/util.go b/util.go index e8ac262..bf06ae5 100644 --- a/util.go +++ b/util.go @@ -22,6 +22,9 @@ import ( "errors" "os" "os/exec" + "runtime" + + "github.com/mattn/go-isatty" ) func Mkdir(dir string) error { @@ -53,3 +56,13 @@ func man() error { return nil } + +// returns TRUE if stdout is NOT a tty or windows +func IsNoTty() bool { + if runtime.GOOS == "windows" || !isatty.IsTerminal(os.Stdout.Fd()) { + return true + } + + // it is a tty + return false +}