diff --git a/README.md b/README.md index 66e8953..8622db9 100644 --- a/README.md +++ b/README.md @@ -3,22 +3,87 @@ [![License](https://img.shields.io/badge/license-GPL-blue.svg)](https://github.com/tlinden/kleingebaeck/blob/master/LICENSE) [![Go Report Card](https://goreportcard.com/badge/github.com/tlinden/kleingebaeck)](https://goreportcard.com/report/github.com/tlinden/kleingebaeck) -Mit diesem kleinen aber feinen Tool kann man seine -Anzeigen bei https://kleinanzeigen.de sichern. Das -Problem ist ja bekanntlich, dass Kleinanzeigen nach einer Weile (2 -Monate?) automatisch gelöscht werden. Wenn man keine Sicherung hat, -wird es schwierig, die erneut einzustellen. Mit dem Tool braucht man -sich keine Texte zu merken. Man kann auch einfach Änderungen -(z.B. Preis runter) durchführen oder den Text anpassen und dann ein -neues Backup anfertigen. +This tool can be used to backup ads on the german ad page https://kleinanzeigen.de -Es wird pro Anzeige ein Verzeichnis erstellt. In der Datei -`Anzeige.txt` wird der Titel, die Beschreibung sowie der Preis -eingetragen. Ausserdem werden alle Bilder heruntergeladen. +It downloads all (or only the specified ones) ads of one user into a +directory, each ad into its own subdirectory. The backup will contain +a textfile B which contains the ad contents as the +title, body, price etc. All images will be downloaded as well. -## Copyright und Lizenz +The tool doesn't need authentication and doesn't have any +dependencies. Just download the binary for your platform from the +releases page and you're good to go. -Lizensiert unter der GNU GENERAL PUBLIC LICENSE version 3. +The releases also include a handy tarball which you can use to install +the tool system-wide including the manual page. Just extract it and +type: `make install`. + +## Commandline options: + +``` +Usage: kleingebaeck [-dvVhmoc] [,...] +Options: +--user,-u Backup ads from user with uid . +--debug, -d Enable debug output. +--verbose,-v Enable verbose output. +--output-dir,-o Set output dir (default: current directory) +--manual,-m Show manual. +--config,-c Use config file (default: ~/.kleingebaeck). + +If one or more 's are specified, only backup those, +otherwise backup all ads of the given user. +``` + +## Configfile + +You can create a config file to save typing. By default +C<~/.kleingebaeck.hcl> is being used but you can specify one with +C<-c> as well. + +Format is simple: + +``` +user = 1010101 +verbose = true +outdir = "test" +``` + +## Usage + +To setup the tool, you need to lookup your userid on +kleinanzeigen.de. Go to your ad overview page while NOT being logged +in: + +https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX + +The B part is your userid. + +Put it into the configfile as outlined above. Also specify an output +directory. Then just execute `kleingebaeck`. + + +## Kleingebäck? + +The name is derived from "kleinanzeigen backup": "klein" (german for +small) and "back". In german "bäck" is spelled the same as the english +"back" so "kleinbäck" was short enough, but it's not a valid german +word. "Kleingebäck" however is: it means "Cookies" in english :) + +## Getting help + +Although I'm happy to hear from kleingebaeck users in private email, +that's the best way for me to forget to do something. + +In order to report a bug, unexpected behavior, feature requests or to +submit a patch, please open an issue on github: +https://github.com/TLINDEN/kleingebaeck/issues. + +Please repeat the failing command with debugging enabled C<-d> and +include the output in the issue. + +## Copyright und License + +Licensed under the GNU GENERAL PUBLIC LICENSE version 3. ## Author diff --git a/config.go b/config.go new file mode 100644 index 0000000..140c1d7 --- /dev/null +++ b/config.go @@ -0,0 +1,51 @@ +/* +Copyright © 2023 Thomas von Dein + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ +package main + +import ( + "github.com/hashicorp/hcl/v2/hclsimple" + "os" +) + +type Config struct { + Verbose bool `hcl:"verbose"` + User int `hcl:"user"` + Outdir string `hcl:"outdir"` +} + +func ParseConfigfile(file string) (*Config, error) { + c := Config{} + if path, err := os.Stat(file); !os.IsNotExist(err) { + if !path.IsDir() { + configstring, err := os.ReadFile(file) + if err != nil { + return nil, err + } + + err = hclsimple.Decode( + path.Name(), configstring, + nil, &c, + ) + + if err != nil { + return nil, err + } + } + } + + return &c, nil +} diff --git a/example.hcl b/example.hcl new file mode 100644 index 0000000..4e94bd0 --- /dev/null +++ b/example.hcl @@ -0,0 +1,3 @@ +user = 89056200 +verbose = true +outdir = "test" diff --git a/go.mod b/go.mod index b7ee371..8e408e3 100644 --- a/go.mod +++ b/go.mod @@ -1,12 +1,23 @@ module kleingebaeck -go 1.20 +go 1.21 + +toolchain go1.21.1 require ( astuart.co/goq v1.0.0 // indirect github.com/PuerkitoBio/goquery v1.5.0 // indirect + github.com/agext/levenshtein v1.2.1 // indirect github.com/andybalholm/cascadia v1.0.0 // indirect + github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect + github.com/apparentlymart/go-textseg/v15 v15.0.0 // indirect + github.com/google/go-cmp v0.3.1 // indirect + github.com/hashicorp/hcl/v2 v2.19.1 // indirect + github.com/lmittmann/tint v1.0.3 // indirect + github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/zclconf/go-cty v1.13.0 // indirect golang.org/x/net v0.0.0-20190606173856-1492cefac77f // indirect + golang.org/x/text v0.11.0 // indirect ) diff --git a/go.sum b/go.sum index 8571d31..924e1a8 100644 --- a/go.sum +++ b/go.sum @@ -2,14 +2,30 @@ astuart.co/goq v1.0.0 h1:nnYIhu/Z/j0VaX9Dp+pmh2Uh7ldEz6XfgSg+bAY5Yrw= astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno= github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk= github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= +github.com/agext/levenshtein v1.2.1 h1:QmvMAjj2aEICytGiWzmxoE0x2KZvE0fvmqMOfy2tjT8= +github.com/agext/levenshtein v1.2.1/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558= github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o= github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= +github.com/apparentlymart/go-textseg/v13 v13.0.0 h1:Y+KvPE1NYz0xl601PVImeQfFyEy6iT90AvPUL1NNfNw= +github.com/apparentlymart/go-textseg/v13 v13.0.0/go.mod h1:ZK2fH7c4NqDTLtiYLvIkEghdlcqw7yxLeM89kiTRPUo= +github.com/apparentlymart/go-textseg/v15 v15.0.0 h1:uYvfpb3DyLSCGWnctWKGj857c6ew1u1fNQOlOtuGxQY= +github.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmmsvpAG721bKi0joRfFdHIWJ4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.3.1 h1:Xye71clBPdm5HgqGwUkwhbynsUJZhDbS20FvLhQ2izg= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/hashicorp/hcl/v2 v2.19.1 h1://i05Jqznmb2EXqa39Nsvyan2o5XyMowW5fnCKW5RPI= +github.com/hashicorp/hcl/v2 v2.19.1/go.mod h1:ThLC89FV4p9MPW804KVbe/cEXoQ8NZEh+JtMeeGErHE= +github.com/lmittmann/tint v1.0.3 h1:W5PHeA2D8bBJVvabNfQD/XW9HPLZK1XoPZH0cq8NouQ= +github.com/lmittmann/tint v1.0.3/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE= +github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 h1:DpOJ2HYzCv8LZP15IdmG+YdwD2luVPHITV96TkirNBM= +github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/zclconf/go-cty v1.13.0 h1:It5dfKTTZHe9aeppbNOda3mN7Ag7sg6QkBNm6TkyFa0= +github.com/zclconf/go-cty v1.13.0/go.mod h1:YKQzy/7pZ7iq2jNFzy5go57xdxdWoLLpaEp4u238AE0= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -17,3 +33,5 @@ golang.org/x/net v0.0.0-20190606173856-1492cefac77f h1:IWHgpgFqnL5AhBUBZSgBdjl2v golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4= +golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= diff --git a/kleingebaeck.1 b/kleingebaeck.1 index 2f5056a..8847c93 100644 --- a/kleingebaeck.1 +++ b/kleingebaeck.1 @@ -133,11 +133,83 @@ .\" ======================================================================== .\" .IX Title "KLEINGEBAECK 1" -.TH KLEINGEBAECK 1 "2023-12-14" "1" "User Commands" +.TH KLEINGEBAECK 1 "2023-12-15" "1" "User Commands" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l .nh -.SS "kleingebaeck" -.IX Subsection "kleingebaeck" -Backup of kleinanzeigen.de +.SH "NAME" +kleingebaeck \- kleinanzeigen.de backup tool +.SH "SYNOPSYS" +.IX Header "SYNOPSYS" +.Vb 9 +\& This is kleingebaeck, the kleinanzeigen.de backup tool. +\& Usage: kleingebaeck [\-dvVhmoc] [,...] +\& Options: +\& \-\-user,\-u Backup ads from user with uid . +\& \-\-debug, \-d Enable debug output. +\& \-\-verbose,\-v Enable verbose output. +\& \-\-output\-dir,\-o Set output dir (default: current directory) +\& \-\-manual,\-m Show manual. +\& \-\-config,\-c Use config file (default: ~/.kleingebaeck). +.Ve +.SH "DESCRIPTION" +.IX Header "DESCRIPTION" +This tool can be used to backup ads on the german ad page . +.PP +It downloads all (or only the specified ones) ads of one user into a +directory, each ad into its own subdirectory. The backup will contain +a textfile \fBAdlisting.txt\fR which contains the ad contents such as +title, body, price etc. All images will be downloaded as well. +.SH "CONFIGURATION" +.IX Header "CONFIGURATION" +You can create a config file to save typing. By default +\&\f(CW\*(C`~/.kleingebaeck.hcl\*(C'\fR is being used but you can specify one with +\&\f(CW\*(C`\-c\*(C'\fR as well. +.PP +Format is simple: +.PP +.Vb 3 +\& user = 1010101 +\& verbose = true +\& outdir = "test" +.Ve +.SH "SETUP" +.IX Header "SETUP" +To setup the tool, you need to lookup your userid on +kleinanzeigen.de. Go to your ad overview page while \s-1NOT\s0 being logged +in: +.PP +.Vb 1 +\& https://www.kleinanzeigen.de/s\-bestandsliste.html?userId=XXXXXX +.Ve +.PP +The \fB\s-1XXXXX\s0\fR part is your userid. +.PP +Put it into the configfile as outlined above. Also specify an output +directory. Then just execute \f(CW\*(C`kleingebaeck\*(C'\fR. +.PP +You can use the \fB\-v\fR option to get verbose output or \fB\-d\fR to enable +debugging. +.SH "BUGS" +.IX Header "BUGS" +In order to report a bug, unexpected behavior, feature requests +or to submit a patch, please open an issue on github: +. +.PP +Please repeat the failing command with debugging enabled \f(CW\*(C`\-d\*(C'\fR and +include the output in the issue. +.SH "LIMITATIONS" +.IX Header "LIMITATIONS" +The \f(CW\*(C`kleingebaeck\*(C'\fR doesn't currently check if it has downloaded a +file already, so it downloads everything again every time you execute +it. Be aware of it. This will change in the future. +.PP +Also there's currently no parallelization implemented. This will +change in the future. +.SH "LICENSE" +.IX Header "LICENSE" +Licensed under the \s-1GNU GENERAL PUBLIC LICENSE\s0 version 3. +.SH "Author" +.IX Header "Author" +T.v.Dein diff --git a/kleingebaeck.go b/kleingebaeck.go index ea296d1..a79a0c1 100644 --- a/kleingebaeck.go +++ b/kleingebaeck.go @@ -1,7 +1,74 @@ package main var manpage = ` - kleingebaeck - Backup of kleinanzeigen.de +NAME + kleingebaeck - kleinanzeigen.de backup tool + +SYNOPSYS + This is kleingebaeck, the kleinanzeigen.de backup tool. + Usage: kleingebaeck [-dvVhmoc] [,...] + Options: + --user,-u Backup ads from user with uid . + --debug, -d Enable debug output. + --verbose,-v Enable verbose output. + --output-dir,-o Set output dir (default: current directory) + --manual,-m Show manual. + --config,-c Use config file (default: ~/.kleingebaeck). + +DESCRIPTION + This tool can be used to backup ads on the german ad page + . + + It downloads all (or only the specified ones) ads of one user into a + directory, each ad into its own subdirectory. The backup will contain a + textfile Adlisting.txt which contains the ad contents such as title, + body, price etc. All images will be downloaded as well. + +CONFIGURATION + You can create a config file to save typing. By default + "~/.kleingebaeck.hcl" is being used but you can specify one with "-c" as + well. + + Format is simple: + + user = 1010101 + verbose = true + outdir = "test" + +SETUP + To setup the tool, you need to lookup your userid on kleinanzeigen.de. + Go to your ad overview page while NOT being logged in: + + https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX + + The XXXXX part is your userid. + + Put it into the configfile as outlined above. Also specify an output + directory. Then just execute "kleingebaeck". + + You can use the -v option to get verbose output or -d to enable + debugging. + +BUGS + In order to report a bug, unexpected behavior, feature requests or to + submit a patch, please open an issue on github: + . + + Please repeat the failing command with debugging enabled "-d" and + include the output in the issue. + +LIMITATIONS + The "kleingebaeck" doesn't currently check if it has downloaded a file + already, so it downloads everything again every time you execute it. Be + aware of it. This will change in the future. + + Also there's currently no parallelization implemented. This will change + in the future. + +LICENSE + Licensed under the GNU GENERAL PUBLIC LICENSE version 3. + +Author + T.v.Dein ` diff --git a/kleingebaeck.pod b/kleingebaeck.pod index 8c66148..5357682 100644 --- a/kleingebaeck.pod +++ b/kleingebaeck.pod @@ -1,5 +1,82 @@ -=head2 kleingebaeck +=head1 NAME + +kleingebaeck - kleinanzeigen.de backup tool + +=head1 SYNOPSYS + + This is kleingebaeck, the kleinanzeigen.de backup tool. + Usage: kleingebaeck [-dvVhmoc] [,...] + Options: + --user,-u Backup ads from user with uid . + --debug, -d Enable debug output. + --verbose,-v Enable verbose output. + --output-dir,-o Set output dir (default: current directory) + --manual,-m Show manual. + --config,-c Use config file (default: ~/.kleingebaeck). + +=head1 DESCRIPTION + +This tool can be used to backup ads on the german ad page L. + +It downloads all (or only the specified ones) ads of one user into a +directory, each ad into its own subdirectory. The backup will contain +a textfile B which contains the ad contents such as +title, body, price etc. All images will be downloaded as well. + +=head1 CONFIGURATION + +You can create a config file to save typing. By default +C<~/.kleingebaeck.hcl> is being used but you can specify one with +C<-c> as well. + +Format is simple: + + user = 1010101 + verbose = true + outdir = "test" + +=head1 SETUP + +To setup the tool, you need to lookup your userid on +kleinanzeigen.de. Go to your ad overview page while NOT being logged +in: + + https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX + +The B part is your userid. + +Put it into the configfile as outlined above. Also specify an output +directory. Then just execute C. + +You can use the B<-v> option to get verbose output or B<-d> to enable +debugging. + +=head1 BUGS + +In order to report a bug, unexpected behavior, feature requests +or to submit a patch, please open an issue on github: +L. + +Please repeat the failing command with debugging enabled C<-d> and +include the output in the issue. + +=head1 LIMITATIONS + +The C doesn't currently check if it has downloaded a +file already, so it downloads everything again every time you execute +it. Be aware of it. This will change in the future. + +Also there's currently no parallelization implemented. This will +change in the future. + +=head1 LICENSE + +Licensed under the GNU GENERAL PUBLIC LICENSE version 3. + +=head1 Author + +T.v.Dein + -Backup of kleinanzeigen.de =cut diff --git a/main.go b/main.go index d6dfed4..d4baedf 100644 --- a/main.go +++ b/main.go @@ -20,36 +20,75 @@ package main import ( "errors" "fmt" - "os" - + "github.com/lmittmann/tint" flag "github.com/spf13/pflag" + "log/slog" + "os" + "runtime/debug" ) -const VERSION string = "0.0.1" -const Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" +const VERSION string = "0.0.2" +const Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" const Baseuri string = "https://www.kleinanzeigen.de" const Listuri string = "/s-bestandsliste.html" const Defaultdir string = "." +const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool. +Usage: kleingebaeck [-dvVhmoc] [,...] +Options: +--user,-u Backup ads from user with uid . +--debug, -d Enable debug output. +--verbose,-v Enable verbose output. +--output-dir,-o Set output dir (default: current directory) +--manual,-m Show manual. +--config,-c Use config file (default: ~/.kleingebaeck). + +If one or more 's are specified, only backup those, +otherwise backup all ads of the given user.` + +const LevelNotice = slog.Level(2) + func main() { os.Exit(Main()) } func Main() int { + logLevel := &slog.LevelVar{} + opts := &tint.Options{ + Level: logLevel, + AddSource: false, + ReplaceAttr: func(groups []string, a slog.Attr) slog.Attr { + // Remove time from the output + if a.Key == slog.TimeKey { + return slog.Attr{} + } + return a + }, + } + + logLevel.Set(LevelNotice) + var handler slog.Handler = tint.NewHandler(os.Stdout, opts) + logger := slog.New(handler) + slog.SetDefault(logger) + showversion := false showhelp := false showmanual := false enabledebug := false - configfile := "" - dir := Defaultdir + enableverbose := false + uid := 0 + configfile := os.Getenv("HOME") + "/.kleingebaeck.hcl" + dir := "" flag.BoolVarP(&enabledebug, "debug", "d", false, "debug mode") - flag.BoolVarP(&showversion, "version", "v", false, "show version") + flag.BoolVarP(&enableverbose, "verbose", "v", false, "be verbose") + flag.BoolVarP(&showversion, "version", "V", false, "show version") flag.BoolVarP(&showhelp, "help", "h", false, "show usage") flag.BoolVarP(&showmanual, "manual", "m", false, "show manual") + flag.IntVarP(&uid, "user", "u", uid, "user id") flag.StringVarP(&dir, "output-dir", "o", dir, "where to store ads") - flag.StringVarP(&configfile, "config", "c", - os.Getenv("HOME")+"/.kleingebaeck", "config file") + flag.StringVarP(&configfile, "config", "c", configfile, "config file") flag.Parse() @@ -58,39 +97,93 @@ func Main() int { return 0 } - /* + if showhelp { + fmt.Println(Usage) + return 0 + } - if showhelp { - fmt.Println(Usage) - return 0 + conf, err := ParseConfigfile(configfile) + if err != nil { + return Die(err) + } + + if enableverbose || conf.Verbose { + logLevel.Set(slog.LevelInfo) + } + + if enabledebug { + // we're using a more verbose logger in debug mode + buildInfo, _ := debug.ReadBuildInfo() + opts := &tint.Options{ + Level: logLevel, + AddSource: true, } - if enabledebug { - calc.ToggleDebug() - } + logLevel.Set(slog.LevelDebug) + var handler slog.Handler = tint.NewHandler(os.Stdout, opts) + debuglogger := slog.New(handler).With( + slog.Group("program_info", + slog.Int("pid", os.Getpid()), + slog.String("go_version", buildInfo.GoVersion), + ), + ) + slog.SetDefault(debuglogger) + } - if showmanual { - man() - return 0 - } + slog.Debug("config", "conf", conf) - */ - - if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) { - err := os.Mkdir(dir, os.ModePerm) + if showmanual { + err := man() if err != nil { return Die(err) } + return 0 } - if len(flag.Args()) == 1 { - Start(flag.Args()[0], dir) + if len(dir) == 0 { + if len(conf.Outdir) > 0 { + dir = conf.Outdir + } else { + dir = Defaultdir + } + } + + // prepare output dir + err = Mkdir(dir) + if err != nil { + return Die(err) + } + + // directly backup ad listing[s] + if len(flag.Args()) >= 1 { + for _, uri := range flag.Args() { + err := Scrape(uri, dir) + if err != nil { + return Die(err) + } + } + + return 0 + } + + // backup all ads of the given user (via config or cmdline) + if uid == 0 && conf.User > 0 { + uid = conf.User + } + + if uid > 0 { + err := Start(fmt.Sprintf("%d", uid), dir) + if err != nil { + return Die(err) + } + } else { + return Die(errors.New("invalid or no user id specified")) } return 0 } func Die(err error) int { - fmt.Println(err) + slog.Error("Failure", "error", err.Error()) return 1 } diff --git a/mkrel.sh b/mkrel.sh index 54cdf63..d246fce 100755 --- a/mkrel.sh +++ b/mkrel.sh @@ -46,7 +46,7 @@ for D in $DIST; do GOOS=${os} GOARCH=${arch} go build -tags osusergo,netgo -ldflags "-extldflags=-static" -o ${binfile} mkdir -p ${tardir} cp ${binfile} README.md LICENSE ${tardir}/ - echo 'tool = rpn + echo 'tool = kleingebaeck PREFIX = /usr/local UID = root GID = 0 diff --git a/scrape.go b/scrape.go index aa4059b..43e9655 100644 --- a/scrape.go +++ b/scrape.go @@ -21,6 +21,7 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "strings" @@ -42,13 +43,14 @@ func Get(uri string, client *http.Client) (io.ReadCloser, error) { req.Header.Set("User-Agent", Useragent) - // fmt.Println(uri) - res, err := client.Do(req) if err != nil { return nil, err } + slog.Debug("response", "code", res.StatusCode, "status", + res.Status, "size", res.ContentLength) + return res.Body, nil } @@ -56,14 +58,17 @@ func Get(uri string, client *http.Client) (io.ReadCloser, error) { // and scrape every page func Start(uid string, dir string) error { client := &http.Client{} - ads := []string{} + adlinks := []string{} baseuri := Baseuri + Listuri + "?userId=" + uid page := 1 uri := baseuri + slog.Info("fetching ad pages", "user", uid) + for { var index Index + slog.Debug("fetching page", "uri", uri) body, err := Get(uri, client) if err != nil { return err @@ -79,17 +84,19 @@ func Start(uid string, dir string) error { break } + slog.Debug("extracted ad links", "count", len(index.Links)) + for _, href := range index.Links { - ads = append(ads, href) - fmt.Println(href) + adlinks = append(adlinks, href) + slog.Debug("ad link", "href", href) } page++ uri = baseuri + "&pageNum=" + fmt.Sprintf("%d", page) } - for _, ad := range ads { - err := Scrape(ad, dir) + for _, adlink := range adlinks { + err := Scrape(Baseuri+adlink, dir) if err != nil { return err } @@ -99,40 +106,75 @@ func Start(uid string, dir string) error { } type Ad struct { - Title string `goquery:"h1"` + Title string `goquery:"h1"` + Slug string + Id string Text string `goquery:"p#viewad-description-text,html"` Images []string `goquery:".galleryimage-element img,[src]"` Price string `goquery:"h2#viewad-price"` } -func Scrape(link string, dir string) error { - client := &http.Client{} - uri := Baseuri + link - slurp := strings.Split(uri, "/")[1] +func (ad *Ad) LogValue() slog.Value { + return slog.GroupValue( + slog.String("title", ad.Title), + slog.String("price", ad.Price), + slog.String("id", ad.Id), + slog.Int("imagecount", len(ad.Images)), + slog.Int("bodysize", len(ad.Text)), + ) +} - var ad Ad +// scrape an ad. uri is the full uri of the ad, dir is the basedir +func Scrape(uri string, dir string) error { + client := &http.Client{} + ad := &Ad{} + + // extract slug and id from uri + uriparts := strings.Split(uri, "/") + if len(uriparts) < 6 { + return errors.New("invalid uri") + } + ad.Slug = uriparts[4] + ad.Id = uriparts[5] + + // get the ad + slog.Debug("fetching ad page", "uri", uri) body, err := Get(uri, client) if err != nil { return err } defer body.Close() + // extract ad contents with goquery/goq err = goq.NewDecoder(body).Decode(&ad) if err != nil { return err } + slog.Debug("extracted ad listing", "ad", ad) - f, err := os.Create(strings.Join([]string{dir, slurp, "Anzeige.txt"}, "/")) + // prepare output dir + dir = dir + "/" + ad.Slug + err = Mkdir(dir) + if err != nil { + return err + } + + // write ad file + listingfile := strings.Join([]string{dir, "Adlisting.txt"}, "/") + f, err := os.Create(listingfile) if err != nil { return err } ad.Text = strings.ReplaceAll(ad.Text, "
", "\n") - _, err = fmt.Fprintf(f, "Title: %s\nPrice: %s\n\n%s", ad.Title, ad.Price, ad.Text) + _, err = fmt.Fprintf(f, "Title: %s\nPrice: %s\nId: %s\nBody:\n\n%s\n", + ad.Title, ad.Price, ad.Id, ad.Text) if err != nil { return err } + slog.Info("wrote ad listing", "listingfile", listingfile) + // fetch images img := 1 for _, imguri := range ad.Images { file := fmt.Sprintf("%s/%d.jpg", dir, img) @@ -140,6 +182,7 @@ func Scrape(link string, dir string) error { if err != nil { return err } + slog.Info("wrote ad image", "image", file) img++ } @@ -149,6 +192,7 @@ func Scrape(link string, dir string) error { // fetch an image func Getimage(uri, fileName string) error { + slog.Debug("fetching ad image", "uri", uri) response, err := http.Get(uri) if err != nil { return err diff --git a/util.go b/util.go new file mode 100644 index 0000000..e8ac262 --- /dev/null +++ b/util.go @@ -0,0 +1,55 @@ +/* +Copyright © 2023 Thomas von Dein + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package main + +import ( + "bytes" + "errors" + "os" + "os/exec" +) + +func Mkdir(dir string) error { + if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) { + err := os.Mkdir(dir, os.ModePerm) + if err != nil { + return err + } + } + + return nil +} + +func man() error { + man := exec.Command("less", "-") + + var b bytes.Buffer + b.Write([]byte(manpage)) + + man.Stdout = os.Stdout + man.Stdin = &b + man.Stderr = os.Stderr + + err := man.Run() + + if err != nil { + return err + } + + return nil +}