From f932d7be83cc043d2d911d25852f1564e81ab0b7 Mon Sep 17 00:00:00 2001 From: Thomas von Dein Date: Sun, 17 Dec 2023 17:32:05 +0100 Subject: [PATCH] re-orgainzied code a little, using go templates instead format string --- README.md | 21 ++++++++++++++ config.go | 11 ++++++++ example.hcl | 3 +- kleingebaeck.1 | 7 +++-- kleingebaeck.go | 6 ++-- kleingebaeck.pod | 5 ++-- main.go | 8 ------ scrape.go | 29 ++----------------- store.go | 72 ++++++++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 119 insertions(+), 43 deletions(-) create mode 100644 store.go diff --git a/README.md b/README.md index c1d990e..a148ee5 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,27 @@ The `XXXXX` part is your userid. Put it into the configfile as outlined above. Also specify an output directory. Then just execute `kleingebaeck`. +Inside the output directory you'll find a new subdirectory for each +ad. Every directory contains a file `Adlisting.txt`, which will look +somewhat like this: + +```default +Title: A book I sell +Price: 99 € VB +Id: 1919191919 +Category: Sachbücher +Condition: Sehr Gut +Created: 10.12.2023 + +This is the description text. + +Pay with paypal. +``` + +You can change the formatting using the `template` config +variable. The supplied sample config contains the default template. + +All images will be stored in the same directory. ## Kleingebäck? diff --git a/config.go b/config.go index 1f0641d..7796561 100644 --- a/config.go +++ b/config.go @@ -22,6 +22,17 @@ import ( "github.com/hashicorp/hcl/v2/hclsimple" ) +const ( + VERSION string = "0.0.4" + Baseuri string = "https://www.kleinanzeigen.de" + Listuri string = "/s-bestandsliste.html" + Defaultdir string = "." + DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.Id}}\n" + + "Category: {{.Category}}\nCondition: {{.Condition}}\nCreated: {{.Created}}\n\n{{.Text}}\n" + Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" +) + type Config struct { Verbose bool `hcl:"verbose"` User int `hcl:"user"` diff --git a/example.hcl b/example.hcl index 7c65e73..d0542ed 100644 --- a/example.hcl +++ b/example.hcl @@ -15,6 +15,5 @@ verbose = true outdir = "test" # template. leave empty to use the default one, which is: -# Title: %s\nPrice: %s\nId: %s\nCategory: %s\nCondition: %s\nCreated: %s\nBody:\n\n%s\n -# take care to include exactly 7 times '%s'! +# "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.Id}}\nCategory: {{.Category}}\nCondition: {{.Condition}}\nCreated: {{.Created}}\n\n{{.Text}}\n" template = "" diff --git a/kleingebaeck.1 b/kleingebaeck.1 index 9e46bce..10324f3 100644 --- a/kleingebaeck.1 +++ b/kleingebaeck.1 @@ -133,7 +133,7 @@ .\" ======================================================================== .\" .IX Title "KLEINGEBAECK 1" -.TH KLEINGEBAECK 1 "2023-12-16" "1" "User Commands" +.TH KLEINGEBAECK 1 "2023-12-17" "1" "User Commands" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -179,10 +179,11 @@ Format is simple: Be carefull if you want to change the template. The default one looks like this: .PP .Vb 1 -\& Title: %s\enPrice: %s\enId: %s\enCategory: %s\enCondition: %s\enCreated: %s\enBody:\en\en%s\en +\& Title: {{.Title}}\enPrice: {{.Price}}\enId: {{.Id}}\enCategory: {{.Category}}\enCondition: {{.Condition}}\enCreated: {{.Created}}\en\en{{.Text}}\en .Ve .PP -If you change it, include 7 times the '%s' format tag. +You can left out certain fields and use any formatting you like. Refer +to for details how to write a template. .SH "SETUP" .IX Header "SETUP" To setup the tool, you need to lookup your userid on diff --git a/kleingebaeck.go b/kleingebaeck.go index 27691e2..de55cc9 100644 --- a/kleingebaeck.go +++ b/kleingebaeck.go @@ -39,9 +39,11 @@ CONFIGURATION Be carefull if you want to change the template. The default one looks like this: - Title: %s\nPrice: %s\nId: %s\nCategory: %s\nCondition: %s\nCreated: %s\nBody:\n\n%s\n + Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.Id}}\nCategory: {{.Category}}\nCondition: {{.Condition}}\nCreated: {{.Created}}\n\n{{.Text}}\n - If you change it, include 7 times the '%s' format tag. + You can left out certain fields and use any formatting you like. Refer + to for details how to write a + template. SETUP To setup the tool, you need to lookup your userid on kleinanzeigen.de. diff --git a/kleingebaeck.pod b/kleingebaeck.pod index b104706..92962f8 100644 --- a/kleingebaeck.pod +++ b/kleingebaeck.pod @@ -38,9 +38,10 @@ Format is simple: Be carefull if you want to change the template. The default one looks like this: - Title: %s\nPrice: %s\nId: %s\nCategory: %s\nCondition: %s\nCreated: %s\nBody:\n\n%s\n + Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.Id}}\nCategory: {{.Category}}\nCondition: {{.Condition}}\nCreated: {{.Created}}\n\n{{.Text}}\n -If you change it, include 7 times the '%s' format tag. +You can left out certain fields and use any formatting you like. Refer +to L for details how to write a template. =head1 SETUP diff --git a/main.go b/main.go index 3d98bca..e3f11ca 100644 --- a/main.go +++ b/main.go @@ -28,14 +28,6 @@ import ( flag "github.com/spf13/pflag" ) -const VERSION string = "0.0.3" -const Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + - "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" -const Baseuri string = "https://www.kleinanzeigen.de" -const Listuri string = "/s-bestandsliste.html" -const Defaultdir string = "." -const DefaultTemplate string = "Title: %s\nPrice: %s\nId: %s\nCategory: %s\nCondition: %s\nCreated: %s\nBody:\n\n%s\n" - const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool. Usage: kleingebaeck [-dvVhmoc] [,...] Options: diff --git a/scrape.go b/scrape.go index cca98c6..fb83b80 100644 --- a/scrape.go +++ b/scrape.go @@ -23,7 +23,6 @@ import ( "io" "log/slog" "net/http" - "os" "strings" "sync" @@ -160,28 +159,12 @@ func Scrape(uri string, dir string, template string) error { } slog.Debug("extracted ad listing", "ad", ad) - // prepare output dir - dir = dir + "/" + ad.Slug - err = Mkdir(dir) + // write listing + err = WriteAd(dir, ad, template) if err != nil { return err } - // write ad file - listingfile := strings.Join([]string{dir, "Adlisting.txt"}, "/") - f, err := os.Create(listingfile) - if err != nil { - return err - } - - ad.Text = strings.ReplaceAll(ad.Text, "
", "\n") - _, err = fmt.Fprintf(f, template, - ad.Title, ad.Price, ad.Id, ad.Category, ad.Condition, ad.Created, ad.Text) - if err != nil { - return err - } - slog.Info("wrote ad listing", "listingfile", listingfile) - return ScrapeImages(dir, ad) } @@ -230,13 +213,7 @@ func Getimage(uri, fileName string) error { return errors.New("received non 200 response code") } - file, err := os.Create(fileName) - if err != nil { - return err - } - defer file.Close() - - _, err = io.Copy(file, response.Body) + err = WriteImage(fileName, response.Body) if err != nil { return err } diff --git a/store.go b/store.go new file mode 100644 index 0000000..17c25c9 --- /dev/null +++ b/store.go @@ -0,0 +1,72 @@ +/* +Copyright © 2023 Thomas von Dein + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package main + +import ( + "io" + "log/slog" + "os" + "strings" + tpl "text/template" +) + +func WriteAd(dir string, ad *Ad, template string) error { + // prepare output dir + dir = dir + "/" + ad.Slug + err := Mkdir(dir) + if err != nil { + return err + } + + // write ad file + listingfile := strings.Join([]string{dir, "Adlisting.txt"}, "/") + f, err := os.Create(listingfile) + if err != nil { + return err + } + + ad.Text = strings.ReplaceAll(ad.Text, "
", "\n") + + tmpl, err := tpl.New("adlisting").Parse(template) + if err != nil { + return err + } + err = tmpl.Execute(f, ad) + if err != nil { + return err + } + + slog.Info("wrote ad listing", "listingfile", listingfile) + + return nil +} + +func WriteImage(filename string, reader io.ReadCloser) error { + file, err := os.Create(filename) + if err != nil { + return err + } + defer file.Close() + + _, err = io.Copy(file, reader) + if err != nil { + return err + } + + return nil +}