diff --git a/ad.go b/ad.go index a9de619..c0889ae 100644 --- a/ad.go +++ b/ad.go @@ -19,6 +19,7 @@ package main import ( "log/slog" + "strings" ) type Index struct { @@ -26,16 +27,16 @@ type Index struct { } type Ad struct { - Title string `goquery:"h1"` - Slug string - Id string - Condition string - Category string - Price string `goquery:"h2#viewad-price"` - Created string `goquery:"#viewad-extra-info,text"` - Text string `goquery:"p#viewad-description-text,html"` - Images []string `goquery:".galleryimage-element img,[src]"` - Meta []string `goquery:".addetailslist--detail--value,text"` + Title string `goquery:"h1"` + Slug string + Id string + Condition string `goquery:".addetailslist--detail--value,text"` + Category string + CategoryTree []string `goquery:".breadcrump-link,text"` + Price string `goquery:"h2#viewad-price"` + Created string `goquery:"#viewad-extra-info,text"` + Text string `goquery:"p#viewad-description-text,html"` + Images []string `goquery:".galleryimage-element img,[src]"` } // Used by slog to pretty print an ad @@ -46,6 +47,8 @@ func (ad *Ad) LogValue() slog.Value { slog.String("id", ad.Id), slog.Int("imagecount", len(ad.Images)), slog.Int("bodysize", len(ad.Text)), + slog.String("categorytree", strings.Join(ad.CategoryTree, "+")), + slog.String("condition", ad.Condition), ) } @@ -58,7 +61,7 @@ func (ad *Ad) LogValue() slog.Value { // // Note: we return true for "ad is incomplete" and false for "ad is complete"! func (ad *Ad) Incomplete() bool { - if ad.Category == "" || ad.Condition == "" || ad.Created == "" || ad.Text == "" { + if ad.Category == "" || ad.Created == "" || ad.Text == "" { return true } diff --git a/config.go b/config.go index be53e30..c929576 100644 --- a/config.go +++ b/config.go @@ -1,5 +1,5 @@ /* -Copyright © 2023 Thomas von Dein +Copyright © 2023-2024 Thomas von Dein This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,7 +33,7 @@ import ( ) const ( - VERSION string = "0.1.0" + VERSION string = "0.1.1" Baseuri string = "https://www.kleinanzeigen.de" Listuri string = "/s-bestandsliste.html" Defaultdir string = "." @@ -43,6 +43,7 @@ const ( "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nCreated: {{.Created}}\r\n\r\n{{.Text}}\r\n" Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + DefaultAdNameTemplate string = "{{.Slug}}-{{.Id}}" ) const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool. @@ -72,6 +73,7 @@ type Config struct { User int `koanf:"user"` Outdir string `koanf:"outdir"` Template string `koanf:"template"` + Adnametemplate string `koanf:"adnametemplate"` Loglevel string `koanf:"loglevel"` Limit int `koanf:"limit"` Adlinks []string @@ -99,10 +101,11 @@ func InitConfig(w io.Writer) (*Config, error) { // Load default values using the confmap provider. if err := k.Load(confmap.Provider(map[string]interface{}{ - "template": template, - "outdir": ".", - "loglevel": "notice", - "userid": 0, + "template": template, + "outdir": ".", + "loglevel": "notice", + "userid": 0, + "adnametemplate": DefaultAdNameTemplate, }, "."), nil); err != nil { return nil, err } diff --git a/kleingebaeck.1 b/kleingebaeck.1 index 4baf2d4..bdc3961 100644 --- a/kleingebaeck.1 +++ b/kleingebaeck.1 @@ -133,7 +133,7 @@ .\" ======================================================================== .\" .IX Title "KLEINGEBAECK 1" -.TH KLEINGEBAECK 1 "2023-12-19" "1" "User Commands" +.TH KLEINGEBAECK 1 "2024-01-12" "1" "User Commands" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -142,7 +142,7 @@ kleingebaeck \- kleinanzeigen.de backup tool .SH "SYNOPSYS" .IX Header "SYNOPSYS" -.Vb 10 +.Vb 11 \& Usage: kleingebaeck [\-dvVhmoc] [,...] \& Options: \& \-\-user \-u Backup ads from user with uid . @@ -153,6 +153,7 @@ kleingebaeck \- kleinanzeigen.de backup tool \& \-\-config \-c Use config file (default: ~/.kleingebaeck). \& \-\-manual \-m Show manual. \& \-\-help \-h Show usage. +\& \-\-version \-V Show program version. .Ve .SH "DESCRIPTION" .IX Header "DESCRIPTION" @@ -235,7 +236,20 @@ Also there's currently no parallelization implemented. This will change in the future. .SH "LICENSE" .IX Header "LICENSE" -Licensed under the \s-1GNU GENERAL PUBLIC LICENSE\s0 version 3. +Copyright 2023\-2024 Thomas von Dein +.PP +This program is free software: you can redistribute it and/or modify +it under the terms of the \s-1GNU\s0 General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. +.PP +This program is distributed in the hope that it will be useful, +but \s-1WITHOUT ANY WARRANTY\s0; without even the implied warranty of +\&\s-1MERCHANTABILITY\s0 or \s-1FITNESS FOR A PARTICULAR PURPOSE.\s0 See the +\&\s-1GNU\s0 General Public License for more details. +.PP +You should have received a copy of the \s-1GNU\s0 General Public License +along with this program. If not, see . .SH "Author" .IX Header "Author" T.v.Dein diff --git a/kleingebaeck.go b/kleingebaeck.go index 5b4ff21..af850dd 100644 --- a/kleingebaeck.go +++ b/kleingebaeck.go @@ -15,6 +15,7 @@ SYNOPSYS --config -c Use config file (default: ~/.kleingebaeck). --manual -m Show manual. --help -h Show usage. + --version -V Show program version. DESCRIPTION This tool can be used to backup ads on the german ad page @@ -89,7 +90,20 @@ LIMITATIONS in the future. LICENSE - Licensed under the GNU GENERAL PUBLIC LICENSE version 3. + Copyright 2023-2024 Thomas von Dein + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation, either version 3 of the License, or (at your + option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General + Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program. If not, see . Author T.v.Dein diff --git a/kleingebaeck.pod b/kleingebaeck.pod index d8aaeac..b07e2fe 100644 --- a/kleingebaeck.pod +++ b/kleingebaeck.pod @@ -96,7 +96,20 @@ change in the future. =head1 LICENSE -Licensed under the GNU GENERAL PUBLIC LICENSE version 3. +Copyright 2023-2024 Thomas von Dein + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see L. =head1 Author diff --git a/main_test.go b/main_test.go index bceb8af..4244175 100644 --- a/main_test.go +++ b/main_test.go @@ -1,5 +1,5 @@ /* -Copyright © 2023 Thomas von Dein +Copyright © 2023-2024 Thomas von Dein This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -60,6 +60,16 @@ const ADTPL string = `DOCTYPE html> + + {{ range $image := .Images }}
@@ -79,10 +89,6 @@ const ADTPL string = `DOCTYPE html>
    -
  • - Art - {{ .Category }} -
  • Zustand {{ .Condition }} @@ -438,9 +444,17 @@ func SetIntercept(ads []Adsource) { } func VerifyAd(ad AdConfig) error { - body := ad.Title + ad.Price + ad.Id + ad.Category + ad.Condition + ad.Created + body := ad.Title + ad.Price + ad.Id + "Kleinanzeigen => " + ad.Category + ad.Condition + ad.Created - file := fmt.Sprintf("t/out/%s/Adlisting.txt", ad.Slug) + // prepare ad dir name using DefaultAdNameTemplate + c := Config{Adnametemplate: DefaultAdNameTemplate} + adstruct := Ad{Slug: ad.Slug, Id: ad.Id} + addir, err := AdDirName(&c, &adstruct) + if err != nil { + return err + } + + file := fmt.Sprintf("t/out/%s/Adlisting.txt", addir) content, err := os.ReadFile(file) if err != nil { return err diff --git a/scrape.go b/scrape.go index 25e7d73..6545b70 100644 --- a/scrape.go +++ b/scrape.go @@ -1,5 +1,5 @@ /* -Copyright © 2023 Thomas von Dein +Copyright © 2023-2024 Thomas von Dein This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -135,36 +135,37 @@ func Scrape(c *Config, uri string) error { if err != nil { return err } - if len(ad.Meta) == 2 { - ad.Category = ad.Meta[0] - ad.Condition = ad.Meta[1] + + if len(ad.CategoryTree) > 0 { + ad.Category = strings.Join(ad.CategoryTree, " => ") } if ad.Incomplete() { + slog.Debug("got ad", "ad", ad) return errors.New("could not extract ad data from page, got empty struct") } slog.Debug("extracted ad listing", "ad", ad) // write listing - err = WriteAd(c.Outdir, ad, c.Template) + addir, err := WriteAd(c, ad) if err != nil { return err } c.IncrAds() - return ScrapeImages(c, ad) + return ScrapeImages(c, ad, addir) } -func ScrapeImages(c *Config, ad *Ad) error { +func ScrapeImages(c *Config, ad *Ad, addir string) error { // fetch images img := 1 g := new(errgroup.Group) for _, imguri := range ad.Images { imguri := imguri - file := filepath.Join(c.Outdir, ad.Slug, fmt.Sprintf("%d.jpg", img)) + file := filepath.Join(c.Outdir, addir, fmt.Sprintf("%d.jpg", img)) g.Go(func() error { err := Getimage(imguri, file) if err != nil { diff --git a/store.go b/store.go index e159f95..ad71c3e 100644 --- a/store.go +++ b/store.go @@ -1,5 +1,5 @@ /* -Copyright © 2023 Thomas von Dein +Copyright © 2023-2024 Thomas von Dein This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,6 +18,7 @@ along with this program. If not, see . package main import ( + "bytes" "io" "log/slog" "os" @@ -27,19 +28,40 @@ import ( tpl "text/template" ) -func WriteAd(dir string, ad *Ad, template string) error { - // prepare output dir - dir = filepath.Join(dir, ad.Slug) - err := Mkdir(dir) +func AdDirName(c *Config, ad *Ad) (string, error) { + tmpl, err := tpl.New("adname").Parse(c.Adnametemplate) if err != nil { - return err + return "", err + } + + buf := bytes.Buffer{} + err = tmpl.Execute(&buf, ad) + if err != nil { + return "", err + } + + return buf.String(), nil +} + +func WriteAd(c *Config, ad *Ad) (string, error) { + // prepare ad dir name + addir, err := AdDirName(c, ad) + if err != nil { + return "", err + } + + // prepare output dir + dir := filepath.Join(c.Outdir, addir) + err = Mkdir(dir) + if err != nil { + return "", err } // write ad file listingfile := filepath.Join(dir, "Adlisting.txt") f, err := os.Create(listingfile) if err != nil { - return err + return "", err } defer f.Close() @@ -49,19 +71,19 @@ func WriteAd(dir string, ad *Ad, template string) error { ad.Text = strings.ReplaceAll(ad.Text, "
    ", "\n") } - tmpl, err := tpl.New("adlisting").Parse(template) + tmpl, err := tpl.New("adlisting").Parse(c.Template) if err != nil { - return err + return "", err } err = tmpl.Execute(f, ad) if err != nil { - return err + return "", err } slog.Info("wrote ad listing", "listingfile", listingfile) - return nil + return addir, nil } func WriteImage(filename string, reader io.ReadCloser) error {