mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-17 20:41:01 +01:00
Compare commits
43 Commits
intern/moc
...
v0.2.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
634d9a4140 | ||
|
|
96fb142423 | ||
|
|
39b064cc20 | ||
|
|
3fd75fa53d | ||
|
|
78e5de61d2 | ||
|
|
f4a9a9895c | ||
|
|
ac5b0608d8 | ||
|
|
239e253057 | ||
| cdf58efd45 | |||
| 110ee17091 | |||
| 8321d3c343 | |||
|
|
56f53bb777 | ||
|
|
9e7f9a2821 | ||
| 577f9d983e | |||
| 114f6b16d9 | |||
| a06c730fe4 | |||
| d8e968ed6d | |||
| 5f450e54ea | |||
| 3e6349cf36 | |||
|
|
bf8e074034 | ||
| 6dea8d78ed | |||
| ea76b98445 | |||
| 9f688b7692 | |||
|
|
d8baa34c54 | ||
|
|
c1cbce32e1 | ||
|
|
dc4d3d7f9c | ||
|
|
b28f544416 | ||
|
|
8cdefe457b | ||
|
|
1e4b406aa4 | ||
|
|
eaf4db6cef | ||
|
|
825649bb3b | ||
|
|
6aa9c658b6 | ||
|
|
2c62f9eb17 | ||
|
|
bff0ae553e | ||
|
|
450d44d129 | ||
|
|
18f7e0fe49 | ||
|
|
def063afe9 | ||
| f1908f02cb | |||
| 4a528ad9d1 | |||
| 5c1161f227 | |||
| bd9d8fdb2c | |||
|
|
1ee886c504 | ||
|
|
d7b13e8a9a |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -3,6 +3,3 @@ kleingebaeck
|
|||||||
releases
|
releases
|
||||||
t/out
|
t/out
|
||||||
.bak
|
.bak
|
||||||
t/httproot/out
|
|
||||||
t/httproot/kleinanzeigen
|
|
||||||
t/httproot/favicon.ico
|
|
||||||
|
|||||||
@@ -62,7 +62,6 @@ Options:
|
|||||||
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
||||||
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||||
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
||||||
-f --force Download images even if they already exist.
|
|
||||||
-m --manual Show manual.
|
-m --manual Show manual.
|
||||||
-h --help Show usage.
|
-h --help Show usage.
|
||||||
-V --version Show program version.
|
-V --version Show program version.
|
||||||
@@ -83,7 +82,6 @@ type Config struct {
|
|||||||
Loglevel string `koanf:"loglevel"`
|
Loglevel string `koanf:"loglevel"`
|
||||||
Limit int `koanf:"limit"`
|
Limit int `koanf:"limit"`
|
||||||
IgnoreErrors bool `koanf:"ignoreerrors"`
|
IgnoreErrors bool `koanf:"ignoreerrors"`
|
||||||
ForceDownload bool `koanf:"force"`
|
|
||||||
Adlinks []string
|
Adlinks []string
|
||||||
StatsCountAds int
|
StatsCountAds int
|
||||||
StatsCountImages int
|
StatsCountImages int
|
||||||
@@ -135,7 +133,6 @@ func InitConfig(w io.Writer) (*Config, error) {
|
|||||||
f.BoolP("version", "V", false, "show program version")
|
f.BoolP("version", "V", false, "show program version")
|
||||||
f.BoolP("help", "h", false, "show usage")
|
f.BoolP("help", "h", false, "show usage")
|
||||||
f.BoolP("manual", "m", false, "show manual")
|
f.BoolP("manual", "m", false, "show manual")
|
||||||
f.BoolP("force", "f", false, "force")
|
|
||||||
|
|
||||||
if err := f.Parse(os.Args[1:]); err != nil {
|
if err := f.Parse(os.Args[1:]); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
|||||||
2
go.mod
2
go.mod
@@ -21,7 +21,6 @@ require (
|
|||||||
require (
|
require (
|
||||||
github.com/PuerkitoBio/goquery v1.5.1 // indirect
|
github.com/PuerkitoBio/goquery v1.5.1 // indirect
|
||||||
github.com/andybalholm/cascadia v1.1.0 // indirect
|
github.com/andybalholm/cascadia v1.1.0 // indirect
|
||||||
github.com/corona10/goimagehash v1.1.0 // indirect
|
|
||||||
github.com/fatih/color v1.16.0 // indirect
|
github.com/fatih/color v1.16.0 // indirect
|
||||||
github.com/fsnotify/fsnotify v1.6.0 // indirect
|
github.com/fsnotify/fsnotify v1.6.0 // indirect
|
||||||
github.com/knadh/koanf/maps v0.1.1 // indirect
|
github.com/knadh/koanf/maps v0.1.1 // indirect
|
||||||
@@ -29,7 +28,6 @@ require (
|
|||||||
github.com/mitchellh/copystructure v1.2.0 // indirect
|
github.com/mitchellh/copystructure v1.2.0 // indirect
|
||||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||||
github.com/mitchellh/reflectwalk v1.0.2 // indirect
|
github.com/mitchellh/reflectwalk v1.0.2 // indirect
|
||||||
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect
|
|
||||||
github.com/pelletier/go-toml v1.9.5 // indirect
|
github.com/pelletier/go-toml v1.9.5 // indirect
|
||||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
|
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
|
||||||
golang.org/x/sys v0.14.0 // indirect
|
golang.org/x/sys v0.14.0 // indirect
|
||||||
|
|||||||
4
go.sum
4
go.sum
@@ -6,8 +6,6 @@ github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBK
|
|||||||
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||||
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
||||||
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||||
github.com/corona10/goimagehash v1.1.0 h1:teNMX/1e+Wn/AYSbLHX8mj+mF9r60R1kBeqE9MkoYwI=
|
|
||||||
github.com/corona10/goimagehash v1.1.0/go.mod h1:VkvE0mLn84L4aF8vCb6mafVajEb6QYMHl2ZJLn0mOGI=
|
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
@@ -46,8 +44,6 @@ github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyua
|
|||||||
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
|
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
|
||||||
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
|
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
|
||||||
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
|
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
|
||||||
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ=
|
|
||||||
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8=
|
|
||||||
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
|
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
|
||||||
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
|
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
|
|||||||
142
image.go
142
image.go
@@ -1,142 +0,0 @@
|
|||||||
/*
|
|
||||||
Copyright © 2023-2024 Thomas von Dein
|
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
|
||||||
it under the terms of the GNU General Public License as published by
|
|
||||||
the Free Software Foundation, either version 3 of the License, or
|
|
||||||
(at your option) any later version.
|
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
GNU General Public License for more details.
|
|
||||||
|
|
||||||
You should have received a copy of the GNU General Public License
|
|
||||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"image/jpeg"
|
|
||||||
"log/slog"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
|
|
||||||
"github.com/corona10/goimagehash"
|
|
||||||
)
|
|
||||||
|
|
||||||
const MaxDistance = 3
|
|
||||||
|
|
||||||
type Image struct {
|
|
||||||
Filename string
|
|
||||||
Hash *goimagehash.ImageHash
|
|
||||||
Data *bytes.Buffer
|
|
||||||
Uri string
|
|
||||||
}
|
|
||||||
|
|
||||||
// used for logging to avoid printing Data
|
|
||||||
func (img *Image) LogValue() slog.Value {
|
|
||||||
return slog.GroupValue(
|
|
||||||
slog.String("filename", img.Filename),
|
|
||||||
slog.String("uri", img.Uri),
|
|
||||||
slog.String("hash", img.Hash.ToString()),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// holds all images of an ad
|
|
||||||
type Cache []*goimagehash.ImageHash
|
|
||||||
|
|
||||||
func NewImage(buf *bytes.Buffer, filename string, uri string) *Image {
|
|
||||||
img := &Image{
|
|
||||||
Filename: filename,
|
|
||||||
Uri: uri,
|
|
||||||
Data: buf,
|
|
||||||
}
|
|
||||||
|
|
||||||
return img
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate diff hash of the image
|
|
||||||
func (img *Image) CalcHash() error {
|
|
||||||
jpgdata, err := jpeg.Decode(img.Data)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
hash1, err := goimagehash.DifferenceHash(jpgdata)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
img.Hash = hash1
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// checks if 2 images are similar enough to be considered the same
|
|
||||||
func (img *Image) Similar(hash *goimagehash.ImageHash) bool {
|
|
||||||
distance, err := img.Hash.Distance(hash)
|
|
||||||
if err != nil {
|
|
||||||
slog.Debug("failed to compute diff hash distance", "error", err)
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if distance < MaxDistance {
|
|
||||||
slog.Debug("distance computation", "image-A", img.Hash.ToString(),
|
|
||||||
"image-B", hash.ToString(), "distance", distance)
|
|
||||||
return true
|
|
||||||
} else {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// check current image against all known hashes.
|
|
||||||
func (img *Image) SimilarExists(cache Cache) bool {
|
|
||||||
for _, otherimg := range cache {
|
|
||||||
if img.Similar(otherimg) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// read all JPG images in a ad directory, compute diff hashes and
|
|
||||||
// store the results in the slice Images
|
|
||||||
func ReadImages(addir string, dont bool) (Cache, error) {
|
|
||||||
files, err := os.ReadDir(addir)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
cache := Cache{}
|
|
||||||
|
|
||||||
if dont {
|
|
||||||
// forced download, -f given
|
|
||||||
return cache, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, file := range files {
|
|
||||||
ext := filepath.Ext(file.Name())
|
|
||||||
if !file.IsDir() && (ext == ".jpg" || ext == ".jpeg" || ext == ".JPG" || ext == ".JPEG") {
|
|
||||||
filename := filepath.Join(addir, file.Name())
|
|
||||||
data, err := ReadImage(filename)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
img := NewImage(data, filename, "")
|
|
||||||
if err = img.CalcHash(); err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Debug("Caching image from file system", "image", img, "hash", img.Hash.ToString())
|
|
||||||
cache = append(cache, img.Hash)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//return nil, errors.New("ende")
|
|
||||||
return cache, nil
|
|
||||||
}
|
|
||||||
@@ -133,7 +133,7 @@
|
|||||||
.\" ========================================================================
|
.\" ========================================================================
|
||||||
.\"
|
.\"
|
||||||
.IX Title "KLEINGEBAECK 1"
|
.IX Title "KLEINGEBAECK 1"
|
||||||
.TH KLEINGEBAECK 1 "2024-01-22" "1" "User Commands"
|
.TH KLEINGEBAECK 1 "2024-01-17" "1" "User Commands"
|
||||||
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
||||||
.\" way too many mistakes in technical documents.
|
.\" way too many mistakes in technical documents.
|
||||||
.if n .ad l
|
.if n .ad l
|
||||||
@@ -142,7 +142,7 @@
|
|||||||
kleingebaeck \- kleinanzeigen.de backup tool
|
kleingebaeck \- kleinanzeigen.de backup tool
|
||||||
.SH "SYNOPSYS"
|
.SH "SYNOPSYS"
|
||||||
.IX Header "SYNOPSYS"
|
.IX Header "SYNOPSYS"
|
||||||
.Vb 10
|
.Vb 12
|
||||||
\& Usage: kleingebaeck [\-dvVhmoc] [<ad\-listing\-url>,...]
|
\& Usage: kleingebaeck [\-dvVhmoc] [<ad\-listing\-url>,...]
|
||||||
\& Options:
|
\& Options:
|
||||||
\& \-u \-\-user <uid> Backup ads from user with uid <uid>.
|
\& \-u \-\-user <uid> Backup ads from user with uid <uid>.
|
||||||
@@ -152,7 +152,6 @@ kleingebaeck \- kleinanzeigen.de backup tool
|
|||||||
\& \-l \-\-limit <num> Limit the ads to download to <num>, default: load all.
|
\& \-l \-\-limit <num> Limit the ads to download to <num>, default: load all.
|
||||||
\& \-c \-\-config <file> Use config file <file> (default: ~/.kleingebaeck).
|
\& \-c \-\-config <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||||
\& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
\& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
||||||
\& \-f \-\-force Download images even if they already exist.
|
|
||||||
\& \-m \-\-manual Show manual.
|
\& \-m \-\-manual Show manual.
|
||||||
\& \-h \-\-help Show usage.
|
\& \-h \-\-help Show usage.
|
||||||
\& \-V \-\-version Show program version.
|
\& \-V \-\-version Show program version.
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ SYNOPSYS
|
|||||||
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
||||||
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||||
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
||||||
-f --force Download images even if they already exist.
|
|
||||||
-m --manual Show manual.
|
-m --manual Show manual.
|
||||||
-h --help Show usage.
|
-h --help Show usage.
|
||||||
-V --version Show program version.
|
-V --version Show program version.
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ kleingebaeck - kleinanzeigen.de backup tool
|
|||||||
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
||||||
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||||
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
||||||
-f --force Download images even if they already exist.
|
|
||||||
-m --manual Show manual.
|
-m --manual Show manual.
|
||||||
-h --help Show usage.
|
-h --help Show usage.
|
||||||
-V --version Show program version.
|
-V --version Show program version.
|
||||||
|
|||||||
39
scrape.go
39
scrape.go
@@ -18,7 +18,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
@@ -120,14 +119,14 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
|
|||||||
|
|
||||||
ad.CalculateExpire()
|
ad.CalculateExpire()
|
||||||
|
|
||||||
|
slog.Debug("extracted ad listing", "ad", ad)
|
||||||
|
|
||||||
// write listing
|
// write listing
|
||||||
addir, err := WriteAd(fetch.Config, ad)
|
addir, err := WriteAd(fetch.Config, ad)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Debug("extracted ad listing", "ad", ad)
|
|
||||||
|
|
||||||
fetch.Config.IncrAds()
|
fetch.Config.IncrAds()
|
||||||
|
|
||||||
return ScrapeImages(fetch, ad, addir)
|
return ScrapeImages(fetch, ad, addir)
|
||||||
@@ -136,52 +135,22 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
|
|||||||
func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
|
func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
|
||||||
// fetch images
|
// fetch images
|
||||||
img := 1
|
img := 1
|
||||||
adpath := filepath.Join(fetch.Config.Outdir, addir)
|
|
||||||
|
|
||||||
// scan existing images, if any
|
|
||||||
cache, err := ReadImages(adpath, fetch.Config.ForceDownload)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
g := new(errgroup.Group)
|
g := new(errgroup.Group)
|
||||||
|
|
||||||
for _, imguri := range ad.Images {
|
for _, imguri := range ad.Images {
|
||||||
imguri := imguri
|
imguri := imguri
|
||||||
file := filepath.Join(adpath, fmt.Sprintf("%d.jpg", img))
|
file := filepath.Join(fetch.Config.Outdir, addir, fmt.Sprintf("%d.jpg", img))
|
||||||
g.Go(func() error {
|
g.Go(func() error {
|
||||||
body, err := fetch.Getimage(imguri)
|
body, err := fetch.Getimage(imguri)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
buf := new(bytes.Buffer)
|
err = WriteImage(file, body)
|
||||||
_, err = buf.ReadFrom(body)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
buf2 := buf.Bytes() // needed for image writing
|
|
||||||
|
|
||||||
image := NewImage(buf, "", imguri)
|
|
||||||
err = image.CalcHash()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if !fetch.Config.ForceDownload {
|
|
||||||
if image.SimilarExists(cache) {
|
|
||||||
slog.Debug("similar image exists, not written", "uri", image.Uri)
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
err = WriteImage(file, buf2)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Debug("wrote image", "image", image, "size", len(buf2))
|
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
img++
|
img++
|
||||||
|
|||||||
35
store.go
35
store.go
@@ -19,7 +19,7 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -86,46 +86,17 @@ func WriteAd(c *Config, ad *Ad) (string, error) {
|
|||||||
return addir, nil
|
return addir, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteImage(filename string, buf []byte) error {
|
func WriteImage(filename string, reader io.ReadCloser) error {
|
||||||
file, err := os.Create(filename)
|
file, err := os.Create(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
_, err = file.Write(buf)
|
_, err = io.Copy(file, reader)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ReadImage(filename string) (*bytes.Buffer, error) {
|
|
||||||
var buf bytes.Buffer
|
|
||||||
|
|
||||||
if !fileExists(filename) {
|
|
||||||
return nil, fmt.Errorf("image %s does not exist", filename)
|
|
||||||
}
|
|
||||||
|
|
||||||
data, err := os.ReadFile(filename)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
_, err = buf.Write(data)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &buf, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func fileExists(filename string) bool {
|
|
||||||
info, err := os.Stat(filename)
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return !info.IsDir()
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,13 +0,0 @@
|
|||||||
# Mock http server
|
|
||||||
|
|
||||||
Install ehfs from https://github.com/mjpclab/extra-http-file-server/.
|
|
||||||
|
|
||||||
Install p2cli from https://github.com/wrouesnel/p2cli.
|
|
||||||
|
|
||||||
Run `templates/render.sh` to build the file structure.
|
|
||||||
|
|
||||||
Run `server.sh` to start the http server.
|
|
||||||
|
|
||||||
To scrape an ad from it, use such a URL:
|
|
||||||
|
|
||||||
http://localhost:8080/s-anzeige/first-ad/111-11-111
|
|
||||||
Binary file not shown.
|
Before Width: | Height: | Size: 37 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 28 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 25 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 28 KiB |
@@ -1,4 +0,0 @@
|
|||||||
#!/bin/sh
|
|
||||||
ehfs -a :/s-anzeige:./kleinanzeigen \
|
|
||||||
-a :/api/v1/prod-ads/images/fc:./img \
|
|
||||||
-l localhost:8080 -I index.html
|
|
||||||
@@ -1,50 +0,0 @@
|
|||||||
<!DOCTYPE html>
|
|
||||||
<html lang="de">
|
|
||||||
<head>
|
|
||||||
<title>Ad Listing</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
|
|
||||||
<div class="l-container-row">
|
|
||||||
<div id="vap-brdcrmb" class="breadcrump">
|
|
||||||
<a class="breadcrump-link" itemprop="url" href="/" title="Kleinanzeigen ">
|
|
||||||
<span itemprop="title">Kleinanzeigen </span>
|
|
||||||
</a>
|
|
||||||
<a class="breadcrump-link" itemprop="url" href="/egal">
|
|
||||||
<span itemprop="title">{{ category }}</span></a>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
{% for image in images %}
|
|
||||||
<div class="galleryimage-element" data-ix="3">
|
|
||||||
<img src="http://localhost:8080/api/v1/prod-ads/images/fc/{{ image.id }}?rule=$_59.JPG"/>
|
|
||||||
</div>
|
|
||||||
{% endfor %}
|
|
||||||
|
|
||||||
<h1 id="viewad-title" class="boxedarticle--title" itemprop="name" data-soldlabel="Verkauft">
|
|
||||||
{{ title }}</h1>
|
|
||||||
<div class="boxedarticle--flex--container">
|
|
||||||
<h2 class="boxedarticle--price" id="viewad-price">
|
|
||||||
{{ price }}</h2>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div id="viewad-extra-info" class="boxedarticle--details--full">
|
|
||||||
<div><i class="icon icon-small icon-calendar-gray-simple"></i><span>{{ created }}</span></div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="splitlinebox l-container-row" id="viewad-details">
|
|
||||||
<ul class="addetailslist">
|
|
||||||
<li class="addetailslist--detail">
|
|
||||||
Zustand<span class="addetailslist--detail--value" >
|
|
||||||
{{ condition }}</span>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="l-container last-paragraph-no-margin-bottom">
|
|
||||||
<p id="viewad-description-text" class="text-force-linebreak " itemprop="description">
|
|
||||||
{{ text }}
|
|
||||||
</p>
|
|
||||||
</div>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
@@ -1,15 +0,0 @@
|
|||||||
<!DOCTYPE html>
|
|
||||||
<html lang="de" >
|
|
||||||
<head>
|
|
||||||
<title>Ads</title>
|
|
||||||
</head>
|
|
||||||
<body>
|
|
||||||
{% for ad in ads %}
|
|
||||||
<h2 class="text-module-begin">
|
|
||||||
<a class="ellipsis"
|
|
||||||
href="/s-anzeige/{{ ad.slug }}/{{ ad.id }}">{{ ad.title }}</a>
|
|
||||||
</h2>
|
|
||||||
{% endfor %}
|
|
||||||
</body>
|
|
||||||
</html>
|
|
||||||
|
|
||||||
@@ -1,13 +0,0 @@
|
|||||||
#!/bin/sh -x
|
|
||||||
base="../kleinanzeigen"
|
|
||||||
mkdir -p $base
|
|
||||||
|
|
||||||
echo "Generating /s-bestandsliste.html"
|
|
||||||
p2cli -t index.tpl -i vars.yaml > $base/s-bestandsliste.html
|
|
||||||
|
|
||||||
for idx in 0 1; do
|
|
||||||
slug=$(cat vars.yaml | yq ".ads[$idx].slug")
|
|
||||||
id=$(cat vars.yaml | yq ".ads[$idx].id")
|
|
||||||
mkdir -p $base/$slug/$id
|
|
||||||
cat vars.yaml | yq ".ads[$idx]" | p2cli -t ad.tpl -f yaml > $base/$slug/$id/index.html
|
|
||||||
done
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
ads:
|
|
||||||
- slug: first-ad
|
|
||||||
id: 111-11-111
|
|
||||||
title: First Ad
|
|
||||||
price: "19 €"
|
|
||||||
condition: "Sehr gut"
|
|
||||||
category: "Weitere Elektronik"
|
|
||||||
created: 21.12.2023
|
|
||||||
images:
|
|
||||||
- id: fcf6d664-5258-42c2-bf58-d1b8e9221574
|
|
||||||
- id: fcf6d664-5258-42c2-bf58-as43as5d43as
|
|
||||||
text: |
|
|
||||||
Zu Verkaufen.
|
|
||||||
Zahlung nur Paypal.
|
|
||||||
- slug: second-ad
|
|
||||||
id: 222-22-222
|
|
||||||
title: Second Ad
|
|
||||||
price: "200 €"
|
|
||||||
condition: "Sehr gut"
|
|
||||||
category: "Elektronik"
|
|
||||||
created: 21.12.2023
|
|
||||||
images:
|
|
||||||
- id: cdas4sd5-5258-42c2-bf58-d1b8e9221574
|
|
||||||
- id: cdas4sd5-5258-42c2-bf58-as43as5d43as
|
|
||||||
text: |
|
|
||||||
Zu Verkaufen.
|
|
||||||
Zahlung nur Überweisung.
|
|
||||||
Reference in New Issue
Block a user