fixed linter errors

This commit is contained in:
2024-01-25 15:02:49 +01:00
parent 20e6299ebd
commit bebcd15ada
10 changed files with 115 additions and 99 deletions

View File

@@ -17,7 +17,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package main
import (
"errors"
"fmt"
"io"
"os"
@@ -56,6 +55,9 @@ const (
// for image download throttling
MinThrottle int = 2
MaxThrottle int = 20
// we extract the slug from the uri
SlugUriPartNum int = 6
)
const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.
@@ -107,8 +109,8 @@ func (c *Config) IncrImgs(num int) {
}
// load commandline flags and config file
func InitConfig(w io.Writer) (*Config, error) {
var k = koanf.New(".")
func InitConfig(output io.Writer) (*Config, error) {
var kloader = koanf.New(".")
// determine template based on os
template := DefaultTemplate
@@ -117,7 +119,7 @@ func InitConfig(w io.Writer) (*Config, error) {
}
// Load default values using the confmap provider.
if err := k.Load(confmap.Provider(map[string]interface{}{
if err := kloader.Load(confmap.Provider(map[string]interface{}{
"template": template,
"outdir": ".",
"loglevel": "notice",
@@ -125,37 +127,39 @@ func InitConfig(w io.Writer) (*Config, error) {
"adnametemplate": DefaultAdNameTemplate,
"useragent": DefaultUserAgent,
}, "."), nil); err != nil {
return nil, err
return nil, fmt.Errorf("failed to load default values into koanf: %w", err)
}
// setup custom usage
f := flag.NewFlagSet("config", flag.ContinueOnError)
f.Usage = func() {
fmt.Fprintln(w, Usage)
flagset := flag.NewFlagSet("config", flag.ContinueOnError)
flagset.Usage = func() {
fmt.Fprintln(output, Usage)
os.Exit(0)
}
// parse commandline flags
f.StringP("config", "c", "", "config file")
f.StringP("outdir", "o", "", "directory where to store ads")
f.IntP("user", "u", 0, "user id")
f.IntP("limit", "l", 0, "limit ads to be downloaded (default 0, unlimited)")
f.BoolP("verbose", "v", false, "be verbose")
f.BoolP("debug", "d", false, "enable debug log")
f.BoolP("version", "V", false, "show program version")
f.BoolP("help", "h", false, "show usage")
f.BoolP("manual", "m", false, "show manual")
f.BoolP("force", "f", false, "force")
flagset.StringP("config", "c", "", "config file")
flagset.StringP("outdir", "o", "", "directory where to store ads")
flagset.IntP("user", "u", 0, "user id")
flagset.IntP("limit", "l", 0, "limit ads to be downloaded (default 0, unlimited)")
flagset.BoolP("verbose", "v", false, "be verbose")
flagset.BoolP("debug", "d", false, "enable debug log")
flagset.BoolP("version", "V", false, "show program version")
flagset.BoolP("help", "h", false, "show usage")
flagset.BoolP("manual", "m", false, "show manual")
flagset.BoolP("force", "f", false, "force")
if err := f.Parse(os.Args[1:]); err != nil {
return nil, err
if err := flagset.Parse(os.Args[1:]); err != nil {
return nil, fmt.Errorf("failed to parse program arguments: %w", err)
}
// generate a list of config files to try to load, including the
// one provided via -c, if any
var configfiles []string
configfile, _ := f.GetString("config")
configfile, _ := flagset.GetString("config")
home, _ := os.UserHomeDir()
if configfile != "" {
configfiles = []string{configfile}
} else {
@@ -171,31 +175,30 @@ func InitConfig(w io.Writer) (*Config, error) {
for _, cfgfile := range configfiles {
if path, err := os.Stat(cfgfile); !os.IsNotExist(err) {
if !path.IsDir() {
if err := k.Load(file.Provider(cfgfile), toml.Parser()); err != nil {
return nil, errors.New("error loading config file: " + err.Error())
if err := kloader.Load(file.Provider(cfgfile), toml.Parser()); err != nil {
return nil, fmt.Errorf("error loading config file: %w", err)
}
}
}
// else: we ignore the file if it doesn't exists
} // else: we ignore the file if it doesn't exists
}
// env overrides config file
if err := k.Load(env.Provider("KLEINGEBAECK_", ".", func(s string) string {
return strings.Replace(strings.ToLower(
strings.TrimPrefix(s, "KLEINGEBAECK_")), "_", ".", -1)
if err := kloader.Load(env.Provider("KLEINGEBAECK_", ".", func(s string) string {
return strings.ReplaceAll(strings.ToLower(
strings.TrimPrefix(s, "KLEINGEBAECK_")), "_", ".")
}), nil); err != nil {
return nil, errors.New("error loading environment: " + err.Error())
return nil, fmt.Errorf("error loading environment: %w", err)
}
// command line overrides env
if err := k.Load(posflag.Provider(f, ".", k), nil); err != nil {
return nil, errors.New("error loading flags: " + err.Error())
if err := kloader.Load(posflag.Provider(flagset, ".", kloader), nil); err != nil {
return nil, fmt.Errorf("error loading flags: %w", err)
}
// fetch values
conf := &Config{}
if err := k.Unmarshal("", &conf); err != nil {
return nil, errors.New("error unmarshalling: " + err.Error())
if err := kloader.Unmarshal("", &conf); err != nil {
return nil, fmt.Errorf("error unmarshalling: %w", err)
}
// adjust loglevel
@@ -207,7 +210,7 @@ func InitConfig(w io.Writer) (*Config, error) {
}
// are there any args left on commandline? if so threat them as adlinks
conf.Adlinks = f.Args()
conf.Adlinks = flagset.Args()
return conf, nil
}

View File

@@ -19,6 +19,7 @@ package main
import (
"errors"
"fmt"
"io"
"log/slog"
"net/http"
@@ -33,10 +34,10 @@ type Fetcher struct {
Cookies []*http.Cookie
}
func NewFetcher(c *Config) (*Fetcher, error) {
func NewFetcher(conf *Config) (*Fetcher, error) {
jar, err := cookiejar.New(nil)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to create a cookie jar obj: %w", err)
}
return &Fetcher{
@@ -44,7 +45,7 @@ func NewFetcher(c *Config) (*Fetcher, error) {
Transport: &loggingTransport{}, // implemented in http.go
Jar: jar,
},
Config: c,
Config: conf,
Cookies: []*http.Cookie{},
},
nil
@@ -53,7 +54,7 @@ func NewFetcher(c *Config) (*Fetcher, error) {
func (f *Fetcher) Get(uri string) (io.ReadCloser, error) {
req, err := http.NewRequest("GET", uri, nil)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to create a new HTTP request obj: %w", err)
}
req.Header.Set("User-Agent", f.Config.UserAgent)
@@ -69,7 +70,7 @@ func (f *Fetcher) Get(uri string) (io.ReadCloser, error) {
res, err := f.Client.Do(req)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to initiate HTTP request to %s: %w", uri, err)
}
if res.StatusCode != 200 {

1
go.mod
View File

@@ -31,6 +31,7 @@ require (
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect
github.com/pelletier/go-toml v1.9.5 // indirect
github.com/pkg/errors v0.9.1 // indirect
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
golang.org/x/sys v0.14.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect

2
go.sum
View File

@@ -50,6 +50,8 @@ github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=

View File

@@ -19,6 +19,7 @@ package main
import (
"bytes"
"fmt"
"io"
"log/slog"
"math"
@@ -125,5 +126,9 @@ func (t *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error)
retries++
}
return resp, err
if err != nil {
return resp, fmt.Errorf("failed to get HTTP response for %s: %w", req.URL, err)
}
return resp, nil
}

View File

@@ -19,6 +19,7 @@ package main
import (
"bytes"
"fmt"
"image/jpeg"
"log/slog"
"os"
@@ -33,14 +34,14 @@ type Image struct {
Filename string
Hash *goimagehash.ImageHash
Data *bytes.Buffer
Uri string
URI string
}
// used for logging to avoid printing Data
func (img *Image) LogValue() slog.Value {
return slog.GroupValue(
slog.String("filename", img.Filename),
slog.String("uri", img.Uri),
slog.String("uri", img.URI),
slog.String("hash", img.Hash.ToString()),
)
}
@@ -51,7 +52,7 @@ type Cache []*goimagehash.ImageHash
func NewImage(buf *bytes.Buffer, filename string, uri string) *Image {
img := &Image{
Filename: filename,
Uri: uri,
URI: uri,
Data: buf,
}
@@ -62,12 +63,12 @@ func NewImage(buf *bytes.Buffer, filename string, uri string) *Image {
func (img *Image) CalcHash() error {
jpgdata, err := jpeg.Decode(img.Data)
if err != nil {
return err
return fmt.Errorf("failed to decode JPEG image: %w", err)
}
hash1, err := goimagehash.DifferenceHash(jpgdata)
if err != nil {
return err
return fmt.Errorf("failed to calculate diff hash of image: %w", err)
}
img.Hash = hash1
@@ -80,16 +81,18 @@ func (img *Image) Similar(hash *goimagehash.ImageHash) bool {
distance, err := img.Hash.Distance(hash)
if err != nil {
slog.Debug("failed to compute diff hash distance", "error", err)
return false
}
if distance < MaxDistance {
slog.Debug("distance computation", "image-A", img.Hash.ToString(),
"image-B", hash.ToString(), "distance", distance)
return true
} else {
return false
}
return false
}
// check current image against all known hashes.
@@ -108,7 +111,7 @@ func (img *Image) SimilarExists(cache Cache) bool {
func ReadImages(addir string, dont bool) (Cache, error) {
files, err := os.ReadDir(addir)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to read ad directory contents: %w", err)
}
cache := Cache{}
@@ -122,6 +125,7 @@ func ReadImages(addir string, dont bool) (Cache, error) {
ext := filepath.Ext(file.Name())
if !file.IsDir() && (ext == ".jpg" || ext == ".jpeg" || ext == ".JPG" || ext == ".JPEG") {
filename := filepath.Join(addir, file.Name())
data, err := ReadImage(filename)
if err != nil {
return nil, err
@@ -137,6 +141,5 @@ func ReadImages(addir string, dont bool) (Cache, error) {
}
}
//return nil, errors.New("ende")
return cache, nil
}

View File

@@ -22,10 +22,8 @@ import (
"fmt"
"io"
"log/slog"
"math/rand"
"os"
"runtime/debug"
"time"
"github.com/lmittmann/tint"
"github.com/tlinden/yadu"
@@ -118,9 +116,6 @@ func Main(w io.Writer) int {
return Die(err)
}
// randomization needed here and there
rand.Seed(time.Now().UnixNano())
if len(conf.Adlinks) >= 1 {
// directly backup ad listing[s]
for _, uri := range conf.Adlinks {

View File

@@ -19,10 +19,10 @@ package main
import (
"bytes"
"errors"
"fmt"
"log/slog"
"path/filepath"
"strconv"
"strings"
"time"
@@ -43,7 +43,9 @@ func ScrapeUser(fetch *Fetcher) error {
for {
var index Index
slog.Debug("fetching page", "uri", uri)
body, err := fetch.Get(uri)
if err != nil {
return err
@@ -67,16 +69,16 @@ func ScrapeUser(fetch *Fetcher) error {
}
page++
uri = baseuri + "&pageNum=" + fmt.Sprintf("%d", page)
uri = baseuri + "&pageNum=" + strconv.Itoa(page)
}
for i, adlink := range adlinks {
for index, adlink := range adlinks {
err := ScrapeAd(fetch, Baseuri+adlink)
if err != nil {
return err
}
if fetch.Config.Limit > 0 && i == fetch.Config.Limit-1 {
if fetch.Config.Limit > 0 && index == fetch.Config.Limit-1 {
break
}
}
@@ -86,18 +88,20 @@ func ScrapeUser(fetch *Fetcher) error {
// scrape an ad. uri is the full uri of the ad, dir is the basedir
func ScrapeAd(fetch *Fetcher, uri string) error {
ad := &Ad{}
advertisement := &Ad{}
// extract slug and id from uri
uriparts := strings.Split(uri, "/")
if len(uriparts) < 6 {
return errors.New("invalid uri: " + uri)
if len(uriparts) < SlugUriPartNum {
return fmt.Errorf("invalid uri: %s", uri)
}
ad.Slug = uriparts[4]
ad.Id = uriparts[5]
advertisement.Slug = uriparts[4]
advertisement.Id = uriparts[5]
// get the ad
slog.Debug("fetching ad page", "uri", uri)
body, err := fetch.Get(uri)
if err != nil {
return err
@@ -105,36 +109,36 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
defer body.Close()
// extract ad contents with goquery/goq
err = goq.NewDecoder(body).Decode(&ad)
err = goq.NewDecoder(body).Decode(&advertisement)
if err != nil {
return err
return fmt.Errorf("failed to goquery decode HTML body: %w", err)
}
if len(ad.CategoryTree) > 0 {
ad.Category = strings.Join(ad.CategoryTree, " => ")
if len(advertisement.CategoryTree) > 0 {
advertisement.Category = strings.Join(advertisement.CategoryTree, " => ")
}
if ad.Incomplete() {
slog.Debug("got ad", "ad", ad)
return errors.New("could not extract ad data from page, got empty struct")
if advertisement.Incomplete() {
slog.Debug("got ad", "ad", advertisement)
return fmt.Errorf("could not extract ad data from page, got empty struct")
}
ad.CalculateExpire()
advertisement.CalculateExpire()
// write listing
addir, err := WriteAd(fetch.Config, ad)
addir, err := WriteAd(fetch.Config, advertisement)
if err != nil {
return err
}
slog.Debug("extracted ad listing", "ad", ad)
slog.Debug("extracted ad listing", "ad", advertisement)
fetch.Config.IncrAds()
return ScrapeImages(fetch, ad, addir)
return ScrapeImages(fetch, advertisement, addir)
}
func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
func ScrapeImages(fetch *Fetcher, advertisement *Ad, addir string) error {
// fetch images
img := 1
adpath := filepath.Join(fetch.Config.Outdir, addir)
@@ -145,16 +149,17 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
return err
}
g := new(errgroup.Group)
egroup := new(errgroup.Group)
for _, imguri := range ad.Images {
for _, imguri := range advertisement.Images {
imguri := imguri
file := filepath.Join(adpath, fmt.Sprintf("%d.jpg", img))
g.Go(func() error {
egroup.Go(func() error {
// wait a little
t := GetThrottleTime()
time.Sleep(t)
throttle := GetThrottleTime()
time.Sleep(throttle)
body, err := fetch.Getimage(imguri)
if err != nil {
@@ -164,7 +169,7 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
buf := new(bytes.Buffer)
_, err = buf.ReadFrom(body)
if err != nil {
return err
return fmt.Errorf("failed to read from image buffer: %w", err)
}
buf2 := buf.Bytes() // needed for image writing
@@ -177,7 +182,7 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
if !fetch.Config.ForceDownload {
if image.SimilarExists(cache) {
slog.Debug("similar image exists, not written", "uri", image.Uri)
slog.Debug("similar image exists, not written", "uri", image.URI)
return nil
}
}
@@ -187,17 +192,17 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
return err
}
slog.Debug("wrote image", "image", image, "size", len(buf2), "throttle", t)
slog.Debug("wrote image", "image", image, "size", len(buf2), "throttle", throttle)
return nil
})
img++
}
if err := g.Wait(); err != nil {
if err := egroup.Wait(); err != nil {
return err
}
fetch.Config.IncrImgs(len(ad.Images))
fetch.Config.IncrImgs(len(advertisement.Images))
return nil
}

View File

@@ -31,13 +31,13 @@ import (
func AdDirName(c *Config, ad *Ad) (string, error) {
tmpl, err := tpl.New("adname").Parse(c.Adnametemplate)
if err != nil {
return "", err
return "", fmt.Errorf("failed to parse adname template: %w", err)
}
buf := bytes.Buffer{}
err = tmpl.Execute(&buf, ad)
if err != nil {
return "", err
return "", fmt.Errorf("failed to execute adname template: %w", err)
}
return buf.String(), nil
@@ -59,11 +59,11 @@ func WriteAd(c *Config, ad *Ad) (string, error) {
// write ad file
listingfile := filepath.Join(dir, "Adlisting.txt")
f, err := os.Create(listingfile)
listingfd, err := os.Create(listingfile)
if err != nil {
return "", err
return "", fmt.Errorf("failed to create Adlisting.txt: %w", err)
}
defer f.Close()
defer listingfd.Close()
if runtime.GOOS == "windows" {
ad.Text = strings.ReplaceAll(ad.Text, "<br/>", "\r\n")
@@ -73,12 +73,12 @@ func WriteAd(c *Config, ad *Ad) (string, error) {
tmpl, err := tpl.New("adlisting").Parse(c.Template)
if err != nil {
return "", err
return "", fmt.Errorf("failed to parse adlisting template: %w", err)
}
err = tmpl.Execute(f, ad)
err = tmpl.Execute(listingfd, ad)
if err != nil {
return "", err
return "", fmt.Errorf("failed to execute adlisting template: %w", err)
}
slog.Info("wrote ad listing", "listingfile", listingfile)
@@ -89,14 +89,14 @@ func WriteAd(c *Config, ad *Ad) (string, error) {
func WriteImage(filename string, buf []byte) error {
file, err := os.Create(filename)
if err != nil {
return err
return fmt.Errorf("failed to open image file: %w", err)
}
defer file.Close()
_, err = file.Write(buf)
if err != nil {
return err
return fmt.Errorf("failed to write to image file: %w", err)
}
return nil
@@ -111,12 +111,12 @@ func ReadImage(filename string) (*bytes.Buffer, error) {
data, err := os.ReadFile(filename)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to read image file: %w", err)
}
_, err = buf.Write(data)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to write image into buffer: %w", err)
}
return &buf, nil

View File

@@ -20,6 +20,7 @@ package main
import (
"bytes"
"errors"
"fmt"
"math/rand"
"os"
"os/exec"
@@ -33,7 +34,7 @@ func Mkdir(dir string) error {
if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) {
err := os.Mkdir(dir, os.ModePerm)
if err != nil {
return err
return fmt.Errorf("failed to create directory %s: %w", dir, err)
}
}
@@ -53,7 +54,7 @@ func man() error {
err := man.Run()
if err != nil {
return err
return fmt.Errorf("failed to execute 'less': %w", err)
}
return nil