mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-17 12:31:03 +01:00
Compare commits
40 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0387d55624 | |||
| a94ef63a90 | |||
| 90f5e86fdb | |||
| 587529e314 | |||
|
|
8771ec1108 | ||
|
|
1896209b96 | ||
|
|
3c93c9fce0 | ||
|
|
42a958fc4c | ||
|
|
5fa46ff106 | ||
|
|
cca3211023 | ||
|
|
dce7604afb | ||
| 0fd9b519d1 | |||
| 6b7f727449 | |||
| 5abbab9527 | |||
|
|
e03c7debb6 | ||
| 1d2483d18f | |||
| b17f4f0f3e | |||
| 4a91167871 | |||
| 0baaf6f38b | |||
| 42182bb6c9 | |||
| 8455c193eb | |||
| d1faa10a52 | |||
| e28137bf9b | |||
| 1ff5c240c8 | |||
|
|
f893f9c3d7 | ||
|
|
c4e88d98f2 | ||
|
|
0cca387982 | ||
|
|
9e619fb3c5 | ||
|
|
0fdfed2929 | ||
|
|
73c09ec38b | ||
|
|
f901af4f0c | ||
|
|
2a8f53ca98 | ||
|
|
4a95cb1f5e | ||
|
|
482612f889 | ||
|
|
b8977df986 | ||
|
|
ae5e3daea3 | ||
|
|
1c6d832b20 | ||
| 52b39d91a3 | |||
| 3748cd35e5 | |||
| 4d4577c9f8 |
28
.github/workflows/pushimage.yaml
vendored
Normal file
28
.github/workflows/pushimage.yaml
vendored
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
name: build-push-image
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- 'v*'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-push-image:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
packages: write
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
|
- name: Log in to the Container registry
|
||||||
|
uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
|
||||||
|
with:
|
||||||
|
registry: https://ghcr.io
|
||||||
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build and push Docker image
|
||||||
|
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
|
||||||
|
with:
|
||||||
|
push: true
|
||||||
|
tags: ghcr.io/tlinden/kleingebaeck:${{ github.ref_name}}
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -2,3 +2,4 @@ test
|
|||||||
kleingebaeck
|
kleingebaeck
|
||||||
releases
|
releases
|
||||||
t/out
|
t/out
|
||||||
|
.bak
|
||||||
|
|||||||
27
Dockerfile
Normal file
27
Dockerfile
Normal file
@@ -0,0 +1,27 @@
|
|||||||
|
FROM golang:1.21-alpine as builder
|
||||||
|
|
||||||
|
RUN apk update
|
||||||
|
RUN apk upgrade
|
||||||
|
RUN apk add --no-cache git make
|
||||||
|
|
||||||
|
RUN git --version
|
||||||
|
|
||||||
|
WORKDIR /work
|
||||||
|
|
||||||
|
COPY go.mod .
|
||||||
|
COPY . .
|
||||||
|
RUN go mod download
|
||||||
|
RUN make
|
||||||
|
|
||||||
|
FROM alpine:latest
|
||||||
|
LABEL maintainer="Thomas von Dein <git@daemon.de>"
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
COPY --from=builder /work/kleingebaeck /app/kleingebaeck
|
||||||
|
|
||||||
|
ENV KLEINGEBAECK_OUTDIR /backup
|
||||||
|
ENV LANG C.UTF-8
|
||||||
|
USER 1001:1001
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/kleingebaeck"]
|
||||||
|
CMD ["-h"]
|
||||||
52
README.md
52
README.md
@@ -94,19 +94,48 @@ installed - `make`.
|
|||||||
|
|
||||||
To install after building either copy the binary or execute `sudo make install`.
|
To install after building either copy the binary or execute `sudo make install`.
|
||||||
|
|
||||||
|
### Using the docker image
|
||||||
|
|
||||||
|
A pre-built docker image is available, which you can use to test the
|
||||||
|
app without installing it. You need `docker-compose`. Copy the file
|
||||||
|
`docker-compose.yaml` to somewhere, cd to that directory and execute:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
mkdir kleinanzeigen-backup
|
||||||
|
USER_ID=$(id -u) GROUP_ID=$(id -g) OUTDIR=./kleinanzeigen-backup docker-compose run kleingebaeck -u XXX -v
|
||||||
|
```
|
||||||
|
|
||||||
|
`USER_ID` and `GROUP_ID` needs to be specified so that you are the
|
||||||
|
owner of the created backups. The backup directory `OUTDIR` must exist
|
||||||
|
prior to the execution, otherwise docker will create it as root, then
|
||||||
|
kleingebaeck will fail. You may also use a `.env` file in the same
|
||||||
|
directory containing the variables, such as:
|
||||||
|
|
||||||
|
```
|
||||||
|
USER_ID=1000
|
||||||
|
GROUP_ID=1000
|
||||||
|
OUTDIR=./kleinanzeigen-backup
|
||||||
|
```
|
||||||
|
|
||||||
|
You may of course also modify the `docker-compose.yaml` to suit your needs.
|
||||||
|
|
||||||
|
If you want to build the image yourself, use the supplied Dockerfile.
|
||||||
|
|
||||||
## Commandline options:
|
## Commandline options:
|
||||||
|
|
||||||
```
|
```
|
||||||
Usage: kleingebaeck [-dvVhmoc] [<ad-listing-url>,...]
|
Usage: kleingebaeck [-dvVhmoc] [<ad-listing-url>,...]
|
||||||
Options:
|
Options:
|
||||||
--user -u <uid> Backup ads from user with uid <uid>.
|
-u --user <uid> Backup ads from user with uid <uid>.
|
||||||
--debug -d Enable debug output.
|
-d --debug Enable debug output.
|
||||||
--verbose -v Enable verbose output.
|
-v --verbose Enable verbose output.
|
||||||
--outdir -o <dir> Set output dir (default: current directory)
|
-o --outdir <dir> Set output dir (default: current directory)
|
||||||
--limit -l <num> Limit the ads to download to <num>, default: load all.
|
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
||||||
--config -c <file> Use config file <file> (default: ~/.kleingebaeck).
|
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||||
--manual -m Show manual.
|
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
||||||
--help -h Show usage.
|
-m --manual Show manual.
|
||||||
|
-h --help Show usage.
|
||||||
|
-V --version Show program version.
|
||||||
|
|
||||||
If one or more <ad-listing-url>'s are specified, only backup those,
|
If one or more <ad-listing-url>'s are specified, only backup those,
|
||||||
otherwise backup all ads of the given user.
|
otherwise backup all ads of the given user.
|
||||||
@@ -126,6 +155,13 @@ loglevel = verbose
|
|||||||
outdir = "test"
|
outdir = "test"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
Kleingebaeck can also be configured using environment variables. Just prefix the config variables with `KLEINGEBAECK_` and put them to upper case. Eg:
|
||||||
|
```shell
|
||||||
|
% KLEINGEBAECK_OUTDIR=/backup kleingebaeck -v
|
||||||
|
```
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
To setup the tool, you need to lookup your userid on
|
To setup the tool, you need to lookup your userid on
|
||||||
|
|||||||
13
ad.go
13
ad.go
@@ -20,6 +20,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"strings"
|
"strings"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Index struct {
|
type Index struct {
|
||||||
@@ -37,6 +38,7 @@ type Ad struct {
|
|||||||
Created string `goquery:"#viewad-extra-info,text"`
|
Created string `goquery:"#viewad-extra-info,text"`
|
||||||
Text string `goquery:"p#viewad-description-text,html"`
|
Text string `goquery:"p#viewad-description-text,html"`
|
||||||
Images []string `goquery:".galleryimage-element img,[src]"`
|
Images []string `goquery:".galleryimage-element img,[src]"`
|
||||||
|
Expire string
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used by slog to pretty print an ad
|
// Used by slog to pretty print an ad
|
||||||
@@ -49,6 +51,8 @@ func (ad *Ad) LogValue() slog.Value {
|
|||||||
slog.Int("bodysize", len(ad.Text)),
|
slog.Int("bodysize", len(ad.Text)),
|
||||||
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
|
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
|
||||||
slog.String("condition", ad.Condition),
|
slog.String("condition", ad.Condition),
|
||||||
|
slog.String("created", ad.Created),
|
||||||
|
slog.String("expire", ad.Expire),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -67,3 +71,12 @@ func (ad *Ad) Incomplete() bool {
|
|||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (ad *Ad) CalculateExpire() {
|
||||||
|
if len(ad.Created) > 0 {
|
||||||
|
ts, err := time.Parse("02.01.2006", ad.Created)
|
||||||
|
if err == nil {
|
||||||
|
ad.Expire = ts.AddDate(0, 2, 1).Format("02.01.2006")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
45
config.go
45
config.go
@@ -23,9 +23,11 @@ import (
|
|||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/knadh/koanf/parsers/toml"
|
"github.com/knadh/koanf/parsers/toml"
|
||||||
"github.com/knadh/koanf/providers/confmap"
|
"github.com/knadh/koanf/providers/confmap"
|
||||||
|
"github.com/knadh/koanf/providers/env"
|
||||||
"github.com/knadh/koanf/providers/file"
|
"github.com/knadh/koanf/providers/file"
|
||||||
"github.com/knadh/koanf/providers/posflag"
|
"github.com/knadh/koanf/providers/posflag"
|
||||||
"github.com/knadh/koanf/v2"
|
"github.com/knadh/koanf/v2"
|
||||||
@@ -33,17 +35,19 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
VERSION string = "0.1.1"
|
VERSION string = "0.3.0"
|
||||||
Baseuri string = "https://www.kleinanzeigen.de"
|
Baseuri string = "https://www.kleinanzeigen.de"
|
||||||
Listuri string = "/s-bestandsliste.html"
|
Listuri string = "/s-bestandsliste.html"
|
||||||
Defaultdir string = "."
|
Defaultdir string = "."
|
||||||
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.Id}}\n" +
|
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.Id}}\n" +
|
||||||
"Category: {{.Category}}\nCondition: {{.Condition}}\nCreated: {{.Created}}\n\n{{.Text}}\n"
|
"Category: {{.Category}}\nCondition: {{.Condition}}\n" +
|
||||||
|
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
|
||||||
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.Id}}\r\n" +
|
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.Id}}\r\n" +
|
||||||
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nCreated: {{.Created}}\r\n\r\n{{.Text}}\r\n"
|
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\n" +
|
||||||
|
"Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n"
|
||||||
Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
|
Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
|
||||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
|
||||||
DefaultAdNameTemplate string = "{{.Slug}}-{{.Id}}"
|
DefaultAdNameTemplate string = "{{.Slug}}"
|
||||||
)
|
)
|
||||||
|
|
||||||
const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.
|
const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.
|
||||||
@@ -51,15 +55,17 @@ const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.
|
|||||||
Usage: kleingebaeck [-dvVhmoclu] [<ad-listing-url>,...]
|
Usage: kleingebaeck [-dvVhmoclu] [<ad-listing-url>,...]
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
--user -u <uid> Backup ads from user with uid <uid>.
|
-u --user <uid> Backup ads from user with uid <uid>.
|
||||||
--debug -d Enable debug output.
|
-d --debug Enable debug output.
|
||||||
--verbose -v Enable verbose output.
|
-v --verbose Enable verbose output.
|
||||||
--outdir -o <dir> Set output dir (default: current directory)
|
-o --outdir <dir> Set output dir (default: current directory)
|
||||||
--limit -l <num> Limit the ads to download to <num>, default: load all.
|
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
||||||
--config -c <file> Use config file <file> (default: ~/.kleingebaeck).
|
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||||
--manual -m Show manual.
|
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
||||||
--help -h Show usage.
|
-f --force Download images even if they already exist.
|
||||||
--version -V Show program version.
|
-m --manual Show manual.
|
||||||
|
-h --help Show usage.
|
||||||
|
-V --version Show program version.
|
||||||
|
|
||||||
If one or more ad listing url's are specified, only backup those,
|
If one or more ad listing url's are specified, only backup those,
|
||||||
otherwise backup all ads of the given user.`
|
otherwise backup all ads of the given user.`
|
||||||
@@ -76,6 +82,8 @@ type Config struct {
|
|||||||
Adnametemplate string `koanf:"adnametemplate"`
|
Adnametemplate string `koanf:"adnametemplate"`
|
||||||
Loglevel string `koanf:"loglevel"`
|
Loglevel string `koanf:"loglevel"`
|
||||||
Limit int `koanf:"limit"`
|
Limit int `koanf:"limit"`
|
||||||
|
IgnoreErrors bool `koanf:"ignoreerrors"`
|
||||||
|
ForceDownload bool `koanf:"force"`
|
||||||
Adlinks []string
|
Adlinks []string
|
||||||
StatsCountAds int
|
StatsCountAds int
|
||||||
StatsCountImages int
|
StatsCountImages int
|
||||||
@@ -127,6 +135,7 @@ func InitConfig(w io.Writer) (*Config, error) {
|
|||||||
f.BoolP("version", "V", false, "show program version")
|
f.BoolP("version", "V", false, "show program version")
|
||||||
f.BoolP("help", "h", false, "show usage")
|
f.BoolP("help", "h", false, "show usage")
|
||||||
f.BoolP("manual", "m", false, "show manual")
|
f.BoolP("manual", "m", false, "show manual")
|
||||||
|
f.BoolP("force", "f", false, "force")
|
||||||
|
|
||||||
if err := f.Parse(os.Args[1:]); err != nil {
|
if err := f.Parse(os.Args[1:]); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -160,7 +169,15 @@ func InitConfig(w io.Writer) (*Config, error) {
|
|||||||
// else: we ignore the file if it doesn't exists
|
// else: we ignore the file if it doesn't exists
|
||||||
}
|
}
|
||||||
|
|
||||||
// command line overrides config file
|
// env overrides config file
|
||||||
|
if err := k.Load(env.Provider("KLEINGEBAECK_", ".", func(s string) string {
|
||||||
|
return strings.Replace(strings.ToLower(
|
||||||
|
strings.TrimPrefix(s, "KLEINGEBAECK_")), "_", ".", -1)
|
||||||
|
}), nil); err != nil {
|
||||||
|
return nil, errors.New("error loading environment: " + err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
// command line overrides env
|
||||||
if err := k.Load(posflag.Provider(f, ".", k), nil); err != nil {
|
if err := k.Load(posflag.Provider(f, ".", k), nil); err != nil {
|
||||||
return nil, errors.New("error loading flags: " + err.Error())
|
return nil, errors.New("error loading flags: " + err.Error())
|
||||||
}
|
}
|
||||||
|
|||||||
22
docker-compose.yaml
Normal file
22
docker-compose.yaml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
version: "3.9"
|
||||||
|
services:
|
||||||
|
init:
|
||||||
|
image: alpine:latest
|
||||||
|
user: "root"
|
||||||
|
group_add:
|
||||||
|
- '${GROUP_ID}'
|
||||||
|
volumes:
|
||||||
|
- ${OUTDIR}:/backup
|
||||||
|
command: chown -R ${USER_ID}:${USER_ID} /backup
|
||||||
|
|
||||||
|
kleingebaeck:
|
||||||
|
container_name: kleingebaeck
|
||||||
|
user: "${USER_ID}:${USER_ID}"
|
||||||
|
volumes:
|
||||||
|
- ${OUTDIR}:/backup
|
||||||
|
working_dir: /backup
|
||||||
|
build: .
|
||||||
|
image: kleingebaeck:latest
|
||||||
|
depends_on:
|
||||||
|
init:
|
||||||
|
condition: service_completed_successfully
|
||||||
75
fetch.go
Normal file
75
fetch.go
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
/*
|
||||||
|
Copyright © 2023-2024 Thomas von Dein
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"log/slog"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
// convenient wrapper to fetch some web content
|
||||||
|
type Fetcher struct {
|
||||||
|
Config *Config
|
||||||
|
Client *http.Client
|
||||||
|
Useragent string // FIXME: make configurable
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFetcher(c *Config) *Fetcher {
|
||||||
|
return &Fetcher{
|
||||||
|
Client: &http.Client{Transport: &loggingTransport{}}, // implemented in http.go
|
||||||
|
Useragent: Useragent, // default in config.go
|
||||||
|
Config: c,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *Fetcher) Get(uri string) (io.ReadCloser, error) {
|
||||||
|
req, err := http.NewRequest("GET", uri, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Header.Set("User-Agent", f.Useragent)
|
||||||
|
|
||||||
|
res, err := f.Client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if res.StatusCode != 200 {
|
||||||
|
return nil, errors.New("could not get page via HTTP")
|
||||||
|
}
|
||||||
|
|
||||||
|
return res.Body, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// fetch an image
|
||||||
|
func (f *Fetcher) Getimage(uri string) (io.ReadCloser, error) {
|
||||||
|
slog.Debug("fetching ad image", "uri", uri)
|
||||||
|
body, err := f.Get(uri)
|
||||||
|
if err != nil {
|
||||||
|
if f.Config.IgnoreErrors {
|
||||||
|
slog.Info("Failed to download image, error ignored", "error", err.Error())
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return body, nil
|
||||||
|
}
|
||||||
15
go.mod
15
go.mod
@@ -7,25 +7,32 @@ require (
|
|||||||
github.com/jarcoal/httpmock v1.3.1
|
github.com/jarcoal/httpmock v1.3.1
|
||||||
github.com/knadh/koanf/parsers/toml v0.1.0
|
github.com/knadh/koanf/parsers/toml v0.1.0
|
||||||
github.com/knadh/koanf/providers/confmap v0.1.0
|
github.com/knadh/koanf/providers/confmap v0.1.0
|
||||||
|
github.com/knadh/koanf/providers/env v0.1.0
|
||||||
github.com/knadh/koanf/providers/file v0.1.0
|
github.com/knadh/koanf/providers/file v0.1.0
|
||||||
github.com/knadh/koanf/providers/posflag v0.1.0
|
github.com/knadh/koanf/providers/posflag v0.1.0
|
||||||
github.com/knadh/koanf/v2 v2.0.1
|
github.com/knadh/koanf/v2 v2.0.1
|
||||||
github.com/lmittmann/tint v1.0.3
|
github.com/lmittmann/tint v1.0.4
|
||||||
github.com/mattn/go-isatty v0.0.20
|
github.com/mattn/go-isatty v0.0.20
|
||||||
github.com/spf13/pflag v1.0.5
|
github.com/spf13/pflag v1.0.5
|
||||||
|
github.com/tlinden/yadu v0.1.0
|
||||||
golang.org/x/sync v0.5.0
|
golang.org/x/sync v0.5.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/PuerkitoBio/goquery v1.5.0 // indirect
|
github.com/PuerkitoBio/goquery v1.5.1 // indirect
|
||||||
github.com/andybalholm/cascadia v1.0.0 // indirect
|
github.com/andybalholm/cascadia v1.1.0 // indirect
|
||||||
|
github.com/corona10/goimagehash v1.1.0 // indirect
|
||||||
|
github.com/fatih/color v1.16.0 // indirect
|
||||||
github.com/fsnotify/fsnotify v1.6.0 // indirect
|
github.com/fsnotify/fsnotify v1.6.0 // indirect
|
||||||
github.com/knadh/koanf/maps v0.1.1 // indirect
|
github.com/knadh/koanf/maps v0.1.1 // indirect
|
||||||
|
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||||
github.com/mitchellh/copystructure v1.2.0 // indirect
|
github.com/mitchellh/copystructure v1.2.0 // indirect
|
||||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||||
github.com/mitchellh/reflectwalk v1.0.2 // indirect
|
github.com/mitchellh/reflectwalk v1.0.2 // indirect
|
||||||
|
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect
|
||||||
github.com/pelletier/go-toml v1.9.5 // indirect
|
github.com/pelletier/go-toml v1.9.5 // indirect
|
||||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
|
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
|
||||||
golang.org/x/sys v0.6.0 // indirect
|
golang.org/x/sys v0.14.0 // indirect
|
||||||
|
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|||||||
32
go.sum
32
go.sum
@@ -1,12 +1,18 @@
|
|||||||
astuart.co/goq v1.0.0 h1:nnYIhu/Z/j0VaX9Dp+pmh2Uh7ldEz6XfgSg+bAY5Yrw=
|
astuart.co/goq v1.0.0 h1:nnYIhu/Z/j0VaX9Dp+pmh2Uh7ldEz6XfgSg+bAY5Yrw=
|
||||||
astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno=
|
astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno=
|
||||||
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
|
|
||||||
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
|
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
|
||||||
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
|
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
|
||||||
|
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
|
||||||
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||||
|
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
|
||||||
|
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
|
||||||
|
github.com/corona10/goimagehash v1.1.0 h1:teNMX/1e+Wn/AYSbLHX8mj+mF9r60R1kBeqE9MkoYwI=
|
||||||
|
github.com/corona10/goimagehash v1.1.0/go.mod h1:VkvE0mLn84L4aF8vCb6mafVajEb6QYMHl2ZJLn0mOGI=
|
||||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
|
||||||
|
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
|
||||||
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
|
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
|
||||||
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
|
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
|
||||||
github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww=
|
github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww=
|
||||||
@@ -17,14 +23,19 @@ github.com/knadh/koanf/parsers/toml v0.1.0 h1:S2hLqS4TgWZYj4/7mI5m1CQQcWurxUz6OD
|
|||||||
github.com/knadh/koanf/parsers/toml v0.1.0/go.mod h1:yUprhq6eo3GbyVXFFMdbfZSo928ksS+uo0FFqNMnO18=
|
github.com/knadh/koanf/parsers/toml v0.1.0/go.mod h1:yUprhq6eo3GbyVXFFMdbfZSo928ksS+uo0FFqNMnO18=
|
||||||
github.com/knadh/koanf/providers/confmap v0.1.0 h1:gOkxhHkemwG4LezxxN8DMOFopOPghxRVp7JbIvdvqzU=
|
github.com/knadh/koanf/providers/confmap v0.1.0 h1:gOkxhHkemwG4LezxxN8DMOFopOPghxRVp7JbIvdvqzU=
|
||||||
github.com/knadh/koanf/providers/confmap v0.1.0/go.mod h1:2uLhxQzJnyHKfxG927awZC7+fyHFdQkd697K4MdLnIU=
|
github.com/knadh/koanf/providers/confmap v0.1.0/go.mod h1:2uLhxQzJnyHKfxG927awZC7+fyHFdQkd697K4MdLnIU=
|
||||||
|
github.com/knadh/koanf/providers/env v0.1.0 h1:LqKteXqfOWyx5Ab9VfGHmjY9BvRXi+clwyZozgVRiKg=
|
||||||
|
github.com/knadh/koanf/providers/env v0.1.0/go.mod h1:RE8K9GbACJkeEnkl8L/Qcj8p4ZyPXZIQ191HJi44ZaQ=
|
||||||
github.com/knadh/koanf/providers/file v0.1.0 h1:fs6U7nrV58d3CFAFh8VTde8TM262ObYf3ODrc//Lp+c=
|
github.com/knadh/koanf/providers/file v0.1.0 h1:fs6U7nrV58d3CFAFh8VTde8TM262ObYf3ODrc//Lp+c=
|
||||||
github.com/knadh/koanf/providers/file v0.1.0/go.mod h1:rjJ/nHQl64iYCtAW2QQnF0eSmDEX/YZ/eNFj5yR6BvA=
|
github.com/knadh/koanf/providers/file v0.1.0/go.mod h1:rjJ/nHQl64iYCtAW2QQnF0eSmDEX/YZ/eNFj5yR6BvA=
|
||||||
github.com/knadh/koanf/providers/posflag v0.1.0 h1:mKJlLrKPcAP7Ootf4pBZWJ6J+4wHYujwipe7Ie3qW6U=
|
github.com/knadh/koanf/providers/posflag v0.1.0 h1:mKJlLrKPcAP7Ootf4pBZWJ6J+4wHYujwipe7Ie3qW6U=
|
||||||
github.com/knadh/koanf/providers/posflag v0.1.0/go.mod h1:SYg03v/t8ISBNrMBRMlojH8OsKowbkXV7giIbBVgbz0=
|
github.com/knadh/koanf/providers/posflag v0.1.0/go.mod h1:SYg03v/t8ISBNrMBRMlojH8OsKowbkXV7giIbBVgbz0=
|
||||||
github.com/knadh/koanf/v2 v2.0.1 h1:1dYGITt1I23x8cfx8ZnldtezdyaZtfAuRtIFOiRzK7g=
|
github.com/knadh/koanf/v2 v2.0.1 h1:1dYGITt1I23x8cfx8ZnldtezdyaZtfAuRtIFOiRzK7g=
|
||||||
github.com/knadh/koanf/v2 v2.0.1/go.mod h1:ZeiIlIDXTE7w1lMT6UVcNiRAS2/rCeLn/GdLNvY1Dus=
|
github.com/knadh/koanf/v2 v2.0.1/go.mod h1:ZeiIlIDXTE7w1lMT6UVcNiRAS2/rCeLn/GdLNvY1Dus=
|
||||||
github.com/lmittmann/tint v1.0.3 h1:W5PHeA2D8bBJVvabNfQD/XW9HPLZK1XoPZH0cq8NouQ=
|
github.com/lmittmann/tint v1.0.4 h1:LeYihpJ9hyGvE0w+K2okPTGUdVLfng1+nDNVR4vWISc=
|
||||||
github.com/lmittmann/tint v1.0.3/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE=
|
github.com/lmittmann/tint v1.0.4/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE=
|
||||||
|
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
|
||||||
|
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
|
||||||
|
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
|
||||||
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
|
||||||
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
|
||||||
github.com/maxatome/go-testdeep v1.12.0 h1:Ql7Go8Tg0C1D/uMMX59LAoYK7LffeJQ6X2T04nTH68g=
|
github.com/maxatome/go-testdeep v1.12.0 h1:Ql7Go8Tg0C1D/uMMX59LAoYK7LffeJQ6X2T04nTH68g=
|
||||||
@@ -35,6 +46,8 @@ github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyua
|
|||||||
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
|
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
|
||||||
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
|
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
|
||||||
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
|
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
|
||||||
|
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ=
|
||||||
|
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8=
|
||||||
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
|
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
|
||||||
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
|
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
@@ -45,18 +58,27 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+
|
|||||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||||
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
|
||||||
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
|
||||||
|
github.com/tlinden/yadu v0.0.0-20240118202225-ec3f0b7fc355 h1:EmgK+IGUz2m42bFKteLY5SYJLn/CyBrz6nkgS22K8Bk=
|
||||||
|
github.com/tlinden/yadu v0.0.0-20240118202225-ec3f0b7fc355/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA=
|
||||||
|
github.com/tlinden/yadu v0.1.0 h1:qtCi1jxg392qVRLFyrJ2LYu6/PiKSp1LT02EX+mNLME=
|
||||||
|
github.com/tlinden/yadu v0.1.0/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA=
|
||||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||||
golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
||||||
|
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0=
|
golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0=
|
||||||
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||||
golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
|
golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
|
||||||
golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
|
||||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
|
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
|
|
||||||
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q=
|
||||||
|
golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
|||||||
129
http.go
Normal file
129
http.go
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
/*
|
||||||
|
Copyright © 2023-2024 Thomas von Dein
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"io"
|
||||||
|
"log/slog"
|
||||||
|
"math"
|
||||||
|
"math/rand"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// I add an artificial "ID" to each HTTP request and the corresponding
|
||||||
|
// respose for debugging purposes so that the pair of them can be
|
||||||
|
// easier associated in debug output
|
||||||
|
var letters = []rune("ABCDEF0123456789")
|
||||||
|
|
||||||
|
func getid() string {
|
||||||
|
b := make([]rune, 8)
|
||||||
|
for i := range b {
|
||||||
|
b[i] = letters[rand.Intn(len(letters))]
|
||||||
|
}
|
||||||
|
return string(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
// retry after HTTP 50x errors or err!=nil
|
||||||
|
const RetryCount = 3
|
||||||
|
|
||||||
|
// used to inject debug log and implement retries
|
||||||
|
type loggingTransport struct{}
|
||||||
|
|
||||||
|
// escalating timeout, $retry^2 seconds
|
||||||
|
func backoff(retries int) time.Duration {
|
||||||
|
return time.Duration(math.Pow(2, float64(retries))) * time.Second
|
||||||
|
}
|
||||||
|
|
||||||
|
// only retry in case of errors or certain non 200 HTTP codes
|
||||||
|
func shouldRetry(err error, resp *http.Response) bool {
|
||||||
|
if err != nil {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.StatusCode == http.StatusBadGateway ||
|
||||||
|
resp.StatusCode == http.StatusServiceUnavailable ||
|
||||||
|
resp.StatusCode == http.StatusGatewayTimeout {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Body needs to be drained, otherwise we can't reuse the http.Response
|
||||||
|
func drainBody(resp *http.Response) {
|
||||||
|
if resp != nil {
|
||||||
|
if resp.Body != nil {
|
||||||
|
_, err := io.Copy(io.Discard, resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
// unable to copy data? uff!
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// the actual logging transport with retries
|
||||||
|
func (t *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||||
|
// just requred for debugging
|
||||||
|
id := getid()
|
||||||
|
|
||||||
|
// clone the request body, put into request on retry
|
||||||
|
var bodyBytes []byte
|
||||||
|
if req.Body != nil {
|
||||||
|
bodyBytes, _ = io.ReadAll(req.Body)
|
||||||
|
req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Debug("REQUEST", "id", id, "uri", req.URL, "host", req.Host)
|
||||||
|
|
||||||
|
// first try
|
||||||
|
resp, err := http.DefaultTransport.RoundTrip(req)
|
||||||
|
if err == nil {
|
||||||
|
slog.Debug("RESPONSE", "id", id, "status", resp.StatusCode,
|
||||||
|
"contentlength", resp.ContentLength)
|
||||||
|
}
|
||||||
|
|
||||||
|
// enter retry check and loop, if first req were successfull, leave loop immediately
|
||||||
|
retries := 0
|
||||||
|
for shouldRetry(err, resp) && retries < RetryCount {
|
||||||
|
time.Sleep(backoff(retries))
|
||||||
|
|
||||||
|
// consume any response to reuse the connection.
|
||||||
|
drainBody(resp)
|
||||||
|
|
||||||
|
// clone the request body again
|
||||||
|
if req.Body != nil {
|
||||||
|
req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
||||||
|
}
|
||||||
|
|
||||||
|
// actual retry
|
||||||
|
resp, err = http.DefaultTransport.RoundTrip(req)
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
slog.Debug("RESPONSE", "id", id, "status", resp.StatusCode,
|
||||||
|
"contentlength", resp.ContentLength, "retry", retries)
|
||||||
|
}
|
||||||
|
|
||||||
|
retries++
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp, err
|
||||||
|
}
|
||||||
142
image.go
Normal file
142
image.go
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
/*
|
||||||
|
Copyright © 2023-2024 Thomas von Dein
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"image/jpeg"
|
||||||
|
"log/slog"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/corona10/goimagehash"
|
||||||
|
)
|
||||||
|
|
||||||
|
const MaxDistance = 3
|
||||||
|
|
||||||
|
type Image struct {
|
||||||
|
Filename string
|
||||||
|
Hash *goimagehash.ImageHash
|
||||||
|
Data *bytes.Buffer
|
||||||
|
Uri string
|
||||||
|
}
|
||||||
|
|
||||||
|
// used for logging to avoid printing Data
|
||||||
|
func (img *Image) LogValue() slog.Value {
|
||||||
|
return slog.GroupValue(
|
||||||
|
slog.String("filename", img.Filename),
|
||||||
|
slog.String("uri", img.Uri),
|
||||||
|
slog.String("hash", img.Hash.ToString()),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
// holds all images of an ad
|
||||||
|
type Cache []*goimagehash.ImageHash
|
||||||
|
|
||||||
|
func NewImage(buf *bytes.Buffer, filename string, uri string) *Image {
|
||||||
|
img := &Image{
|
||||||
|
Filename: filename,
|
||||||
|
Uri: uri,
|
||||||
|
Data: buf,
|
||||||
|
}
|
||||||
|
|
||||||
|
return img
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate diff hash of the image
|
||||||
|
func (img *Image) CalcHash() error {
|
||||||
|
jpgdata, err := jpeg.Decode(img.Data)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
hash1, err := goimagehash.DifferenceHash(jpgdata)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
img.Hash = hash1
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// checks if 2 images are similar enough to be considered the same
|
||||||
|
func (img *Image) Similar(hash *goimagehash.ImageHash) bool {
|
||||||
|
distance, err := img.Hash.Distance(hash)
|
||||||
|
if err != nil {
|
||||||
|
slog.Debug("failed to compute diff hash distance", "error", err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if distance < MaxDistance {
|
||||||
|
slog.Debug("distance computation", "image-A", img.Hash.ToString(),
|
||||||
|
"image-B", hash.ToString(), "distance", distance)
|
||||||
|
return true
|
||||||
|
} else {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// check current image against all known hashes.
|
||||||
|
func (img *Image) SimilarExists(cache Cache) bool {
|
||||||
|
for _, otherimg := range cache {
|
||||||
|
if img.Similar(otherimg) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// read all JPG images in a ad directory, compute diff hashes and
|
||||||
|
// store the results in the slice Images
|
||||||
|
func ReadImages(addir string, dont bool) (Cache, error) {
|
||||||
|
files, err := os.ReadDir(addir)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
cache := Cache{}
|
||||||
|
|
||||||
|
if dont {
|
||||||
|
// forced download, -f given
|
||||||
|
return cache, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, file := range files {
|
||||||
|
ext := filepath.Ext(file.Name())
|
||||||
|
if !file.IsDir() && (ext == ".jpg" || ext == ".jpeg" || ext == ".JPG" || ext == ".JPEG") {
|
||||||
|
filename := filepath.Join(addir, file.Name())
|
||||||
|
data, err := ReadImage(filename)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
img := NewImage(data, filename, "")
|
||||||
|
if err = img.CalcHash(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Debug("Caching image from file system", "image", img, "hash", img.Hash.ToString())
|
||||||
|
cache = append(cache, img.Hash)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//return nil, errors.New("ende")
|
||||||
|
return cache, nil
|
||||||
|
}
|
||||||
@@ -133,7 +133,7 @@
|
|||||||
.\" ========================================================================
|
.\" ========================================================================
|
||||||
.\"
|
.\"
|
||||||
.IX Title "KLEINGEBAECK 1"
|
.IX Title "KLEINGEBAECK 1"
|
||||||
.TH KLEINGEBAECK 1 "2024-01-12" "1" "User Commands"
|
.TH KLEINGEBAECK 1 "2024-01-22" "1" "User Commands"
|
||||||
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
||||||
.\" way too many mistakes in technical documents.
|
.\" way too many mistakes in technical documents.
|
||||||
.if n .ad l
|
.if n .ad l
|
||||||
@@ -142,18 +142,20 @@
|
|||||||
kleingebaeck \- kleinanzeigen.de backup tool
|
kleingebaeck \- kleinanzeigen.de backup tool
|
||||||
.SH "SYNOPSYS"
|
.SH "SYNOPSYS"
|
||||||
.IX Header "SYNOPSYS"
|
.IX Header "SYNOPSYS"
|
||||||
.Vb 11
|
.Vb 10
|
||||||
\& Usage: kleingebaeck [\-dvVhmoc] [<ad\-listing\-url>,...]
|
\& Usage: kleingebaeck [\-dvVhmoc] [<ad\-listing\-url>,...]
|
||||||
\& Options:
|
\& Options:
|
||||||
\& \-\-user \-u <uid> Backup ads from user with uid <uid>.
|
\& \-u \-\-user <uid> Backup ads from user with uid <uid>.
|
||||||
\& \-\-debug \-d Enable debug output.
|
\& \-d \-\-debug Enable debug output.
|
||||||
\& \-\-verbose \-v Enable verbose output.
|
\& \-v \-\-verbose Enable verbose output.
|
||||||
\& \-\-outdir \-o <dir> Set output dir (default: current directory)
|
\& \-o \-\-outdir <dir> Set output dir (default: current directory)
|
||||||
\& \-\-limit \-l <num> Limit the ads to download to <num>, default: load all.
|
\& \-l \-\-limit <num> Limit the ads to download to <num>, default: load all.
|
||||||
\& \-\-config \-c <file> Use config file <file> (default: ~/.kleingebaeck).
|
\& \-c \-\-config <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||||
\& \-\-manual \-m Show manual.
|
\& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
||||||
\& \-\-help \-h Show usage.
|
\& \-f \-\-force Download images even if they already exist.
|
||||||
\& \-\-version \-V Show program version.
|
\& \-m \-\-manual Show manual.
|
||||||
|
\& \-h \-\-help Show usage.
|
||||||
|
\& \-V \-\-version Show program version.
|
||||||
.Ve
|
.Ve
|
||||||
.SH "DESCRIPTION"
|
.SH "DESCRIPTION"
|
||||||
.IX Header "DESCRIPTION"
|
.IX Header "DESCRIPTION"
|
||||||
@@ -218,6 +220,22 @@ directory. Then just execute \f(CW\*(C`kleingebaeck\*(C'\fR.
|
|||||||
.PP
|
.PP
|
||||||
You can use the \fB\-v\fR option to get verbose output or \fB\-d\fR to enable
|
You can use the \fB\-v\fR option to get verbose output or \fB\-d\fR to enable
|
||||||
debugging.
|
debugging.
|
||||||
|
.SH "ENVIRONMENT VARIABLES"
|
||||||
|
.IX Header "ENVIRONMENT VARIABLES"
|
||||||
|
The following environment variables are considered:
|
||||||
|
.PP
|
||||||
|
.Vb 7
|
||||||
|
\& KLEINGEBAECK_USER
|
||||||
|
\& KLEINGEBAECK_DEBUG
|
||||||
|
\& KLEINGEBAECK_VERBOSE
|
||||||
|
\& KLEINGEBAECK_OUTDIR
|
||||||
|
\& KLEINGEBAECK_LIMIT
|
||||||
|
\& KLEINGEBAECK_CONFIG
|
||||||
|
\& KLEINGEBAECK_IGNOREERRORS
|
||||||
|
.Ve
|
||||||
|
.PP
|
||||||
|
Please note, that they take precedence over config file, but
|
||||||
|
commandline flags take precedence over env!
|
||||||
.SH "BUGS"
|
.SH "BUGS"
|
||||||
.IX Header "BUGS"
|
.IX Header "BUGS"
|
||||||
In order to report a bug, unexpected behavior, feature requests
|
In order to report a bug, unexpected behavior, feature requests
|
||||||
|
|||||||
@@ -7,15 +7,17 @@ NAME
|
|||||||
SYNOPSYS
|
SYNOPSYS
|
||||||
Usage: kleingebaeck [-dvVhmoc] [<ad-listing-url>,...]
|
Usage: kleingebaeck [-dvVhmoc] [<ad-listing-url>,...]
|
||||||
Options:
|
Options:
|
||||||
--user -u <uid> Backup ads from user with uid <uid>.
|
-u --user <uid> Backup ads from user with uid <uid>.
|
||||||
--debug -d Enable debug output.
|
-d --debug Enable debug output.
|
||||||
--verbose -v Enable verbose output.
|
-v --verbose Enable verbose output.
|
||||||
--outdir -o <dir> Set output dir (default: current directory)
|
-o --outdir <dir> Set output dir (default: current directory)
|
||||||
--limit -l <num> Limit the ads to download to <num>, default: load all.
|
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
||||||
--config -c <file> Use config file <file> (default: ~/.kleingebaeck).
|
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||||
--manual -m Show manual.
|
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
||||||
--help -h Show usage.
|
-f --force Download images even if they already exist.
|
||||||
--version -V Show program version.
|
-m --manual Show manual.
|
||||||
|
-h --help Show usage.
|
||||||
|
-V --version Show program version.
|
||||||
|
|
||||||
DESCRIPTION
|
DESCRIPTION
|
||||||
This tool can be used to backup ads on the german ad page
|
This tool can be used to backup ads on the german ad page
|
||||||
@@ -73,6 +75,20 @@ SETUP
|
|||||||
You can use the -v option to get verbose output or -d to enable
|
You can use the -v option to get verbose output or -d to enable
|
||||||
debugging.
|
debugging.
|
||||||
|
|
||||||
|
ENVIRONMENT VARIABLES
|
||||||
|
The following environment variables are considered:
|
||||||
|
|
||||||
|
KLEINGEBAECK_USER
|
||||||
|
KLEINGEBAECK_DEBUG
|
||||||
|
KLEINGEBAECK_VERBOSE
|
||||||
|
KLEINGEBAECK_OUTDIR
|
||||||
|
KLEINGEBAECK_LIMIT
|
||||||
|
KLEINGEBAECK_CONFIG
|
||||||
|
KLEINGEBAECK_IGNOREERRORS
|
||||||
|
|
||||||
|
Please note, that they take precedence over config file, but commandline
|
||||||
|
flags take precedence over env!
|
||||||
|
|
||||||
BUGS
|
BUGS
|
||||||
In order to report a bug, unexpected behavior, feature requests or to
|
In order to report a bug, unexpected behavior, feature requests or to
|
||||||
submit a patch, please open an issue on github:
|
submit a patch, please open an issue on github:
|
||||||
|
|||||||
@@ -6,15 +6,17 @@ kleingebaeck - kleinanzeigen.de backup tool
|
|||||||
|
|
||||||
Usage: kleingebaeck [-dvVhmoc] [<ad-listing-url>,...]
|
Usage: kleingebaeck [-dvVhmoc] [<ad-listing-url>,...]
|
||||||
Options:
|
Options:
|
||||||
--user -u <uid> Backup ads from user with uid <uid>.
|
-u --user <uid> Backup ads from user with uid <uid>.
|
||||||
--debug -d Enable debug output.
|
-d --debug Enable debug output.
|
||||||
--verbose -v Enable verbose output.
|
-v --verbose Enable verbose output.
|
||||||
--outdir -o <dir> Set output dir (default: current directory)
|
-o --outdir <dir> Set output dir (default: current directory)
|
||||||
--limit -l <num> Limit the ads to download to <num>, default: load all.
|
-l --limit <num> Limit the ads to download to <num>, default: load all.
|
||||||
--config -c <file> Use config file <file> (default: ~/.kleingebaeck).
|
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
|
||||||
--manual -m Show manual.
|
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
|
||||||
--help -h Show usage.
|
-f --force Download images even if they already exist.
|
||||||
--version -V Show program version.
|
-m --manual Show manual.
|
||||||
|
-h --help Show usage.
|
||||||
|
-V --version Show program version.
|
||||||
|
|
||||||
=head1 DESCRIPTION
|
=head1 DESCRIPTION
|
||||||
|
|
||||||
@@ -76,6 +78,23 @@ directory. Then just execute C<kleingebaeck>.
|
|||||||
You can use the B<-v> option to get verbose output or B<-d> to enable
|
You can use the B<-v> option to get verbose output or B<-d> to enable
|
||||||
debugging.
|
debugging.
|
||||||
|
|
||||||
|
=head1 ENVIRONMENT VARIABLES
|
||||||
|
|
||||||
|
The following environment variables are considered:
|
||||||
|
|
||||||
|
KLEINGEBAECK_USER
|
||||||
|
KLEINGEBAECK_DEBUG
|
||||||
|
KLEINGEBAECK_VERBOSE
|
||||||
|
KLEINGEBAECK_OUTDIR
|
||||||
|
KLEINGEBAECK_LIMIT
|
||||||
|
KLEINGEBAECK_CONFIG
|
||||||
|
KLEINGEBAECK_IGNOREERRORS
|
||||||
|
|
||||||
|
Please note, that they take precedence over config file, but
|
||||||
|
commandline flags take precedence over env!
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
=head1 BUGS
|
=head1 BUGS
|
||||||
|
|
||||||
In order to report a bug, unexpected behavior, feature requests
|
In order to report a bug, unexpected behavior, feature requests
|
||||||
|
|||||||
18
main.go
18
main.go
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
Copyright © 2023 Thomas von Dein
|
Copyright © 2023-2024 Thomas von Dein
|
||||||
|
|
||||||
This program is free software: you can redistribute it and/or modify
|
This program is free software: you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
@@ -26,6 +26,7 @@ import (
|
|||||||
"runtime/debug"
|
"runtime/debug"
|
||||||
|
|
||||||
"github.com/lmittmann/tint"
|
"github.com/lmittmann/tint"
|
||||||
|
"github.com/tlinden/yadu"
|
||||||
)
|
)
|
||||||
|
|
||||||
const LevelNotice = slog.Level(2)
|
const LevelNotice = slog.Level(2)
|
||||||
@@ -84,14 +85,14 @@ func Main(w io.Writer) int {
|
|||||||
if conf.Debug {
|
if conf.Debug {
|
||||||
// we're using a more verbose logger in debug mode
|
// we're using a more verbose logger in debug mode
|
||||||
buildInfo, _ := debug.ReadBuildInfo()
|
buildInfo, _ := debug.ReadBuildInfo()
|
||||||
opts := &tint.Options{
|
opts := &yadu.Options{
|
||||||
Level: logLevel,
|
Level: logLevel,
|
||||||
AddSource: true,
|
AddSource: true,
|
||||||
NoColor: IsNoTty(),
|
//NoColor: IsNoTty(),
|
||||||
}
|
}
|
||||||
|
|
||||||
logLevel.Set(slog.LevelDebug)
|
logLevel.Set(slog.LevelDebug)
|
||||||
handler := tint.NewHandler(w, opts)
|
handler := yadu.NewHandler(w, opts)
|
||||||
debuglogger := slog.New(handler).With(
|
debuglogger := slog.New(handler).With(
|
||||||
slog.Group("program_info",
|
slog.Group("program_info",
|
||||||
slog.Int("pid", os.Getpid()),
|
slog.Int("pid", os.Getpid()),
|
||||||
@@ -101,8 +102,6 @@ func Main(w io.Writer) int {
|
|||||||
slog.SetDefault(debuglogger)
|
slog.SetDefault(debuglogger)
|
||||||
}
|
}
|
||||||
|
|
||||||
// defaultlogger := log.Default()
|
|
||||||
// defaultlogger.SetOutput(w)
|
|
||||||
slog.Debug("config", "conf", conf)
|
slog.Debug("config", "conf", conf)
|
||||||
|
|
||||||
// prepare output dir
|
// prepare output dir
|
||||||
@@ -111,17 +110,20 @@ func Main(w io.Writer) int {
|
|||||||
return Die(err)
|
return Die(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// used for all HTTP requests
|
||||||
|
fetch := NewFetcher(conf)
|
||||||
|
|
||||||
if len(conf.Adlinks) >= 1 {
|
if len(conf.Adlinks) >= 1 {
|
||||||
// directly backup ad listing[s]
|
// directly backup ad listing[s]
|
||||||
for _, uri := range conf.Adlinks {
|
for _, uri := range conf.Adlinks {
|
||||||
err := Scrape(conf, uri)
|
err := ScrapeAd(fetch, uri)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Die(err)
|
return Die(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if conf.User > 0 {
|
} else if conf.User > 0 {
|
||||||
// backup all ads of the given user (via config or cmdline)
|
// backup all ads of the given user (via config or cmdline)
|
||||||
err := Start(conf)
|
err := ScrapeUser(fetch)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return Die(err)
|
return Die(err)
|
||||||
}
|
}
|
||||||
|
|||||||
31
main_test.go
31
main_test.go
@@ -114,6 +114,7 @@ const EMPTYPAGE string = `DOCTYPE html>
|
|||||||
|
|
||||||
const (
|
const (
|
||||||
EMPTYURI string = `https://www.kleinanzeigen.de/s-anzeige/empty/1`
|
EMPTYURI string = `https://www.kleinanzeigen.de/s-anzeige/empty/1`
|
||||||
|
INVALID503URI string = `https://www.kleinanzeigen.de/s-anzeige/503/1`
|
||||||
INVALIDPATHURI string = `https://www.kleinanzeigen.de/anzeige/name/1`
|
INVALIDPATHURI string = `https://www.kleinanzeigen.de/anzeige/name/1`
|
||||||
INVALID404URI string = `https://www.kleinanzeigen.de/anzeige/name/1/foo/bar`
|
INVALID404URI string = `https://www.kleinanzeigen.de/anzeige/name/1/foo/bar`
|
||||||
INVALIDURI string = `https://foo.bar/weird/things`
|
INVALIDURI string = `https://foo.bar/weird/things`
|
||||||
@@ -144,7 +145,13 @@ var tests = []Tests{
|
|||||||
{
|
{
|
||||||
name: "debug",
|
name: "debug",
|
||||||
args: base + " -d",
|
args: base + " -d",
|
||||||
expect: "program_info",
|
expect: "error: invalid or no user id or no ad link specified",
|
||||||
|
exitcode: 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "debug-check-programinfo",
|
||||||
|
args: base + " -d",
|
||||||
|
expect: "pid:",
|
||||||
exitcode: 1,
|
exitcode: 1,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -168,7 +175,7 @@ var tests = []Tests{
|
|||||||
{
|
{
|
||||||
name: "download-single-ad-debug",
|
name: "download-single-ad-debug",
|
||||||
args: base + " -o t/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1 -d",
|
args: base + " -o t/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1 -d",
|
||||||
expect: "extracted ad listing program_info.pid=",
|
expect: "DEBUG: extracted ad listing",
|
||||||
exitcode: 0,
|
exitcode: 0,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -228,6 +235,12 @@ var invalidtests = []Tests{
|
|||||||
expect: "error loading config file",
|
expect: "error loading config file",
|
||||||
exitcode: 1,
|
exitcode: 1,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "503",
|
||||||
|
args: base + " " + INVALID503URI,
|
||||||
|
expect: "could not get page via HTTP",
|
||||||
|
exitcode: 1,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
type AdConfig struct {
|
type AdConfig struct {
|
||||||
@@ -410,6 +423,12 @@ func InitInvalidSources() []Adsource {
|
|||||||
content: GetTemplate(nil, empty, "<html>HTTP 404: /eine-anzeige/ does not exist!</html>"),
|
content: GetTemplate(nil, empty, "<html>HTTP 404: /eine-anzeige/ does not exist!</html>"),
|
||||||
status: 404,
|
status: 404,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
// valid ad page but 503
|
||||||
|
uri: fmt.Sprintf("%s/s-anzeige/503/1", Baseuri),
|
||||||
|
content: GetTemplate(nil, empty, "<html>HTTP 503: service unavailable</html>"),
|
||||||
|
status: 503,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
return ads
|
return ads
|
||||||
@@ -438,16 +457,18 @@ func SetIntercept(ads []Adsource) {
|
|||||||
|
|
||||||
// we just use 2 images, put this here
|
// we just use 2 images, put this here
|
||||||
for _, image := range []string{"t/1.jpg", "t/2.jpg"} {
|
for _, image := range []string{"t/1.jpg", "t/2.jpg"} {
|
||||||
httpmock.RegisterResponder("GET", image, httpmock.NewBytesResponder(200, GetImage(image)))
|
httpmock.RegisterResponder("GET", image,
|
||||||
|
httpmock.NewBytesResponder(200, GetImage(image)))
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func VerifyAd(ad AdConfig) error {
|
func VerifyAd(ad AdConfig) error {
|
||||||
body := ad.Title + ad.Price + ad.Id + "Kleinanzeigen => " + ad.Category + ad.Condition + ad.Created
|
body := ad.Title + ad.Price + ad.Id + "Kleinanzeigen => " +
|
||||||
|
ad.Category + ad.Condition + ad.Created
|
||||||
|
|
||||||
// prepare ad dir name using DefaultAdNameTemplate
|
// prepare ad dir name using DefaultAdNameTemplate
|
||||||
c := Config{Adnametemplate: DefaultAdNameTemplate}
|
c := Config{Adnametemplate: "{{ .Slug }}"}
|
||||||
adstruct := Ad{Slug: ad.Slug, Id: ad.Id}
|
adstruct := Ad{Slug: ad.Slug, Id: ad.Id}
|
||||||
addir, err := AdDirName(&c, &adstruct)
|
addir, err := AdDirName(&c, &adstruct)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
120
scrape.go
120
scrape.go
@@ -18,11 +18,10 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"net/http"
|
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@@ -30,46 +29,21 @@ import (
|
|||||||
"golang.org/x/sync/errgroup"
|
"golang.org/x/sync/errgroup"
|
||||||
)
|
)
|
||||||
|
|
||||||
// fetch some web page content
|
|
||||||
func Get(uri string, client *http.Client) (io.ReadCloser, error) {
|
|
||||||
req, err := http.NewRequest("GET", uri, nil)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
req.Header.Set("User-Agent", Useragent)
|
|
||||||
|
|
||||||
res, err := client.Do(req)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
slog.Debug("response", "code", res.StatusCode, "status",
|
|
||||||
res.Status, "size", res.ContentLength)
|
|
||||||
|
|
||||||
if res.StatusCode != 200 {
|
|
||||||
return nil, errors.New("could not get page via HTTP")
|
|
||||||
}
|
|
||||||
|
|
||||||
return res.Body, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// extract links from all ad listing pages (that is: use pagination)
|
// extract links from all ad listing pages (that is: use pagination)
|
||||||
// and scrape every page
|
// and scrape every page
|
||||||
func Start(conf *Config) error {
|
func ScrapeUser(fetch *Fetcher) error {
|
||||||
client := &http.Client{}
|
|
||||||
adlinks := []string{}
|
adlinks := []string{}
|
||||||
|
|
||||||
baseuri := fmt.Sprintf("%s%s?userId=%d", Baseuri, Listuri, conf.User)
|
baseuri := fmt.Sprintf("%s%s?userId=%d", Baseuri, Listuri, fetch.Config.User)
|
||||||
page := 1
|
page := 1
|
||||||
uri := baseuri
|
uri := baseuri
|
||||||
|
|
||||||
slog.Info("fetching ad pages", "user", conf.User)
|
slog.Info("fetching ad pages", "user", fetch.Config.User)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
var index Index
|
var index Index
|
||||||
slog.Debug("fetching page", "uri", uri)
|
slog.Debug("fetching page", "uri", uri)
|
||||||
body, err := Get(uri, client)
|
body, err := fetch.Get(uri)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -96,12 +70,12 @@ func Start(conf *Config) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i, adlink := range adlinks {
|
for i, adlink := range adlinks {
|
||||||
err := Scrape(conf, Baseuri+adlink)
|
err := ScrapeAd(fetch, Baseuri+adlink)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if conf.Limit > 0 && i == conf.Limit-1 {
|
if fetch.Config.Limit > 0 && i == fetch.Config.Limit-1 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -110,8 +84,7 @@ func Start(conf *Config) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// scrape an ad. uri is the full uri of the ad, dir is the basedir
|
// scrape an ad. uri is the full uri of the ad, dir is the basedir
|
||||||
func Scrape(c *Config, uri string) error {
|
func ScrapeAd(fetch *Fetcher, uri string) error {
|
||||||
client := &http.Client{}
|
|
||||||
ad := &Ad{}
|
ad := &Ad{}
|
||||||
|
|
||||||
// extract slug and id from uri
|
// extract slug and id from uri
|
||||||
@@ -124,7 +97,7 @@ func Scrape(c *Config, uri string) error {
|
|||||||
|
|
||||||
// get the ad
|
// get the ad
|
||||||
slog.Debug("fetching ad page", "uri", uri)
|
slog.Debug("fetching ad page", "uri", uri)
|
||||||
body, err := Get(uri, client)
|
body, err := fetch.Get(uri)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -145,34 +118,70 @@ func Scrape(c *Config, uri string) error {
|
|||||||
return errors.New("could not extract ad data from page, got empty struct")
|
return errors.New("could not extract ad data from page, got empty struct")
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Debug("extracted ad listing", "ad", ad)
|
ad.CalculateExpire()
|
||||||
|
|
||||||
// write listing
|
// write listing
|
||||||
addir, err := WriteAd(c, ad)
|
addir, err := WriteAd(fetch.Config, ad)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
c.IncrAds()
|
slog.Debug("extracted ad listing", "ad", ad)
|
||||||
|
|
||||||
return ScrapeImages(c, ad, addir)
|
fetch.Config.IncrAds()
|
||||||
|
|
||||||
|
return ScrapeImages(fetch, ad, addir)
|
||||||
}
|
}
|
||||||
|
|
||||||
func ScrapeImages(c *Config, ad *Ad, addir string) error {
|
func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
|
||||||
// fetch images
|
// fetch images
|
||||||
img := 1
|
img := 1
|
||||||
|
adpath := filepath.Join(fetch.Config.Outdir, addir)
|
||||||
|
|
||||||
|
// scan existing images, if any
|
||||||
|
cache, err := ReadImages(adpath, fetch.Config.ForceDownload)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
g := new(errgroup.Group)
|
g := new(errgroup.Group)
|
||||||
|
|
||||||
for _, imguri := range ad.Images {
|
for _, imguri := range ad.Images {
|
||||||
imguri := imguri
|
imguri := imguri
|
||||||
file := filepath.Join(c.Outdir, addir, fmt.Sprintf("%d.jpg", img))
|
file := filepath.Join(adpath, fmt.Sprintf("%d.jpg", img))
|
||||||
g.Go(func() error {
|
g.Go(func() error {
|
||||||
err := Getimage(imguri, file)
|
body, err := fetch.Getimage(imguri)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
slog.Info("wrote ad image", "image", file)
|
|
||||||
|
|
||||||
|
buf := new(bytes.Buffer)
|
||||||
|
_, err = buf.ReadFrom(body)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
buf2 := buf.Bytes() // needed for image writing
|
||||||
|
|
||||||
|
image := NewImage(buf, "", imguri)
|
||||||
|
err = image.CalcHash()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if !fetch.Config.ForceDownload {
|
||||||
|
if image.SimilarExists(cache) {
|
||||||
|
slog.Debug("similar image exists, not written", "uri", image.Uri)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
err = WriteImage(file, buf2)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
slog.Debug("wrote image", "image", image, "size", len(buf2))
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
img++
|
img++
|
||||||
@@ -182,28 +191,7 @@ func ScrapeImages(c *Config, ad *Ad, addir string) error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
c.IncrImgs(len(ad.Images))
|
fetch.Config.IncrImgs(len(ad.Images))
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// fetch an image
|
|
||||||
func Getimage(uri, fileName string) error {
|
|
||||||
slog.Debug("fetching ad image", "uri", uri)
|
|
||||||
response, err := http.Get(uri)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer response.Body.Close()
|
|
||||||
|
|
||||||
if response.StatusCode != 200 {
|
|
||||||
return errors.New("could not get image via HTTP")
|
|
||||||
}
|
|
||||||
|
|
||||||
err = WriteImage(fileName, response.Body)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
35
store.go
35
store.go
@@ -19,7 +19,7 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"io"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -86,17 +86,46 @@ func WriteAd(c *Config, ad *Ad) (string, error) {
|
|||||||
return addir, nil
|
return addir, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func WriteImage(filename string, reader io.ReadCloser) error {
|
func WriteImage(filename string, buf []byte) error {
|
||||||
file, err := os.Create(filename)
|
file, err := os.Create(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer file.Close()
|
defer file.Close()
|
||||||
|
|
||||||
_, err = io.Copy(file, reader)
|
_, err = file.Write(buf)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ReadImage(filename string) (*bytes.Buffer, error) {
|
||||||
|
var buf bytes.Buffer
|
||||||
|
|
||||||
|
if !fileExists(filename) {
|
||||||
|
return nil, fmt.Errorf("image %s does not exist", filename)
|
||||||
|
}
|
||||||
|
|
||||||
|
data, err := os.ReadFile(filename)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
_, err = buf.Write(data)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return &buf, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func fileExists(filename string) bool {
|
||||||
|
info, err := os.Stat(filename)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return !info.IsDir()
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user