Compare commits

..

72 Commits

Author SHA1 Message Date
T.v.Dein
6675c4d232 Fix/timeformat (#122)
* Fix #121: confused day with month thanks to time.Format
* Add outdir template variable example
2025-02-10 22:20:25 +01:00
T.v.Dein
46be48af38 Generic attributes (#120)
* fix #117: use a generic attribute parser, still support fixed attrs
2025-02-10 18:20:54 +01:00
T.v.Dein
09948a6b39 add color detail as well (#119)
Co-authored-by: Thomas von Dein <tom@vondein.org>
2025-02-06 20:13:08 +01:00
T.v.Dein
bc01391872 Fix ad condition parsing (#118)
* fix #117: use details slice and pre-set to properly extract condition
* also added the type part of the detail content (original de: "Art")

---------

Co-authored-by: Thomas von Dein <tom@vondein.org>
2025-02-06 13:48:20 +01:00
cd3d00adbe add changelog builder, update release builder 2025-02-05 17:54:47 +01:00
528ecdd43d updated deps 2025-02-01 18:04:25 +01:00
dependabot[bot]
5cb928518d Bump docker/build-push-action from 6.10.0 to 6.13.0
Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 6.10.0 to 6.13.0.
- [Release notes](https://github.com/docker/build-push-action/releases)
- [Commits](48aba3b46d...ca877d9245)

---
updated-dependencies:
- dependency-name: docker/build-push-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-02-01 18:03:43 +01:00
dependabot[bot]
d8c7409c7a Bump github.com/lmittmann/tint from 1.0.6 to 1.0.7
Bumps [github.com/lmittmann/tint](https://github.com/lmittmann/tint) from 1.0.6 to 1.0.7.
- [Release notes](https://github.com/lmittmann/tint/releases)
- [Commits](https://github.com/lmittmann/tint/compare/v1.0.6...v1.0.7)

---
updated-dependencies:
- dependency-name: github.com/lmittmann/tint
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-02-01 18:03:33 +01:00
dependabot[bot]
1cd6eb5134 Bump github.com/spf13/pflag from 1.0.5 to 1.0.6
Bumps [github.com/spf13/pflag](https://github.com/spf13/pflag) from 1.0.5 to 1.0.6.
- [Release notes](https://github.com/spf13/pflag/releases)
- [Commits](https://github.com/spf13/pflag/compare/v1.0.5...v1.0.6)

---
updated-dependencies:
- dependency-name: github.com/spf13/pflag
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-02-01 18:03:11 +01:00
9e2983a85c build release binaries using ci workflow 2025-01-18 10:53:41 +01:00
6eddd08e4a added warning about security update 2025-01-17 12:22:20 +01:00
34dfc25e87 bump version 2025-01-17 12:00:21 +01:00
dependabot[bot]
0bc6a0ae59 Bump github.com/lmittmann/tint from 1.0.5 to 1.0.6
Bumps [github.com/lmittmann/tint](https://github.com/lmittmann/tint) from 1.0.5 to 1.0.6.
- [Release notes](https://github.com/lmittmann/tint/releases)
- [Commits](https://github.com/lmittmann/tint/compare/v1.0.5...v1.0.6)

---
updated-dependencies:
- dependency-name: github.com/lmittmann/tint
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-01-06 18:44:09 +01:00
dependabot[bot]
14c554563a Bump golang.org/x/sync from 0.9.0 to 0.10.0
Bumps [golang.org/x/sync](https://github.com/golang/sync) from 0.9.0 to 0.10.0.
- [Commits](https://github.com/golang/sync/compare/v0.9.0...v0.10.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sync
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-01-06 18:43:57 +01:00
475a9a2fd3 add test files in other formats 2025-01-06 18:42:19 +01:00
fbd9a5a621 make clear that this is not the complete filename 2025-01-06 18:42:19 +01:00
7014c97dee added image formats test 2025-01-06 18:42:19 +01:00
91edfeb19a support hashing with all formats 2025-01-06 18:42:19 +01:00
3b3435515c check seek err 2025-01-06 18:42:19 +01:00
2239a83f76 properly check image format for storing and distance hashing 2025-01-06 18:42:19 +01:00
31b27beee5 fix error handling when trying to open files 2024-12-13 11:23:24 +01:00
dependabot[bot]
a4be51f498 Bump docker/build-push-action from 6.9.0 to 6.10.0
Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 6.9.0 to 6.10.0.
- [Release notes](https://github.com/docker/build-push-action/releases)
- [Commits](4f58ea7922...48aba3b46d)

---
updated-dependencies:
- dependency-name: docker/build-push-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-12-13 11:11:31 +01:00
dependabot[bot]
6b5af984cc Bump golang.org/x/sync from 0.8.0 to 0.9.0
Bumps [golang.org/x/sync](https://github.com/golang/sync) from 0.8.0 to 0.9.0.
- [Commits](https://github.com/golang/sync/compare/v0.8.0...v0.9.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sync
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-12-13 11:09:28 +01:00
dependabot[bot]
f5d3853388 Bump github.com/knadh/koanf/v2 from 2.1.1 to 2.1.2
Bumps [github.com/knadh/koanf/v2](https://github.com/knadh/koanf) from 2.1.1 to 2.1.2.
- [Release notes](https://github.com/knadh/koanf/releases)
- [Commits](https://github.com/knadh/koanf/compare/v2.1.1...v2.1.2)

---
updated-dependencies:
- dependency-name: github.com/knadh/koanf/v2
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-12-13 11:08:36 +01:00
07ebd7afad bump version 2024-11-01 12:21:04 +01:00
dependabot[bot]
5e82881b69 Bump github.com/knadh/koanf/providers/file from 1.1.0 to 1.1.2
Bumps [github.com/knadh/koanf/providers/file](https://github.com/knadh/koanf) from 1.1.0 to 1.1.2.
- [Release notes](https://github.com/knadh/koanf/releases)
- [Commits](https://github.com/knadh/koanf/compare/v1.1.0...providers/file/v1.1.2)

---
updated-dependencies:
- dependency-name: github.com/knadh/koanf/providers/file
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-11-01 12:20:45 +01:00
c9a75e3f91 bump version 2024-10-02 10:52:24 +02:00
dependabot[bot]
2fd2028cbe Bump github.com/knadh/koanf/providers/env from 0.1.0 to 1.0.0
Bumps [github.com/knadh/koanf/providers/env](https://github.com/knadh/koanf) from 0.1.0 to 1.0.0.
- [Release notes](https://github.com/knadh/koanf/releases)
- [Commits](https://github.com/knadh/koanf/compare/v0.1.0...v1.0.0)

---
updated-dependencies:
- dependency-name: github.com/knadh/koanf/providers/env
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-10-02 10:49:44 +02:00
dependabot[bot]
766f35d1d5 Bump docker/build-push-action from 6.7.0 to 6.9.0
Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 6.7.0 to 6.9.0.
- [Release notes](https://github.com/docker/build-push-action/releases)
- [Commits](5cd11c3a4c...4f58ea7922)

---
updated-dependencies:
- dependency-name: docker/build-push-action
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-10-02 10:49:17 +02:00
e4edde082b bump version 2024-09-25 15:43:15 +02:00
dependabot[bot]
6b95a01591 Bump github.com/lmittmann/tint from 1.0.4 to 1.0.5
Bumps [github.com/lmittmann/tint](https://github.com/lmittmann/tint) from 1.0.4 to 1.0.5.
- [Release notes](https://github.com/lmittmann/tint/releases)
- [Commits](https://github.com/lmittmann/tint/compare/v1.0.4...v1.0.5)

---
updated-dependencies:
- dependency-name: github.com/lmittmann/tint
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-09-25 15:41:03 +02:00
dependabot[bot]
2f66758c9f Bump github.com/tlinden/yadu from 0.1.2 to 0.1.3
Bumps [github.com/tlinden/yadu](https://github.com/tlinden/yadu) from 0.1.2 to 0.1.3.
- [Release notes](https://github.com/tlinden/yadu/releases)
- [Commits](https://github.com/tlinden/yadu/compare/v0.1.2...v0.1.3)

---
updated-dependencies:
- dependency-name: github.com/tlinden/yadu
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-09-25 15:40:20 +02:00
dependabot[bot]
f21b47e14f Bump golang.org/x/sync from 0.5.0 to 0.8.0
Bumps [golang.org/x/sync](https://github.com/golang/sync) from 0.5.0 to 0.8.0.
- [Commits](https://github.com/golang/sync/compare/v0.5.0...v0.8.0)

---
updated-dependencies:
- dependency-name: golang.org/x/sync
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-09-25 15:37:23 +02:00
dependabot[bot]
14f1c89f05 Bump github.com/knadh/koanf/v2 from 2.0.1 to 2.1.1
Bumps [github.com/knadh/koanf/v2](https://github.com/knadh/koanf) from 2.0.1 to 2.1.1.
- [Release notes](https://github.com/knadh/koanf/releases)
- [Commits](https://github.com/knadh/koanf/compare/v2.0.1...v2.1.1)

---
updated-dependencies:
- dependency-name: github.com/knadh/koanf/v2
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-09-25 15:36:55 +02:00
dependabot[bot]
b7530843c5 Bump github.com/knadh/koanf/providers/file from 0.1.0 to 1.1.0
Bumps [github.com/knadh/koanf/providers/file](https://github.com/knadh/koanf) from 0.1.0 to 1.1.0.
- [Release notes](https://github.com/knadh/koanf/releases)
- [Commits](https://github.com/knadh/koanf/compare/v0.1.0...v1.1.0)

---
updated-dependencies:
- dependency-name: github.com/knadh/koanf/providers/file
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-09-25 15:23:50 +02:00
dependabot[bot]
44c5e40466 Bump docker/login-action from 1.10.0 to 3.3.0
Bumps [docker/login-action](https://github.com/docker/login-action) from 1.10.0 to 3.3.0.
- [Release notes](https://github.com/docker/login-action/releases)
- [Commits](f054a8b539...9780b0c442)

---
updated-dependencies:
- dependency-name: docker/login-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-09-25 15:22:39 +02:00
dependabot[bot]
7b4dd50ebf Bump docker/build-push-action from 2.5.0 to 6.7.0
Bumps [docker/build-push-action](https://github.com/docker/build-push-action) from 2.5.0 to 6.7.0.
- [Release notes](https://github.com/docker/build-push-action/releases)
- [Commits](ad44023a93...5cd11c3a4c)

---
updated-dependencies:
- dependency-name: docker/build-push-action
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-09-25 15:21:43 +02:00
dependabot[bot]
5a6fe0f2fe Bump golang.org/x/net from 0.0.0-20220722155237-a158d28d115b to 0.23.0
Bumps [golang.org/x/net](https://github.com/golang/net) from 0.0.0-20220722155237-a158d28d115b to 0.23.0.
- [Commits](https://github.com/golang/net/commits/v0.23.0)

---
updated-dependencies:
- dependency-name: golang.org/x/net
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-09-25 15:18:58 +02:00
d7f5fbbe58 update other actions as well 2024-09-25 15:13:41 +02:00
ee550b1ed7 use normal Error() func 2024-09-25 15:08:34 +02:00
e35815f27f Update golang-ci linter 2024-09-25 15:05:44 +02:00
9138770d6e added dependabot config 2024-09-25 14:56:47 +02:00
834fbcd9c5 check return of ReadString() 2024-04-26 13:42:28 +02:00
4c10ae89f8 bump version 2024-04-26 13:42:28 +02:00
df6fc47ca3 fix #88: respond accordingly when user double clicks kleingebaeck.exe 2024-04-26 13:42:28 +02:00
5956a68e72 minor critics corrected 2024-04-26 13:42:28 +02:00
eee0167574 added 2024-04-26 13:42:28 +02:00
a3b2748479 fix #87: document image distance hash 2024-04-26 13:42:28 +02:00
8cc5a9e3ed missed commits 2024-02-12 13:36:29 +01:00
d2bcd7b505 fix #80: using os.MkdirAll():
Recursively create ad dir including output dir. The output dir itself
is not being created separately anymore. That way, no directory will
be created if no ads could be downloaded.
2024-02-12 13:32:25 +01:00
c59c2e2931 fix #81: add arm64 build support 2024-02-12 13:32:25 +01:00
2288806105 fix #77: use processed ad dir for duplicate checking, not slug 2024-02-10 15:15:43 +01:00
5a2c277f0e fix #71 and #73: add support for outdir template and enhance docs 2024-02-10 14:44:09 +01:00
612ed2aa79 fix #74: warn if about to write to already visited ad, overwrite if -f 2024-02-10 14:44:09 +01:00
ed78731b3c check seek error 2024-01-27 17:34:44 +01:00
a84f0e1436 get rid of duplicate bytes.Buffer, use bytes.Reader instead, #39 2024-01-27 17:34:44 +01:00
d8d5be5c7d fix #58: add missing dashes to self issue template 2024-01-27 17:34:44 +01:00
bcf920c91e correct #39 add --ignoreerrors flag 2024-01-27 17:34:44 +01:00
T.v.Dein
14f8c3fd43 Fix/linter (#66)
* added lint targets
* fix linter errors
* enhance error handling
* !!BREAKING!! rename Id to ID in tpls
2024-01-25 19:04:15 +01:00
9cd1fc0596 behavior changes: UserAgent configurable, test cookies, check errors 2024-01-24 19:22:31 +01:00
8df3ebfa6d add throttling to image download 2024-01-24 19:22:31 +01:00
de82127223 first step in fixing #49:
fetch cookies from 1st response and use them in subsequent requests.
2024-01-24 19:22:31 +01:00
a79a28f4a1 add contribution guidelines and non-code-of-conduct 2024-01-23 18:01:14 +01:00
95b1172b7f fix typo 2024-01-23 17:26:06 +01:00
6f3954f1c0 added issue templates and fix make show-versions 2024-01-23 14:20:33 +01:00
a465bbfaa8 Added german README, update docker docs, add latest image tag 2024-01-23 14:20:33 +01:00
ddae6ed8be bump version (feature update) 2024-01-22 18:43:44 +01:00
8cced1f9e1 Fix #47: add mock http server for testing w/o hitting prod site 2024-01-22 18:43:44 +01:00
3245438564 update yadu to v0.1.1, fix #46 2024-01-22 18:43:44 +01:00
71c528114f fix linter error 2024-01-22 18:43:44 +01:00
e2afc1350b added -f to override d-hash, better debug and error handling 2024-01-22 18:43:44 +01:00
e971070f9f added image diff hash distance caching to not overwrite similar images 2024-01-22 18:43:44 +01:00
53 changed files with 2049 additions and 358 deletions

31
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View File

@@ -0,0 +1,31 @@
---
name: Bug report
about: Create a report to help us improve
title: "[bug-report]"
labels: bug
assignees: TLINDEN
---
**Description**
<!-- Please provide a clear and concise description of the issue: -->
**Steps To Reproduce**
<!-- Please detail the steps to reproduce the behavior, execute kleingebaeck with the -d option: -->
**Expected behavior**
<!-- What do you expected to happen instead? -->
**Version information**
<!--
Please provide as much version information as possible:
- if you have just installed a binary, provide the output of: kleingebaeck --version
- if you installed from source, provide the output of: make show-version
- provide additional details: operating system and version and shell environment
-->
**Additional informations**

32
.github/ISSUE_TEMPLATE/bug_report_de.md vendored Normal file
View File

@@ -0,0 +1,32 @@
---
name: Bugreport Deutsch
about: Erzeuge einen Bugreport
title: "[bug-report-de]"
labels: bug
assignees: TLINDEN
---
**Beschreibung**
<!-- Bitte beschreibe den Fehler klar und möglichst präzise: -->
**Schritte um den Fehler zu reproduzieren**
<!-- Bitte gib detailiert an, welche konkreten Schritte zum Fehler
geführt haben, führe kleingebaeck mit der Option -d option aus: -->
**Erwartetes Verhalten**
<!-- Welches Verhalten hast Du ursprünglich erwartet? -->
**Versionsinformation**
<!--
Bitte gib uns so viel Versionsinfos wie möglich:
- wenn Du nur das Programm installiert hast: kleingebaeck --version
- wenn Du von Source installiert hast: make show-version
- bitte gib zusätzliche Details an: Betriebssystem + Version, Shellumgebung etc.
-->
**Zusätzliche Informationen**

View File

@@ -0,0 +1,23 @@
---
name: Feature request
about: Suggest a feature
title: "[feature-request]"
labels: feature-request
assignees: TLINDEN
---
**Description**
<!-- Please provide a clear and concise description of the feature you desire: -->
**Version information**
<!--
Just in case the feature is already present, please provide as
much version information as possible:
- if you have just installed a binary, provide the output of: tablizer --version
- if you installed from source, provide the output of: make show-version
- provide additional details: operating system and version and shell environment
-->

View File

@@ -0,0 +1,20 @@
---
name: Featurerequest Deutsch
about: Empfehle ein neues Feature
title: "[feature-request-de]"
labels: feature-request
assignees: TLINDEN
---
**Beschreibung**
<!-- Bitte beschreibe das gewünschte Feature klar und möglichst präzise: -->
**Versionsinformation**
<!--
Bitte gib uns so viel Versionsinfos wie möglich:
- wenn Du nur das Programm installiert hast: kleingebaeck --version
- wenn Du von Source installiert hast: make show-version
- bitte gib zusätzliche Details an: Betriebssystem + Version, Shellumgebung etc.
-->

View File

@@ -0,0 +1,8 @@
---
name: Note to self
about: Internal bugs and wishes
title: "[bug-report]"
labels: bug
assignees: TLINDEN
---

BIN
.github/assets/english.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.5 KiB

BIN
.github/assets/english.xcf vendored Normal file

Binary file not shown.

BIN
.github/assets/german.png vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

BIN
.github/assets/german.xcf vendored Normal file

Binary file not shown.

10
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,10 @@
version: 2
updates:
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "monthly"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"

View File

@@ -10,13 +10,13 @@ jobs:
runs-on: ${{ matrix.os }}
steps:
- name: Set up Go
uses: actions/setup-go@v3
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.version }}
id: go
- name: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: build
run: go build
@@ -39,9 +39,9 @@ jobs:
name: lint
runs-on: ubuntu-latest
steps:
- uses: actions/setup-go@v3
- uses: actions/setup-go@v5
with:
go-version: 1.21
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: golangci-lint
uses: golangci/golangci-lint-action@v3
uses: golangci/golangci-lint-action@v6

View File

@@ -15,14 +15,20 @@ jobs:
uses: actions/checkout@v3
- name: Log in to the Container registry
uses: docker/login-action@f054a8b539a109f9f41c372932f1ae047eff08c9
uses: docker/login-action@9780b0c442fbb1117ed29e0efdff1e18412f7567
with:
registry: https://ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push Docker image
uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc
uses: docker/build-push-action@ca877d9245402d1537745e0e356eab47c3520991
with:
push: true
tags: ghcr.io/tlinden/kleingebaeck:${{ github.ref_name}}
- name: Build and push latest Docker image
uses: docker/build-push-action@ca877d9245402d1537745e0e356eab47c3520991
with:
push: true
tags: ghcr.io/tlinden/kleingebaeck:latest

87
.github/workflows/release.yaml vendored Normal file
View File

@@ -0,0 +1,87 @@
name: build-release
on:
push:
tags:
- "v*.*.*"
jobs:
release:
name: Build Release Assets
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Go
uses: actions/setup-go@v5
with:
go-version: 1.22.11
- name: Build the executables
run: ./mkrel.sh kleingebaeck ${{ github.ref_name}}
- name: List the executables
run: ls -l ./releases
- name: Upload the binaries
uses: svenstaro/upload-release-action@v2
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
tag: ${{ github.ref_name }}
file: ./releases/*
file_glob: true
- name: Build Changelog
id: github_release
uses: mikepenz/release-changelog-builder-action@v5
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
mode: "PR"
configurationJson: |
{
"template": "#{{CHANGELOG}}\n\n**Full Changelog**: #{{RELEASE_DIFF}}",
"pr_template": "- #{{TITLE}} (##{{NUMBER}}) by #{{AUTHOR}}\n#{{BODY}}",
"empty_template": "- no changes",
"categories": [
{
"title": "## New Features",
"labels": ["add", "feature"]
},
{
"title": "## Bug Fixes",
"labels": ["fix", "bug", "revert"]
},
{
"title": "## Documentation Enhancements",
"labels": ["doc"]
},
{
"title": "## Refactoring Efforts",
"labels": ["refactor"]
},
{
"title": "## Miscellaneus Changes",
"labels": []
}
],
"ignore_labels": [
"duplicate", "good first issue", "help wanted", "invalid", "question", "wontfix"
],
"label_extractor": [
{
"pattern": "(.) (.+)",
"target": "$1"
},
{
"pattern": "(.) (.+)",
"target": "$1",
"on_property": "title"
}
]
}
- name: Create Release
uses: softprops/action-gh-release@v2
with:
body: ${{steps.github_release.outputs.changelog}}

3
.gitignore vendored
View File

@@ -3,3 +3,6 @@ kleingebaeck
releases
t/out
.bak
t/httproot/out
t/httproot/kleinanzeigen
t/httproot/favicon.ico

114
CODE_OF_CONDUCT.md Normal file
View File

@@ -0,0 +1,114 @@
# No Code of Conduct
*TL;DR:* This project does **NOT** have a so called Code of Conduct,
nor will it ever have one.
## The Rant
The reasons are somewhat complicated and I'll try my best to document
them here.
Ethical codes or rules come along like laws. But how is ethical or
moral behavior defined? And who defines which behavior is ethical and
which is not? Certainly not me.
Unless you live in a dictatorship (and more than half of the
population on planet earth do as of this writing), laws come into
existence by democratic procedures. Laws cover almost every aspect of
live in a society. Laws allow and forbid behavior and laws sanction
infringements.
A software project like this one on the other hand is not a society.
There are not enough people involved to form democratic
structures. And there will always be a minority of users who have the
right to commit or reject code. How could any maintainer of a software
project dare to decree rules upon others? Actually, am I, the current
maintainer of this very project authorized to do so?
I think the anser to this question clearly is NO.
The issue is being complicated by the fact, that open source
development these days happens on a planetary scale. And this planet
houses hundreds if not thousands of different cultures, philosophies,
ideologies and worldviews. The answer to many ethical questions will
in most cases be vague and nebulous.
Ones joke will always be another ones insult.
Then there is the problem of language. I myself am not an english
native, but I publish everyting using the english language. I am able
to communicate with most people in the open source community because
of that. But I am certainly not able to understand everything and
everyone. There might be nuances to a sentence I don't sense, there
might be sarcastic connotations I don't understand or references to
historical figures, events or traditions I don't know and never have
heard of.
Judging over other peoples online behavior looks like a titanic task
to me. It is just not my job to judge others, I am not legitimized or
authorized to do so and I am not interested in this kind of business.
Another huge problem with ethical rules is that you need to outline
and enforce sanctions on those who violate the rules. But since I am
not an elected authority how would I be able to do this? I don't
know. And what happens if someone complains about myself? Shall I
remove myself from my own project? Come on!
Last but not least there's the law. So, let's say someone in india
writes something insulting to some other developer in an issue. Of
course german law does not apply to indian people. Moreover, the
insult might actually not be an insult in india. In the end, nothing
would happen. Under normal circumstances, maintainers would
eventually delete the posting, ban the user or remove push privileges
etc.
But then, is there a way for the offending user to defend himself? Of
course not, since neither indian or german law alone applies. I cannot
go to a german court and sue the guy and he cannot do the same in
india. Or - we possibly could but the judges in both countries would
just laugh and close the case.
That being said, I don't have the power nor the tools, nor the
authority to enforce serious sanctions of any meaningful kind against
others. Therefore I cannot outline any rules whatsoever.
And let's not even start talking about these undemocratic "comitees"
many projects are forming to circumvent this problem. Some projects
even include external entities like a lawyer or some bureaucrat
somewhere just to have the ability to complain against a comitee
member. What a mess!
## So, what are the ethical rules within this project then?
Well, there are none.
This project is about code, not society. It doesn't matter where you
come from, how you look, how you think, what you believe, who your
friends are, whay you said or did sometime in the past. I don't even
care if you are a human being. You are an alien so bored that you need
to submit code on github? Fine with me. You're a convicted criminal? I
don't give a shit!
**The only thing I am interested here is Code and only Code.**
So if anyhing happens here I don't like or I am obliged by (german!)
law to act on, I will decide on a case to case basis what to do. And
unfortunately, since this is the nature of a github project, you
cannot complain, object or protest. I am very sorry!
If you will, let's at least outline these:
- Please - just please - behave towards others as you'd expect others
to behave towards yourself.
- Don't judge others for any reason.
- Only judge the code.
But these are not rules, only a friendly appeal to you as a developer
and user.
Thanks a lot!

93
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,93 @@
## Project Goals
The goal of this project is to build a small tool which helps in
maintaining backups of the german ad site kleinanzeigen.de. It should
be small, fast and easy to understand.
There will be no GUI, no web interface, no public API of some sort, no
builtin interpreter.
The programming language used for this project will always be
[GOLANG](https://go.dev/) with the exception of the documentation
([Perl POD](https://perldoc.perl.org/perlpod)) and the Makefile.
# Contributing
You can contribute to this project in various ways:
## Open an issue
If you encounter a problem or don't understand how the program works
or if you think the documentation is unclear, please don't hesitate to
open an issue.
Please add as much information about the case as possible, such as:
- Your environment (operating system etc)
- kleingebaeck version (`kleingebaeck --version`)
- Commandline used. Please replace sensitive information with mock data!
- Repeat the command with debugging enabled (`-d` flag)
- Actual program output, Please replace sensitive information with mock data!
- Expected program output.
- Error message - if any.
Be aware that I am working on this (and some others) project in my
spare time which is scarce. Therefore please don't expect me to
respond to your query within hours or even days. Be patient, but I
WILL respond.
## Pull Requests
Code and documentation help is always much appreciated! Please follow
thes guidelines to successfully contribute:
- Every pull request shall be based on latest `development`
branch. `main` is only used for releases.
- Execute the unit tests before committing: `make test`. There shall
be no errors.
- Strive to be backwards compatible so that users who are already
using the program don't have to change their habits - unless it is
really neccessary.
- Try to add a unit test for your fix, addition or modification.
- Don't ever change existing unit tests!
- Add a meaningful and comprehensive rationale about your contribution:
- Why do you think it might be useful for others?
- What did you actually change or add?
- Is there an open issue which this PR fixes and if so, please link
to that issue.
- [Re-]format your code with `gofmt -s`.
- Avoid unneccesary dependencies, especially for very small functions.
- **If** a new dependency is being added, it must be compatible with
our [license agreement](LICENSE).
- You need to accept that the code or documentation you contribute
will be redistributed under the terms of said license agreement. If
your contribution is considerably large or if you contribute
regularly, then feel free to add your name (and if you want your
email address) to the *AUTHORS* section of the
[manpage](kleingebaeck.pod).
- Adhere to the above mentioned project goals.
- If you are unsure if your addition or change will be accepted,
better ask before starting coding. Open an issue about your proposal
and let's discuss it! That way we avoid doing unnessesary work on
both sides.
Each pull request will be carefully reviewed and if it is a useful
addition it will be accepted. However, please be prepared that
sometimes a PR will be rejected. The reasons may vary and will be
documented. Perhaps the above guidelines are not matched, or the
addition seems to be not so useful from my perspective, maybe there
are too much changes or there might be changes I don't even
understand.
But whatever happens: your contribution is always welcome!

View File

@@ -56,6 +56,15 @@ test: clean
mkdir -p t/out
go test ./... $(ARGS)
testlint: test lint
lint:
golangci-lint run
lint-full:
golangci-lint run --enable-all --exclude-use-default --disable exhaustivestruct,exhaustruct,depguard,interfacer,deadcode,golint,structcheck,scopelint,varcheck,ifshort,maligned,nosnakecase,godot,funlen,gofumpt,cyclop,noctx,gochecknoglobals,paralleltest
gocritic check -enableAll *.go
testfuzzy: clean
go test -fuzz ./... $(ARGS)
@@ -73,12 +82,12 @@ goupdate:
buildall:
./mkrel.sh $(tool) $(VERSION)
release: buildall
gh release create v$(VERSION) --generate-notes releases/*
release:
gh release create v$(VERSION) --generate-notes
show-versions: buildlocal
@echo "### kleingebaeck version:"
@./kleingebaeck -v
@./kleingebaeck -V
@echo
@echo "### go module versions:"
@@ -88,5 +97,5 @@ show-versions: buildlocal
@echo "### go version used for building:"
@grep -m 1 go go.mod
lint:
golangci-lint run -p bugs -p unused
# lint:
# golangci-lint run -p bugs -p unused

355
README-de.md Normal file
View File

@@ -0,0 +1,355 @@
## Kleingebäck - kleinanzeigen.de Backup
![Kleingebaeck Logo](https://github.com/TLINDEN/kleingebaeck/blob/main/.github/assets/kleingebaecklogo-small.png)
[![Go Report Card](https://goreportcard.com/badge/github.com/tlinden/kleingebaeck)](https://goreportcard.com/report/github.com/tlinden/kleingebaeck)
[![Actions](https://github.com/tlinden/kleingebaeck/actions/workflows/ci.yaml/badge.svg)](https://github.com/tlinden/kleingebaeck/actions)
[![Go Coverage](https://github.com/tlinden/kleingebaeck/wiki/coverage.svg)](https://raw.githack.com/wiki/tlinden/kleingebaeck/coverage.html)
![GitHub License](https://img.shields.io/github/license/tlinden/kleingebaeck)
[![GitHub release](https://img.shields.io/github/v/release/tlinden/kleingebaeck?color=%2300a719)](https://github.com/TLINDEN/kleingebaeck/releases/latest)
[![English](https://github.com/TLINDEN/kleingebaeck/blob/main/.github/assets/english.png)](https://github.com/tlinden/kleingebaeck/blob/main/README.md)
Mit diesem Tool kann man seine Anzeigen bei https://kleinanzeigen.de sichern.
Es kann alle Anzeigen eines Users (oder nur eine Ausgewählte)
inklusive der Bilder herunterladen, die in einem Verzeichnis pro
Anzeige gespeichert werden. In dem Verzeichnis wird eine Datei
`Adlisting.txt` erstellt, in der sich die Inhalte der Anzeige wie
Titel, Preis, Text etc befinden. Bilder werden natürlich auch heruntergeladen.
## ACHTUNG - SICHERHEITS-UPDATE
Fertige vorcompilierte Programme älter als Version `v0.3.12` sind von
Schwachstellen in der Behandlung von HTTP und Zertifikaten
betroffen. Falls Du eine ältere Kleingebäck-Version im Einsatz hast,
bitte update auf Version `v0.3.12` oder höher. Bitte lies auch die [Release Notes für
v0.3.12](https://github.com/TLINDEN/kleingebaeck/releases/tag/v0.3.12)
für mehr Details.
## Screenshots
Das ist die Hauptseite meines kleinanzeigen.de Accounts:
![Index](https://github.com/TLINDEN/kleingebaeck/blob/main/.github/assets/kleinanzeigen-index.png)
Sichern ich meine Anzeigen:
![Download](https://github.com/TLINDEN/kleingebaeck/blob/main/.github/assets/kleinanzeigen-download.png)
Backupverzeichnis nach dem Download:
![Download](https://github.com/TLINDEN/kleingebaeck/blob/main/.github/assets/kleinanzeigen-backup.png)
Verzeichnis einer Anzeige:
![Download](https://github.com/TLINDEN/kleingebaeck/blob/main/.github/assets/kleinanzeigen-ad.png)
**Das gleiche unter Windows:**
Anzeigen Sichern:
![Download](https://github.com/TLINDEN/kleingebaeck/blob/main/.github/assets/cmd-windows.jpg)
Backupverzeichnis nach dem Download
![Download](https://github.com/TLINDEN/kleingebaeck/blob/main/.github/assets/liste-windows.jpg)
Und eine Anzeige:
![Download](https://github.com/TLINDEN/kleingebaeck/blob/main/.github/assets/adlisting-windows.jpg)
## Installation
Das Tool hat keine weiteren Abhängigkeiten und erfordert auch keine
Anmeldung oder ähnliches. Man kädt sich einfach die ausführbare Datei
für seine Plattform herunter und kann direkt loslegen.
### Installation des vorcompilierten Programms
Auf der Seite [des letzten Releases](https://github.com/TLINDEN/kleingebaeck/releases/latest) findet man das Program für sein Betriebssystem und die Plattform (z.b. Windows + Intel)
Es gibt 2 Varianten:
1. Direkt das fertige Program für seine Plattform+OS herunterladen,
z.B. `kleingebaeck-linux-amd64-0.0.5`, nach `kleingebaeck`
umbenennen und in ein Verzeichnis kopieren, das im `PATH` ist,
(z.B. nach `$HOME/bin` oder als root nach `/usr/local/bin`).
Um sicher zu gehen, dass an dem Program nicht verändert wurde, kann
man die Signatur vergleichen. Für jeden Download gibt es eine dazu
passende Signatur, in unserem Beispiel wäre das
`kleingebaeck-linux-amd64-0.0.5.sha256`.
Zum Verifizieren ausführen:
```shell
cat kleingebaeck-linux-amd64-0.0.5.sha25 && sha256sum kleingebaeck-linux-amd64-0.0.5
```
Man sollte zweimal den gleichen SHA256 Hash sehen.
2. Man kann auch einen Tarball (tgz Dateiendung) herunterladen,
auspacken und mit GNU Make installieren:
```shell
tar xvfz kleingebaeck-linux-amd64-0.0.5.tar.gz
cd kleingebaeck-linux-amd64-0.0.5
sudo make install
```
### Installation aus dem Sourcecode
Man muss eine funktionierende Go Buildumgebung in der Version 1.21
installiert haben, um das Programm selber zu compilieren. GNU Make ist
hilfreich, aber nicht unbedingt erforderlich.
Um das Programm zu compilieren, muss man folgende Schritte ausführen:
```shell
git clone https://github.com/TLINDEN/kleingebaeck.git
cd kleingebaeck
go mod tidy
make # (oder make)
sudo make install
```
### Docker image benutzen
Ein fertiges Dockerimage mit der aktuellen Programmversion ist immer
verfügbar. Man kann damit z.B. das Tool testen, bevor man es dauerhaft
benutzen möchte.
Um das Image herunterzuladen:
```
docker pull ghcr.io/tlinden/kleingebaeck:latest
```
Um kleingebäck im Image auszuführen und Daten ins lokale Filesystem zu
sichern, kann man so vorgehen:
```shell
mkdir anzeigen
docker run -u `id -u $USER` -v ./anzeigen:/backup ghcr.io/tlinden/kleingebaeck:latest -u XXX -v
ls -l anzeigen/ein-buch-mit-leeren-seiten
total 792
drwxr-xr-x 2 scip root 4096 Jan 23 12:58 ./
drwxr-xr-x 3 scip scip 4096 Jan 23 12:58 ../
-rw-r--r-- 1 scip root 131650 Jan 23 12:58 1.jpg
-rw-r--r-- 1 scip root 81832 Jan 23 12:58 2.jpg
-rw-r--r-- 1 scip root 134050 Jan 23 12:58 3.jpg
-rw-r--r-- 1 scip root 1166 Jan 23 12:58 Adlisting.txt
```
Hier wird der aktuelle User auf den User im Image gemappt und das
lokale Verzeichnis `anzeigen` nach `/backup` innerhalb des Images
gemountet.
Die Optionen `-u XXX -v` sind kleingebäck Optionen. Ersetze `XXX`
durch Deine tatsächliche kleinanzeigen.de Userid.
Eine Liste verfügbarer Images findet man [hier](https://github.com/tlinden/kleingebaeck/pkgs/container/kleingebaeck/versions?filters%5Bversion_type%5D=tagged)
## Kommandozeilen Optionen:
```
Usage: kleingebaeck [-dvVhmoc] [<ad-listing-url>,...]
Options:
-u --user <uid> Backup ads from user with uid <uid>.
-d --debug Enable debug output.
-v --verbose Enable verbose output.
-o --outdir <dir> Set output dir (default: current directory)
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
If one or more <ad-listing-url>'s are specified, only backup those,
otherwise backup all ads of the given user.
```
## Konfiguration
Man kann anstelle von Kommandlineoptionen auch eine
Konfigurationsdatei verwenden. Sie befindet sich standardmäßig in
`~/.kleingebaeck` aber man kann mit dem Parameter `-c` auch eine
andere Datei angeben.
Das Format (TOML) ist einfach:
```
user = 1010101
loglevel = verbose
outdir = "test"
```
Im Source gibt es eine Beispieldatei `example.conf` mit Kommentaren.
## Umgebungsvariablen
Man kann darüber hinaus auch Umgebungsvariablen verwenden. Sie
entsprechen den Konfigurationsoptionen, aber gross geschrieben mit dem
Präfix `KLEINGEBAECK_`, z.B.
```shell
% KLEINGEBAECK_OUTDIR=/backup kleingebaeck -v
```
## Benutzung
Um das Tool einsetzen zu können, muss man zunächst seine Userid bei
kleinanzeigen.de herausfinden. Dazu ruft man am besten die Liste
seiner Anzeigen auf, während man NICHT eingeloggt ist:
https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX
Der `XXXXX` Teil der URL ist die Userid.
Trage diese Userid in der Konfigurationsdatei ein wie oben
beschrieben. Gib ausserdem das Ausgabeverzeichnis an. Dann einfach nur
`kleingebaeck` ausführen.
Innerhalb des Ausgabeverzeichnisses wird sich dann pro Anzeige ein
Unterverzeichnis befinden. Pro Anzeige gibt es eine Datei
`Adlisting.txt`, die etwa so aussieht:
```default
Title: A book I sell
Price: 99 € VB
Id: 1919191919
Category: Sachbücher
Condition: Sehr Gut
Created: 10.12.2023
This is the description text.
Pay with paypal.
```
Sowie alle Bilder.
Das Format kann man mit der Variable `template` in der Konfiguration
ändern. Die `example.conf` enthält ein Beispiel für das Standard Template.
## Verhalten des Tools
Es gibt einige Dinge über das Verhalten von kleingebäck, über die Du
Bescheid wissen solltest:
- alle HTML Seiten und Bilder werden immer heruntergeladen
- es wird ein (konfigurierbarer) Useragent verwendet
- HTTP Cookies werden beachtet
- bei Fehlern wird dreimal mit unterschiedlichem Abstand erneut
versucht
- Bilder Downloads laufen parallelisiert mit leicht unterschiedlichen
zeitlichen Abständen ab
- Gleich aussehende Bilder werden nicht überschrieben
Der letzte Punkt muss genauer erläutert werden:
Wenn man bei Kleinanzeigen.de eine Anzeige einstellt und Bilder
postet, werden diese dort in ihrer Grösse reduziert (durch Kompression
und Verkleinerung der Bilder usw.). Diese reduzierten Bilder werden
dann von kleingebäck heruntergeladen. Falls Du Deine original Bilder
behalten hast, kannst Du diese danach in das Backupverzeichnis
kopieren. Bei einem erneuten kleingebäck-Lauf werden diese Bilder dann
nicht überschrieben.
Wir verwenden dafür einen Algorythmus namens [distance
hashing](https://github.com/corona10/goimagehash). Dieser Algorithmus
prüft die Ähnlichkeit von Bildern. Diese können in ihrer Auflösung,
Kompression, Farbtiefe und vielem mehr manipuliert worden sein und
trotzdem als das "gleiche Bild" erkannt werden (wohlgemerkt nicht "das
selbe": die Dateien sind durchaus unterschiedlich!). Bis zu einer
Distance von 5 überschreiben wir keine Bilder, weil wir dann davon
ausgehen, dass das lokal Vorhandene das Original ist.
Bitte beachte aber, dass dies KEIN Cachingmechanismus ist: die Bilder
werden trotzdem immer alle heruntergeladen. Das muss so sein, da wir
uns nicht die Dateinamen anschauen können, da kleinanzeigen.de diese
nämlich zu Zahlen umbenennt. Und die Dateinamen können sich auch
ändern, wenn der User in der Anzeige die Bilder umarrangiert hat.
Du kannst dieses Verhalten mit der Option **--force** ausschalten. Du
kannst ausserdem mit der Option **--ignoreerrors** auch alle Fehler
ignorieren, die beim Bilderdownload auftreten könnten.
## Documentation
Die Dokumentation kann man
[online](https://github.com/TLINDEN/kleingebaeck/blob/main/kleingebaeck.pod)
oder lokal lesen mit: `kleingebaeck --manual`. Hat man das Tool mit
dem Tarball installiert, funktioniert auch `man kleingebaeck`.
## Kleingebäck?
Der Name kommt von "kleinanzeigen backup", verkürzt "klein back", das
englisch ausgesprochene "back" (deutsch bäck) führt dann zu "Kleingebäck".
## Wo bekommt man Hilfe
Obwohl ich gerne von kleingebäck Benutzern in privaten Mails höre, ist
das doch der beste Weg, die Anfrage zu übersehen und zu vergessen.
Um einen Fehler, ein unerwartetes Verhalten, eine Feature Request oder
einen Patch zu übermitteln, eröffne daher bitte einen Issue unter:
https://github.com/TLINDEN/kleingebaeck/issues. Danke!
Bitte gebe den fehlgeschlagenen Befehl an, rufe es auch mit Debugging
`-d` auf.
## Ähnliche Projekte
Ich konnte kein Projekt finden, das speziell dafür geeignet ist,
Anzeigen bei kleinanzeigen.de zu sichern.
Aber es gibt ein Projekt, mit dem man ebenfalls Backups erstellen
kann: [kleinanzeigen-bot](https://github.com/Second-Hand-Friends/kleinanzeigen-bot/).
Aber Vorsicht: kleinanzeigen.de bekämpft Bots aktiv, mit diesem hier
gibt es regelmäßige Probleme, z.B.:
[issue](https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/219).
Das Hauptproblem ist, dass diese Art von Bot sich mit Deinem Account
aktiv einloggt und mit der Seite interagiert. Damit kann die Firma die
Aktivitäten recht einfach Deinem User zuordnen und diesen **sperren**!
Also sei bitte vorsichtig!
**Kleingebäck** erfordert keinen Login, es verwendet lediglich die
öffentlich verfügbare Webseite und ruft diese auf, wie ein normaler
Browser. Tatsächlich gibt es meiner Meinung nach keinen Unterschied zu
einem Browserclient: beide laufen auf Anwenderseite auf Initiative
eines Benutzers. Und mit welchen Browser ich eine Webseite aufrufe,
bleibt immer noch mir überlassen und muss mir nicht von irgendwem
vorgeschrieben werden. Das schliesst die Verwendung von Kleingebäck
mit ein.
Hinzu kommt, dass dieses Tool nicht dazu gedacht ist, rund um die Uhr
zu laufen. Man ruft es ab und zu mal auf, wenn man halt neue Anzeigen
eingestellt hat, vielleicht einmal die Woche oder so. Man weiss ja
selber, wann man was geändert hat. Man benötigt trotzdem den Zugriff
mit dem Browser oder der mobilen App um Kleinanzeigen.de verwalten zu
können.
Meiner Ansicht nach ist das Risiko also sehr minimal, es handelt sich
meiner Meinung nach auch nicht um eine Verletzung der AGBs dort. Aber
das ist nur meine persönliche Meinung, bitte beachtet das. Am Ende
müsst Ihr selbst einschätzen und beurteilen wie hoch Ihr das Risiko
seht und ob Ohr es eingehen möchtet. Für eventuell auftretende
Konsequenzen bin ich nicht verantwortlich. Siehe auch [GPL Lizenz](LICENSE).
Es gibt noch ein weiteres Tool namens
[kleinanzeigen-enhanded](https://kleinanzeigen-enhanced.de/). Das ist
eine kostenpflichtige vollständige Anzeigenverwaltung für
Profinutzer. Man muss eine monatliche Abogebühr bezahlen. Das Tool
ist als Browsererweiterung für Google Chrome implementiert, was
erklärt, warum sie Anzeigen erstellen, ändern und löschen können,
obwohl es gar keine öffentliche API gibt. Sieht nach einer netten
ausgereiften Lösung aus. Mit Backups.
## Copyright und License
Lizensiert unter der GNU GENERAL PUBLIC LICENSE Version 3.
## Autor
T.v.Dein <tom AT vondein DOT org>

View File

@@ -7,7 +7,9 @@
[![Go Coverage](https://github.com/tlinden/kleingebaeck/wiki/coverage.svg)](https://raw.githack.com/wiki/tlinden/kleingebaeck/coverage.html)
![GitHub License](https://img.shields.io/github/license/tlinden/kleingebaeck)
[![GitHub release](https://img.shields.io/github/v/release/tlinden/kleingebaeck?color=%2300a719)](https://github.com/TLINDEN/kleingebaeck/releases/latest)
[![German](https://github.com/TLINDEN/kleingebaeck/blob/main/.github/assets/german.png)](https://github.com/tlinden/kleingebaeck/blob/main/README-de.md)
[Die deutsche Version des READMEs findet Ihr hier](README-de.md).
This tool can be used to backup ads on the german ad page https://kleinanzeigen.de
@@ -16,6 +18,15 @@ directory, each ad into its own subdirectory. The backup will contain
a textfile `Adlisting.txt` which contains the ad contents as the
title, body, price etc. All images will be downloaded as well.
## CAUTION - SECURITY UPDATE
Binary releases prior to version `v0.3.11` are affected by
vulnerabilities in HTTP and certificate handling. If you are using
such a binary, please update to `v0.3.12` or higher. Please also refer
to the [Release Notes of
v0.3.12](https://github.com/TLINDEN/kleingebaeck/releases/tag/v0.3.12)
for more details.
## Screenshots
This is the index of my kleinanzeigen.de Account:
@@ -60,7 +71,7 @@ Go to the [latest release
page](https://github.com/TLINDEN/kleingebaeck/releases/latest) and
look for your OS and platform. There are two options to install the binary:
1. Directly download the binary for your platoform,
1. Directly download the binary for your platform,
e.g. `kleingebaeck-linux-amd64-0.0.5`, rename it to `kleingebaeck`
(or whatever you like more!) and put it into your bin dir
(e.g. `$HOME/bin` or as root to `/usr/local/bin`).
@@ -97,29 +108,37 @@ To install after building either copy the binary or execute `sudo make install`.
### Using the docker image
A pre-built docker image is available, which you can use to test the
app without installing it. You need `docker-compose`. Copy the file
`docker-compose.yaml` to somewhere, cd to that directory and execute:
app without installing it. To download:
```shell
mkdir kleinanzeigen-backup
USER_ID=$(id -u) GROUP_ID=$(id -g) OUTDIR=./kleinanzeigen-backup docker-compose run kleingebaeck -u XXX -v
docker pull ghcr.io/tlinden/kleingebaeck:latest
```
`USER_ID` and `GROUP_ID` needs to be specified so that you are the
owner of the created backups. The backup directory `OUTDIR` must exist
prior to the execution, otherwise docker will create it as root, then
kleingebaeck will fail. You may also use a `.env` file in the same
directory containing the variables, such as:
To execute kleingebaeck inside the image and download ads to a local
directory, do something like this:
```
USER_ID=1000
GROUP_ID=1000
OUTDIR=./kleinanzeigen-backup
```shell
mkdir myads
docker run -u `id -u $USER` -v ./myads:/backup ghcr.io/tlinden/kleingebaeck:latest -u XXX -v
ls -l myads/ein-buch-mit-leeren-seiten
total 792
drwxr-xr-x 2 scip root 4096 Jan 23 12:58 ./
drwxr-xr-x 3 scip scip 4096 Jan 23 12:58 ../
-rw-r--r-- 1 scip root 131650 Jan 23 12:58 1.jpg
-rw-r--r-- 1 scip root 81832 Jan 23 12:58 2.jpg
-rw-r--r-- 1 scip root 134050 Jan 23 12:58 3.jpg
-rw-r--r-- 1 scip root 1166 Jan 23 12:58 Adlisting.txt
```
You may of course also modify the `docker-compose.yaml` to suit your needs.
We map the local user to the one inside the image so the permission
will match. You'll need to create the directory first before executing
docker run. And the local directory `myads` will be mapped to
`/backup` inside the container.
If you want to build the image yourself, use the supplied Dockerfile.
The options `-u XXX -v` are kleingebaeck options, replace `XXX` with
your actual kleinanzeigen.de user id.
A list of available images is [here](https://github.com/tlinden/kleingebaeck/pkgs/container/kleingebaeck/versions?filters%5Bversion_type%5D=tagged)
## Commandline options:
@@ -185,6 +204,7 @@ Price: 99 € VB
Id: 1919191919
Category: Sachbücher
Condition: Sehr Gut
Type: Buch
Created: 10.12.2023
This is the description text.
@@ -197,6 +217,48 @@ variable. The supplied sample config contains the default template.
All images will be stored in the same directory.
## Tool Behavior
There are a bunch of things you might want to know about the behavior
of the kleingebäck tool:
- all HTML pages and IMAGEs are always being downloaded
- we use a (customizable) user agent
- we respect HTTP cookies
- in the case of an error, the tool does 3 retries, the time it waits
between tries is longer for each retry
- image download is parallized using small time differences to look
more natural
- same images are not being overwritten on subsequent download
The latter needs to be elaborated a bit more:
If you publish an ad on kleinanzeigen.de and post images, those images
will be reduced in size by the site (by compressing and down sizing
them). This reduced images will be downloaded by kleingebäck. However,
you may still own the original images and may want to put them into
that backup directory so that you have all things for one ad together.
You can easily do that, because kleingebäck won't overwrite those
original images. It uses something called a distance hash using
[goimagehash](https://github.com/corona10/goimagehash). This
algorithmus checks the similarity of images. If an image has been
resized it is still very similar to the original one. We accept a
maximum of a distance of 5, everything above leads to overwrite.
This works with resizes, cropped and otherwise manipulated images as
long as the image still shows the original contents good enough.
Also note, that this is NOT a caching mechanism: the images will be
downloaded anyway during each run. We also can't look at the file
names because kleinanzeigen.de renames all images to numbers. And
those might even change if the user re-arranges the images.
You can override this behavior using the **--force** option. Another
option, **--ignoreerrors**, can be used to ignore all kinds of image
errors.
## Documentation
You can read the documentation [online](https://github.com/TLINDEN/kleingebaeck/blob/main/kleingebaeck.pod) or locally once you have installed kleingebaeck with: `kleingebaeck --manual`.
@@ -246,9 +308,9 @@ daily basis. You cannot use it to view regular ads or maintain your
own ads. You'll need to use the mobile app or the browser page with a
login. So, in my point of view, the risk is very minimal.
There is another Tool available named [kleinanzeigen-enhanded](https://kleinanzeigen-enhanced.de/). It is a complete Ad management system targeting primarily commercial users. You have to pay a monthly fee, perhaps there's also a free version available, but I haven't checked. The tool is implemented as a Chrome browser extension, which explains why it was possible to implement it without an API. It seems to be a nice solution for power users by the looks of it. And it includes backups.
There is another Tool available named [kleinanzeigen-enhanced](https://kleinanzeigen-enhanced.de/). It is a complete Ad management system targeting primarily commercial users. You have to pay a monthly fee, perhaps there's also a free version available, but I haven't checked. The tool is implemented as a Chrome browser extension, which explains why it was possible to implement it without an API. It seems to be a nice solution for power users by the looks of it. And it includes backups.
## Copyright und License
## Copyright and License
Licensed under the GNU GENERAL PUBLIC LICENSE version 3.

17
SECURITY.md Normal file
View File

@@ -0,0 +1,17 @@
# Security Policy
## Supported Versions
Only the latest release is supported. If you find an issue (any
issue!), please check with the latest release first.
## Reporting a Vulnerability
I don't agree with the "responsible disclosure" process most projects
(and companies) work these days.
So, if you find a vulnerability of any kind, please just open an
[issue](https://github.com/TLINDEN/kleingebaeck/issues). Please add
all details required to reproduce the vulnerability. You won't be chased.
That's just all about it.

80
ad.go
View File

@@ -1,5 +1,5 @@
/*
Copyright © 2023 Thomas von Dein
Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package main
import (
"bufio"
"log/slog"
"strings"
"time"
@@ -30,8 +31,13 @@ type Index struct {
type Ad struct {
Title string `goquery:"h1"`
Slug string
Id string
Condition string `goquery:".addetailslist--detail--value,text"`
ID string
Details string `goquery:".addetailslist--detail,text"`
Attributes map[string]string // processed afterwards
Condition string // post processed from details for backward compatibility
Type string // post processed from details for backward compatibility
Color string // post processed from details for backward compatibility
Material string // post processed from details for backward compatibility
Category string
CategoryTree []string `goquery:".breadcrump-link,text"`
Price string `goquery:"h2#viewad-price"`
@@ -46,11 +52,10 @@ func (ad *Ad) LogValue() slog.Value {
return slog.GroupValue(
slog.String("title", ad.Title),
slog.String("price", ad.Price),
slog.String("id", ad.Id),
slog.String("id", ad.ID),
slog.Int("imagecount", len(ad.Images)),
slog.Int("bodysize", len(ad.Text)),
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
slog.String("condition", ad.Condition),
slog.String("created", ad.Created),
slog.String("expire", ad.Expire),
)
@@ -73,10 +78,71 @@ func (ad *Ad) Incomplete() bool {
}
func (ad *Ad) CalculateExpire() {
if len(ad.Created) > 0 {
if ad.Created != "" {
ts, err := time.Parse("02.01.2006", ad.Created)
if err == nil {
ad.Expire = ts.AddDate(0, 2, 1).Format("02.01.2006")
ad.Expire = ts.AddDate(0, ExpireMonths, ExpireDays).Format("02.01.2006")
}
}
}
/*
Decode attributes like color or condition. See
https://github.com/TLINDEN/kleingebaeck/issues/117
for more details. In short: the HTML delivered by
kleinanzeigen.de has no css attribute for the keys
so we cannot extract key=>value mappings of the
ad details but have to parse them manually.
The ad.Details member contains this after goq run:
Art
Weitere Kinderzimmermöbel
Farbe
Holz
Zustand
In Ordnung
We parse this into ad.Attributes and fill in some
static members for backward compatibility reasons.
*/
func (ad *Ad) DecodeAttributes() {
rd := strings.NewReader(ad.Details)
scanner := bufio.NewScanner(rd)
isattr := true
attr := ""
attrmap := map[string]string{}
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
if isattr {
attr = line
} else {
attrmap[attr] = line
}
isattr = !isattr
}
ad.Attributes = attrmap
switch {
case Exists(ad.Attributes, "Zustand"):
ad.Condition = ad.Attributes["Zustand"]
case Exists(ad.Attributes, "Farbe"):
ad.Color = ad.Attributes["Farbe"]
case Exists(ad.Attributes, "Art"):
ad.Type = ad.Attributes["Type"]
case Exists(ad.Attributes, "Material"):
ad.Material = ad.Attributes["Material"]
}
}

138
config.go
View File

@@ -1,5 +1,5 @@
/*
Copyright © 2023-2024 Thomas von Dein
Copyright © 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -17,7 +17,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package main
import (
"errors"
"fmt"
"io"
"os"
@@ -35,21 +34,49 @@ import (
)
const (
VERSION string = "0.2.0"
Baseuri string = "https://www.kleinanzeigen.de"
Listuri string = "/s-bestandsliste.html"
Defaultdir string = "."
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.Id}}\n" +
"Category: {{.Category}}\nCondition: {{.Condition}}\n" +
VERSION string = "0.3.17"
Baseuri string = "https://www.kleinanzeigen.de"
Listuri string = "/s-bestandsliste.html"
Defaultdir string = "."
/*
Also possible: loop through .Attributes:
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
"Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" +
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
*/
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
"Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" +
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.Id}}\r\n" +
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\n" +
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" +
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\nColor: {{.Color}}\r\n" +
"Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n"
Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
DefaultAdNameTemplate string = "{{.Slug}}"
DefaultOutdirTemplate string = "."
// for image download throttling
MinThrottle int = 2
MaxThrottle int = 20
// we extract the slug from the uri
SlugURIPartNum int = 6
ExpireMonths int = 2
ExpireDays int = 1
WIN string = "windows"
)
var DirsVisited map[string]int
const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.
Usage: kleingebaeck [-dvVhmoclu] [<ad-listing-url>,...]
@@ -62,7 +89,7 @@ Options:
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Download images even if they already exist.
-f --force Overwrite images and ads even if the already exist.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
@@ -84,6 +111,7 @@ type Config struct {
Limit int `koanf:"limit"`
IgnoreErrors bool `koanf:"ignoreerrors"`
ForceDownload bool `koanf:"force"`
UserAgent string `koanf:"useragent"` // conf only
Adlinks []string
StatsCountAds int
StatsCountImages int
@@ -98,54 +126,58 @@ func (c *Config) IncrImgs(num int) {
}
// load commandline flags and config file
func InitConfig(w io.Writer) (*Config, error) {
var k = koanf.New(".")
func InitConfig(output io.Writer) (*Config, error) {
var kloader = koanf.New(".")
// determine template based on os
template := DefaultTemplate
if runtime.GOOS == "windows" {
if runtime.GOOS == WIN {
template = DefaultTemplateWin
}
// Load default values using the confmap provider.
if err := k.Load(confmap.Provider(map[string]interface{}{
if err := kloader.Load(confmap.Provider(map[string]interface{}{
"template": template,
"outdir": ".",
"outdir": DefaultOutdirTemplate,
"loglevel": "notice",
"userid": 0,
"adnametemplate": DefaultAdNameTemplate,
"useragent": DefaultUserAgent,
}, "."), nil); err != nil {
return nil, err
return nil, fmt.Errorf("failed to load default values into koanf: %w", err)
}
// setup custom usage
f := flag.NewFlagSet("config", flag.ContinueOnError)
f.Usage = func() {
fmt.Fprintln(w, Usage)
flagset := flag.NewFlagSet("config", flag.ContinueOnError)
flagset.Usage = func() {
fmt.Fprintln(output, Usage)
os.Exit(0)
}
// parse commandline flags
f.StringP("config", "c", "", "config file")
f.StringP("outdir", "o", "", "directory where to store ads")
f.IntP("user", "u", 0, "user id")
f.IntP("limit", "l", 0, "limit ads to be downloaded (default 0, unlimited)")
f.BoolP("verbose", "v", false, "be verbose")
f.BoolP("debug", "d", false, "enable debug log")
f.BoolP("version", "V", false, "show program version")
f.BoolP("help", "h", false, "show usage")
f.BoolP("manual", "m", false, "show manual")
f.BoolP("force", "f", false, "force")
flagset.StringP("config", "c", "", "config file")
flagset.StringP("outdir", "o", "", "directory where to store ads")
flagset.IntP("user", "u", 0, "user id")
flagset.IntP("limit", "l", 0, "limit ads to be downloaded (default 0, unlimited)")
flagset.BoolP("verbose", "v", false, "be verbose")
flagset.BoolP("debug", "d", false, "enable debug log")
flagset.BoolP("version", "V", false, "show program version")
flagset.BoolP("help", "h", false, "show usage")
flagset.BoolP("manual", "m", false, "show manual")
flagset.BoolP("force", "f", false, "force")
flagset.BoolP("ignoreerrors", "", false, "ignore image download HTTP errors")
if err := f.Parse(os.Args[1:]); err != nil {
return nil, err
if err := flagset.Parse(os.Args[1:]); err != nil {
return nil, fmt.Errorf("failed to parse program arguments: %w", err)
}
// generate a list of config files to try to load, including the
// one provided via -c, if any
var configfiles []string
configfile, _ := f.GetString("config")
configfile, _ := flagset.GetString("config")
home, _ := os.UserHomeDir()
if configfile != "" {
configfiles = []string{configfile}
} else {
@@ -159,33 +191,41 @@ func InitConfig(w io.Writer) (*Config, error) {
// Load the config file[s]
for _, cfgfile := range configfiles {
if path, err := os.Stat(cfgfile); !os.IsNotExist(err) {
if !path.IsDir() {
if err := k.Load(file.Provider(cfgfile), toml.Parser()); err != nil {
return nil, errors.New("error loading config file: " + err.Error())
}
path, err := os.Stat(cfgfile)
if err != nil {
// ignore non-existent files, but bail out on any other errors
if !os.IsNotExist(err) {
return nil, fmt.Errorf("failed to stat config file: %w", err)
}
continue
}
if !path.IsDir() {
if err := kloader.Load(file.Provider(cfgfile), toml.Parser()); err != nil {
return nil, fmt.Errorf("error loading config file: %w", err)
}
}
// else: we ignore the file if it doesn't exists
}
// env overrides config file
if err := k.Load(env.Provider("KLEINGEBAECK_", ".", func(s string) string {
return strings.Replace(strings.ToLower(
strings.TrimPrefix(s, "KLEINGEBAECK_")), "_", ".", -1)
if err := kloader.Load(env.Provider("KLEINGEBAECK_", ".", func(s string) string {
return strings.ReplaceAll(strings.ToLower(
strings.TrimPrefix(s, "KLEINGEBAECK_")), "_", ".")
}), nil); err != nil {
return nil, errors.New("error loading environment: " + err.Error())
return nil, fmt.Errorf("error loading environment: %w", err)
}
// command line overrides env
if err := k.Load(posflag.Provider(f, ".", k), nil); err != nil {
return nil, errors.New("error loading flags: " + err.Error())
if err := kloader.Load(posflag.Provider(flagset, ".", kloader), nil); err != nil {
return nil, fmt.Errorf("error loading flags: %w", err)
}
// fetch values
conf := &Config{}
if err := k.Unmarshal("", &conf); err != nil {
return nil, errors.New("error unmarshalling: " + err.Error())
if err := kloader.Unmarshal("", &conf); err != nil {
return nil, fmt.Errorf("error unmarshalling: %w", err)
}
// adjust loglevel
@@ -197,7 +237,7 @@ func InitConfig(w io.Writer) (*Config, error) {
}
// are there any args left on commandline? if so threat them as adlinks
conf.Adlinks = f.Args()
conf.Adlinks = flagset.Args()
return conf, nil
}

View File

@@ -12,19 +12,36 @@ user = 00000000
loglevel = "verbose"
# directory where to store downloaded ads. kleingebaeck will try to
# create it. must be a quoted string.
# create it. must be a quoted string. You can also include a couple of
# template variables, e.g:
# outdir = "test-{{.Year}}-{{.Month}}-{{.Day}}"
outdir = "test"
# template for stored adlistings. To enable it, remove the comment
# chars up until the last #"""
#template="""
#Title: {{.Title}}
#Price: {{.Price}}
#Id: {{.Id}}
#Category: {{.Category}}
#Condition: {{.Condition}}
#Created: {{.Created}}
# template for stored adlistings.
template="""
Title: {{.Title}}
Price: {{.Price}}
Id: {{.Id}}
Category: {{.Category}}
Condition: {{.Condition}}
Type: {{.Type}}
Created: {{.Created}}
#{{.Text}}
# """
{{.Text}}
"""
# Ads may contain more attributes than just the Condition. To print
# all attributes, loop over all of them:
template="""
Title: {{.Title}}
Price: {{.Price}}
Id: {{.Id}}
Category: {{.Category}}
{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}
{{ end }}
Type: {{.Type}}
Created: {{.Created}}
{{.Text}}
"""

View File

@@ -19,55 +19,84 @@ package main
import (
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"net/http/cookiejar"
"net/url"
)
// convenient wrapper to fetch some web content
type Fetcher struct {
Config *Config
Client *http.Client
Useragent string // FIXME: make configurable
Config *Config
Client *http.Client
Cookies []*http.Cookie
}
func NewFetcher(c *Config) *Fetcher {
return &Fetcher{
Client: &http.Client{Transport: &loggingTransport{}}, // implemented in http.go
Useragent: Useragent, // default in config.go
Config: c,
func NewFetcher(conf *Config) (*Fetcher, error) {
jar, err := cookiejar.New(nil)
if err != nil {
return nil, fmt.Errorf("failed to create a cookie jar obj: %w", err)
}
return &Fetcher{
Client: &http.Client{
Transport: &loggingTransport{}, // implemented in http.go
Jar: jar,
},
Config: conf,
Cookies: []*http.Cookie{},
},
nil
}
func (f *Fetcher) Get(uri string) (io.ReadCloser, error) {
req, err := http.NewRequest("GET", uri, nil)
req, err := http.NewRequest(http.MethodGet, uri, http.NoBody)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to create a new HTTP request obj: %w", err)
}
req.Header.Set("User-Agent", f.Useragent)
req.Header.Set("User-Agent", f.Config.UserAgent)
if len(f.Cookies) > 0 {
uriobj, _ := url.Parse(Baseuri)
slog.Debug("have cookies, sending them",
"sample-cookie-name", f.Cookies[0].Name,
"sample-cookie-expire", f.Cookies[0].Expires,
)
f.Client.Jar.SetCookies(uriobj, f.Cookies)
}
res, err := f.Client.Do(req)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to initiate HTTP request to %s: %w", uri, err)
}
if res.StatusCode != 200 {
if res.StatusCode != http.StatusOK {
return nil, errors.New("could not get page via HTTP")
}
slog.Debug("got cookies?", "cookies", res.Cookies())
f.Cookies = res.Cookies()
return res.Body, nil
}
// fetch an image
func (f *Fetcher) Getimage(uri string) (io.ReadCloser, error) {
slog.Debug("fetching ad image", "uri", uri)
body, err := f.Get(uri)
if err != nil {
if f.Config.IgnoreErrors {
slog.Info("Failed to download image, error ignored", "error", err.Error())
return nil, nil
}
return nil, err
}

29
go.mod
View File

@@ -4,35 +4,38 @@ go 1.21
require (
astuart.co/goq v1.0.0
github.com/corona10/goimagehash v1.1.0
github.com/inconshreveable/mousetrap v1.1.0
github.com/jarcoal/httpmock v1.3.1
github.com/knadh/koanf/parsers/toml v0.1.0
github.com/knadh/koanf/providers/confmap v0.1.0
github.com/knadh/koanf/providers/env v0.1.0
github.com/knadh/koanf/providers/file v0.1.0
github.com/knadh/koanf/providers/env v1.0.0
github.com/knadh/koanf/providers/file v1.1.2
github.com/knadh/koanf/providers/posflag v0.1.0
github.com/knadh/koanf/v2 v2.0.1
github.com/lmittmann/tint v1.0.4
github.com/knadh/koanf/v2 v2.1.2
github.com/lmittmann/tint v1.0.7
github.com/mattn/go-isatty v0.0.20
github.com/spf13/pflag v1.0.5
github.com/tlinden/yadu v0.1.1
golang.org/x/sync v0.5.0
github.com/spf13/pflag v1.0.6
github.com/tlinden/yadu v0.1.3
golang.org/x/image v0.23.0
golang.org/x/sync v0.10.0
)
require (
github.com/PuerkitoBio/goquery v1.5.1 // indirect
github.com/alecthomas/repr v0.4.0 // indirect
github.com/andybalholm/cascadia v1.1.0 // indirect
github.com/corona10/goimagehash v1.1.0 // indirect
github.com/fatih/color v1.16.0 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-viper/mapstructure/v2 v2.2.1 // indirect
github.com/knadh/koanf/maps v0.1.1 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-colorable v0.1.14 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect
github.com/pelletier/go-toml v1.9.5 // indirect
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
golang.org/x/sys v0.14.0 // indirect
golang.org/x/net v0.23.0 // indirect
golang.org/x/sys v0.29.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

70
go.sum
View File

@@ -3,6 +3,8 @@ astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc=
github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
@@ -13,8 +15,12 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/go-viper/mapstructure/v2 v2.2.1 h1:ZAaOCxANMuZx5RCeg0mBdEZk7DZasvvZIxtHqx8aGss=
github.com/go-viper/mapstructure/v2 v2.2.1/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jarcoal/httpmock v1.3.1 h1:iUx3whfZWVf3jT01hQTO/Eo5sAYtB2/rqaUuOtpInww=
github.com/jarcoal/httpmock v1.3.1/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg=
github.com/knadh/koanf/maps v0.1.1 h1:G5TjmUh2D7G2YWf5SQQqSiHRJEjaicvU0KpypqB3NIs=
@@ -23,18 +29,24 @@ github.com/knadh/koanf/parsers/toml v0.1.0 h1:S2hLqS4TgWZYj4/7mI5m1CQQcWurxUz6OD
github.com/knadh/koanf/parsers/toml v0.1.0/go.mod h1:yUprhq6eo3GbyVXFFMdbfZSo928ksS+uo0FFqNMnO18=
github.com/knadh/koanf/providers/confmap v0.1.0 h1:gOkxhHkemwG4LezxxN8DMOFopOPghxRVp7JbIvdvqzU=
github.com/knadh/koanf/providers/confmap v0.1.0/go.mod h1:2uLhxQzJnyHKfxG927awZC7+fyHFdQkd697K4MdLnIU=
github.com/knadh/koanf/providers/env v0.1.0 h1:LqKteXqfOWyx5Ab9VfGHmjY9BvRXi+clwyZozgVRiKg=
github.com/knadh/koanf/providers/env v0.1.0/go.mod h1:RE8K9GbACJkeEnkl8L/Qcj8p4ZyPXZIQ191HJi44ZaQ=
github.com/knadh/koanf/providers/file v0.1.0 h1:fs6U7nrV58d3CFAFh8VTde8TM262ObYf3ODrc//Lp+c=
github.com/knadh/koanf/providers/file v0.1.0/go.mod h1:rjJ/nHQl64iYCtAW2QQnF0eSmDEX/YZ/eNFj5yR6BvA=
github.com/knadh/koanf/providers/env v1.0.0 h1:ufePaI9BnWH+ajuxGGiJ8pdTG0uLEUWC7/HDDPGLah0=
github.com/knadh/koanf/providers/env v1.0.0/go.mod h1:mzFyRZueYhb37oPmC1HAv/oGEEuyvJDA98r3XAa8Gak=
github.com/knadh/koanf/providers/file v1.1.2 h1:aCC36YGOgV5lTtAFz2qkgtWdeQsgfxUkxDOe+2nQY3w=
github.com/knadh/koanf/providers/file v1.1.2/go.mod h1:/faSBcv2mxPVjFrXck95qeoyoZ5myJ6uxN8OOVNJJCI=
github.com/knadh/koanf/providers/posflag v0.1.0 h1:mKJlLrKPcAP7Ootf4pBZWJ6J+4wHYujwipe7Ie3qW6U=
github.com/knadh/koanf/providers/posflag v0.1.0/go.mod h1:SYg03v/t8ISBNrMBRMlojH8OsKowbkXV7giIbBVgbz0=
github.com/knadh/koanf/v2 v2.0.1 h1:1dYGITt1I23x8cfx8ZnldtezdyaZtfAuRtIFOiRzK7g=
github.com/knadh/koanf/v2 v2.0.1/go.mod h1:ZeiIlIDXTE7w1lMT6UVcNiRAS2/rCeLn/GdLNvY1Dus=
github.com/lmittmann/tint v1.0.4 h1:LeYihpJ9hyGvE0w+K2okPTGUdVLfng1+nDNVR4vWISc=
github.com/lmittmann/tint v1.0.4/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE=
github.com/knadh/koanf/v2 v2.1.2 h1:I2rtLRqXRy1p01m/utEtpZSSA6dcJbgGVuE27kW2PzQ=
github.com/knadh/koanf/v2 v2.1.2/go.mod h1:Gphfaen0q1Fc1HTgJgSTC4oRX9R2R5ErYMZJy8fLJBo=
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lmittmann/tint v1.0.7 h1:D/0OqWZ0YOGZ6AyC+5Y2kD8PBEzBk6rFHVSfOqCkF9Y=
github.com/lmittmann/tint v1.0.7/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
@@ -42,8 +54,6 @@ github.com/maxatome/go-testdeep v1.12.0 h1:Ql7Go8Tg0C1D/uMMX59LAoYK7LffeJQ6X2T04
github.com/maxatome/go-testdeep v1.12.0/go.mod h1:lPZc/HAcJMP92l7yI6TRz1aZN5URwUBUAfUNvrclaNM=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ=
@@ -52,35 +62,35 @@ github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3v
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/pflag v1.0.6 h1:jFzHGLGAlb3ruxLB8MhbI6A8+AQX/2eW4qeyNZXNp2o=
github.com/spf13/pflag v1.0.6/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/tlinden/yadu v0.0.0-20240118202225-ec3f0b7fc355 h1:EmgK+IGUz2m42bFKteLY5SYJLn/CyBrz6nkgS22K8Bk=
github.com/tlinden/yadu v0.0.0-20240118202225-ec3f0b7fc355/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA=
github.com/tlinden/yadu v0.1.0 h1:qtCi1jxg392qVRLFyrJ2LYu6/PiKSp1LT02EX+mNLME=
github.com/tlinden/yadu v0.1.0/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA=
github.com/tlinden/yadu v0.1.1 h1:116oEUy9b4PcMF5wLL2dCFA/sn/praYutOnao07MROw=
github.com/tlinden/yadu v0.1.1/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tlinden/yadu v0.1.3 h1:5cRCUmj+l5yvlM2irtpFBIJwVV2DPEgYSaWvF19FtcY=
github.com/tlinden/yadu v0.1.3/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/image v0.23.0 h1:HseQ7c2OpPKTPVzNjG5fwJsOTCiiwS4QdsYi5XU6H68=
golang.org/x/image v0.23.0/go.mod h1:wJJBTdLfCCf3tiHa1fNxpZmUI4mmoZvwMCPP0ddoNKY=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b h1:PxfKdU9lEEDYjdIzOtC4qFWgkU2rGHdKlKowJSMN9h0=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs=
golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q=
golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws=
golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU=
golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

37
http.go
View File

@@ -19,6 +19,7 @@ package main
import (
"bytes"
"fmt"
"io"
"log/slog"
"math"
@@ -32,17 +33,20 @@ import (
// easier associated in debug output
var letters = []rune("ABCDEF0123456789")
func getid() string {
b := make([]rune, 8)
for i := range b {
b[i] = letters[rand.Intn(len(letters))]
}
return string(b)
}
const IDLEN int = 8
// retry after HTTP 50x errors or err!=nil
const RetryCount = 3
func getid() string {
b := make([]rune, IDLEN)
for i := range b {
b[i] = letters[rand.Intn(len(letters))]
}
return string(b)
}
// used to inject debug log and implement retries
type loggingTransport struct{}
@@ -75,6 +79,7 @@ func drainBody(resp *http.Response) {
// unable to copy data? uff!
panic(err)
}
resp.Body.Close()
}
}
@@ -82,8 +87,8 @@ func drainBody(resp *http.Response) {
// the actual logging transport with retries
func (t *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error) {
// just requred for debugging
id := getid()
// just required for debugging
requestid := getid()
// clone the request body, put into request on retry
var bodyBytes []byte
@@ -92,16 +97,16 @@ func (t *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error)
req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
}
slog.Debug("REQUEST", "id", id, "uri", req.URL, "host", req.Host)
slog.Debug("REQUEST", "id", requestid, "uri", req.URL, "host", req.Host)
// first try
resp, err := http.DefaultTransport.RoundTrip(req)
if err == nil {
slog.Debug("RESPONSE", "id", id, "status", resp.StatusCode,
slog.Debug("RESPONSE", "id", requestid, "status", resp.StatusCode,
"contentlength", resp.ContentLength)
}
// enter retry check and loop, if first req were successfull, leave loop immediately
// enter retry check and loop, if first req were successful, leave loop immediately
retries := 0
for shouldRetry(err, resp) && retries < RetryCount {
time.Sleep(backoff(retries))
@@ -118,12 +123,16 @@ func (t *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error)
resp, err = http.DefaultTransport.RoundTrip(req)
if err == nil {
slog.Debug("RESPONSE", "id", id, "status", resp.StatusCode,
slog.Debug("RESPONSE", "id", requestid, "status", resp.StatusCode,
"contentlength", resp.ContentLength, "retry", retries)
}
retries++
}
return resp, err
if err != nil {
return resp, fmt.Errorf("failed to get HTTP response for %s: %w", req.URL, err)
}
return resp, nil
}

View File

@@ -19,11 +19,17 @@ package main
import (
"bytes"
"image/jpeg"
"fmt"
"image"
_ "image/gif"
_ "image/jpeg"
_ "image/png"
"log/slog"
"os"
"path/filepath"
_ "golang.org/x/image/webp"
"github.com/corona10/goimagehash"
)
@@ -32,15 +38,16 @@ const MaxDistance = 3
type Image struct {
Filename string
Hash *goimagehash.ImageHash
Data *bytes.Buffer
Uri string
Data *bytes.Reader
URI string
Mime string
}
// used for logging to avoid printing Data
func (img *Image) LogValue() slog.Value {
return slog.GroupValue(
slog.String("filename", img.Filename),
slog.String("uri", img.Uri),
slog.String("uri", img.URI),
slog.String("hash", img.Hash.ToString()),
)
}
@@ -48,26 +55,57 @@ func (img *Image) LogValue() slog.Value {
// holds all images of an ad
type Cache []*goimagehash.ImageHash
func NewImage(buf *bytes.Buffer, filename string, uri string) *Image {
img := &Image{
Filename: filename,
Uri: uri,
Data: buf,
// filename comes from the scraper, it contains directory/base w/o suffix
func NewImage(buf *bytes.Reader, filename, uri string) (*Image, error) {
_, imgconfig, err := image.DecodeConfig(buf)
if err != nil {
return nil, fmt.Errorf("failed to decode image: %w", err)
}
return img
_, err = buf.Seek(0, 0)
if err != nil {
return nil, fmt.Errorf("failed to seek(0) on image buffer: %w", err)
}
if imgconfig == "jpeg" {
// we're using the format as file extension, but have used
// "jpg" in the past, so to be backwards compatible, stay with
// it.
imgconfig = "jpg"
}
if imgconfig == "" {
return nil, fmt.Errorf("failed to process image: unknown or unsupported image format (supported: jpg,png,gif,webp)")
}
filename += "." + imgconfig
img := &Image{
Filename: filename,
URI: uri,
Data: buf,
Mime: imgconfig,
}
slog.Debug("image MIME", "mime", img.Mime)
return img, nil
}
// Calculate diff hash of the image
func (img *Image) CalcHash() error {
jpgdata, err := jpeg.Decode(img.Data)
jpgdata, format, err := image.Decode(img.Data)
if err != nil {
return err
return fmt.Errorf("failed to decode image: %w", err)
}
if format == "" {
return fmt.Errorf("failed to decode image: unknown or unsupported image format (supported: jpg,png,gif,webp)")
}
hash1, err := goimagehash.DifferenceHash(jpgdata)
if err != nil {
return err
return fmt.Errorf("failed to calculate diff hash of image: %w", err)
}
img.Hash = hash1
@@ -80,16 +118,18 @@ func (img *Image) Similar(hash *goimagehash.ImageHash) bool {
distance, err := img.Hash.Distance(hash)
if err != nil {
slog.Debug("failed to compute diff hash distance", "error", err)
return false
}
if distance < MaxDistance {
slog.Debug("distance computation", "image-A", img.Hash.ToString(),
"image-B", hash.ToString(), "distance", distance)
return true
} else {
return false
}
return false
}
// check current image against all known hashes.
@@ -108,7 +148,7 @@ func (img *Image) SimilarExists(cache Cache) bool {
func ReadImages(addir string, dont bool) (Cache, error) {
files, err := os.ReadDir(addir)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to read ad directory contents: %w", err)
}
cache := Cache{}
@@ -122,21 +162,30 @@ func ReadImages(addir string, dont bool) (Cache, error) {
ext := filepath.Ext(file.Name())
if !file.IsDir() && (ext == ".jpg" || ext == ".jpeg" || ext == ".JPG" || ext == ".JPEG") {
filename := filepath.Join(addir, file.Name())
data, err := ReadImage(filename)
if err != nil {
return nil, err
}
img := NewImage(data, filename, "")
if err = img.CalcHash(); err != nil {
reader := bytes.NewReader(data.Bytes())
img, err := NewImage(reader, filename, "")
if err != nil {
return nil, err
}
slog.Debug("Caching image from file system", "image", img, "hash", img.Hash.ToString())
if err := img.CalcHash(); err != nil {
return nil, err
}
if img.Hash != nil {
slog.Debug("Caching image from file system", "image", img, "hash", img.Hash.ToString())
}
cache = append(cache, img.Hash)
}
}
//return nil, errors.New("ende")
return cache, nil
}

View File

@@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "KLEINGEBAECK 1"
.TH KLEINGEBAECK 1 "2024-01-22" "1" "User Commands"
.TH KLEINGEBAECK 1 "2025-02-10" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
@@ -152,7 +152,7 @@ kleingebaeck \- kleinanzeigen.de backup tool
\& \-l \-\-limit <num> Limit the ads to download to <num>, default: load all.
\& \-c \-\-config <file> Use config file <file> (default: ~/.kleingebaeck).
\& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
\& \-f \-\-force Download images even if they already exist.
\& \-f \-\-force Overwrite images and ads even if the already exist.
\& \-m \-\-manual Show manual.
\& \-h \-\-help Show usage.
\& \-V \-\-version Show program version.
@@ -174,14 +174,15 @@ well. We use \s-1TOML\s0 as our configuration language. See
.PP
Format is pretty simple:
.PP
.Vb 10
.Vb 11
\& user = 1010101
\& loglevel = verbose
\& outdir = "test"
\& useragent = "Mozilla/5.0"
\& template = """
\& Title: {{.Title}}
\& Price: {{.Price}}
\& Id: {{.Id}}
\& Id: {{.ID}}
\& Category: {{.Category}}
\& Condition: {{.Condition}}
\& Created: {{.Created}}
@@ -190,11 +191,11 @@ Format is pretty simple:
\& """
.Ve
.PP
Be carefull if you want to change the template. The variable is a
Be careful if you want to change the template. The variable is a
multiline string surrounded by three double quotes. You can left out
certain fields and use any formatting you like. Refer to
<https://pkg.go.dev/text/template> for details how to write a
template.
template. Also read the \s-1TEMPLATES\s0 section below.
.PP
If you're on windows and want to customize the output directory, put
it into single quotes to avoid the backslashes interpreted as escape
@@ -203,6 +204,95 @@ chars like this:
.Vb 1
\& outdir = \*(AqC:\eData\eAds\*(Aq
.Ve
.SH "TEMPLATES"
.IX Header "TEMPLATES"
Various parts of the configuration can be modified using templates:
the output directory, the ad directory and the ad listing itself.
.SS "\s-1OUTPUT DIR TEMPLATE\s0"
.IX Subsection "OUTPUT DIR TEMPLATE"
The config varialbe \f(CW\*(C`outdir\*(C'\fR or the command line parameter \f(CW\*(C`\-o\*(C'\fR take a
template which may contain:
.ie n .IP """{{.Year}}""" 4
.el .IP "\f(CW{{.Year}}\fR" 4
.IX Item "{{.Year}}"
.PD 0
.ie n .IP """{{.Month}}""" 4
.el .IP "\f(CW{{.Month}}\fR" 4
.IX Item "{{.Month}}"
.ie n .IP """{{.Day}}""" 4
.el .IP "\f(CW{{.Day}}\fR" 4
.IX Item "{{.Day}}"
.PD
.PP
That way you can create a new output directory for every backup
run. For example:
.PP
.Vb 1
\& outdir = "/home/backups/ads\-{{.Year}}\-{{.Month}}\-{{.Day}}"
.Ve
.PP
Or using the command line flag:
.PP
.Vb 1
\& \-o "/home/backups/ads\-{{.Year}}\-{{.Month}}\-{{.Day}}"
.Ve
.PP
The default value is \f(CW\*(C`.\*(C'\fR \- the current directory.
.SS "\s-1AD DIRECTORY TEMPLATE\s0"
.IX Subsection "AD DIRECTORY TEMPLATE"
The ad directory name can be modified using the following ad values:
.IP "{{.Price}}" 4
.IX Item "{{.Price}}"
.PD 0
.IP "{{.ID}}" 4
.IX Item "{{.ID}}"
.IP "{{.Category}}" 4
.IX Item "{{.Category}}"
.IP "{{.Condition}}" 4
.IX Item "{{.Condition}}"
.IP "{{.Created}}" 4
.IX Item "{{.Created}}"
.IP "{{.Slug}}" 4
.IX Item "{{.Slug}}"
.IP "{{.Text}}" 4
.IX Item "{{.Text}}"
.PD
.PP
It can only be configured in the config file. By default only
\&\f(CW\*(C`{{.Slug}}\*(C'\fR is being used, this is the title of the ad in url format.
.SS "\s-1AD TEMPLATE\s0"
.IX Subsection "AD TEMPLATE"
The ad listing itself can be modified as well, using the same
variables as the ad name template above.
.PP
This is the default template:
.PP
.Vb 8
\& Title: {{.Title}}
\& Price: {{.Price}}
\& Id: {{.ID}}
\& Category: {{.Category}}
\& Condition: {{.Condition}}
\& Type: {{.Type}}
\& Created: {{.Created}}
\& Expire: {{.Expire}}
\&
\& {{.Text}}
.Ve
.PP
The config parameter to modify is \f(CW\*(C`template\*(C'\fR. See example.conf in the
source repository. Please take care, since this is a multiline
string. This is how it shall look if you modify it:
.PP
.Vb 2
\& template="""
\& Title: {{.Title}}
\&
\& {{.Text}}
\& """
.Ve
.PP
That is, the content between the two \f(CW"""\fR chars is the template.
.SH "SETUP"
.IX Header "SETUP"
To setup the tool, you need to lookup your userid on

View File

@@ -14,7 +14,7 @@ SYNOPSYS
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Download images even if they already exist.
-f --force Overwrite images and ads even if the already exist.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
@@ -39,10 +39,11 @@ CONFIGURATION
user = 1010101
loglevel = verbose
outdir = "test"
useragent = "Mozilla/5.0"
template = """
Title: {{.Title}}
Price: {{.Price}}
Id: {{.Id}}
Id: {{.ID}}
Category: {{.Category}}
Condition: {{.Condition}}
Created: {{.Created}}
@@ -50,10 +51,11 @@ CONFIGURATION
{{.Text}}
"""
Be carefull if you want to change the template. The variable is a
Be careful if you want to change the template. The variable is a
multiline string surrounded by three double quotes. You can left out
certain fields and use any formatting you like. Refer to
<https://pkg.go.dev/text/template> for details how to write a template.
Also read the TEMPLATES section below.
If you're on windows and want to customize the output directory, put it
into single quotes to avoid the backslashes interpreted as escape chars
@@ -61,6 +63,72 @@ CONFIGURATION
outdir = 'C:\Data\Ads'
TEMPLATES
Various parts of the configuration can be modified using templates: the
output directory, the ad directory and the ad listing itself.
OUTPUT DIR TEMPLATE
The config varialbe "outdir" or the command line parameter "-o" take a
template which may contain:
"{{.Year}}"
"{{.Month}}"
"{{.Day}}"
That way you can create a new output directory for every backup run. For
example:
outdir = "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}"
Or using the command line flag:
-o "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}"
The default value is "." - the current directory.
AD DIRECTORY TEMPLATE
The ad directory name can be modified using the following ad values:
{{.Price}}
{{.ID}}
{{.Category}}
{{.Condition}}
{{.Created}}
{{.Slug}}
{{.Text}}
It can only be configured in the config file. By default only
"{{.Slug}}" is being used, this is the title of the ad in url format.
AD TEMPLATE
The ad listing itself can be modified as well, using the same variables
as the ad name template above.
This is the default template:
Title: {{.Title}}
Price: {{.Price}}
Id: {{.ID}}
Category: {{.Category}}
Condition: {{.Condition}}
Type: {{.Type}}
Created: {{.Created}}
Expire: {{.Expire}}
{{.Text}}
The config parameter to modify is "template". See example.conf in the
source repository. Please take care, since this is a multiline string.
This is how it shall look if you modify it:
template="""
Title: {{.Title}}
{{.Text}}
"""
That is, the content between the two """ chars is the template.
SETUP
To setup the tool, you need to lookup your userid on kleinanzeigen.de.
Go to your ad overview page while NOT being logged in:

View File

@@ -13,7 +13,7 @@ kleingebaeck - kleinanzeigen.de backup tool
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Download images even if they already exist.
-f --force Overwrite images and ads even if the already exist.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
@@ -39,10 +39,11 @@ Format is pretty simple:
user = 1010101
loglevel = verbose
outdir = "test"
useragent = "Mozilla/5.0"
template = """
Title: {{.Title}}
Price: {{.Price}}
Id: {{.Id}}
Id: {{.ID}}
Category: {{.Category}}
Condition: {{.Condition}}
Created: {{.Created}}
@@ -50,11 +51,11 @@ Format is pretty simple:
{{.Text}}
"""
Be carefull if you want to change the template. The variable is a
Be careful if you want to change the template. The variable is a
multiline string surrounded by three double quotes. You can left out
certain fields and use any formatting you like. Refer to
L<https://pkg.go.dev/text/template> for details how to write a
template.
template. Also read the TEMPLATES section below.
If you're on windows and want to customize the output directory, put
it into single quotes to avoid the backslashes interpreted as escape
@@ -62,6 +63,92 @@ chars like this:
outdir = 'C:\Data\Ads'
=head1 TEMPLATES
Various parts of the configuration can be modified using templates:
the output directory, the ad directory and the ad listing itself.
=head2 OUTPUT DIR TEMPLATE
The config varialbe C<outdir> or the command line parameter C<-o> take a
template which may contain:
=over
=item C<{{.Year}}>
=item C<{{.Month}}>
=item C<{{.Day}}>
=back
That way you can create a new output directory for every backup
run. For example:
outdir = "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}"
Or using the command line flag:
-o "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}"
The default value is C<.> - the current directory.
=head2 AD DIRECTORY TEMPLATE
The ad directory name can be modified using the following ad values:
=over
=item {{.Price}}
=item {{.ID}}
=item {{.Category}}
=item {{.Condition}}
=item {{.Created}}
=item {{.Slug}}
=item {{.Text}}
=back
It can only be configured in the config file. By default only
C<{{.Slug}}> is being used, this is the title of the ad in url format.
=head2 AD TEMPLATE
The ad listing itself can be modified as well, using the same
variables as the ad name template above.
This is the default template:
Title: {{.Title}}
Price: {{.Price}}
Id: {{.ID}}
Category: {{.Category}}
Condition: {{.Condition}}
Type: {{.Type}}
Created: {{.Created}}
Expire: {{.Expire}}
{{.Text}}
The config parameter to modify is C<template>. See example.conf in the
source repository. Please take care, since this is a multiline
string. This is how it shall look if you modify it:
template="""
Title: {{.Title}}
{{.Text}}
"""
That is, the content between the two C<"""> chars is the template.
=head1 SETUP
To setup the tool, you need to lookup your userid on

74
main.go
View File

@@ -18,13 +18,16 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package main
import (
"bufio"
"errors"
"fmt"
"io"
"log/slog"
"os"
"runtime"
"runtime/debug"
"github.com/inconshreveable/mousetrap"
"github.com/lmittmann/tint"
"github.com/tlinden/yadu"
)
@@ -35,38 +38,62 @@ func main() {
os.Exit(Main(os.Stdout))
}
func Main(w io.Writer) int {
func init() {
// if we're running on Windows AND if the user double clicked the
// exe file from explorer, we tell them and then wait until any
// key has been hit, which will make the cmd window disappear and
// thus give the user time to read it.
if runtime.GOOS == "windows" {
if mousetrap.StartedByExplorer() {
fmt.Println("Do no double click kleingebaeck.exe!")
fmt.Println("Please open a command shell and run it from there.")
fmt.Println()
fmt.Print("Press any key to quit: ")
_, err := bufio.NewReader(os.Stdin).ReadString('\n')
if err != nil {
panic(err)
}
}
}
}
func Main(output io.Writer) int {
logLevel := &slog.LevelVar{}
opts := &tint.Options{
Level: logLevel,
AddSource: false,
ReplaceAttr: func(groups []string, a slog.Attr) slog.Attr {
ReplaceAttr: func(groups []string, attr slog.Attr) slog.Attr {
// Remove time from the output
if a.Key == slog.TimeKey {
if attr.Key == slog.TimeKey {
return slog.Attr{}
}
return a
return attr
},
NoColor: IsNoTty(),
}
logLevel.Set(LevelNotice)
handler := tint.NewHandler(w, opts)
handler := tint.NewHandler(output, opts)
logger := slog.New(handler)
slog.SetDefault(logger)
conf, err := InitConfig(w)
conf, err := InitConfig(output)
if err != nil {
return Die(err)
}
if conf.Showversion {
fmt.Fprintf(w, "This is kleingebaeck version %s\n", VERSION)
fmt.Fprintf(output, "This is kleingebaeck version %s\n", VERSION)
return 0
}
if conf.Showhelp {
fmt.Fprintln(w, Usage)
fmt.Fprintln(output, Usage)
return 0
}
@@ -75,6 +102,7 @@ func Main(w io.Writer) int {
if err != nil {
return Die(err)
}
return 0
}
@@ -92,7 +120,8 @@ func Main(w io.Writer) int {
}
logLevel.Set(slog.LevelDebug)
handler := yadu.NewHandler(w, opts)
handler := yadu.NewHandler(output, opts)
debuglogger := slog.New(handler).With(
slog.Group("program_info",
slog.Int("pid", os.Getpid()),
@@ -105,15 +134,23 @@ func Main(w io.Writer) int {
slog.Debug("config", "conf", conf)
// prepare output dir
err = Mkdir(conf.Outdir)
outdir, err := OutDirName(conf)
if err != nil {
return Die(err)
}
conf.Outdir = outdir
// used for all HTTP requests
fetch, err := NewFetcher(conf)
if err != nil {
return Die(err)
}
// used for all HTTP requests
fetch := NewFetcher(conf)
// setup ad dir registry, needed to check for duplicates
DirsVisited = make(map[string]int)
if len(conf.Adlinks) >= 1 {
switch {
case len(conf.Adlinks) >= 1:
// directly backup ad listing[s]
for _, uri := range conf.Adlinks {
err := ScrapeAd(fetch, uri)
@@ -121,25 +158,27 @@ func Main(w io.Writer) int {
return Die(err)
}
}
} else if conf.User > 0 {
case conf.User > 0:
// backup all ads of the given user (via config or cmdline)
err := ScrapeUser(fetch)
if err != nil {
return Die(err)
}
} else {
default:
return Die(errors.New("invalid or no user id or no ad link specified"))
}
if conf.StatsCountAds > 0 {
adstr := "ads"
if conf.StatsCountAds == 1 {
adstr = "ad"
}
fmt.Fprintf(w, "Successfully downloaded %d %s with %d images to %s.\n",
fmt.Fprintf(output, "Successfully downloaded %d %s with %d images to %s.\n",
conf.StatsCountAds, adstr, conf.StatsCountImages, conf.Outdir)
} else {
fmt.Fprintf(w, "No ads found.")
fmt.Fprintf(output, "No ads found.")
}
return 0
@@ -147,5 +186,6 @@ func Main(w io.Writer) int {
func Die(err error) int {
slog.Error("Failure", "error", err.Error())
return 1
}

View File

@@ -21,6 +21,7 @@ import (
"bytes"
"errors"
"fmt"
"net/http"
"os"
"strings"
"testing"
@@ -42,7 +43,7 @@ const LISTTPL string = `<!DOCTYPE html>
{{ range . }}
<h2 class="text-module-begin">
<a class="ellipsis"
href="/s-anzeige/{{ .Slug }}/{{ .Id }}">{{ .Title }}</a>
href="/s-anzeige/{{ .Slug }}/{{ .ID }}">{{ .Title }}</a>
</h2>
{{ end }}
</body>
@@ -92,6 +93,10 @@ const ADTPL string = `DOCTYPE html>
<li class="addetailslist--detail">
Zustand<span class="addetailslist--detail--value" >
{{ .Condition }}</span>
Farbe<span class="addetailslist--detail--value" >
{{ .Color }}</span>
Art<span class="addetailslist--detail--value" >
{{ .Type }}</span>
</li>
</ul>
</div>
@@ -181,13 +186,13 @@ var tests = []Tests{
{
name: "download-all-ads",
args: base + " -o t/out -u 1",
expect: "Successfully downloaded 6 ads with 12 images to t/out",
expect: "Successfully downloaded 7 ads with 16 images to t/out",
exitcode: 0,
},
{
name: "download-all-ads-using-config",
args: "kleingebaeck -c t/fullconfig.conf",
expect: "Successfully downloaded 6 ads with 12 images to t/out",
expect: "Successfully downloaded 7 ads with 16 images to t/out",
exitcode: 0,
},
}
@@ -246,80 +251,108 @@ var invalidtests = []Tests{
type AdConfig struct {
Title string
Slug string
Id string
ID string
Price string
Category string
Condition string
Type string
Color string
Created string
Text string
Images []string // files in ./t/
}
// used to generate ad listings returned by httpmock using templates
var adsrc = []AdConfig{
{
Title: "First Ad",
Id: "1", Price: "5€",
ID: "1", Price: "5€",
Category: "Klimbim",
Text: "Thing to sale",
Slug: "first-ad",
Condition: "works",
Condition: "Sehr Gut",
Color: "Grün",
Type: "Ball",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
{
Title: "Secnd Ad",
Id: "2", Price: "5€",
ID: "2", Price: "5€",
Category: "Kram",
Text: "Thing to sale",
Slug: "second-ad",
Condition: "works",
Condition: "Gut",
Color: "Lila",
Type: "Schoki",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
{
Title: "Third Ad",
Id: "3",
ID: "3",
Price: "5€",
Category: "Kuddelmuddel",
Text: "Thing to sale",
Slug: "third-ad",
Condition: "works",
Condition: "In Ordnung",
Color: "Blau",
Type: "Auto",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
{
Title: "Forth Ad",
Id: "4",
ID: "4",
Price: "5€",
Category: "Krempel",
Text: "Thing to sale",
Slug: "fourth-ad",
Condition: "works",
Condition: "Neu",
Color: "Rot",
Type: "Spielzeut",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
{
Title: "Fifth Ad",
Id: "5",
ID: "5",
Price: "5€",
Category: "Kladderadatsch",
Text: "Thing to sale",
Slug: "fifth-ad",
Condition: "works",
Condition: "Sehr Gut",
Color: "Braun",
Type: "Parteibuch",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
{
Title: "Sixth Ad",
Id: "6",
ID: "6",
Price: "5€",
Category: "Klunker",
Text: "Thing to sale",
Slug: "sixth-ad",
Condition: "works",
Condition: "Sehr Gut",
Color: "Silber",
Type: "Ring",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
{
Title: "Ad with multiple img formats",
ID: "7",
Price: "5€",
Category: "Klunker",
Text: "Thing to sale",
Slug: "seventh-ad",
Condition: "Sehr Gut",
Color: "Gelpb",
Type: "Schmuck",
Created: "Yesterday",
Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"},
},
}
// An Adsource is used to construct a httpmock responder for a
@@ -333,17 +366,17 @@ type Adsource struct {
}
// Render a HTML template for an adlisting or an ad
func GetTemplate(l []AdConfig, a AdConfig, htmltemplate string) string {
func GetTemplate(adconfigs []AdConfig, adconfig *AdConfig, htmltemplate string) string {
tmpl, err := tpl.New("template").Parse(htmltemplate)
if err != nil {
panic(err)
}
var out bytes.Buffer
if len(a.Id) == 0 {
err = tmpl.Execute(&out, l)
if adconfig.ID == "" {
err = tmpl.Execute(&out, adconfigs)
} else {
err = tmpl.Execute(&out, a)
err = tmpl.Execute(&out, adconfig)
}
if err != nil {
@@ -362,7 +395,7 @@ func InitValidSources() []Adsource {
// valid ad listing page 2
list2 := []AdConfig{
adsrc[3], adsrc[4], adsrc[5],
adsrc[3], adsrc[4], adsrc[5], adsrc[6],
}
// valid ad listing page 3, which is empty
@@ -375,25 +408,24 @@ func InitValidSources() []Adsource {
ads := []Adsource{
{
uri: fmt.Sprintf("%s%s?userId=1", Baseuri, Listuri),
content: GetTemplate(list1, empty, LISTTPL),
content: GetTemplate(list1, &empty, LISTTPL),
},
{
uri: fmt.Sprintf("%s%s?userId=1&pageNum=2", Baseuri, Listuri),
content: GetTemplate(list2, empty, LISTTPL),
content: GetTemplate(list2, &empty, LISTTPL),
},
{
uri: fmt.Sprintf("%s%s?userId=1&pageNum=3", Baseuri, Listuri),
content: GetTemplate(list3, empty, LISTTPL),
content: GetTemplate(list3, &empty, LISTTPL),
},
}
// prepare urls for the ads
for _, ad := range adsrc {
ads = append(ads, Adsource{
uri: fmt.Sprintf("%s/s-anzeige/%s/%s", Baseuri, ad.Slug, ad.Id),
content: GetTemplate(nil, ad, ADTPL),
uri: fmt.Sprintf("%s/s-anzeige/%s/%s", Baseuri, ad.Slug, ad.ID),
content: GetTemplate(nil, &ad, ADTPL),
})
//panic(GetTemplate(nil, ad, ADTPL))
}
return ads
@@ -405,28 +437,28 @@ func InitInvalidSources() []Adsource {
{
// valid ad page but without content
uri: fmt.Sprintf("%s/s-anzeige/empty/1", Baseuri),
content: GetTemplate(nil, empty, EMPTYPAGE),
content: GetTemplate(nil, &empty, EMPTYPAGE),
},
{
// some random foreign webpage
uri: INVALIDURI,
content: GetTemplate(nil, empty, "<html>foo</html>"),
content: GetTemplate(nil, &empty, "<html>foo</html>"),
},
{
// some invalid page path
uri: fmt.Sprintf("%s/anzeige/name/1", Baseuri),
content: GetTemplate(nil, empty, "<html></html>"),
content: GetTemplate(nil, &empty, "<html></html>"),
},
{
// some none-ad page
uri: fmt.Sprintf("%s/anzeige/name/1/foo/bar", Baseuri),
content: GetTemplate(nil, empty, "<html>HTTP 404: /eine-anzeige/ does not exist!</html>"),
content: GetTemplate(nil, &empty, "<html>HTTP 404: /eine-anzeige/ does not exist!</html>"),
status: 404,
},
{
// valid ad page but 503
uri: fmt.Sprintf("%s/s-anzeige/503/1", Baseuri),
content: GetTemplate(nil, empty, "<html>HTTP 503: service unavailable</html>"),
content: GetTemplate(nil, &empty, "<html>HTTP 503: service unavailable</html>"),
status: 503,
},
}
@@ -446,43 +478,48 @@ func GetImage(path string) []byte {
// setup httpmock
func SetIntercept(ads []Adsource) {
for _, ad := range ads {
if ad.status == 0 {
ad.status = 200
headers := http.Header{}
headers.Add("Set-Cookie", "session=permanent")
for _, advertisement := range ads {
if advertisement.status == 0 {
advertisement.status = 200
}
httpmock.RegisterResponder("GET", ad.uri,
httpmock.NewStringResponder(ad.status, ad.content))
httpmock.RegisterResponder("GET", advertisement.uri,
httpmock.NewStringResponder(advertisement.status, advertisement.content).HeaderAdd(headers))
}
// we just use 2 images, put this here
for _, image := range []string{"t/1.jpg", "t/2.jpg"} {
for _, image := range []string{"t/1.jpg", "t/2.jpg", "t/1.png", "t/1.gif", "t/1.webp"} {
httpmock.RegisterResponder("GET", image,
httpmock.NewBytesResponder(200, GetImage(image)))
httpmock.NewBytesResponder(200, GetImage(image)).HeaderAdd(headers))
}
}
func VerifyAd(ad AdConfig) error {
body := ad.Title + ad.Price + ad.Id + "Kleinanzeigen => " +
ad.Category + ad.Condition + ad.Created
func VerifyAd(advertisement *AdConfig) error {
body := advertisement.Title + advertisement.Price + advertisement.ID + "Kleinanzeigen => " +
advertisement.Category + advertisement.Condition + advertisement.Created
// prepare ad dir name using DefaultAdNameTemplate
c := Config{Adnametemplate: "{{ .Slug }}"}
adstruct := Ad{Slug: ad.Slug, Id: ad.Id}
adstruct := Ad{Slug: advertisement.Slug, ID: advertisement.ID}
addir, err := AdDirName(&c, &adstruct)
if err != nil {
return err
}
file := fmt.Sprintf("t/out/%s/Adlisting.txt", addir)
content, err := os.ReadFile(file)
if err != nil {
return err
return fmt.Errorf("unable to read adlisting file: %w", err)
}
if body != strings.TrimSpace(string(content)) {
msg := fmt.Sprintf("ad content doesn't match.\nExpect: %s\n Got: %s\n", body, content)
return errors.New(msg)
}
@@ -500,27 +537,28 @@ func TestMain(t *testing.T) {
SetIntercept(InitValidSources())
// run commandline tests
for _, tt := range tests {
for _, test := range tests {
var buf bytes.Buffer
os.Args = strings.Split(tt.args, " ")
os.Args = strings.Split(test.args, " ")
ret := Main(&buf)
if ret != tt.exitcode {
if ret != test.exitcode {
t.Errorf("%s with cmd <%s> did not exit with %d but %d",
tt.name, tt.args, tt.exitcode, ret)
test.name, test.args, test.exitcode, ret)
}
if !strings.Contains(buf.String(), tt.expect) {
if !strings.Contains(buf.String(), test.expect) {
t.Errorf("%s with cmd <%s> output did not match.\nExpect: %s\n Got: %s\n",
tt.name, tt.args, tt.expect, buf.String())
test.name, test.args, test.expect, buf.String())
}
}
// verify if downloaded ads match
for _, ad := range adsrc {
if err := VerifyAd(ad); err != nil {
t.Errorf(err.Error())
if err := VerifyAd(&ad); err != nil {
t.Error(err.Error())
}
}
}
@@ -536,20 +574,21 @@ func TestMainInvalids(t *testing.T) {
SetIntercept(InitInvalidSources())
// run commandline tests
for _, tt := range invalidtests {
for _, test := range invalidtests {
var buf bytes.Buffer
os.Args = strings.Split(tt.args, " ")
os.Args = strings.Split(test.args, " ")
ret := Main(&buf)
if ret != tt.exitcode {
if ret != test.exitcode {
t.Errorf("%s with cmd <%s> did not exit with %d but %d",
tt.name, tt.args, tt.exitcode, ret)
test.name, test.args, test.exitcode, ret)
}
if !strings.Contains(buf.String(), tt.expect) {
if !strings.Contains(buf.String(), test.expect) {
t.Errorf("%s with cmd <%s> output did not match.\nExpect: %s\n Got: %s\n",
tt.name, tt.args, tt.expect, buf.String())
test.name, test.args, test.expect, buf.String())
}
}
}

View File

@@ -22,7 +22,12 @@ freebsd/amd64
linux/amd64
netbsd/amd64
openbsd/amd64
windows/amd64"
windows/amd64
freebsd/arm64
linux/arm64
netbsd/arm64
openbsd/arm64
windows/arm64"
tool="$1"
version="$2"

117
scrape.go
View File

@@ -19,11 +19,12 @@ package main
import (
"bytes"
"errors"
"fmt"
"log/slog"
"path/filepath"
"strconv"
"strings"
"time"
"astuart.co/goq"
"golang.org/x/sync/errgroup"
@@ -42,7 +43,9 @@ func ScrapeUser(fetch *Fetcher) error {
for {
var index Index
slog.Debug("fetching page", "uri", uri)
body, err := fetch.Get(uri)
if err != nil {
return err
@@ -51,7 +54,7 @@ func ScrapeUser(fetch *Fetcher) error {
err = goq.NewDecoder(body).Decode(&index)
if err != nil {
return err
return fmt.Errorf("failed to goquery decode HTML index body: %w", err)
}
if len(index.Links) == 0 {
@@ -66,16 +69,16 @@ func ScrapeUser(fetch *Fetcher) error {
}
page++
uri = baseuri + "&pageNum=" + fmt.Sprintf("%d", page)
uri = baseuri + "&pageNum=" + strconv.Itoa(page)
}
for i, adlink := range adlinks {
for index, adlink := range adlinks {
err := ScrapeAd(fetch, Baseuri+adlink)
if err != nil {
return err
}
if fetch.Config.Limit > 0 && i == fetch.Config.Limit-1 {
if fetch.Config.Limit > 0 && index == fetch.Config.Limit-1 {
break
}
}
@@ -85,18 +88,20 @@ func ScrapeUser(fetch *Fetcher) error {
// scrape an ad. uri is the full uri of the ad, dir is the basedir
func ScrapeAd(fetch *Fetcher, uri string) error {
ad := &Ad{}
advertisement := &Ad{}
// extract slug and id from uri
uriparts := strings.Split(uri, "/")
if len(uriparts) < 6 {
return errors.New("invalid uri: " + uri)
if len(uriparts) < SlugURIPartNum {
return fmt.Errorf("invalid uri: %s", uri)
}
ad.Slug = uriparts[4]
ad.Id = uriparts[5]
advertisement.Slug = uriparts[4]
advertisement.ID = uriparts[5]
// get the ad
slog.Debug("fetching ad page", "uri", uri)
body, err := fetch.Get(uri)
if err != nil {
return err
@@ -104,36 +109,54 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
defer body.Close()
// extract ad contents with goquery/goq
err = goq.NewDecoder(body).Decode(&ad)
err = goq.NewDecoder(body).Decode(&advertisement)
if err != nil {
return fmt.Errorf("failed to goquery decode HTML ad body: %w", err)
}
if len(advertisement.CategoryTree) > 0 {
advertisement.Category = strings.Join(advertisement.CategoryTree, " => ")
}
if advertisement.Incomplete() {
slog.Debug("got ad", "ad", advertisement)
return fmt.Errorf("could not extract ad data from page, got empty struct")
}
advertisement.DecodeAttributes()
advertisement.CalculateExpire()
// prepare ad dir name
addir, err := AdDirName(fetch.Config, advertisement)
if err != nil {
return err
}
if len(ad.CategoryTree) > 0 {
ad.Category = strings.Join(ad.CategoryTree, " => ")
proceed := CheckAdVisited(fetch.Config, addir)
if !proceed {
return nil
}
if ad.Incomplete() {
slog.Debug("got ad", "ad", ad)
return errors.New("could not extract ad data from page, got empty struct")
}
ad.CalculateExpire()
// write listing
addir, err := WriteAd(fetch.Config, ad)
err = WriteAd(fetch.Config, advertisement, addir)
if err != nil {
return err
}
slog.Debug("extracted ad listing", "ad", ad)
// tell the user
slog.Debug("extracted ad listing", "ad", advertisement)
// stats
fetch.Config.IncrAds()
return ScrapeImages(fetch, ad, addir)
// register for later checks
DirsVisited[addir] = 1
return ScrapeImages(fetch, advertisement, addir)
}
func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
func ScrapeImages(fetch *Fetcher, advertisement *Ad, addir string) error {
// fetch images
img := 1
adpath := filepath.Join(fetch.Config.Outdir, addir)
@@ -144,26 +167,39 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
return err
}
g := new(errgroup.Group)
egroup := new(errgroup.Group)
for _, imguri := range ad.Images {
for _, imguri := range advertisement.Images {
imguri := imguri
file := filepath.Join(adpath, fmt.Sprintf("%d.jpg", img))
g.Go(func() error {
// we append the suffix later in NewImage() based on image format
basefilename := filepath.Join(adpath, fmt.Sprintf("%d", img))
egroup.Go(func() error {
// wait a little
throttle := GetThrottleTime()
time.Sleep(throttle)
body, err := fetch.Getimage(imguri)
if err != nil {
return err
}
buf := new(bytes.Buffer)
_, err = buf.ReadFrom(body)
if err != nil {
return fmt.Errorf("failed to read from image buffer: %w", err)
}
reader := bytes.NewReader(buf.Bytes())
image, err := NewImage(reader, basefilename, imguri)
if err != nil {
return err
}
buf2 := buf.Bytes() // needed for image writing
image := NewImage(buf, "", imguri)
err = image.CalcHash()
if err != nil {
return err
@@ -171,27 +207,34 @@ func ScrapeImages(fetch *Fetcher, ad *Ad, addir string) error {
if !fetch.Config.ForceDownload {
if image.SimilarExists(cache) {
slog.Debug("similar image exists, not written", "uri", image.Uri)
slog.Debug("similar image exists, not written", "uri", image.URI)
return nil
}
}
err = WriteImage(file, buf2)
_, err = reader.Seek(0, 0)
if err != nil {
return fmt.Errorf("failed to seek(0) on image reader: %w", err)
}
err = WriteImage(image.Filename, reader)
if err != nil {
return err
}
slog.Debug("wrote image", "image", image, "size", len(buf2))
slog.Debug("wrote image", "image", image, "size", buf.Len(), "throttle", throttle)
return nil
})
img++
}
if err := g.Wait(); err != nil {
return err
if err := egroup.Wait(); err != nil {
return fmt.Errorf("failed to finalize error waitgroup: %w", err)
}
fetch.Config.IncrImgs(len(ad.Images))
fetch.Config.IncrImgs(len(advertisement.Images))
return nil
}

114
store.go
View File

@@ -1,5 +1,5 @@
/*
Copyright © 2023-2024 Thomas von Dein
Copyright © 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -26,77 +26,102 @@ import (
"runtime"
"strings"
tpl "text/template"
"time"
)
func AdDirName(c *Config, ad *Ad) (string, error) {
tmpl, err := tpl.New("adname").Parse(c.Adnametemplate)
type OutdirData struct {
Year, Day, Month string
}
func OutDirName(conf *Config) (string, error) {
tmpl, err := tpl.New("outdir").Parse(conf.Outdir)
if err != nil {
return "", err
return "", fmt.Errorf("failed to parse outdir template: %w", err)
}
buf := bytes.Buffer{}
err = tmpl.Execute(&buf, ad)
now := time.Now()
data := OutdirData{
Year: now.Format("2006"),
Month: now.Format("01"),
Day: now.Format("02"),
}
err = tmpl.Execute(&buf, data)
if err != nil {
return "", err
return "", fmt.Errorf("failed to execute outdir template: %w", err)
}
return buf.String(), nil
}
func WriteAd(c *Config, ad *Ad) (string, error) {
// prepare ad dir name
addir, err := AdDirName(c, ad)
func AdDirName(conf *Config, advertisement *Ad) (string, error) {
tmpl, err := tpl.New("adname").Parse(conf.Adnametemplate)
if err != nil {
return "", err
return "", fmt.Errorf("failed to parse adname template: %w", err)
}
// prepare output dir
dir := filepath.Join(c.Outdir, addir)
err = Mkdir(dir)
buf := bytes.Buffer{}
err = tmpl.Execute(&buf, advertisement)
if err != nil {
return "", err
return "", fmt.Errorf("failed to execute adname template: %w", err)
}
return buf.String(), nil
}
func WriteAd(conf *Config, advertisement *Ad, addir string) error {
// prepare output dir
dir := filepath.Join(conf.Outdir, addir)
err := Mkdir(dir)
if err != nil {
return err
}
// write ad file
listingfile := filepath.Join(dir, "Adlisting.txt")
f, err := os.Create(listingfile)
if err != nil {
return "", err
}
defer f.Close()
if runtime.GOOS == "windows" {
ad.Text = strings.ReplaceAll(ad.Text, "<br/>", "\r\n")
listingfd, err := os.Create(listingfile)
if err != nil {
return fmt.Errorf("failed to create Adlisting.txt: %w", err)
}
defer listingfd.Close()
if runtime.GOOS == WIN {
advertisement.Text = strings.ReplaceAll(advertisement.Text, "<br/>", "\r\n")
} else {
ad.Text = strings.ReplaceAll(ad.Text, "<br/>", "\n")
advertisement.Text = strings.ReplaceAll(advertisement.Text, "<br/>", "\n")
}
tmpl, err := tpl.New("adlisting").Parse(c.Template)
tmpl, err := tpl.New("adlisting").Parse(conf.Template)
if err != nil {
return "", err
return fmt.Errorf("failed to parse adlisting template: %w", err)
}
err = tmpl.Execute(f, ad)
err = tmpl.Execute(listingfd, advertisement)
if err != nil {
return "", err
return fmt.Errorf("failed to execute adlisting template: %w", err)
}
slog.Info("wrote ad listing", "listingfile", listingfile)
return addir, nil
return nil
}
func WriteImage(filename string, buf []byte) error {
func WriteImage(filename string, reader *bytes.Reader) error {
file, err := os.Create(filename)
if err != nil {
return err
return fmt.Errorf("failed to open image file: %w", err)
}
defer file.Close()
_, err = file.Write(buf)
_, err = reader.WriteTo(file)
if err != nil {
return err
return fmt.Errorf("failed to write to image file: %w", err)
}
return nil
@@ -111,12 +136,12 @@ func ReadImage(filename string) (*bytes.Buffer, error) {
data, err := os.ReadFile(filename)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to read image file: %w", err)
}
_, err = buf.Write(data)
if err != nil {
return nil, err
return nil, fmt.Errorf("failed to write image into buffer: %w", err)
}
return &buf, nil
@@ -124,8 +149,29 @@ func ReadImage(filename string) (*bytes.Buffer, error) {
func fileExists(filename string) bool {
info, err := os.Stat(filename)
if os.IsNotExist(err) {
if err != nil {
// return false on any error
return false
}
return !info.IsDir()
}
// check if an addir has already been processed by current run and
// decide what to do
func CheckAdVisited(conf *Config, adname string) bool {
if Exists(DirsVisited, adname) {
if conf.ForceDownload {
slog.Warn("an ad with the same name has already been downloaded, overwriting", "addir", adname)
return true
}
// don't overwrite
slog.Warn("an ad with the same name has already been downloaded, skipping (use -f to overwrite)", "addir", adname)
return false
}
// overwrite
return true
}

39
store_test.go Normal file
View File

@@ -0,0 +1,39 @@
/*
Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"bytes"
"testing"
)
// this is a weird thing. WriteImage() is being called in scrape.go
// which is being tested by TestMain() in main_test.go. However, it
// doesn't show up in the coverage report for unknown reasons, so
// here's a single test for it
func TestWriteImage(t *testing.T) {
t.Parallel()
reader := bytes.NewReader([]byte{1, 2, 3, 4, 5, 6, 7, 8})
file := "t/out/t.jpg"
err := WriteImage(file, reader)
if err != nil {
t.Errorf("Could not write mock image to %s: %s", file, err.Error())
}
}

BIN
t/1.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 B

BIN
t/1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.2 KiB

BIN
t/1.webp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.3 KiB

View File

@@ -1,6 +1,6 @@
# empty config for Main() unit tests to force unit tests NOT to use an
# eventually existing ~/.kleingebaeck!
template="""
{{.Title}}{{.Price}}{{.Id}}{{.Category}}{{.Condition}}{{.Created}}
{{.Title}}{{.Price}}{{.ID}}{{.Category}}{{.Condition}}{{.Created}}
"""

View File

@@ -2,5 +2,5 @@ user = 1
loglevel = "verbose"
outdir = "t/out"
template="""
{{.Title}}{{.Price}}{{.Id}}{{.Category}}{{.Condition}}{{.Created}}
{{.Title}}{{.Price}}{{.ID}}{{.Category}}{{.Condition}}{{.Created}}
"""

13
t/httproot/README.md Normal file
View File

@@ -0,0 +1,13 @@
# Mock http server
Install ehfs from https://github.com/mjpclab/extra-http-file-server/.
Install p2cli from https://github.com/wrouesnel/p2cli.
Run `templates/render.sh` to build the file structure.
Run `server.sh` to start the http server.
To scrape an ad from it, use such a URL:
http://localhost:8080/s-anzeige/first-ad/111-11-111

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

4
t/httproot/serve.sh Executable file
View File

@@ -0,0 +1,4 @@
#!/bin/sh
ehfs -a :/s-anzeige:./kleinanzeigen \
-a :/api/v1/prod-ads/images/fc:./img \
-l localhost:8080 -I index.html

View File

@@ -0,0 +1,50 @@
<!DOCTYPE html>
<html lang="de">
<head>
<title>Ad Listing</title>
</head>
<body>
<div class="l-container-row">
<div id="vap-brdcrmb" class="breadcrump">
<a class="breadcrump-link" itemprop="url" href="/" title="Kleinanzeigen ">
<span itemprop="title">Kleinanzeigen </span>
</a>
<a class="breadcrump-link" itemprop="url" href="/egal">
<span itemprop="title">{{ category }}</span></a>
</div>
</div>
{% for image in images %}
<div class="galleryimage-element" data-ix="3">
<img src="http://localhost:8080/api/v1/prod-ads/images/fc/{{ image.id }}?rule=$_59.JPG"/>
</div>
{% endfor %}
<h1 id="viewad-title" class="boxedarticle--title" itemprop="name" data-soldlabel="Verkauft">
{{ title }}</h1>
<div class="boxedarticle--flex--container">
<h2 class="boxedarticle--price" id="viewad-price">
{{ price }}</h2>
</div>
<div id="viewad-extra-info" class="boxedarticle--details--full">
<div><i class="icon icon-small icon-calendar-gray-simple"></i><span>{{ created }}</span></div>
</div>
<div class="splitlinebox l-container-row" id="viewad-details">
<ul class="addetailslist">
<li class="addetailslist--detail">
Zustand<span class="addetailslist--detail--value" >
{{ condition }}</span>
</li>
</ul>
</div>
<div class="l-container last-paragraph-no-margin-bottom">
<p id="viewad-description-text" class="text-force-linebreak " itemprop="description">
{{ text }}
</p>
</div>
</body>
</html>

View File

@@ -0,0 +1,15 @@
<!DOCTYPE html>
<html lang="de" >
<head>
<title>Ads</title>
</head>
<body>
{% for ad in ads %}
<h2 class="text-module-begin">
<a class="ellipsis"
href="/s-anzeige/{{ ad.slug }}/{{ ad.id }}">{{ ad.title }}</a>
</h2>
{% endfor %}
</body>
</html>

15
t/httproot/templates/render.sh Executable file
View File

@@ -0,0 +1,15 @@
#!/bin/sh -x
base="../kleinanzeigen"
rm -rf $base
mkdir -p $base
echo "Generating /s-bestandsliste.html"
p2cli -t index.tpl -i vars.yaml > $base/s-bestandsliste.html
for idx in 0 1; do
slug=$(cat vars.yaml | yq ".ads[$idx].slug")
id=$(cat vars.yaml | yq ".ads[$idx].id")
mkdir -p $base/$slug/$id
cat vars.yaml | yq ".ads[$idx]" | p2cli -t ad.tpl -f yaml > $base/$slug/$id/index.html
done

View File

@@ -0,0 +1,27 @@
ads:
- slug: first-ad
id: 111-11-111
title: First Ad
price: "19 €"
condition: "Sehr gut"
category: "Weitere Elektronik"
created: 21.12.2023
images:
- id: fcf6d664-5258-42c2-bf58-d1b8e9221574
- id: fcf6d664-5258-42c2-bf58-as43as5d43as
text: |
Zu Verkaufen.
Zahlung nur Paypal.
- slug: second-ad
id: 222-22-222
title: Second Ad
price: "200 €"
condition: "Sehr gut"
category: "Elektronik"
created: 21.12.2023
images:
- id: cdas4sd5-5258-42c2-bf58-d1b8e9221574
- id: cdas4sd5-5258-42c2-bf58-as43as5d43as
text: |
Zu Verkaufen.
Zahlung nur Überweisung.

29
util.go
View File

@@ -1,5 +1,5 @@
/*
Copyright © 2023 Thomas von Dein
Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,18 +20,21 @@ package main
import (
"bytes"
"errors"
"fmt"
"math/rand"
"os"
"os/exec"
"runtime"
"time"
"github.com/mattn/go-isatty"
)
func Mkdir(dir string) error {
if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) {
err := os.Mkdir(dir, os.ModePerm)
err := os.MkdirAll(dir, os.ModePerm)
if err != nil {
return err
return fmt.Errorf("failed to create directory %s: %w", dir, err)
}
}
@@ -42,7 +45,8 @@ func man() error {
man := exec.Command("less", "-")
var b bytes.Buffer
b.Write([]byte(manpage))
b.WriteString(manpage)
man.Stdout = os.Stdout
man.Stdin = &b
@@ -51,7 +55,7 @@ func man() error {
err := man.Run()
if err != nil {
return err
return fmt.Errorf("failed to execute 'less': %w", err)
}
return nil
@@ -59,10 +63,23 @@ func man() error {
// returns TRUE if stdout is NOT a tty or windows
func IsNoTty() bool {
if runtime.GOOS == "windows" || !isatty.IsTerminal(os.Stdout.Fd()) {
if runtime.GOOS == WIN || !isatty.IsTerminal(os.Stdout.Fd()) {
return true
}
// it is a tty
return false
}
func GetThrottleTime() time.Duration {
return time.Duration(rand.Intn(MaxThrottle-MinThrottle+1)+MinThrottle) * time.Millisecond
}
// look if a key in a map exists, generic variant
func Exists[K comparable, V any](m map[K]V, v K) bool {
if _, ok := m[v]; ok {
return true
}
return false
}