Compare commits

..

11 Commits

Author SHA1 Message Date
T.v.Dein
2c62f9eb17 fix invalid mod load (#14)
Co-authored-by: Thomas von Dein <tom@vondein.org>
2023-12-19 18:27:20 +01:00
T.v.Dein
bff0ae553e Bugfixes (#13)
* several fixes:

- fix #9 + #10: switched to koanf module and dropped support for HCL
- fix #11: disabling colors on windows
- fix #12: fixed race condition in go routine call inside for loop,
  images had been downloaded multiple times
- remove hcl support and use toml format (same thing, better parser)
- update documentation and example config on TOML format of config file
- use Config as arg instead of singular args
- use x/errgroup instead of sync.Waitgroup inside image download loop

---------

Co-authored-by: Thomas von Dein <tom@vondein.org>
2023-12-19 18:23:41 +01:00
T.v.Dein
450d44d129 Dev (#8)
* fixed conf parsing: variables can now be omitted from the config
* fix newlines: use CRLF on windows
* bump version

---------

Co-authored-by: Thomas von Dein <tom@vondein.org>
2023-12-18 20:18:37 +01:00
T.v.Dein
18f7e0fe49 added proper install instructions (#7)
Co-authored-by: Thomas von Dein <tom@vondein.org>
2023-12-18 09:48:00 +01:00
T.v.Dein
def063afe9 Merge pull request #6 from TLINDEN/dev 2023-12-18 09:23:55 +01:00
f1908f02cb bump version 2023-12-18 09:23:18 +01:00
4a528ad9d1 fix #5: add exe extension to built windows binaries 2023-12-18 09:22:08 +01:00
5c1161f227 fix #4, use filepath.Join to create portable path's 2023-12-18 09:21:26 +01:00
bd9d8fdb2c fix version finding 2023-12-17 17:53:01 +01:00
T.v.Dein
1ee886c504 Merge pull request #2 from TLINDEN/dev
re-orgainzied code a little, using go templates instead format string
2023-12-17 17:49:27 +01:00
T.v.Dein
d7b13e8a9a Merge pull request #1 from TLINDEN/dev
added custom template support, added more ad data, use concurrency
2023-12-16 20:35:18 +01:00
34 changed files with 1435 additions and 668 deletions

View File

@@ -1,31 +0,0 @@
---
name: Bug report
about: Create a report to help us improve
title: "[bug-report]"
labels: bug
assignees: TLINDEN
---
**Description**
<!-- Please provide a clear and concise description of the issue: -->
**Steps To Reproduce**
<!-- Please detail the steps to reproduce the behavior, execute kleingebaeck with the -d option: -->
**Expected behavior**
<!-- What do you expected to happen instead? -->
**Version information**
<!--
Please provide as much version information as possible:
- if you have just installed a binary, provide the output of: kleingebaeck --version
- if you installed from source, provide the output of: make show-version
- provide additional details: operating system and version and shell environment
-->
**Additional informations**

View File

@@ -1,32 +0,0 @@
---
name: Bugreport Deutsch
about: Erzeuge einen Bugreport
title: "[bug-report-de]"
labels: bug
assignees: TLINDEN
---
**Beschreibung**
<!-- Bitte beschreibe den Fehler klar und möglichst präzise: -->
**Schritte um den Fehler zu reproduzieren**
<!-- Bitte gib detailiert an, welche konkreten Schritte zum Fehler
geführt haben, führe kleingebaeck mit der Option -d option aus: -->
**Erwartetes Verhalten**
<!-- Welches Verhalten hast Du ursprünglich erwartet? -->
**Versionsinformation**
<!--
Bitte gib uns so viel Versionsinfos wie möglich:
- wenn Du nur das Programm installiert hast: kleingebaeck --version
- wenn Du von Source installiert hast: make show-version
- bitte gib zusätzliche Details an: Betriebssystem + Version, Shellumgebung etc.
-->
**Zusätzliche Informationen**

View File

@@ -1,23 +0,0 @@
---
name: Feature request
about: Suggest a feature
title: "[feature-request]"
labels: feature-request
assignees: TLINDEN
---
**Description**
<!-- Please provide a clear and concise description of the feature you desire: -->
**Version information**
<!--
Just in case the feature is already present, please provide as
much version information as possible:
- if you have just installed a binary, provide the output of: tablizer --version
- if you installed from source, provide the output of: make show-version
- provide additional details: operating system and version and shell environment
-->

View File

@@ -1,20 +0,0 @@
---
name: Featurerequest Deutsch
about: Empfehle ein neues Feature
title: "[feature-request-de]"
labels: feature-request
assignees: TLINDEN
---
**Beschreibung**
<!-- Bitte beschreibe das gewünschte Feature klar und möglichst präzise: -->
**Versionsinformation**
<!--
Bitte gib uns so viel Versionsinfos wie möglich:
- wenn Du nur das Programm installiert hast: kleingebaeck --version
- wenn Du von Source installiert hast: make show-version
- bitte gib zusätzliche Details an: Betriebssystem + Version, Shellumgebung etc.
-->

View File

@@ -1,8 +0,0 @@
---
name: Note to self
about: Internal bugs and wishes
title: "[bug-report]"
labels: bug
assignees: TLINDEN
---

Binary file not shown.

Before

Width:  |  Height:  |  Size: 139 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.5 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 199 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 263 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 10 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 232 KiB

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 90 KiB

View File

@@ -1,10 +0,0 @@
version: 2
updates:
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "monthly"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "monthly"

5
.gitignore vendored
View File

@@ -1,8 +1,3 @@
test
kleingebaeck
releases
t/out
.bak
t/httproot/out
t/httproot/kleinanzeigen
t/httproot/favicon.ico

91
Makefile Normal file
View File

@@ -0,0 +1,91 @@
# Copyright © 2023 Thomas von Dein
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# no need to modify anything below
tool = kleingebaeck
VERSION = $(shell grep VERSION config.go | head -1 | cut -d '"' -f2)
archs = darwin freebsd linux windows
PREFIX = /usr/local
UID = root
GID = 0
HAVE_POD := $(shell pod2text -h 2>/dev/null)
all: $(tool).1 $(tool).go buildlocal
%.1: %.pod
ifdef HAVE_POD
pod2man -c "User Commands" -r 1 -s 1 $*.pod > $*.1
endif
%.go: %.pod
ifdef HAVE_POD
echo "package main" > $*.go
echo >> $*.go
echo "var manpage = \`" >> $*.go
pod2text $*.pod >> $*.go
echo "\`" >> $*.go
endif
buildlocal:
CGO_LDFLAGS='-static' go build -tags osusergo,netgo -ldflags "-extldflags=-static" -o $(tool)
install: buildlocal
install -d -o $(UID) -g $(GID) $(PREFIX)/bin
install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1
install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/
install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/
clean:
rm -rf $(tool) coverage.out testdata
test: clean
go test ./... $(ARGS)
testfuzzy: clean
go test -fuzz ./... $(ARGS)
singletest:
@echo "Call like this: make singletest TEST=TestPrepareColumns ARGS=-v"
go test -run $(TEST) $(ARGS)
cover-report:
go test ./... -cover -coverprofile=coverage.out
go tool cover -html=coverage.out
goupdate:
go get -t -u=patch ./...
buildall:
./mkrel.sh $(tool) $(VERSION)
release: buildall
gh release create v$(VERSION) --generate-notes releases/*
show-versions: buildlocal
@echo "### kleingebaeck version:"
@./kleingebaeck -v
@echo
@echo "### go module versions:"
@go list -m all
@echo
@echo "### go version used for building:"
@grep -m 1 go go.mod
lint:
golangci-lint run -p bugs -p unused

View File

@@ -1,357 +0,0 @@
## Kleingebäck - kleinanzeigen.de Backup
![Kleingebaeck Logo](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/kleingebaecklogo-small.png)
[![Go Report Card](https://goreportcard.com/badge/codeberg.org/scip/kleingebaeck)](https://goreportcard.com/report/codeberg.org/scip/kleingebaeck)
[![status-badge](https://ci.codeberg.org/api/badges/15530/status.svg)](https://ci.codeberg.org/repos/15530)
![GitHub License](https://img.shields.io/github/license/tlinden/kleingebaeck)
[![GitHub release](https://img.shields.io/github/v/release/tlinden/kleingebaeck?color=%2300a719)](https://codeberg.org/scip/kleingebaeck/releases)
[![English](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/english.png)](https://codeberg.org/scip/kleingebaeck/raw/branch/main/README.md)
Mit diesem Tool kann man seine Anzeigen bei https://kleinanzeigen.de sichern.
> [!IMPORTANT]
> Diese Software wird jetzt bei Codeberg weitergepflegt: [Codeberg](https://codeberg.org/scip/kleingebaeck/).
Es kann alle Anzeigen eines Users (oder nur eine Ausgewählte)
inklusive der Bilder herunterladen, die in einem Verzeichnis pro
Anzeige gespeichert werden. In dem Verzeichnis wird eine Datei
`Adlisting.txt` erstellt, in der sich die Inhalte der Anzeige wie
Titel, Preis, Text etc befinden. Bilder werden natürlich auch heruntergeladen.
## ACHTUNG - SICHERHEITS-UPDATE
Fertige vorcompilierte Programme älter als Version `v0.3.12` sind von
Schwachstellen in der Behandlung von HTTP und Zertifikaten
betroffen. Falls Du eine ältere Kleingebäck-Version im Einsatz hast,
bitte update auf Version `v0.3.12` oder höher. Bitte lies auch die [Release Notes für
v0.3.12](https://codeberg.org/scip/kleingebaeck/releases/tag/v0.3.12)
für mehr Details.
## Screenshots
Das ist die Hauptseite meines kleinanzeigen.de Accounts:
![Index](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/kleinanzeigen-index.png)
Sichern ich meine Anzeigen:
![Download](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/kleinanzeigen-download.png)
Backupverzeichnis nach dem Download:
![Download](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/kleinanzeigen-backup.png)
Verzeichnis einer Anzeige:
![Download](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/kleinanzeigen-ad.png)
**Das gleiche unter Windows:**
Anzeigen Sichern:
![Download](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/cmd-windows.jpg)
Backupverzeichnis nach dem Download
![Download](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/liste-windows.jpg)
Und eine Anzeige:
![Download](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/adlisting-windows.jpg)
## Installation
Das Tool hat keine weiteren Abhängigkeiten und erfordert auch keine
Anmeldung oder ähnliches. Man kädt sich einfach die ausführbare Datei
für seine Plattform herunter und kann direkt loslegen.
### Installation des vorcompilierten Programms
Auf der Seite [des letzten Releases](https://codeberg.org/scip/kleingebaeck/releases) findet man das Program für sein Betriebssystem und die Plattform (z.b. Windows + Intel)
Es gibt 2 Varianten:
1. Direkt das fertige Program für seine Plattform+OS herunterladen,
z.B. `kleingebaeck-linux-amd64-0.0.5`, nach `kleingebaeck`
umbenennen und in ein Verzeichnis kopieren, das im `PATH` ist,
(z.B. nach `$HOME/bin` oder als root nach `/usr/local/bin`).
Um sicher zu gehen, dass an dem Program nicht verändert wurde, kann
man die Signatur vergleichen. Für jeden Download gibt es eine dazu
passende Signatur, in unserem Beispiel wäre das
`kleingebaeck-linux-amd64-0.0.5.sha256`.
Zum Verifizieren ausführen:
```shell
cat kleingebaeck-linux-amd64-0.0.5.sha25 && sha256sum kleingebaeck-linux-amd64-0.0.5
```
Man sollte zweimal den gleichen SHA256 Hash sehen.
2. Man kann auch einen Tarball (tgz Dateiendung) herunterladen,
auspacken und mit GNU Make installieren:
```shell
tar xvfz kleingebaeck-linux-amd64-0.0.5.tar.gz
cd kleingebaeck-linux-amd64-0.0.5
sudo make install
```
### Installation aus dem Sourcecode
Man muss eine funktionierende Go Buildumgebung in der Version 1.21
installiert haben, um das Programm selber zu compilieren. GNU Make ist
hilfreich, aber nicht unbedingt erforderlich.
Um das Programm zu compilieren, muss man folgende Schritte ausführen:
```shell
git clone https://codeberg.org/scip/kleingebaeck.git
cd kleingebaeck
go mod tidy
make # (oder make)
sudo make install
```
### Docker image benutzen
Ein fertiges Dockerimage mit der aktuellen Programmversion ist immer
verfügbar. Man kann damit z.B. das Tool testen, bevor man es dauerhaft
benutzen möchte.
Um das Image herunterzuladen:
```
docker pull ghcr.io/tlinden/kleingebaeck:latest
```
Um kleingebäck im Image auszuführen und Daten ins lokale Filesystem zu
sichern, kann man so vorgehen:
```shell
mkdir anzeigen
docker run -u `id -u $USER` -v ./anzeigen:/backup ghcr.io/tlinden/kleingebaeck:latest -u XXX -v
ls -l anzeigen/ein-buch-mit-leeren-seiten
total 792
drwxr-xr-x 2 scip root 4096 Jan 23 12:58 ./
drwxr-xr-x 3 scip scip 4096 Jan 23 12:58 ../
-rw-r--r-- 1 scip root 131650 Jan 23 12:58 1.jpg
-rw-r--r-- 1 scip root 81832 Jan 23 12:58 2.jpg
-rw-r--r-- 1 scip root 134050 Jan 23 12:58 3.jpg
-rw-r--r-- 1 scip root 1166 Jan 23 12:58 Adlisting.txt
```
Hier wird der aktuelle User auf den User im Image gemappt und das
lokale Verzeichnis `anzeigen` nach `/backup` innerhalb des Images
gemountet.
Die Optionen `-u XXX -v` sind kleingebäck Optionen. Ersetze `XXX`
durch Deine tatsächliche kleinanzeigen.de Userid.
Eine Liste verfügbarer Images findet man [hier](https://codeberg.org/scip/kleingebaeck/pkgs/container/kleingebaeck/versions?filters%5Bversion_type%5D=tagged)
## Kommandozeilen Optionen:
```
Usage: kleingebaeck [-dvVhmoc] [<ad-listing-url>,...]
Options:
-u --user <uid> Backup ads from user with uid <uid>.
-d --debug Enable debug output.
-v --verbose Enable verbose output.
-o --outdir <dir> Set output dir (default: current directory)
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
If one or more <ad-listing-url>'s are specified, only backup those,
otherwise backup all ads of the given user.
```
## Konfiguration
Man kann anstelle von Kommandlineoptionen auch eine
Konfigurationsdatei verwenden. Sie befindet sich standardmäßig in
`~/.kleingebaeck` aber man kann mit dem Parameter `-c` auch eine
andere Datei angeben.
Das Format (TOML) ist einfach:
```
user = 1010101
loglevel = verbose
outdir = "test"
```
Im Source gibt es eine Beispieldatei `example.conf` mit Kommentaren.
## Umgebungsvariablen
Man kann darüber hinaus auch Umgebungsvariablen verwenden. Sie
entsprechen den Konfigurationsoptionen, aber gross geschrieben mit dem
Präfix `KLEINGEBAECK_`, z.B.
```shell
% KLEINGEBAECK_OUTDIR=/backup kleingebaeck -v
```
## Benutzung
Um das Tool einsetzen zu können, muss man zunächst seine Userid bei
kleinanzeigen.de herausfinden. Dazu ruft man am besten die Liste
seiner Anzeigen auf, während man NICHT eingeloggt ist:
https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX
Der `XXXXX` Teil der URL ist die Userid.
Trage diese Userid in der Konfigurationsdatei ein wie oben
beschrieben. Gib ausserdem das Ausgabeverzeichnis an. Dann einfach nur
`kleingebaeck` ausführen.
Innerhalb des Ausgabeverzeichnisses wird sich dann pro Anzeige ein
Unterverzeichnis befinden. Pro Anzeige gibt es eine Datei
`Adlisting.txt`, die etwa so aussieht:
```default
Title: A book I sell
Price: 99 € VB
Id: 1919191919
Category: Sachbücher
Condition: Sehr Gut
Created: 10.12.2023
This is the description text.
Pay with paypal.
```
Sowie alle Bilder.
Das Format kann man mit der Variable `template` in der Konfiguration
ändern. Die `example.conf` enthält ein Beispiel für das Standard Template.
## Verhalten des Tools
Es gibt einige Dinge über das Verhalten von kleingebäck, über die Du
Bescheid wissen solltest:
- alle HTML Seiten und Bilder werden immer heruntergeladen
- es wird ein (konfigurierbarer) Useragent verwendet
- HTTP Cookies werden beachtet
- bei Fehlern wird dreimal mit unterschiedlichem Abstand erneut
versucht
- Bilder Downloads laufen parallelisiert mit leicht unterschiedlichen
zeitlichen Abständen ab
- Gleich aussehende Bilder werden nicht überschrieben
Der letzte Punkt muss genauer erläutert werden:
Wenn man bei Kleinanzeigen.de eine Anzeige einstellt und Bilder
postet, werden diese dort in ihrer Grösse reduziert (durch Kompression
und Verkleinerung der Bilder usw.). Diese reduzierten Bilder werden
dann von kleingebäck heruntergeladen. Falls Du Deine original Bilder
behalten hast, kannst Du diese danach in das Backupverzeichnis
kopieren. Bei einem erneuten kleingebäck-Lauf werden diese Bilder dann
nicht überschrieben.
Wir verwenden dafür einen Algorythmus namens [distance
hashing](https://github.com/corona10/goimagehash). Dieser Algorithmus
prüft die Ähnlichkeit von Bildern. Diese können in ihrer Auflösung,
Kompression, Farbtiefe und vielem mehr manipuliert worden sein und
trotzdem als das "gleiche Bild" erkannt werden (wohlgemerkt nicht "das
selbe": die Dateien sind durchaus unterschiedlich!). Bis zu einer
Distance von 5 überschreiben wir keine Bilder, weil wir dann davon
ausgehen, dass das lokal Vorhandene das Original ist.
Bitte beachte aber, dass dies KEIN Cachingmechanismus ist: die Bilder
werden trotzdem immer alle heruntergeladen. Das muss so sein, da wir
uns nicht die Dateinamen anschauen können, da kleinanzeigen.de diese
nämlich zu Zahlen umbenennt. Und die Dateinamen können sich auch
ändern, wenn der User in der Anzeige die Bilder umarrangiert hat.
Du kannst dieses Verhalten mit der Option **--force** ausschalten. Du
kannst ausserdem mit der Option **--ignoreerrors** auch alle Fehler
ignorieren, die beim Bilderdownload auftreten könnten.
## Documentation
Die Dokumentation kann man
[online](https://codeberg.org/scip/kleingebaeck/raw/branch/main/kleingebaeck.pod)
oder lokal lesen mit: `kleingebaeck --manual`. Hat man das Tool mit
dem Tarball installiert, funktioniert auch `man kleingebaeck`.
## Kleingebäck?
Der Name kommt von "kleinanzeigen backup", verkürzt "klein back", das
englisch ausgesprochene "back" (deutsch bäck) führt dann zu "Kleingebäck".
## Wo bekommt man Hilfe
Obwohl ich gerne von kleingebäck Benutzern in privaten Mails höre, ist
das doch der beste Weg, die Anfrage zu übersehen und zu vergessen.
Um einen Fehler, ein unerwartetes Verhalten, eine Feature Request oder
einen Patch zu übermitteln, eröffne daher bitte einen Issue unter:
https://codeberg.org/scip/kleingebaeck/issues. Danke!
Bitte gebe den fehlgeschlagenen Befehl an, rufe es auch mit Debugging
`-d` auf.
## Ähnliche Projekte
Ich konnte kein Projekt finden, das speziell dafür geeignet ist,
Anzeigen bei kleinanzeigen.de zu sichern.
Aber es gibt ein Projekt, mit dem man ebenfalls Backups erstellen
kann: [kleinanzeigen-bot](https://github.com/Second-Hand-Friends/kleinanzeigen-bot/).
Aber Vorsicht: kleinanzeigen.de bekämpft Bots aktiv, mit diesem hier
gibt es regelmäßige Probleme, z.B.:
[issue](https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/219).
Das Hauptproblem ist, dass diese Art von Bot sich mit Deinem Account
aktiv einloggt und mit der Seite interagiert. Damit kann die Firma die
Aktivitäten recht einfach Deinem User zuordnen und diesen **sperren**!
Also sei bitte vorsichtig!
**Kleingebäck** erfordert keinen Login, es verwendet lediglich die
öffentlich verfügbare Webseite und ruft diese auf, wie ein normaler
Browser. Tatsächlich gibt es meiner Meinung nach keinen Unterschied zu
einem Browserclient: beide laufen auf Anwenderseite auf Initiative
eines Benutzers. Und mit welchen Browser ich eine Webseite aufrufe,
bleibt immer noch mir überlassen und muss mir nicht von irgendwem
vorgeschrieben werden. Das schliesst die Verwendung von Kleingebäck
mit ein.
Hinzu kommt, dass dieses Tool nicht dazu gedacht ist, rund um die Uhr
zu laufen. Man ruft es ab und zu mal auf, wenn man halt neue Anzeigen
eingestellt hat, vielleicht einmal die Woche oder so. Man weiss ja
selber, wann man was geändert hat. Man benötigt trotzdem den Zugriff
mit dem Browser oder der mobilen App um Kleinanzeigen.de verwalten zu
können.
Meiner Ansicht nach ist das Risiko also sehr minimal, es handelt sich
meiner Meinung nach auch nicht um eine Verletzung der AGBs dort. Aber
das ist nur meine persönliche Meinung, bitte beachtet das. Am Ende
müsst Ihr selbst einschätzen und beurteilen wie hoch Ihr das Risiko
seht und ob Ohr es eingehen möchtet. Für eventuell auftretende
Konsequenzen bin ich nicht verantwortlich. Siehe auch [GPL Lizenz](LICENSE).
Es gibt noch ein weiteres Tool namens
[kleinanzeigen-enhanded](https://kleinanzeigen-enhanced.de/). Das ist
eine kostenpflichtige vollständige Anzeigenverwaltung für
Profinutzer. Man muss eine monatliche Abogebühr bezahlen. Das Tool
ist als Browsererweiterung für Google Chrome implementiert, was
erklärt, warum sie Anzeigen erstellen, ändern und löschen können,
obwohl es gar keine öffentliche API gibt. Sieht nach einer netten
ausgereiften Lösung aus. Mit Backups.
## Copyright und License
Lizensiert unter der GNU GENERAL PUBLIC LICENSE Version 3.
## Autor
T.v.Dein <tom AT vondein DOT org>

198
README.md
View File

@@ -1,17 +1,12 @@
## Kleingebäck - kleinanzeigen.de Backup
![Kleingebaeck Logo](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/kleingebaecklogo-small.png)
![Kleingebaeck Logo](https://github.com/TLINDEN/kleingebaeck/blob/main/.github/assets/kleingebaecklogo-small.png)
[![Go Report Card](https://goreportcard.com/badge/codeberg.org/scip/kleingebaeck)](https://goreportcard.com/report/codeberg.org/scip/kleingebaeck)
[![status-badge](https://ci.codeberg.org/api/badges/15530/status.svg)](https://ci.codeberg.org/repos/15530)
[![License](https://img.shields.io/badge/license-GPL-blue.svg)](https://github.com/tlinden/kleingebaeck/blob/master/LICENSE)
[![Go Report Card](https://goreportcard.com/badge/github.com/tlinden/kleingebaeck)](https://goreportcard.com/report/github.com/tlinden/kleingebaeck)
![GitHub License](https://img.shields.io/github/license/tlinden/kleingebaeck)
[![GitHub release](https://img.shields.io/github/v/release/tlinden/kleingebaeck?color=%2300a719)](https://codeberg.org/scip/kleingebaeck/releases)
[![German](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/german.png)](https://codeberg.org/scip/kleingebaeck/raw/branch/main/README-de.md)
[![GitHub release](https://img.shields.io/github/v/release/tlinden/kleingebaeck?color=%2300a719)](https://github.com/TLINDEN/kleingebaeck/releases/latest)
[Die deutsche Version des READMEs findet Ihr hier](README-de.md).
> [!IMPORTANT]
> This software is now being maintained on [Codeberg](https://codeberg.org/scip/kleingebaeck/).
This tool can be used to backup ads on the german ad page https://kleinanzeigen.de
@@ -20,47 +15,6 @@ directory, each ad into its own subdirectory. The backup will contain
a textfile `Adlisting.txt` which contains the ad contents as the
title, body, price etc. All images will be downloaded as well.
## CAUTION - SECURITY UPDATE
Binary releases prior to version `v0.3.11` are affected by
vulnerabilities in HTTP and certificate handling. If you are using
such a binary, please update to `v0.3.12` or higher. Please also refer
to the [Release Notes of
v0.3.12](https://codeberg.org/scip/kleingebaeck/releases/tag/v0.3.12)
for more details.
## Screenshots
This is the index of my kleinanzeigen.de Account:
![Index](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/kleinanzeigen-index.png)
Here I download my ads on the commandline:
![Download](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/kleinanzeigen-download.png)
And this is the backup directory after download:
![Download](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/kleinanzeigen-backup.png)
Here's a directory for one ad:
![Download](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/kleinanzeigen-ad.png)
**The same thing under windows:**
Downloading ads:
![Download](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/cmd-windows.jpg)
Backup directory after download:
![Download](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/liste-windows.jpg)
And one ad listing directory:
![Download](https://codeberg.org/scip/kleingebaeck/raw/branch/main/.github/assets/adlisting-windows.jpg)
## Installation
The tool doesn't need authentication and doesn't have any
@@ -70,10 +24,10 @@ releases page and you're good to go.
### Installation using a pre-compiled binary
Go to the [latest release
page](https://codeberg.org/scip/kleingebaeck/releases) and
page](https://github.com/TLINDEN/kleingebaeck/releases/latest) and
look for your OS and platform. There are two options to install the binary:
1. Directly download the binary for your platform,
1. Directly download the binary for your platoform,
e.g. `kleingebaeck-linux-amd64-0.0.5`, rename it to `kleingebaeck`
(or whatever you like more!) and put it into your bin dir
(e.g. `$HOME/bin` or as root to `/usr/local/bin`).
@@ -107,56 +61,19 @@ installed - `make`.
To install after building either copy the binary or execute `sudo make install`.
### Using the docker image
A pre-built docker image is available, which you can use to test the
app without installing it. To download:
```shell
docker pull ghcr.io/tlinden/kleingebaeck:latest
```
To execute kleingebaeck inside the image and download ads to a local
directory, do something like this:
```shell
mkdir myads
docker run -u `id -u $USER` -v ./myads:/backup ghcr.io/tlinden/kleingebaeck:latest -u XXX -v
ls -l myads/ein-buch-mit-leeren-seiten
total 792
drwxr-xr-x 2 scip root 4096 Jan 23 12:58 ./
drwxr-xr-x 3 scip scip 4096 Jan 23 12:58 ../
-rw-r--r-- 1 scip root 131650 Jan 23 12:58 1.jpg
-rw-r--r-- 1 scip root 81832 Jan 23 12:58 2.jpg
-rw-r--r-- 1 scip root 134050 Jan 23 12:58 3.jpg
-rw-r--r-- 1 scip root 1166 Jan 23 12:58 Adlisting.txt
```
We map the local user to the one inside the image so the permission
will match. You'll need to create the directory first before executing
docker run. And the local directory `myads` will be mapped to
`/backup` inside the container.
The options `-u XXX -v` are kleingebaeck options, replace `XXX` with
your actual kleinanzeigen.de user id.
A list of available images is [here](https://codeberg.org/scip/kleingebaeck/pkgs/container/kleingebaeck/versions?filters%5Bversion_type%5D=tagged)
## Commandline options:
```
Usage: kleingebaeck [-dvVhmoc] [<ad-listing-url>,...]
Options:
-u --user <uid> Backup ads from user with uid <uid>.
-d --debug Enable debug output.
-v --verbose Enable verbose output.
-o --outdir <dir> Set output dir (default: current directory)
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
--user -u <uid> Backup ads from user with uid <uid>.
--debug -d Enable debug output.
--verbose -v Enable verbose output.
--outdir -o <dir> Set output dir (default: current directory)
--limit -l <num> Limit the ads to download to <num>, default: load all.
--config -c <file> Use config file <file> (default: ~/.kleingebaeck).
--manual -m Show manual.
--help -h Show usage.
If one or more <ad-listing-url>'s are specified, only backup those,
otherwise backup all ads of the given user.
@@ -176,13 +93,6 @@ loglevel = verbose
outdir = "test"
```
## Environment Variables
Kleingebaeck can also be configured using environment variables. Just prefix the config variables with `KLEINGEBAECK_` and put them to upper case. Eg:
```shell
% KLEINGEBAECK_OUTDIR=/backup kleingebaeck -v
```
## Usage
To setup the tool, you need to lookup your userid on
@@ -203,11 +113,9 @@ somewhat like this:
```default
Title: A book I sell
Price: 99 € VB
Shipping: 6,90 €
Id: 1919191919
Category: Sachbücher
Condition: Sehr Gut
Type: Buch
Created: 10.12.2023
This is the description text.
@@ -220,52 +128,6 @@ variable. The supplied sample config contains the default template.
All images will be stored in the same directory.
## Tool Behavior
There are a bunch of things you might want to know about the behavior
of the kleingebäck tool:
- all HTML pages and IMAGEs are always being downloaded
- we use a (customizable) user agent
- we respect HTTP cookies
- in the case of an error, the tool does 3 retries, the time it waits
between tries is longer for each retry
- image download is parallized using small time differences to look
more natural
- same images are not being overwritten on subsequent download
The latter needs to be elaborated a bit more:
If you publish an ad on kleinanzeigen.de and post images, those images
will be reduced in size by the site (by compressing and down sizing
them). This reduced images will be downloaded by kleingebäck. However,
you may still own the original images and may want to put them into
that backup directory so that you have all things for one ad together.
You can easily do that, because kleingebäck won't overwrite those
original images. It uses something called a distance hash using
[goimagehash](https://github.com/corona10/goimagehash). This
algorithmus checks the similarity of images. If an image has been
resized it is still very similar to the original one. We accept a
maximum of a distance of 5, everything above leads to overwrite.
This works with resizes, cropped and otherwise manipulated images as
long as the image still shows the original contents good enough.
Also note, that this is NOT a caching mechanism: the images will be
downloaded anyway during each run. We also can't look at the file
names because kleinanzeigen.de renames all images to numbers. And
those might even change if the user re-arranges the images.
You can override this behavior using the **--force** option. Another
option, **--ignoreerrors**, can be used to ignore all kinds of image
errors.
## Documentation
You can read the documentation [online](https://codeberg.org/scip/kleingebaeck/raw/branch/main/kleingebaeck.pod) or locally once you have installed kleingebaeck with: `kleingebaeck --manual`.
## Kleingebäck?
The name is derived from "kleinanzeigen backup": "klein" (german for
@@ -280,40 +142,12 @@ that's the best way for me to forget to do something.
In order to report a bug, unexpected behavior, feature requests or to
submit a patch, please open an issue on github:
https://codeberg.org/scip/kleingebaeck/issues.
https://github.com/TLINDEN/kleingebaeck/issues.
Please repeat the failing command with debugging enabled `-d` and
include the output in the issue.
## Related projects
I could not find any projects specifically designed to backup
kleinanzeigen.de ads, however there's a bot project which is also able
to download ads:
[kleinanzeigen-bot](https://github.com/Second-Hand-Friends/kleinanzeigen-bot/). However,
be aware that kleinanzeigen.de is actively fighting bots! Look at this
[issue](https://github.com/Second-Hand-Friends/kleinanzeigen-bot/issues/219). The
problem with these kind of bots is, that they login into your account
using your credentials. If the company is able to detect bot activity
they can associate it easily with your account and **lock you
out**. So be careful.
**kleingebäck** doesn't need to login, it just accesses public
available web pages. Kleinanzeigen.de could hardly do anything against
it, once because it is legal. There's no difference between a browser
and a commandline client. Both run on the clientside and it is not
kleinanzeigen.de's decision which software one uses to access their
pages. And second: because you can use it to download any ads, not
just yours. So it is not really clear if the activity is associated in
any way with the ad owner. In addition to that comes the fact that
kleingebäck is just a backup tool. It is not intendet to be used on a
daily basis. You cannot use it to view regular ads or maintain your
own ads. You'll need to use the mobile app or the browser page with a
login. So, in my point of view, the risk is very minimal.
There is another Tool available named [kleinanzeigen-enhanced](https://kleinanzeigen-enhanced.de/). It is a complete Ad management system targeting primarily commercial users. You have to pay a monthly fee, perhaps there's also a free version available, but I haven't checked. The tool is implemented as a Chrome browser extension, which explains why it was possible to implement it without an API. It seems to be a nice solution for power users by the looks of it. And it includes backups.
## Copyright and License
## Copyright und License
Licensed under the GNU GENERAL PUBLIC LICENSE version 3.

178
config.go Normal file
View File

@@ -0,0 +1,178 @@
/*
Copyright © 2023 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"errors"
"fmt"
"os"
"path/filepath"
"runtime"
"github.com/knadh/koanf/parsers/toml"
"github.com/knadh/koanf/providers/confmap"
"github.com/knadh/koanf/providers/file"
"github.com/knadh/koanf/providers/posflag"
"github.com/knadh/koanf/v2"
flag "github.com/spf13/pflag"
)
const (
VERSION string = "0.1.0"
Baseuri string = "https://www.kleinanzeigen.de"
Listuri string = "/s-bestandsliste.html"
Defaultdir string = "."
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.Id}}\n" +
"Category: {{.Category}}\nCondition: {{.Condition}}\nCreated: {{.Created}}\n\n{{.Text}}\n"
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.Id}}\r\n" +
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nCreated: {{.Created}}\r\n\r\n{{.Text}}\r\n"
Useragent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
)
const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.
Usage: kleingebaeck [-dvVhmoclu] [<ad-listing-url>,...]
Options:
--user -u <uid> Backup ads from user with uid <uid>.
--debug -d Enable debug output.
--verbose -v Enable verbose output.
--outdir -o <dir> Set output dir (default: current directory)
--limit -l <num> Limit the ads to download to <num>, default: load all.
--config -c <file> Use config file <file> (default: ~/.kleingebaeck).
--manual -m Show manual.
--help -h Show usage.
--version -V Show program version.
If one or more ad listing url's are specified, only backup those,
otherwise backup all ads of the given user.`
type Config struct {
Verbose bool `koanf:"verbose"` // loglevel=info
Debug bool `koanf:"debug"` // loglevel=debug
Showversion bool `koanf:"version"` // -v
Showhelp bool `koanf:"help"` // -h
Showmanual bool `koanf:"manual"` // -m
User int `koanf:"user"`
Outdir string `koanf:"outdir"`
Template string `koanf:"template"`
Loglevel string `koanf:"loglevel"`
Limit int `koanf:"limit"`
Adlinks []string
StatsCountAds int
StatsCountImages int
}
func (c *Config) IncrAds() {
c.StatsCountAds++
}
func (c *Config) IncrImgs(num int) {
c.StatsCountImages += num
}
// load commandline flags and config file
func InitConfig() (*Config, error) {
var k = koanf.New(".")
// determine template based on os
template := DefaultTemplate
if runtime.GOOS == "windows" {
template = DefaultTemplateWin
}
// Load default values using the confmap provider.
k.Load(confmap.Provider(map[string]interface{}{
"template": template,
"outdir": ".",
"loglevel": "notice",
"userid": 0,
}, "."), nil)
// setup custom usage
f := flag.NewFlagSet("config", flag.ContinueOnError)
f.Usage = func() {
fmt.Println(Usage)
os.Exit(0)
}
// parse commandline flags
f.StringP("config", "c", "", "config file")
f.StringP("outdir", "o", "", "directory where to store ads")
f.IntP("user", "u", 0, "user id")
f.IntP("limit", "l", 0, "limit ads to be downloaded (default 0, unlimited)")
f.BoolP("verbose", "v", false, "be verbose")
f.BoolP("debug", "d", false, "enable debug log")
f.BoolP("version", "V", false, "show program version")
f.BoolP("help", "h", false, "show usage")
f.BoolP("manual", "m", false, "show manual")
f.Parse(os.Args[1:])
// generate a list of config files to try to load, including the
// one provided via -c, if any
var configfiles []string
configfile, _ := f.GetString("config")
home, _ := os.UserHomeDir()
if configfile != "" {
configfiles = []string{configfile}
} else {
configfiles = []string{
"/etc/kleingebaeck.conf", "/usr/local/etc/kleingebaeck.conf", // unix variants
filepath.Join(home, ".config", "kleingebaeck", "config"),
filepath.Join(home, ".kleingebaeck"),
"kleingebaeck.conf",
}
}
// Load the config file[s]
for _, cfgfile := range configfiles {
if path, err := os.Stat(cfgfile); !os.IsNotExist(err) {
if !path.IsDir() {
if err := k.Load(file.Provider(cfgfile), toml.Parser()); err != nil {
return nil, errors.New("error loading config file: " + err.Error())
}
}
}
// else: we ignore the file if it doesn't exists
}
// command line overrides config file
if err := k.Load(posflag.Provider(f, ".", k), nil); err != nil {
return nil, errors.New("error loading flags: " + err.Error())
}
// fetch values
conf := &Config{}
if err := k.Unmarshal("", &conf); err != nil {
return nil, errors.New("error unmarshalling: " + err.Error())
}
// adjust loglevel
switch conf.Loglevel {
case "verbose":
conf.Verbose = true
case "debug":
conf.Debug = true
}
// are there any args left on commandline? if so threat them as adlinks
conf.Adlinks = f.Args()
return conf, nil
}

30
example.conf Normal file
View File

@@ -0,0 +1,30 @@
#
# kleingebaeck sample configuration file.
# put this to ~/.kleingebaeck.
#
# Comments start with the '#' character.
# kleinanzeigen.de user-id. must be an unquoted number
user = 00000000
# enable verbose output (same as -v), may be true or false.
# other values: notice or debug
loglevel = "verbose"
# directory where to store downloaded ads. kleingebaeck will try to
# create it. must be a quoted string.
outdir = "test"
# template for stored adlistings. To enable it, remove the comment
# chars up until the last #"""
#template="""
#Title: {{.Title}}
#Price: {{.Price}}
#Id: {{.Id}}
#Category: {{.Category}}
#Condition: {{.Condition}}
#Created: {{.Created}}
#{{.Text}}
# """

30
go.mod Normal file
View File

@@ -0,0 +1,30 @@
module kleingebaeck
go 1.21
require (
astuart.co/goq v1.0.0
github.com/knadh/koanf/parsers/toml v0.1.0
github.com/knadh/koanf/providers/confmap v0.1.0
github.com/knadh/koanf/providers/file v0.1.0
github.com/knadh/koanf/providers/posflag v0.1.0
github.com/knadh/koanf/v2 v2.0.1
github.com/lmittmann/tint v1.0.3
github.com/mattn/go-isatty v0.0.20
github.com/spf13/pflag v1.0.5
)
require (
github.com/PuerkitoBio/goquery v1.5.0 // indirect
github.com/andybalholm/cascadia v1.0.0 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/knadh/koanf/maps v0.1.1 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/pelletier/go-toml v1.9.5 // indirect
golang.org/x/net v0.0.0-20190606173856-1492cefac77f // indirect
golang.org/x/sync v0.5.0 // indirect
golang.org/x/sys v0.6.0 // indirect
)

57
go.sum Normal file
View File

@@ -0,0 +1,57 @@
astuart.co/goq v1.0.0 h1:nnYIhu/Z/j0VaX9Dp+pmh2Uh7ldEz6XfgSg+bAY5Yrw=
astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno=
github.com/PuerkitoBio/goquery v1.5.0 h1:uGvmFXOA73IKluu/F84Xd1tt/z07GYm8X49XKHP7EJk=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/andybalholm/cascadia v1.0.0 h1:hOCXnnZ5A+3eVDX8pvgl4kofXv2ELss0bKcqRySc45o=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
github.com/knadh/koanf/maps v0.1.1 h1:G5TjmUh2D7G2YWf5SQQqSiHRJEjaicvU0KpypqB3NIs=
github.com/knadh/koanf/maps v0.1.1/go.mod h1:npD/QZY3V6ghQDdcQzl1W4ICNVTkohC8E73eI2xW4yI=
github.com/knadh/koanf/parsers/toml v0.1.0 h1:S2hLqS4TgWZYj4/7mI5m1CQQcWurxUz6ODgOub/6LCI=
github.com/knadh/koanf/parsers/toml v0.1.0/go.mod h1:yUprhq6eo3GbyVXFFMdbfZSo928ksS+uo0FFqNMnO18=
github.com/knadh/koanf/providers/confmap v0.1.0 h1:gOkxhHkemwG4LezxxN8DMOFopOPghxRVp7JbIvdvqzU=
github.com/knadh/koanf/providers/confmap v0.1.0/go.mod h1:2uLhxQzJnyHKfxG927awZC7+fyHFdQkd697K4MdLnIU=
github.com/knadh/koanf/providers/file v0.1.0 h1:fs6U7nrV58d3CFAFh8VTde8TM262ObYf3ODrc//Lp+c=
github.com/knadh/koanf/providers/file v0.1.0/go.mod h1:rjJ/nHQl64iYCtAW2QQnF0eSmDEX/YZ/eNFj5yR6BvA=
github.com/knadh/koanf/providers/posflag v0.1.0 h1:mKJlLrKPcAP7Ootf4pBZWJ6J+4wHYujwipe7Ie3qW6U=
github.com/knadh/koanf/providers/posflag v0.1.0/go.mod h1:SYg03v/t8ISBNrMBRMlojH8OsKowbkXV7giIbBVgbz0=
github.com/knadh/koanf/v2 v2.0.1 h1:1dYGITt1I23x8cfx8ZnldtezdyaZtfAuRtIFOiRzK7g=
github.com/knadh/koanf/v2 v2.0.1/go.mod h1:ZeiIlIDXTE7w1lMT6UVcNiRAS2/rCeLn/GdLNvY1Dus=
github.com/lmittmann/tint v1.0.3 h1:W5PHeA2D8bBJVvabNfQD/XW9HPLZK1XoPZH0cq8NouQ=
github.com/lmittmann/tint v1.0.3/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY=
github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190606173856-1492cefac77f h1:IWHgpgFqnL5AhBUBZSgBdjl2vkQUEzcY+JNKWfcgAU0=
golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE=
golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

241
kleingebaeck.1 Normal file
View File

@@ -0,0 +1,241 @@
.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.42)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings. \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote. \*(C+ will
.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
. ds -- \(*W-
. ds PI pi
. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
. ds L" ""
. ds R" ""
. ds C` ""
. ds C' ""
'br\}
.el\{\
. ds -- \|\(em\|
. ds PI \(*p
. ds L" ``
. ds R" ''
. ds C`
. ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el .ds Aq '
.\"
.\" If the F register is >0, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD. Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{\
. if \nF \{\
. de IX
. tm Index:\\$1\t\\n%\t"\\$2"
..
. if !\nF==2 \{\
. nr % 0
. nr F 2
. \}
. \}
.\}
.rr rF
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear. Run. Save yourself. No user-serviceable parts.
. \" fudge factors for nroff and troff
.if n \{\
. ds #H 0
. ds #V .8m
. ds #F .3m
. ds #[ \f1
. ds #] \fP
.\}
.if t \{\
. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
. ds #V .6m
. ds #F 0
. ds #[ \&
. ds #] \&
.\}
. \" simple accents for nroff and troff
.if n \{\
. ds ' \&
. ds ` \&
. ds ^ \&
. ds , \&
. ds ~ ~
. ds /
.\}
.if t \{\
. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
. \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
. \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
. \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
. ds : e
. ds 8 ss
. ds o a
. ds d- d\h'-1'\(ga
. ds D- D\h'-1'\(hy
. ds th \o'bp'
. ds Th \o'LP'
. ds ae ae
. ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "KLEINGEBAECK 1"
.TH KLEINGEBAECK 1 "2023-12-19" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
kleingebaeck \- kleinanzeigen.de backup tool
.SH "SYNOPSYS"
.IX Header "SYNOPSYS"
.Vb 10
\& Usage: kleingebaeck [\-dvVhmoc] [<ad\-listing\-url>,...]
\& Options:
\& \-\-user \-u <uid> Backup ads from user with uid <uid>.
\& \-\-debug \-d Enable debug output.
\& \-\-verbose \-v Enable verbose output.
\& \-\-outdir \-o <dir> Set output dir (default: current directory)
\& \-\-limit \-l <num> Limit the ads to download to <num>, default: load all.
\& \-\-config \-c <file> Use config file <file> (default: ~/.kleingebaeck).
\& \-\-manual \-m Show manual.
\& \-\-help \-h Show usage.
.Ve
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
This tool can be used to backup ads on the german ad page <https://kleinanzeigen.de>.
.PP
It downloads all (or only the specified ones) ads of one user into a
directory, each ad into its own subdirectory. The backup will contain
a textfile \fBAdlisting.txt\fR which contains the ad contents such as
title, body, price etc. All images will be downloaded as well.
.SH "CONFIGURATION"
.IX Header "CONFIGURATION"
You can create a config file to save typing. By default
\&\f(CW\*(C`~/.kleingebaeck\*(C'\fR is being used but you can specify one with \f(CW\*(C`\-c\*(C'\fR as
well. We use \s-1TOML\s0 as our configuration language. See
<https://toml.io/en/>.
.PP
Format is pretty simple:
.PP
.Vb 10
\& user = 1010101
\& loglevel = verbose
\& outdir = "test"
\& template = """
\& Title: {{.Title}}
\& Price: {{.Price}}
\& Id: {{.Id}}
\& Category: {{.Category}}
\& Condition: {{.Condition}}
\& Created: {{.Created}}
\&
\& {{.Text}}
\& """
.Ve
.PP
Be carefull if you want to change the template. The variable is a
multiline string surrounded by three double quotes. You can left out
certain fields and use any formatting you like. Refer to
<https://pkg.go.dev/text/template> for details how to write a
template.
.PP
If you're on windows and want to customize the output directory, put
it into single quotes to avoid the backslashes interpreted as escape
chars like this:
.PP
.Vb 1
\& outdir = \*(AqC:\eData\eAds\*(Aq
.Ve
.SH "SETUP"
.IX Header "SETUP"
To setup the tool, you need to lookup your userid on
kleinanzeigen.de. Go to your ad overview page while \s-1NOT\s0 being logged
in:
.PP
.Vb 1
\& https://www.kleinanzeigen.de/s\-bestandsliste.html?userId=XXXXXX
.Ve
.PP
The \fB\s-1XXXXX\s0\fR part is your userid.
.PP
Put it into the configfile as outlined above. Also specify an output
directory. Then just execute \f(CW\*(C`kleingebaeck\*(C'\fR.
.PP
You can use the \fB\-v\fR option to get verbose output or \fB\-d\fR to enable
debugging.
.SH "BUGS"
.IX Header "BUGS"
In order to report a bug, unexpected behavior, feature requests
or to submit a patch, please open an issue on github:
<https://github.com/TLINDEN/kleingebaeck/issues>.
.PP
Please repeat the failing command with debugging enabled \f(CW\*(C`\-d\*(C'\fR and
include the output in the issue.
.SH "LIMITATIONS"
.IX Header "LIMITATIONS"
The \f(CW\*(C`kleingebaeck\*(C'\fR doesn't currently check if it has downloaded a
file already, so it downloads everything again every time you execute
it. Be aware of it. This will change in the future.
.PP
Also there's currently no parallelization implemented. This will
change in the future.
.SH "LICENSE"
.IX Header "LICENSE"
Licensed under the \s-1GNU GENERAL PUBLIC LICENSE\s0 version 3.
.SH "Author"
.IX Header "Author"
T.v.Dein <tom \s-1AT\s0 vondein \s-1DOT\s0 org>

97
kleingebaeck.go Normal file
View File

@@ -0,0 +1,97 @@
package main
var manpage = `
NAME
kleingebaeck - kleinanzeigen.de backup tool
SYNOPSYS
Usage: kleingebaeck [-dvVhmoc] [<ad-listing-url>,...]
Options:
--user -u <uid> Backup ads from user with uid <uid>.
--debug -d Enable debug output.
--verbose -v Enable verbose output.
--outdir -o <dir> Set output dir (default: current directory)
--limit -l <num> Limit the ads to download to <num>, default: load all.
--config -c <file> Use config file <file> (default: ~/.kleingebaeck).
--manual -m Show manual.
--help -h Show usage.
DESCRIPTION
This tool can be used to backup ads on the german ad page
<https://kleinanzeigen.de>.
It downloads all (or only the specified ones) ads of one user into a
directory, each ad into its own subdirectory. The backup will contain a
textfile Adlisting.txt which contains the ad contents such as title,
body, price etc. All images will be downloaded as well.
CONFIGURATION
You can create a config file to save typing. By default
"~/.kleingebaeck" is being used but you can specify one with "-c" as
well. We use TOML as our configuration language. See
<https://toml.io/en/>.
Format is pretty simple:
user = 1010101
loglevel = verbose
outdir = "test"
template = """
Title: {{.Title}}
Price: {{.Price}}
Id: {{.Id}}
Category: {{.Category}}
Condition: {{.Condition}}
Created: {{.Created}}
{{.Text}}
"""
Be carefull if you want to change the template. The variable is a
multiline string surrounded by three double quotes. You can left out
certain fields and use any formatting you like. Refer to
<https://pkg.go.dev/text/template> for details how to write a template.
If you're on windows and want to customize the output directory, put it
into single quotes to avoid the backslashes interpreted as escape chars
like this:
outdir = 'C:\Data\Ads'
SETUP
To setup the tool, you need to lookup your userid on kleinanzeigen.de.
Go to your ad overview page while NOT being logged in:
https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX
The XXXXX part is your userid.
Put it into the configfile as outlined above. Also specify an output
directory. Then just execute "kleingebaeck".
You can use the -v option to get verbose output or -d to enable
debugging.
BUGS
In order to report a bug, unexpected behavior, feature requests or to
submit a patch, please open an issue on github:
<https://github.com/TLINDEN/kleingebaeck/issues>.
Please repeat the failing command with debugging enabled "-d" and
include the output in the issue.
LIMITATIONS
The "kleingebaeck" doesn't currently check if it has downloaded a file
already, so it downloads everything again every time you execute it. Be
aware of it. This will change in the future.
Also there's currently no parallelization implemented. This will change
in the future.
LICENSE
Licensed under the GNU GENERAL PUBLIC LICENSE version 3.
Author
T.v.Dein <tom AT vondein DOT org>
`

107
kleingebaeck.pod Normal file
View File

@@ -0,0 +1,107 @@
=head1 NAME
kleingebaeck - kleinanzeigen.de backup tool
=head1 SYNOPSYS
Usage: kleingebaeck [-dvVhmoc] [<ad-listing-url>,...]
Options:
--user -u <uid> Backup ads from user with uid <uid>.
--debug -d Enable debug output.
--verbose -v Enable verbose output.
--outdir -o <dir> Set output dir (default: current directory)
--limit -l <num> Limit the ads to download to <num>, default: load all.
--config -c <file> Use config file <file> (default: ~/.kleingebaeck).
--manual -m Show manual.
--help -h Show usage.
--version -V Show program version.
=head1 DESCRIPTION
This tool can be used to backup ads on the german ad page L<https://kleinanzeigen.de>.
It downloads all (or only the specified ones) ads of one user into a
directory, each ad into its own subdirectory. The backup will contain
a textfile B<Adlisting.txt> which contains the ad contents such as
title, body, price etc. All images will be downloaded as well.
=head1 CONFIGURATION
You can create a config file to save typing. By default
C<~/.kleingebaeck> is being used but you can specify one with C<-c> as
well. We use TOML as our configuration language. See
L<https://toml.io/en/>.
Format is pretty simple:
user = 1010101
loglevel = verbose
outdir = "test"
template = """
Title: {{.Title}}
Price: {{.Price}}
Id: {{.Id}}
Category: {{.Category}}
Condition: {{.Condition}}
Created: {{.Created}}
{{.Text}}
"""
Be carefull if you want to change the template. The variable is a
multiline string surrounded by three double quotes. You can left out
certain fields and use any formatting you like. Refer to
L<https://pkg.go.dev/text/template> for details how to write a
template.
If you're on windows and want to customize the output directory, put
it into single quotes to avoid the backslashes interpreted as escape
chars like this:
outdir = 'C:\Data\Ads'
=head1 SETUP
To setup the tool, you need to lookup your userid on
kleinanzeigen.de. Go to your ad overview page while NOT being logged
in:
https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX
The B<XXXXX> part is your userid.
Put it into the configfile as outlined above. Also specify an output
directory. Then just execute C<kleingebaeck>.
You can use the B<-v> option to get verbose output or B<-d> to enable
debugging.
=head1 BUGS
In order to report a bug, unexpected behavior, feature requests
or to submit a patch, please open an issue on github:
L<https://github.com/TLINDEN/kleingebaeck/issues>.
Please repeat the failing command with debugging enabled C<-d> and
include the output in the issue.
=head1 LIMITATIONS
The C<kleingebaeck> doesn't currently check if it has downloaded a
file already, so it downloads everything again every time you execute
it. Be aware of it. This will change in the future.
Also there's currently no parallelization implemented. This will
change in the future.
=head1 LICENSE
Licensed under the GNU GENERAL PUBLIC LICENSE version 3.
=head1 Author
T.v.Dein <tom AT vondein DOT org>
=cut

146
main.go Normal file
View File

@@ -0,0 +1,146 @@
/*
Copyright © 2023 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"errors"
"fmt"
"log/slog"
"os"
"runtime/debug"
"github.com/lmittmann/tint"
)
const LevelNotice = slog.Level(2)
func main() {
os.Exit(Main())
}
func Main() int {
logLevel := &slog.LevelVar{}
opts := &tint.Options{
Level: logLevel,
AddSource: false,
ReplaceAttr: func(groups []string, a slog.Attr) slog.Attr {
// Remove time from the output
if a.Key == slog.TimeKey {
return slog.Attr{}
}
return a
},
NoColor: IsNoTty(),
}
logLevel.Set(LevelNotice)
var handler slog.Handler = tint.NewHandler(os.Stdout, opts)
logger := slog.New(handler)
slog.SetDefault(logger)
conf, err := InitConfig()
if err != nil {
return Die(err)
}
if conf.Showversion {
fmt.Printf("This is kleingebaeck version %s\n", VERSION)
return 0
}
if conf.Showhelp {
fmt.Println(Usage)
return 0
}
if conf.Showmanual {
err := man()
if err != nil {
return Die(err)
}
return 0
}
if conf.Verbose {
logLevel.Set(slog.LevelInfo)
}
if conf.Debug {
// we're using a more verbose logger in debug mode
buildInfo, _ := debug.ReadBuildInfo()
opts := &tint.Options{
Level: logLevel,
AddSource: true,
NoColor: IsNoTty(),
}
logLevel.Set(slog.LevelDebug)
var handler slog.Handler = tint.NewHandler(os.Stdout, opts)
debuglogger := slog.New(handler).With(
slog.Group("program_info",
slog.Int("pid", os.Getpid()),
slog.String("go_version", buildInfo.GoVersion),
),
)
slog.SetDefault(debuglogger)
}
slog.Debug("config", "conf", conf)
// prepare output dir
err = Mkdir(conf.Outdir)
if err != nil {
return Die(err)
}
if len(conf.Adlinks) >= 1 {
// directly backup ad listing[s]
for _, uri := range conf.Adlinks {
err := Scrape(conf, uri)
if err != nil {
return Die(err)
}
}
} else if conf.User > 0 {
// backup all ads of the given user (via config or cmdline)
err := Start(conf)
if err != nil {
return Die(err)
}
} else {
return Die(errors.New("invalid or no user id or no ad link specified"))
}
if conf.StatsCountAds > 0 {
adstr := "ads"
if conf.StatsCountAds == 1 {
adstr = "ad"
}
fmt.Printf("Successfully downloaded %d %s with %d images to %s.\n",
conf.StatsCountAds, adstr, conf.StatsCountImages, conf.Outdir)
} else {
fmt.Printf("No ads found.")
}
return 0
}
func Die(err error) int {
slog.Error("Failure", "error", err.Error())
return 1
}

70
mkrel.sh Executable file
View File

@@ -0,0 +1,70 @@
#!/bin/bash
# Copyright © 2023 Thomas von Dein
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# get list with: go tool dist list
DIST="darwin/amd64
freebsd/amd64
linux/amd64
netbsd/amd64
openbsd/amd64
windows/amd64"
tool="$1"
version="$2"
if test -z "$version"; then
echo "Usage: $0 <tool name> <release version>"
exit 1
fi
rm -rf releases
mkdir -p releases
for D in $DIST; do
os=${D/\/*/}
arch=${D/*\//}
binfile="releases/${tool}-${os}-${arch}-${version}"
if test "$os" = "windows"; then
binfile="${binfile}.exe"
fi
tardir="${tool}-${os}-${arch}-${version}"
tarfile="releases/${tool}-${os}-${arch}-${version}.tar.gz"
set -x
GOOS=${os} GOARCH=${arch} go build -tags osusergo,netgo -ldflags "-extldflags=-static" -o ${binfile}
mkdir -p ${tardir}
cp ${binfile} README.md LICENSE ${tardir}/
echo 'tool = kleingebaeck
PREFIX = /usr/local
UID = root
GID = 0
install:
install -d -o $(UID) -g $(GID) $(PREFIX)/bin
install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1
install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/
install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/' > ${tardir}/Makefile
tar cpzf ${tarfile} ${tardir}
sha256sum ${binfile} | cut -d' ' -f1 > ${binfile}.sha256
sha256sum ${tarfile} | cut -d' ' -f1 > ${tarfile}.sha256
rm -rf ${tardir}
set +x
done

226
scrape.go Normal file
View File

@@ -0,0 +1,226 @@
/*
Copyright © 2023 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"path/filepath"
"strings"
"astuart.co/goq"
"golang.org/x/sync/errgroup"
)
type Index struct {
Links []string `goquery:".text-module-begin a,[href]"`
}
type Ad struct {
Title string `goquery:"h1"`
Slug string
Id string
Condition string
Category string
Price string `goquery:"h2#viewad-price"`
Created string `goquery:"#viewad-extra-info,text"`
Text string `goquery:"p#viewad-description-text,html"`
Images []string `goquery:".galleryimage-element img,[src]"`
Meta []string `goquery:".addetailslist--detail--value,text"`
}
func (ad *Ad) LogValue() slog.Value {
return slog.GroupValue(
slog.String("title", ad.Title),
slog.String("price", ad.Price),
slog.String("id", ad.Id),
slog.Int("imagecount", len(ad.Images)),
slog.Int("bodysize", len(ad.Text)),
)
}
// fetch some web page content
func Get(uri string, client *http.Client) (io.ReadCloser, error) {
req, err := http.NewRequest("GET", uri, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", Useragent)
res, err := client.Do(req)
if err != nil {
return nil, err
}
slog.Debug("response", "code", res.StatusCode, "status",
res.Status, "size", res.ContentLength)
return res.Body, nil
}
// extract links from all ad listing pages (that is: use pagination)
// and scrape every page
func Start(conf *Config) error {
client := &http.Client{}
adlinks := []string{}
baseuri := fmt.Sprintf("%s%s?userId=%d", Baseuri, Listuri, conf.User)
page := 1
uri := baseuri
slog.Info("fetching ad pages", "user", conf.User)
for {
var index Index
slog.Debug("fetching page", "uri", uri)
body, err := Get(uri, client)
if err != nil {
return err
}
defer body.Close()
err = goq.NewDecoder(body).Decode(&index)
if err != nil {
return err
}
if len(index.Links) == 0 {
break
}
slog.Debug("extracted ad links", "count", len(index.Links))
for _, href := range index.Links {
adlinks = append(adlinks, href)
slog.Debug("ad link", "href", href)
}
page++
uri = baseuri + "&pageNum=" + fmt.Sprintf("%d", page)
}
for i, adlink := range adlinks {
err := Scrape(conf, Baseuri+adlink)
if err != nil {
return err
}
if conf.Limit > 0 && i == conf.Limit-1 {
break
}
}
return nil
}
// scrape an ad. uri is the full uri of the ad, dir is the basedir
func Scrape(c *Config, uri string) error {
client := &http.Client{}
ad := &Ad{}
// extract slug and id from uri
uriparts := strings.Split(uri, "/")
if len(uriparts) < 6 {
return errors.New("invalid uri")
}
ad.Slug = uriparts[4]
ad.Id = uriparts[5]
// get the ad
slog.Debug("fetching ad page", "uri", uri)
body, err := Get(uri, client)
if err != nil {
return err
}
defer body.Close()
// extract ad contents with goquery/goq
err = goq.NewDecoder(body).Decode(&ad)
if err != nil {
return err
}
if len(ad.Meta) == 2 {
ad.Category = ad.Meta[0]
ad.Condition = ad.Meta[1]
}
slog.Debug("extracted ad listing", "ad", ad)
// write listing
err = WriteAd(c.Outdir, ad, c.Template)
if err != nil {
return err
}
c.IncrAds()
return ScrapeImages(c, ad)
}
func ScrapeImages(c *Config, ad *Ad) error {
// fetch images
img := 1
g := new(errgroup.Group)
for _, imguri := range ad.Images {
imguri := imguri
file := filepath.Join(c.Outdir, ad.Slug, fmt.Sprintf("%d.jpg", img))
g.Go(func() error {
err := Getimage(imguri, file)
if err != nil {
return err
}
slog.Info("wrote ad image", "image", file)
return nil
})
img++
}
if err := g.Wait(); err != nil {
return err
}
c.IncrImgs(len(ad.Images))
return nil
}
// fetch an image
func Getimage(uri, fileName string) error {
slog.Debug("fetching ad image", "uri", uri)
response, err := http.Get(uri)
if err != nil {
return err
}
defer response.Body.Close()
if response.StatusCode != 200 {
return errors.New("received non 200 response code")
}
err = WriteImage(fileName, response.Body)
if err != nil {
return err
}
return nil
}

78
store.go Normal file
View File

@@ -0,0 +1,78 @@
/*
Copyright © 2023 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"io"
"log/slog"
"os"
"path/filepath"
"runtime"
"strings"
tpl "text/template"
)
func WriteAd(dir string, ad *Ad, template string) error {
// prepare output dir
dir = filepath.Join(dir, ad.Slug)
err := Mkdir(dir)
if err != nil {
return err
}
// write ad file
listingfile := filepath.Join(dir, "Adlisting.txt")
f, err := os.Create(listingfile)
if err != nil {
return err
}
if runtime.GOOS == "windows" {
ad.Text = strings.ReplaceAll(ad.Text, "<br/>", "\r\n")
} else {
ad.Text = strings.ReplaceAll(ad.Text, "<br/>", "\n")
}
tmpl, err := tpl.New("adlisting").Parse(template)
if err != nil {
return err
}
err = tmpl.Execute(f, ad)
if err != nil {
return err
}
slog.Info("wrote ad listing", "listingfile", listingfile)
return nil
}
func WriteImage(filename string, reader io.ReadCloser) error {
file, err := os.Create(filename)
if err != nil {
return err
}
defer file.Close()
_, err = io.Copy(file, reader)
if err != nil {
return err
}
return nil
}

68
util.go Normal file
View File

@@ -0,0 +1,68 @@
/*
Copyright © 2023 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"bytes"
"errors"
"os"
"os/exec"
"runtime"
"github.com/mattn/go-isatty"
)
func Mkdir(dir string) error {
if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) {
err := os.Mkdir(dir, os.ModePerm)
if err != nil {
return err
}
}
return nil
}
func man() error {
man := exec.Command("less", "-")
var b bytes.Buffer
b.Write([]byte(manpage))
man.Stdout = os.Stdout
man.Stdin = &b
man.Stderr = os.Stderr
err := man.Run()
if err != nil {
return err
}
return nil
}
// returns TRUE if stdout is NOT a tty or windows
func IsNoTty() bool {
if runtime.GOOS == "windows" || !isatty.IsTerminal(os.Stdout.Fd()) {
return true
}
// it is a tty
return false
}