moved to codeberg

This commit is contained in:
2025-11-05 08:40:09 +01:00
parent 230bbf3d53
commit 704450bc44
50 changed files with 6 additions and 4084 deletions

View File

@@ -1,96 +0,0 @@
prSections:
- title: Responsible PRs
filters: repo:tlinden/kleingebaeck is:open NOT dependabot
layout:
repoName:
hidden: true
- title: Responsible Dependabot PRs
filters: repo:tlinden/kleingebaeck is:open dependabot
layout:
repoName:
hidden: true
issuesSections:
- title: Responsible Issues
filters: is:open repo:tlinden/kleingebaeck -author:@me
layout:
repoName:
hidden: true
- title: Note-to-Self Issues
filters: is:open repo:tlinden/kleingebaeck author:@me
layout:
creator:
hidden: true
repoName:
hidden: true
defaults:
preview:
open: false
width: 100
keybindings:
universal:
- key: "shift+down"
builtin: pageDown
- key: "shift+up"
builtin: pageUp
prs:
- key: g
name: gitu
command: >
cd {{.RepoPath}} && /home/scip/bin/gitu
- key: M
name: squash-merge
command: gh pr merge --rebase --squash --admin --repo {{.RepoName}} {{.PrNumber}}
- key: i
name: show ci checks
command: gh pr checks --repo {{.RepoName}} {{.PrNumber}} | glow -p
- key: e
name: edit pr
command: ~/.config/gh-dash/edit-gh-pr {{.RepoName}} {{.PrNumber}}
- key: E
name: open repo in emacs
command: emacsclient {{.RepoPath}} &
issues:
- key: v
name: view
command: gh issue view --repo {{.RepoName}} {{.IssueNumber}} | glow -p
- key: l
name: add label
command: gh issue --repo {{.RepoName}} edit {{.IssueNumber}} --add-label $(gum choose bug enhancement question dependencies wontfix)
- key: L
name: remove label
command: gh issue --repo {{.RepoName}} edit {{.IssueNumber}} --remove-label $(gum choose bug enhancement question dependencies wontfix)
- key: E
name: open repo in emacs
command: emacsclient {{.RepoPath}} &
theme:
ui:
sectionsShowCount: true
table:
compact: false
showSeparator: true
colors:
text:
primary: "#E2E1ED"
secondary: "#6770cb"
inverted: "#242347"
faint: "#b0793b"
warning: "#E0AF68"
success: "#3DF294"
background:
selected: "#1B1B33"
border:
primary: "#383B5B"
secondary: "#39386B"
faint: "#8d3e0b"
repoPaths:
:owner/:repo: ~/dev/:repo
pager:
diff: delta

View File

@@ -1,69 +0,0 @@
# vim: set ts=2 sw=2 tw=0 fo=cnqoj
version: 2
before:
hooks:
- go mod tidy
gitea_urls:
api: https://codeberg.org/api/v1
download: https://codeberg.org
builds:
- env:
- CGO_ENABLED=0
goos:
- linux
- windows
- darwin
- freebsd
archives:
- formats: [tar.gz]
# this name template makes the OS and Arch compatible with the results of `uname`.
name_template: >-
{{ .ProjectName }}_
{{- title .Os }}_
{{- if eq .Arch "amd64" }}x86_64
{{- else if eq .Arch "386" }}i386
{{- else }}{{ .Arch }}{{ end }}
{{- if .Arm }}v{{ .Arm }}{{ end }}_{{ .Tag }}
# use zip for windows archives
format_overrides:
- goos: windows
formats: [zip]
- goos: linux
formats: [tar.gz,binary]
files:
- src: "*.md"
strip_parent: true
- src: "docs/*"
strip_parent: true
- src: Makefile.dist
dst: Makefile
wrap_in_directory: true
changelog:
sort: asc
filters:
exclude:
- "^docs:"
- "^test:"
groups:
- title: Improved
regexp: '^.*?(feat|add|new)(\([[:word:]]+\))??!?:.+$'
order: 0
- title: Fixed
regexp: '^.*?(bug|fix)(\([[:word:]]+\))??!?:.+$'
order: 1
- title: Changed
order: 999
release:
header: "# Release Notes"
footer: >-
---
Full Changelog: [{{ .PreviousTag }}...{{ .Tag }}](https://codeberg.org/scip/epuppy/compare/{{ .PreviousTag }}...{{ .Tag }})

View File

@@ -1,36 +0,0 @@
matrix:
platform:
- linux/amd64
goversion:
- 1.24
labels:
platform: ${platform}
steps:
build:
when:
event: [push]
image: golang:${goversion}
commands:
- go get
- go build
linter:
when:
event: [push]
image: golang:${goversion}
commands:
- curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/HEAD/install.sh | sh -s -- -b $(go env GOPATH)/bin v2.5.0
- golangci-lint --version
- golangci-lint run ./...
depends_on: [build]
test:
when:
event: [push]
image: golang:${goversion}
commands:
- go get
- go test -v -cover
depends_on: [build,linter]

View File

@@ -1,32 +0,0 @@
# https://woodpecker-ci.org/plugins/docker-buildx
# enable Package unit and go to /scip/-/packages after building to link to proj
variables:
- &repo codeberg.org/${CI_REPO_OWNER}/kleingebaeck
steps:
dryrun:
image: docker.io/woodpeckerci/plugin-docker-buildx:latest
settings:
dockerfile: Dockerfile
platforms: linux/amd64
dry_run: true
repo: *repo
tags: latest
when:
event: [pull_request]
publish:
image: docker.io/woodpeckerci/plugin-docker-buildx:latest
settings:
dockerfile: Dockerfile
platforms: linux/amd64
repo: *repo
registry: codeberg.org
tags: latest,${CI_COMMIT_SHA:0:8},${CI_COMMIT_TAG}
username: ${CI_REPO_OWNER}
password:
from_secret: REGISTRY_TOKEN
when:
event: [tag]
branch: main

View File

@@ -1,15 +0,0 @@
# build release
labels:
platform: linux/amd64
steps:
goreleaser:
image: goreleaser/goreleaser
when:
event: [tag]
environment:
GITEA_TOKEN:
from_secret: DEPLOY_TOKEN
commands:
- goreleaser release --clean --verbose

View File

@@ -1,114 +0,0 @@
# No Code of Conduct
*TL;DR:* This project does **NOT** have a so called Code of Conduct,
nor will it ever have one.
## The Rant
The reasons are somewhat complicated and I'll try my best to document
them here.
Ethical codes or rules come along like laws. But how is ethical or
moral behavior defined? And who defines which behavior is ethical and
which is not? Certainly not me.
Unless you live in a dictatorship (and more than half of the
population on planet earth do as of this writing), laws come into
existence by democratic procedures. Laws cover almost every aspect of
live in a society. Laws allow and forbid behavior and laws sanction
infringements.
A software project like this one on the other hand is not a society.
There are not enough people involved to form democratic
structures. And there will always be a minority of users who have the
right to commit or reject code. How could any maintainer of a software
project dare to decree rules upon others? Actually, am I, the current
maintainer of this very project authorized to do so?
I think the anser to this question clearly is NO.
The issue is being complicated by the fact, that open source
development these days happens on a planetary scale. And this planet
houses hundreds if not thousands of different cultures, philosophies,
ideologies and worldviews. The answer to many ethical questions will
in most cases be vague and nebulous.
Ones joke will always be another ones insult.
Then there is the problem of language. I myself am not an english
native, but I publish everyting using the english language. I am able
to communicate with most people in the open source community because
of that. But I am certainly not able to understand everything and
everyone. There might be nuances to a sentence I don't sense, there
might be sarcastic connotations I don't understand or references to
historical figures, events or traditions I don't know and never have
heard of.
Judging over other peoples online behavior looks like a titanic task
to me. It is just not my job to judge others, I am not legitimized or
authorized to do so and I am not interested in this kind of business.
Another huge problem with ethical rules is that you need to outline
and enforce sanctions on those who violate the rules. But since I am
not an elected authority how would I be able to do this? I don't
know. And what happens if someone complains about myself? Shall I
remove myself from my own project? Come on!
Last but not least there's the law. So, let's say someone in india
writes something insulting to some other developer in an issue. Of
course german law does not apply to indian people. Moreover, the
insult might actually not be an insult in india. In the end, nothing
would happen. Under normal circumstances, maintainers would
eventually delete the posting, ban the user or remove push privileges
etc.
But then, is there a way for the offending user to defend himself? Of
course not, since neither indian or german law alone applies. I cannot
go to a german court and sue the guy and he cannot do the same in
india. Or - we possibly could but the judges in both countries would
just laugh and close the case.
That being said, I don't have the power nor the tools, nor the
authority to enforce serious sanctions of any meaningful kind against
others. Therefore I cannot outline any rules whatsoever.
And let's not even start talking about these undemocratic "comitees"
many projects are forming to circumvent this problem. Some projects
even include external entities like a lawyer or some bureaucrat
somewhere just to have the ability to complain against a comitee
member. What a mess!
## So, what are the ethical rules within this project then?
Well, there are none.
This project is about code, not society. It doesn't matter where you
come from, how you look, how you think, what you believe, who your
friends are, whay you said or did sometime in the past. I don't even
care if you are a human being. You are an alien so bored that you need
to submit code on github? Fine with me. You're a convicted criminal? I
don't give a shit!
**The only thing I am interested here is Code and only Code.**
So if anyhing happens here I don't like or I am obliged by (german!)
law to act on, I will decide on a case to case basis what to do. And
unfortunately, since this is the nature of a github project, you
cannot complain, object or protest. I am very sorry!
If you will, let's at least outline these:
- Please - just please - behave towards others as you'd expect others
to behave towards yourself.
- Don't judge others for any reason.
- Only judge the code.
But these are not rules, only a friendly appeal to you as a developer
and user.
Thanks a lot!

View File

@@ -1,93 +0,0 @@
## Project Goals
The goal of this project is to build a small tool which helps in
maintaining backups of the german ad site kleinanzeigen.de. It should
be small, fast and easy to understand.
There will be no GUI, no web interface, no public API of some sort, no
builtin interpreter.
The programming language used for this project will always be
[GOLANG](https://go.dev/) with the exception of the documentation
([Perl POD](https://perldoc.perl.org/perlpod)) and the Makefile.
# Contributing
You can contribute to this project in various ways:
## Open an issue
If you encounter a problem or don't understand how the program works
or if you think the documentation is unclear, please don't hesitate to
open an issue.
Please add as much information about the case as possible, such as:
- Your environment (operating system etc)
- kleingebaeck version (`kleingebaeck --version`)
- Commandline used. Please replace sensitive information with mock data!
- Repeat the command with debugging enabled (`-d` flag)
- Actual program output, Please replace sensitive information with mock data!
- Expected program output.
- Error message - if any.
Be aware that I am working on this (and some others) project in my
spare time which is scarce. Therefore please don't expect me to
respond to your query within hours or even days. Be patient, but I
WILL respond.
## Pull Requests
Code and documentation help is always much appreciated! Please follow
thes guidelines to successfully contribute:
- Every pull request shall be based on latest `development`
branch. `main` is only used for releases.
- Execute the unit tests before committing: `make test`. There shall
be no errors.
- Strive to be backwards compatible so that users who are already
using the program don't have to change their habits - unless it is
really neccessary.
- Try to add a unit test for your fix, addition or modification.
- Don't ever change existing unit tests!
- Add a meaningful and comprehensive rationale about your contribution:
- Why do you think it might be useful for others?
- What did you actually change or add?
- Is there an open issue which this PR fixes and if so, please link
to that issue.
- [Re-]format your code with `gofmt -s`.
- Avoid unneccesary dependencies, especially for very small functions.
- **If** a new dependency is being added, it must be compatible with
our [license agreement](LICENSE).
- You need to accept that the code or documentation you contribute
will be redistributed under the terms of said license agreement. If
your contribution is considerably large or if you contribute
regularly, then feel free to add your name (and if you want your
email address) to the *AUTHORS* section of the
[manpage](kleingebaeck.pod).
- Adhere to the above mentioned project goals.
- If you are unsure if your addition or change will be accepted,
better ask before starting coding. Open an issue about your proposal
and let's discuss it! That way we avoid doing unnessesary work on
both sides.
Each pull request will be carefully reviewed and if it is a useful
addition it will be accepted. However, please be prepared that
sometimes a PR will be rejected. The reasons may vary and will be
documented. Perhaps the above guidelines are not matched, or the
addition seems to be not so useful from my perspective, maybe there
are too much changes or there might be changes I don't even
understand.
But whatever happens: your contribution is always welcome!

View File

@@ -1,27 +0,0 @@
FROM golang:1.24-alpine as builder
RUN apk update
RUN apk upgrade
RUN apk add --no-cache git make
RUN git --version
WORKDIR /work
COPY go.mod .
COPY . .
RUN go mod download
RUN make
FROM alpine:latest
LABEL maintainer="Thomas von Dein <git@daemon.de>"
WORKDIR /app
COPY --from=builder /work/kleingebaeck /app/kleingebaeck
ENV KLEINGEBAECK_OUTDIR /backup
ENV LANG C.UTF-8
USER 1001:1001
ENTRYPOINT ["/app/kleingebaeck"]
CMD ["-h"]

101
Makefile
View File

@@ -1,101 +0,0 @@
# Copyright © 2023 Thomas von Dein
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# no need to modify anything below
tool = kleingebaeck
VERSION = $(shell grep VERSION config.go | head -1 | cut -d '"' -f2)
archs = darwin freebsd linux windows
PREFIX = /usr/local
UID = root
GID = 0
HAVE_POD := $(shell pod2text -h 2>/dev/null)
all: $(tool).1 $(tool).go buildlocal
%.1: %.pod
ifdef HAVE_POD
pod2man -c "User Commands" -r 1 -s 1 $*.pod > $*.1
endif
%.go: %.pod
ifdef HAVE_POD
echo "package main" > $*.go
echo >> $*.go
echo "var manpage = \`" >> $*.go
pod2text $*.pod >> $*.go
echo "\`" >> $*.go
endif
buildlocal:
CGO_LDFLAGS='-static' go build -tags osusergo,netgo -ldflags "-extldflags=-static" -o $(tool)
install: buildlocal
install -d -o $(UID) -g $(GID) $(PREFIX)/bin
install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1
install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/
install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/
clean:
rm -rf $(tool) coverage.out testdata t/out
test: clean
mkdir -p t/out
go test ./... $(ARGS)
testlint: test lint
lint:
golangci-lint run
lint-full:
golangci-lint run --enable-all --exclude-use-default --disable exhaustivestruct,exhaustruct,depguard,interfacer,deadcode,golint,structcheck,scopelint,varcheck,ifshort,maligned,nosnakecase,godot,funlen,gofumpt,cyclop,noctx,gochecknoglobals,paralleltest
gocritic check -enableAll *.go
testfuzzy: clean
go test -fuzz ./... $(ARGS)
singletest:
@echo "Call like this: make singletest TEST=TestPrepareColumns ARGS=-v"
go test -run $(TEST) $(ARGS)
cover-report:
go test ./... -cover -coverprofile=coverage.out
go tool cover -html=coverage.out
goupdate:
go get -t -u=patch ./...
buildall:
./mkrel.sh $(tool) $(VERSION)
release:
gh release create v$(VERSION) --generate-notes
show-versions: buildlocal
@echo "### kleingebaeck version:"
@./kleingebaeck -V
@echo
@echo "### go module versions:"
@go list -m all
@echo
@echo "### go version used for building:"
@grep -m 1 go go.mod
# lint:
# golangci-lint run -p bugs -p unused

View File

@@ -1,20 +0,0 @@
# -*-make-*-
.PHONY: install all
tool = rpn
PREFIX = /usr/local
UID = root
GID = 0
all:
@echo "Type 'sudo make install' to install the tool."
@echo "To change prefix, type 'sudo make install PREFIX=/opt'"
install:
install -d -o $(UID) -g $(GID) $(PREFIX)/bin
install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1
install -d -o $(UID) -g $(GID) $(PREFIX)/share/doc
install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/
install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/
install -o $(UID) -g $(GID) -m 444 *.md $(PREFIX)/share/doc/

View File

@@ -9,6 +9,9 @@
[![GitHub release](https://img.shields.io/github/v/release/tlinden/kleingebaeck?color=%2300a719)](https://codeberg.org/scip/kleingebaeck/releases)
[![English](https://codeberg.org/scip/kleingebaeck/raw/branch/.github/assets/english.png)](https://codeberg.org/scip/kleingebaeck/raw/branch/README.md)
> [!IMPORTANT]
> Diese Software wird jetzt bei Codeberg weitergepflegt: [Codeberg](https://codeberg.org/scip/kleingebaeck/).
Mit diesem Tool kann man seine Anzeigen bei https://kleinanzeigen.de sichern.
Es kann alle Anzeigen eines Users (oder nur eine Ausgewählte)

View File

@@ -9,6 +9,9 @@
[![GitHub release](https://img.shields.io/github/v/release/tlinden/kleingebaeck?color=%2300a719)](https://codeberg.org/scip/kleingebaeck/releases)
[![German](https://codeberg.org/scip/kleingebaeck/raw/branch/.github/assets/german.png)](https://codeberg.org/scip/kleingebaeck/raw/branch/README-de.md)
> [!IMPORTANT]
> This software is now being maintained on [Codeberg](https://codeberg.org/scip/kleingebaeck/).
[Die deutsche Version des READMEs findet Ihr hier](README-de.md).
This tool can be used to backup ads on the german ad page https://kleinanzeigen.de

View File

@@ -1,17 +0,0 @@
# Security Policy
## Supported Versions
Only the latest release is supported. If you find an issue (any
issue!), please check with the latest release first.
## Reporting a Vulnerability
I don't agree with the "responsible disclosure" process most projects
(and companies) work these days.
So, if you find a vulnerability of any kind, please just open an
[issue](https://codeberg.org/scip/kleingebaeck/issues). Please add
all details required to reproduce the vulnerability. You won't be chased.
That's just all about it.

163
ad.go
View File

@@ -1,163 +0,0 @@
/*
Copyright © 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"bufio"
"log/slog"
"strings"
"time"
)
type Index struct {
Links []string `goquery:".text-module-begin a,[href]"`
}
type Ad struct {
Title string `goquery:"h1"`
Slug string
ID string
Details string `goquery:".addetailslist--detail,text"`
Attributes map[string]string // processed afterwards
Condition string // post processed from details for backward compatibility
Type string // post processed from details for backward compatibility
Color string // post processed from details for backward compatibility
Material string // post processed from details for backward compatibility
Category string
CategoryTree []string `goquery:".breadcrump-link,text"`
Price string `goquery:"h2#viewad-price"`
Created string `goquery:"#viewad-extra-info,text"`
Text string `goquery:"p#viewad-description-text,html"`
Images []string `goquery:".galleryimage-element img,[src]"`
Shipping string `goquery:".boxedarticle--details--shipping,text"` // not always filled
Expire string
// runtime computed
Year, Day, Month string
}
// Used by slog to pretty print an ad
func (ad *Ad) LogValue() slog.Value {
return slog.GroupValue(
slog.String("title", ad.Title),
slog.String("price", ad.Price),
slog.String("id", ad.ID),
slog.Int("imagecount", len(ad.Images)),
slog.Int("bodysize", len(ad.Text)),
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
slog.String("created", ad.Created),
slog.String("expire", ad.Expire),
slog.String("shipping", ad.Shipping),
slog.String("details", ad.Details),
)
}
// check for completeness. I erected these fields to be mandatory
// (though I really don't know if they really are). I consider images
// and meta optional. So, if either of the checked fields here is
// empty we return an error. All the checked fields are extracted
// using goquery. However, I think price is optional since there are
// ads for gifts as well.
//
// Note: we return true for "ad is incomplete" and false for "ad is complete"!
func (ad *Ad) Incomplete() bool {
if ad.Category == "" || ad.Created == "" || ad.Text == "" {
return true
}
return false
}
func (ad *Ad) CalculateExpire() {
if ad.Created != "" {
ts, err := time.Parse("02.01.2006", ad.Created)
if err == nil {
ad.Expire = ts.AddDate(0, 0, ExpireDays).Format("02.01.2006")
}
}
}
/*
Decode attributes like color or condition. See
https://codeberg.org/scip/kleingebaeck/issues/117
for more details. In short: the HTML delivered by
kleinanzeigen.de has no css attribute for the keys
so we cannot extract key=>value mappings of the
ad details but have to parse them manually.
The ad.Details member contains this after goq run:
Art
Weitere Kinderzimmermöbel
Farbe
Holz
Zustand
In Ordnung
We parse this into ad.Attributes and fill in some
static members for backward compatibility reasons.
*/
func (ad *Ad) DecodeAttributes() {
rd := strings.NewReader(ad.Details)
scanner := bufio.NewScanner(rd)
isattr := true
attr := ""
attrmap := map[string]string{}
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line == "" {
continue
}
if isattr {
attr = line
} else {
attrmap[attr] = line
}
isattr = !isattr
}
ad.Attributes = attrmap
if Exists(ad.Attributes, "Zustand") {
ad.Condition = ad.Attributes["Zustand"]
}
if Exists(ad.Attributes, "Farbe") {
ad.Color = ad.Attributes["Farbe"]
}
if Exists(ad.Attributes, "Art") {
ad.Type = ad.Attributes["Art"]
}
if Exists(ad.Attributes, "Material") {
ad.Material = ad.Attributes["Material"]
}
slog.Debug("parsed attributes", "attributes", ad.Attributes)
ad.Shipping = strings.Replace(ad.Shipping, "+ Versand ab ", "", 1)
}

251
config.go
View File

@@ -1,251 +0,0 @@
/*
Copyright © 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"fmt"
"io"
"os"
"path/filepath"
"runtime"
"strings"
"github.com/knadh/koanf/parsers/toml"
"github.com/knadh/koanf/providers/confmap"
"github.com/knadh/koanf/providers/env"
"github.com/knadh/koanf/providers/file"
"github.com/knadh/koanf/providers/posflag"
"github.com/knadh/koanf/v2"
flag "github.com/spf13/pflag"
)
const (
VERSION string = "0.3.23"
Baseuri string = "https://www.kleinanzeigen.de"
Listuri string = "/s-bestandsliste.html"
Defaultdir string = "."
/*
Also possible: loop through .Attributes:
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
"Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" +
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
*/
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nShipping: {{.Shipping}}\nId: {{.ID}}\n" +
"Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" +
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nShipping: {{.Shipping}}\r\nId: {{.ID}}\r\n" +
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\nColor: {{.Color}}\r\n" +
"Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n"
DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
DefaultAdNameTemplate string = "{{.Slug}}"
DefaultOutdirTemplate string = "."
// for image download throttling
MinThrottle int = 2
MaxThrottle int = 20
// we extract the slug from the uri
SlugURIPartNum int = 6
// We have to calculate the ad expiry because the real value can
// only be seen by logged in users. The initial ad lifetime is 120
// days. It can be extended by the user 8 days before expire by 60
// days. But this is unknown to us, so we'll stick with our 120
// days. They may be wrong for older ads. Don't rely on it!
ExpireDays int = 120
WIN string = "windows"
)
var DirsVisited map[string]int
const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool.
Usage: kleingebaeck [-dvVhmoclu] [<ad-listing-url>,...]
Options:
-u --user <uid> Backup ads from user with uid <uid>.
-d --debug Enable debug output.
-v --verbose Enable verbose output.
-o --outdir <dir> Set output dir (default: current directory)
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Overwrite images and ads even if the already exist.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
If one or more ad listing url's are specified, only backup those,
otherwise backup all ads of the given user.`
type Config struct {
Verbose bool `koanf:"verbose"` // loglevel=info
Debug bool `koanf:"debug"` // loglevel=debug
Showversion bool `koanf:"version"` // -v
Showhelp bool `koanf:"help"` // -h
Showmanual bool `koanf:"manual"` // -m
User int `koanf:"user"`
Outdir string `koanf:"outdir"`
Template string `koanf:"template"`
Adnametemplate string `koanf:"adnametemplate"`
Loglevel string `koanf:"loglevel"`
Limit int `koanf:"limit"`
IgnoreErrors bool `koanf:"ignoreerrors"`
ForceDownload bool `koanf:"force"`
UserAgent string `koanf:"useragent"` // conf only
Adlinks []string
StatsCountAds int
StatsCountImages int
}
func (c *Config) IncrAds() {
c.StatsCountAds++
}
func (c *Config) IncrImgs(num int) {
c.StatsCountImages += num
}
// load commandline flags and config file
func InitConfig(output io.Writer) (*Config, error) {
var kloader = koanf.New(".")
// determine template based on os
template := DefaultTemplate
if runtime.GOOS == WIN {
template = DefaultTemplateWin
}
// Load default values using the confmap provider.
if err := kloader.Load(confmap.Provider(map[string]interface{}{
"template": template,
"outdir": DefaultOutdirTemplate,
"loglevel": "notice",
"userid": 0,
"adnametemplate": DefaultAdNameTemplate,
"useragent": DefaultUserAgent,
}, "."), nil); err != nil {
return nil, fmt.Errorf("failed to load default values into koanf: %w", err)
}
// setup custom usage
flagset := flag.NewFlagSet("config", flag.ContinueOnError)
flagset.Usage = func() {
_, err := fmt.Fprintln(output, Usage)
if err != nil {
panic(err)
}
os.Exit(0)
}
// parse commandline flags
flagset.StringP("config", "c", "", "config file")
flagset.StringP("outdir", "o", "", "directory where to store ads")
flagset.IntP("user", "u", 0, "user id")
flagset.IntP("limit", "l", 0, "limit ads to be downloaded (default 0, unlimited)")
flagset.BoolP("verbose", "v", false, "be verbose")
flagset.BoolP("debug", "d", false, "enable debug log")
flagset.BoolP("version", "V", false, "show program version")
flagset.BoolP("help", "h", false, "show usage")
flagset.BoolP("manual", "m", false, "show manual")
flagset.BoolP("force", "f", false, "force")
flagset.BoolP("ignoreerrors", "", false, "ignore image download HTTP errors")
if err := flagset.Parse(os.Args[1:]); err != nil {
return nil, fmt.Errorf("failed to parse program arguments: %w", err)
}
// generate a list of config files to try to load, including the
// one provided via -c, if any
var configfiles []string
configfile, _ := flagset.GetString("config")
home, _ := os.UserHomeDir()
if configfile != "" {
configfiles = []string{configfile}
} else {
configfiles = []string{
"/etc/kleingebaeck.conf", "/usr/local/etc/kleingebaeck.conf", // unix variants
filepath.Join(home, ".config", "kleingebaeck", "config"),
filepath.Join(home, ".kleingebaeck"),
"kleingebaeck.conf",
}
}
// Load the config file[s]
for _, cfgfile := range configfiles {
path, err := os.Stat(cfgfile)
if err != nil {
// ignore non-existent files, but bail out on any other errors
if !os.IsNotExist(err) {
return nil, fmt.Errorf("failed to stat config file: %w", err)
}
continue
}
if !path.IsDir() {
if err := kloader.Load(file.Provider(cfgfile), toml.Parser()); err != nil {
return nil, fmt.Errorf("error loading config file: %w", err)
}
}
}
// env overrides config file
if err := kloader.Load(env.Provider("KLEINGEBAECK_", ".", func(s string) string {
return strings.ReplaceAll(strings.ToLower(
strings.TrimPrefix(s, "KLEINGEBAECK_")), "_", ".")
}), nil); err != nil {
return nil, fmt.Errorf("error loading environment: %w", err)
}
// command line overrides env
if err := kloader.Load(posflag.Provider(flagset, ".", kloader), nil); err != nil {
return nil, fmt.Errorf("error loading flags: %w", err)
}
// fetch values
conf := &Config{}
if err := kloader.Unmarshal("", &conf); err != nil {
return nil, fmt.Errorf("error unmarshalling: %w", err)
}
// adjust loglevel
switch conf.Loglevel {
case "verbose":
conf.Verbose = true
case "debug":
conf.Debug = true
}
// are there any args left on commandline? if so threat them as adlinks
conf.Adlinks = flagset.Args()
return conf, nil
}

View File

@@ -1,22 +0,0 @@
version: "3.9"
services:
init:
image: alpine:latest
user: "root"
group_add:
- '${GROUP_ID}'
volumes:
- ${OUTDIR}:/backup
command: chown -R ${USER_ID}:${USER_ID} /backup
kleingebaeck:
container_name: kleingebaeck
user: "${USER_ID}:${USER_ID}"
volumes:
- ${OUTDIR}:/backup
working_dir: /backup
build: .
image: kleingebaeck:latest
depends_on:
init:
condition: service_completed_successfully

View File

@@ -1,48 +0,0 @@
#
# kleingebaeck sample configuration file.
# put this to ~/.kleingebaeck.
#
# Comments start with the '#' character.
# kleinanzeigen.de user-id. must be an unquoted number
user = 00000000
# enable verbose output (same as -v), may be true or false.
# other values: notice or debug
loglevel = "verbose"
# directory where to store downloaded ads. kleingebaeck will try to
# create it. must be a quoted string. You can also include a couple of
# template variables, e.g:
# outdir = "test-{{.Year}}-{{.Month}}-{{.Day}}"
outdir = "test"
# template for stored adlistings.
template="""
Title: {{.Title}}
Price: {{.Price}}
Shipping: {{.Shipping}}
Id: {{.Id}}
Category: {{.Category}}
Condition: {{.Condition}}
Type: {{.Type}}
Created: {{.Created}}
{{.Text}}
"""
# Ads may contain more attributes than just the Condition. To print
# all attributes, loop over all of them:
template="""
Title: {{.Title}}
Price: {{.Price}}
Id: {{.Id}}
Category: {{.Category}}
{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}
{{ end }}
Type: {{.Type}}
Created: {{.Created}}
{{.Text}}
"""

104
fetch.go
View File

@@ -1,104 +0,0 @@
/*
Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"errors"
"fmt"
"io"
"log/slog"
"net/http"
"net/http/cookiejar"
"net/url"
)
// convenient wrapper to fetch some web content
type Fetcher struct {
Config *Config
Client *http.Client
Cookies []*http.Cookie
}
func NewFetcher(conf *Config) (*Fetcher, error) {
jar, err := cookiejar.New(nil)
if err != nil {
return nil, fmt.Errorf("failed to create a cookie jar obj: %w", err)
}
return &Fetcher{
Client: &http.Client{
Transport: &loggingTransport{}, // implemented in http.go
Jar: jar,
},
Config: conf,
Cookies: []*http.Cookie{},
},
nil
}
func (f *Fetcher) Get(uri string) (io.ReadCloser, error) {
req, err := http.NewRequest(http.MethodGet, uri, http.NoBody)
if err != nil {
return nil, fmt.Errorf("failed to create a new HTTP request obj: %w", err)
}
req.Header.Set("User-Agent", f.Config.UserAgent)
if len(f.Cookies) > 0 {
uriobj, _ := url.Parse(Baseuri)
slog.Debug("have cookies, sending them",
"sample-cookie-name", f.Cookies[0].Name,
"sample-cookie-expire", f.Cookies[0].Expires,
)
f.Client.Jar.SetCookies(uriobj, f.Cookies)
}
res, err := f.Client.Do(req)
if err != nil {
return nil, fmt.Errorf("failed to initiate HTTP request to %s: %w", uri, err)
}
if res.StatusCode != http.StatusOK {
return nil, errors.New("could not get page via HTTP")
}
slog.Debug("got cookies?", "cookies", res.Cookies())
f.Cookies = res.Cookies()
return res.Body, nil
}
// fetch an image
func (f *Fetcher) Getimage(uri string) (io.ReadCloser, error) {
slog.Debug("fetching ad image", "uri", uri)
body, err := f.Get(uri)
if err != nil {
if f.Config.IgnoreErrors {
slog.Info("Failed to download image, error ignored", "error", err.Error())
return nil, nil
}
return nil, err
}
return body, nil
}

41
go.mod
View File

@@ -1,41 +0,0 @@
module kleingebaeck
go 1.24.0
toolchain go1.24.5
require (
astuart.co/goq v1.0.0
github.com/corona10/goimagehash v1.1.0
github.com/inconshreveable/mousetrap v1.1.0
github.com/jarcoal/httpmock v1.4.1
github.com/knadh/koanf/parsers/toml v0.1.0
github.com/knadh/koanf/providers/confmap v1.0.0
github.com/knadh/koanf/providers/env v1.1.0
github.com/knadh/koanf/providers/file v1.2.0
github.com/knadh/koanf/providers/posflag v1.0.1
github.com/knadh/koanf/v2 v2.3.0
github.com/lmittmann/tint v1.1.2
github.com/mattn/go-isatty v0.0.20
github.com/spf13/pflag v1.0.10
github.com/tlinden/yadu v0.1.3
golang.org/x/image v0.31.0
golang.org/x/sync v0.17.0
)
require (
github.com/PuerkitoBio/goquery v1.5.1 // indirect
github.com/andybalholm/cascadia v1.1.0 // indirect
github.com/fatih/color v1.16.0 // indirect
github.com/fsnotify/fsnotify v1.9.0 // indirect
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
github.com/knadh/koanf/maps v0.1.2 // indirect
github.com/mattn/go-colorable v0.1.14 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect
github.com/pelletier/go-toml v1.9.5 // indirect
golang.org/x/net v0.38.0 // indirect
golang.org/x/sys v0.32.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

88
go.sum
View File

@@ -1,88 +0,0 @@
astuart.co/goq v1.0.0 h1:nnYIhu/Z/j0VaX9Dp+pmh2Uh7ldEz6XfgSg+bAY5Yrw=
astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno=
github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg=
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
github.com/corona10/goimagehash v1.1.0 h1:teNMX/1e+Wn/AYSbLHX8mj+mF9r60R1kBeqE9MkoYwI=
github.com/corona10/goimagehash v1.1.0/go.mod h1:VkvE0mLn84L4aF8vCb6mafVajEb6QYMHl2ZJLn0mOGI=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs=
github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jarcoal/httpmock v1.4.1 h1:0Ju+VCFuARfFlhVXFc2HxlcQkfB+Xq12/EotHko+x2A=
github.com/jarcoal/httpmock v1.4.1/go.mod h1:ftW1xULwo+j0R0JJkJIIi7UKigZUXCLLanykgjwBXL0=
github.com/knadh/koanf/maps v0.1.2 h1:RBfmAW5CnZT+PJ1CVc1QSJKf4Xu9kxfQgYVQSu8hpbo=
github.com/knadh/koanf/maps v0.1.2/go.mod h1:npD/QZY3V6ghQDdcQzl1W4ICNVTkohC8E73eI2xW4yI=
github.com/knadh/koanf/parsers/toml v0.1.0 h1:S2hLqS4TgWZYj4/7mI5m1CQQcWurxUz6ODgOub/6LCI=
github.com/knadh/koanf/parsers/toml v0.1.0/go.mod h1:yUprhq6eo3GbyVXFFMdbfZSo928ksS+uo0FFqNMnO18=
github.com/knadh/koanf/providers/confmap v1.0.0 h1:mHKLJTE7iXEys6deO5p6olAiZdG5zwp8Aebir+/EaRE=
github.com/knadh/koanf/providers/confmap v1.0.0/go.mod h1:txHYHiI2hAtF0/0sCmcuol4IDcuQbKTybiB1nOcUo1A=
github.com/knadh/koanf/providers/env v1.1.0 h1:U2VXPY0f+CsNDkvdsG8GcsnK4ah85WwWyJgef9oQMSc=
github.com/knadh/koanf/providers/env v1.1.0/go.mod h1:QhHHHZ87h9JxJAn2czdEl6pdkNnDh/JS1Vtsyt65hTY=
github.com/knadh/koanf/providers/file v1.2.0 h1:hrUJ6Y9YOA49aNu/RSYzOTFlqzXSCpmYIDXI7OJU6+U=
github.com/knadh/koanf/providers/file v1.2.0/go.mod h1:bp1PM5f83Q+TOUu10J/0ApLBd9uIzg+n9UgthfY+nRA=
github.com/knadh/koanf/providers/posflag v1.0.1 h1:EnMxHSrPkYCFnKgBUl5KBgrjed8gVFrcXDzaW4l/C6Y=
github.com/knadh/koanf/providers/posflag v1.0.1/go.mod h1:3Wn3+YG3f4ljzRyCUgIwH7G0sZ1pMjCOsNBovrbKmAk=
github.com/knadh/koanf/v2 v2.3.0 h1:Qg076dDRFHvqnKG97ZEsi9TAg2/nFTa9hCdcSa1lvlM=
github.com/knadh/koanf/v2 v2.3.0/go.mod h1:gRb40VRAbd4iJMYYD5IxZ6hfuopFcXBpc9bbQpZwo28=
github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/lmittmann/tint v1.1.2 h1:2CQzrL6rslrsyjqLDwD11bZ5OpLBPU+g3G/r5LSfS8w=
github.com/lmittmann/tint v1.1.2/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE=
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/maxatome/go-testdeep v1.14.0 h1:rRlLv1+kI8eOI3OaBXZwb3O7xY3exRzdW5QyX48g9wI=
github.com/maxatome/go-testdeep v1.14.0/go.mod h1:lPZc/HAcJMP92l7yI6TRz1aZN5URwUBUAfUNvrclaNM=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ=
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/tlinden/yadu v0.1.3 h1:5cRCUmj+l5yvlM2irtpFBIJwVV2DPEgYSaWvF19FtcY=
github.com/tlinden/yadu v0.1.3/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/image v0.31.0 h1:mLChjE2MV6g1S7oqbXC0/UcKijjm5fnJLUYKIYrLESA=
golang.org/x/image v0.31.0/go.mod h1:R9ec5Lcp96v9FTF+ajwaH3uGxPH4fKfHHAVbUILxghA=
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20=
golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo=
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

140
http.go
View File

@@ -1,140 +0,0 @@
/*
Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"bytes"
"fmt"
"io"
"log/slog"
"math"
"math/rand"
"net/http"
"time"
)
// I add an artificial "ID" to each HTTP request and the corresponding
// respose for debugging purposes so that the pair of them can be
// easier associated in debug output
var letters = []rune("ABCDEF0123456789")
const IDLEN int = 8
// retry after HTTP 50x errors or err!=nil
const RetryCount = 3
func getid() string {
b := make([]rune, IDLEN)
for i := range b {
b[i] = letters[rand.Intn(len(letters))]
}
return string(b)
}
// used to inject debug log and implement retries
type loggingTransport struct{}
// escalating timeout, $retry^2 seconds
func backoff(retries int) time.Duration {
return time.Duration(math.Pow(2, float64(retries))) * time.Second
}
// only retry in case of errors or certain non 200 HTTP codes
func shouldRetry(err error, resp *http.Response) bool {
if err != nil {
return true
}
if resp.StatusCode == http.StatusBadGateway ||
resp.StatusCode == http.StatusServiceUnavailable ||
resp.StatusCode == http.StatusGatewayTimeout {
return true
}
return false
}
// Body needs to be drained, otherwise we can't reuse the http.Response
func drainBody(resp *http.Response) {
if resp != nil {
if resp.Body != nil {
_, err := io.Copy(io.Discard, resp.Body)
if err != nil {
// unable to copy data? uff!
panic(err)
}
if err := resp.Body.Close(); err != nil {
panic(err)
}
}
}
}
// the actual logging transport with retries
func (t *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error) {
// just required for debugging
requestid := getid()
// clone the request body, put into request on retry
var bodyBytes []byte
if req.Body != nil {
bodyBytes, _ = io.ReadAll(req.Body)
req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
}
slog.Debug("REQUEST", "id", requestid, "uri", req.URL, "host", req.Host)
// first try
resp, err := http.DefaultTransport.RoundTrip(req)
if err == nil {
slog.Debug("RESPONSE", "id", requestid, "status", resp.StatusCode,
"contentlength", resp.ContentLength)
}
// enter retry check and loop, if first req were successful, leave loop immediately
retries := 0
for shouldRetry(err, resp) && retries < RetryCount {
time.Sleep(backoff(retries))
// consume any response to reuse the connection.
drainBody(resp)
// clone the request body again
if req.Body != nil {
req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
}
// actual retry
resp, err = http.DefaultTransport.RoundTrip(req)
if err == nil {
slog.Debug("RESPONSE", "id", requestid, "status", resp.StatusCode,
"contentlength", resp.ContentLength, "retry", retries)
}
retries++
}
if err != nil {
return resp, fmt.Errorf("failed to get HTTP response for %s: %w", req.URL, err)
}
return resp, nil
}

191
image.go
View File

@@ -1,191 +0,0 @@
/*
Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"bytes"
"fmt"
"image"
_ "image/gif"
_ "image/jpeg"
_ "image/png"
"log/slog"
"os"
"path/filepath"
_ "golang.org/x/image/webp"
"github.com/corona10/goimagehash"
)
const MaxDistance = 3
type Image struct {
Filename string
Hash *goimagehash.ImageHash
Data *bytes.Reader
URI string
Mime string
}
// used for logging to avoid printing Data
func (img *Image) LogValue() slog.Value {
return slog.GroupValue(
slog.String("filename", img.Filename),
slog.String("uri", img.URI),
slog.String("hash", img.Hash.ToString()),
)
}
// holds all images of an ad
type Cache []*goimagehash.ImageHash
// filename comes from the scraper, it contains directory/base w/o suffix
func NewImage(buf *bytes.Reader, filename, uri string) (*Image, error) {
_, imgconfig, err := image.DecodeConfig(buf)
if err != nil {
return nil, fmt.Errorf("failed to decode image: %w", err)
}
_, err = buf.Seek(0, 0)
if err != nil {
return nil, fmt.Errorf("failed to seek(0) on image buffer: %w", err)
}
if imgconfig == "jpeg" {
// we're using the format as file extension, but have used
// "jpg" in the past, so to be backwards compatible, stay with
// it.
imgconfig = "jpg"
}
if imgconfig == "" {
return nil, fmt.Errorf("failed to process image: unknown or unsupported image format (supported: jpg,png,gif,webp)")
}
filename += "." + imgconfig
img := &Image{
Filename: filename,
URI: uri,
Data: buf,
Mime: imgconfig,
}
slog.Debug("image MIME", "mime", img.Mime)
return img, nil
}
// Calculate diff hash of the image
func (img *Image) CalcHash() error {
jpgdata, format, err := image.Decode(img.Data)
if err != nil {
return fmt.Errorf("failed to decode image: %w", err)
}
if format == "" {
return fmt.Errorf("failed to decode image: unknown or unsupported image format (supported: jpg,png,gif,webp)")
}
hash1, err := goimagehash.DifferenceHash(jpgdata)
if err != nil {
return fmt.Errorf("failed to calculate diff hash of image: %w", err)
}
img.Hash = hash1
return nil
}
// checks if 2 images are similar enough to be considered the same
func (img *Image) Similar(hash *goimagehash.ImageHash) bool {
distance, err := img.Hash.Distance(hash)
if err != nil {
slog.Debug("failed to compute diff hash distance", "error", err)
return false
}
if distance < MaxDistance {
slog.Debug("distance computation", "image-A", img.Hash.ToString(),
"image-B", hash.ToString(), "distance", distance)
return true
}
return false
}
// check current image against all known hashes.
func (img *Image) SimilarExists(cache Cache) bool {
for _, otherimg := range cache {
if img.Similar(otherimg) {
return true
}
}
return false
}
// read all JPG images in a ad directory, compute diff hashes and
// store the results in the slice Images
func ReadImages(addir string, dont bool) (Cache, error) {
files, err := os.ReadDir(addir)
if err != nil {
return nil, fmt.Errorf("failed to read ad directory contents: %w", err)
}
cache := Cache{}
if dont {
// forced download, -f given
return cache, nil
}
for _, file := range files {
ext := filepath.Ext(file.Name())
if !file.IsDir() && (ext == ".jpg" || ext == ".jpeg" || ext == ".JPG" || ext == ".JPEG") {
filename := filepath.Join(addir, file.Name())
data, err := ReadImage(filename)
if err != nil {
return nil, err
}
reader := bytes.NewReader(data.Bytes())
img, err := NewImage(reader, filename, "")
if err != nil {
return nil, err
}
if err := img.CalcHash(); err != nil {
return nil, err
}
if img.Hash != nil {
slog.Debug("Caching image from file system", "image", img, "hash", img.Hash.ToString())
}
cache = append(cache, img.Hash)
}
}
return cache, nil
}

View File

@@ -1,386 +0,0 @@
.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.42)
.\"
.\" Standard preamble:
.\" ========================================================================
.de Sp \" Vertical space (when we can't use .PP)
.if t .sp .5v
.if n .sp
..
.de Vb \" Begin verbatim text
.ft CW
.nf
.ne \\$1
..
.de Ve \" End verbatim text
.ft R
.fi
..
.\" Set up some character translations and predefined strings. \*(-- will
.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
.\" double quote, and \*(R" will give a right double quote. \*(C+ will
.\" give a nicer C++. Capital omega is used to do unbreakable dashes and
.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff,
.\" nothing in troff, for use with C<>.
.tr \(*W-
.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
.ie n \{\
. ds -- \(*W-
. ds PI pi
. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
. ds L" ""
. ds R" ""
. ds C` ""
. ds C' ""
'br\}
.el\{\
. ds -- \|\(em\|
. ds PI \(*p
. ds L" ``
. ds R" ''
. ds C`
. ds C'
'br\}
.\"
.\" Escape single quotes in literal strings from groff's Unicode transform.
.ie \n(.g .ds Aq \(aq
.el .ds Aq '
.\"
.\" If the F register is >0, we'll generate index entries on stderr for
.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
.\" entries marked with X<> in POD. Of course, you'll have to process the
.\" output yourself in some meaningful fashion.
.\"
.\" Avoid warning from groff about undefined register 'F'.
.de IX
..
.nr rF 0
.if \n(.g .if rF .nr rF 1
.if (\n(rF:(\n(.g==0)) \{\
. if \nF \{\
. de IX
. tm Index:\\$1\t\\n%\t"\\$2"
..
. if !\nF==2 \{\
. nr % 0
. nr F 2
. \}
. \}
.\}
.rr rF
.\"
.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
.\" Fear. Run. Save yourself. No user-serviceable parts.
. \" fudge factors for nroff and troff
.if n \{\
. ds #H 0
. ds #V .8m
. ds #F .3m
. ds #[ \f1
. ds #] \fP
.\}
.if t \{\
. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
. ds #V .6m
. ds #F 0
. ds #[ \&
. ds #] \&
.\}
. \" simple accents for nroff and troff
.if n \{\
. ds ' \&
. ds ` \&
. ds ^ \&
. ds , \&
. ds ~ ~
. ds /
.\}
.if t \{\
. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
.\}
. \" troff and (daisy-wheel) nroff accents
.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
.ds ae a\h'-(\w'a'u*4/10)'e
.ds Ae A\h'-(\w'A'u*4/10)'E
. \" corrections for vroff
.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
. \" for low resolution devices (crt and lpr)
.if \n(.H>23 .if \n(.V>19 \
\{\
. ds : e
. ds 8 ss
. ds o a
. ds d- d\h'-1'\(ga
. ds D- D\h'-1'\(hy
. ds th \o'bp'
. ds Th \o'LP'
. ds ae ae
. ds Ae AE
.\}
.rm #[ #] #H #V #F C
.\" ========================================================================
.\"
.IX Title "KLEINGEBAECK 1"
.TH KLEINGEBAECK 1 "2025-02-27" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
.nh
.SH "NAME"
kleingebaeck \- kleinanzeigen.de backup tool
.SH "SYNOPSYS"
.IX Header "SYNOPSYS"
.Vb 10
\& Usage: kleingebaeck [\-dvVhmoc] [<ad\-listing\-url>,...]
\& Options:
\& \-u \-\-user <uid> Backup ads from user with uid <uid>.
\& \-d \-\-debug Enable debug output.
\& \-v \-\-verbose Enable verbose output.
\& \-o \-\-outdir <dir> Set output dir (default: current directory)
\& \-l \-\-limit <num> Limit the ads to download to <num>, default: load all.
\& \-c \-\-config <file> Use config file <file> (default: ~/.kleingebaeck).
\& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
\& \-f \-\-force Overwrite images and ads even if the already exist.
\& \-m \-\-manual Show manual.
\& \-h \-\-help Show usage.
\& \-V \-\-version Show program version.
.Ve
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
This tool can be used to backup ads on the german ad page <https://kleinanzeigen.de>.
.PP
It downloads all (or only the specified ones) ads of one user into a
directory, each ad into its own subdirectory. The backup will contain
a textfile \fBAdlisting.txt\fR which contains the ad contents such as
title, body, price etc. All images will be downloaded as well.
.SH "CONFIGURATION"
.IX Header "CONFIGURATION"
You can create a config file to save typing. By default
\&\f(CW\*(C`~/.kleingebaeck\*(C'\fR is being used but you can specify one with \f(CW\*(C`\-c\*(C'\fR as
well. We use \s-1TOML\s0 as our configuration language. See
<https://toml.io/en/>.
.PP
Format is pretty simple:
.PP
.Vb 11
\& user = 1010101
\& loglevel = verbose
\& outdir = "test"
\& useragent = "Mozilla/5.0"
\& template = """
\& Title: {{.Title}}
\& Price: {{.Price}}
\& Id: {{.ID}}
\& Category: {{.Category}}
\& Condition: {{.Condition}}
\& Created: {{.Created}}
\&
\& {{.Text}}
\& """
.Ve
.PP
Be careful if you want to change the template. The variable is a
multiline string surrounded by three double quotes. You can left out
certain fields and use any formatting you like. Refer to
<https://pkg.go.dev/text/template> for details how to write a
template. Also read the \s-1TEMPLATES\s0 section below.
.PP
If you're on windows and want to customize the output directory, put
it into single quotes to avoid the backslashes interpreted as escape
chars like this:
.PP
.Vb 1
\& outdir = \*(AqC:\eData\eAds\*(Aq
.Ve
.SH "TEMPLATES"
.IX Header "TEMPLATES"
Various parts of the configuration can be modified using templates:
the output directory, the ad directory and the ad listing itself.
.SS "\s-1OUTPUT DIR TEMPLATE\s0"
.IX Subsection "OUTPUT DIR TEMPLATE"
The config varialbe \f(CW\*(C`outdir\*(C'\fR or the command line parameter \f(CW\*(C`\-o\*(C'\fR take a
template which may contain:
.ie n .IP """{{.Year}}""" 4
.el .IP "\f(CW{{.Year}}\fR" 4
.IX Item "{{.Year}}"
.PD 0
.ie n .IP """{{.Month}}""" 4
.el .IP "\f(CW{{.Month}}\fR" 4
.IX Item "{{.Month}}"
.ie n .IP """{{.Day}}""" 4
.el .IP "\f(CW{{.Day}}\fR" 4
.IX Item "{{.Day}}"
.PD
.PP
That way you can create a new output directory for every backup
run. For example:
.PP
.Vb 1
\& outdir = "/home/backups/ads\-{{.Year}}\-{{.Month}}\-{{.Day}}"
.Ve
.PP
Or using the command line flag:
.PP
.Vb 1
\& \-o "/home/backups/ads\-{{.Year}}\-{{.Month}}\-{{.Day}}"
.Ve
.PP
The default value is \f(CW\*(C`.\*(C'\fR \- the current directory.
.SS "\s-1AD DIRECTORY TEMPLATE\s0"
.IX Subsection "AD DIRECTORY TEMPLATE"
The ad directory name can be modified using the following ad values:
.IP "{{.Price}}" 4
.IX Item "{{.Price}}"
.PD 0
.IP "{{.ID}}" 4
.IX Item "{{.ID}}"
.IP "{{.Category}}" 4
.IX Item "{{.Category}}"
.IP "{{.Condition}}" 4
.IX Item "{{.Condition}}"
.IP "{{.Created}}" 4
.IX Item "{{.Created}}"
.IP "{{.Slug}}" 4
.IX Item "{{.Slug}}"
.IP "{{.Text}}" 4
.IX Item "{{.Text}}"
.PD
.PP
It can only be configured in the config file. By default only
\&\f(CW\*(C`{{.Slug}}\*(C'\fR is being used, this is the title of the ad in url format.
.SS "\s-1AD NAME TEMPLATE\s0"
.IX Subsection "AD NAME TEMPLATE"
The name of the directory per ad can be tuned as well:
.ie n .IP """{{.Year}}""" 4
.el .IP "\f(CW{{.Year}}\fR" 4
.IX Item "{{.Year}}"
.PD 0
.ie n .IP """{{.Month}}""" 4
.el .IP "\f(CW{{.Month}}\fR" 4
.IX Item "{{.Month}}"
.ie n .IP """{{.Day}}""" 4
.el .IP "\f(CW{{.Day}}\fR" 4
.IX Item "{{.Day}}"
.ie n .IP """{{.Slug}}""" 4
.el .IP "\f(CW{{.Slug}}\fR" 4
.IX Item "{{.Slug}}"
.ie n .IP """{{.Category}}""" 4
.el .IP "\f(CW{{.Category}}\fR" 4
.IX Item "{{.Category}}"
.ie n .IP """{{.ID}}""" 4
.el .IP "\f(CW{{.ID}}\fR" 4
.IX Item "{{.ID}}"
.PD
.SS "\s-1AD TEMPLATE\s0"
.IX Subsection "AD TEMPLATE"
The ad listing itself can be modified as well, using the same
variables as the ad name template above.
.PP
This is the default template:
.PP
.Vb 8
\& Title: {{.Title}}
\& Price: {{.Price}}
\& Id: {{.ID}}
\& Category: {{.Category}}
\& Condition: {{.Condition}}
\& Type: {{.Type}}
\& Created: {{.Created}}
\& Expire: {{.Expire}}
\&
\& {{.Text}}
.Ve
.PP
The config parameter to modify is \f(CW\*(C`template\*(C'\fR. See example.conf in the
source repository. Please take care, since this is a multiline
string. This is how it shall look if you modify it:
.PP
.Vb 2
\& template="""
\& Title: {{.Title}}
\&
\& {{.Text}}
\& """
.Ve
.PP
That is, the content between the two \f(CW"""\fR chars is the template.
.SH "SETUP"
.IX Header "SETUP"
To setup the tool, you need to lookup your userid on
kleinanzeigen.de. Go to your ad overview page while \s-1NOT\s0 being logged
in:
.PP
.Vb 1
\& https://www.kleinanzeigen.de/s\-bestandsliste.html?userId=XXXXXX
.Ve
.PP
The \fB\s-1XXXXX\s0\fR part is your userid.
.PP
Put it into the configfile as outlined above. Also specify an output
directory. Then just execute \f(CW\*(C`kleingebaeck\*(C'\fR.
.PP
You can use the \fB\-v\fR option to get verbose output or \fB\-d\fR to enable
debugging.
.SH "ENVIRONMENT VARIABLES"
.IX Header "ENVIRONMENT VARIABLES"
The following environment variables are considered:
.PP
.Vb 7
\& KLEINGEBAECK_USER
\& KLEINGEBAECK_DEBUG
\& KLEINGEBAECK_VERBOSE
\& KLEINGEBAECK_OUTDIR
\& KLEINGEBAECK_LIMIT
\& KLEINGEBAECK_CONFIG
\& KLEINGEBAECK_IGNOREERRORS
.Ve
.PP
Please note, that they take precedence over config file, but
commandline flags take precedence over env!
.SH "BUGS"
.IX Header "BUGS"
In order to report a bug, unexpected behavior, feature requests
or to submit a patch, please open an issue on github:
<https://codeberg.org/scip/kleingebaeck/issues>.
.PP
Please repeat the failing command with debugging enabled \f(CW\*(C`\-d\*(C'\fR and
include the output in the issue.
.SH "LIMITATIONS"
.IX Header "LIMITATIONS"
The \f(CW\*(C`kleingebaeck\*(C'\fR doesn't currently check if it has downloaded a
file already, so it downloads everything again every time you execute
it. Be aware of it. This will change in the future.
.PP
Also there's currently no parallelization implemented. This will
change in the future.
.SH "LICENSE"
.IX Header "LICENSE"
Copyright 2023\-2025 Thomas von Dein
.PP
This program is free software: you can redistribute it and/or modify
it under the terms of the \s-1GNU\s0 General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
.PP
This program is distributed in the hope that it will be useful,
but \s-1WITHOUT ANY WARRANTY\s0; without even the implied warranty of
\&\s-1MERCHANTABILITY\s0 or \s-1FITNESS FOR A PARTICULAR PURPOSE.\s0 See the
\&\s-1GNU\s0 General Public License for more details.
.PP
You should have received a copy of the \s-1GNU\s0 General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
.SH "Author"
.IX Header "Author"
T.v.Dein <tom \s-1AT\s0 vondein \s-1DOT\s0 org>

View File

@@ -1,205 +0,0 @@
package main
var manpage = `
NAME
kleingebaeck - kleinanzeigen.de backup tool
SYNOPSYS
Usage: kleingebaeck [-dvVhmoc] [<ad-listing-url>,...]
Options:
-u --user <uid> Backup ads from user with uid <uid>.
-d --debug Enable debug output.
-v --verbose Enable verbose output.
-o --outdir <dir> Set output dir (default: current directory)
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Overwrite images and ads even if the already exist.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
DESCRIPTION
This tool can be used to backup ads on the german ad page
<https://kleinanzeigen.de>.
It downloads all (or only the specified ones) ads of one user into a
directory, each ad into its own subdirectory. The backup will contain a
textfile Adlisting.txt which contains the ad contents such as title,
body, price etc. All images will be downloaded as well.
CONFIGURATION
You can create a config file to save typing. By default
"~/.kleingebaeck" is being used but you can specify one with "-c" as
well. We use TOML as our configuration language. See
<https://toml.io/en/>.
Format is pretty simple:
user = 1010101
loglevel = verbose
outdir = "test"
useragent = "Mozilla/5.0"
template = """
Title: {{.Title}}
Price: {{.Price}}
Id: {{.ID}}
Category: {{.Category}}
Condition: {{.Condition}}
Created: {{.Created}}
{{.Text}}
"""
Be careful if you want to change the template. The variable is a
multiline string surrounded by three double quotes. You can left out
certain fields and use any formatting you like. Refer to
<https://pkg.go.dev/text/template> for details how to write a template.
Also read the TEMPLATES section below.
If you're on windows and want to customize the output directory, put it
into single quotes to avoid the backslashes interpreted as escape chars
like this:
outdir = 'C:\Data\Ads'
TEMPLATES
Various parts of the configuration can be modified using templates: the
output directory, the ad directory and the ad listing itself.
OUTPUT DIR TEMPLATE
The config varialbe "outdir" or the command line parameter "-o" take a
template which may contain:
"{{.Year}}"
"{{.Month}}"
"{{.Day}}"
That way you can create a new output directory for every backup run. For
example:
outdir = "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}"
Or using the command line flag:
-o "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}"
The default value is "." - the current directory.
AD DIRECTORY TEMPLATE
The ad directory name can be modified using the following ad values:
{{.Price}}
{{.ID}}
{{.Category}}
{{.Condition}}
{{.Created}}
{{.Slug}}
{{.Text}}
It can only be configured in the config file. By default only
"{{.Slug}}" is being used, this is the title of the ad in url format.
AD NAME TEMPLATE
The name of the directory per ad can be tuned as well:
"{{.Year}}"
"{{.Month}}"
"{{.Day}}"
"{{.Slug}}"
"{{.Category}}"
"{{.ID}}"
AD TEMPLATE
The ad listing itself can be modified as well, using the same variables
as the ad name template above.
This is the default template:
Title: {{.Title}}
Price: {{.Price}}
Id: {{.ID}}
Category: {{.Category}}
Condition: {{.Condition}}
Type: {{.Type}}
Created: {{.Created}}
Expire: {{.Expire}}
{{.Text}}
The config parameter to modify is "template". See example.conf in the
source repository. Please take care, since this is a multiline string.
This is how it shall look if you modify it:
template="""
Title: {{.Title}}
{{.Text}}
"""
That is, the content between the two """ chars is the template.
SETUP
To setup the tool, you need to lookup your userid on kleinanzeigen.de.
Go to your ad overview page while NOT being logged in:
https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX
The XXXXX part is your userid.
Put it into the configfile as outlined above. Also specify an output
directory. Then just execute "kleingebaeck".
You can use the -v option to get verbose output or -d to enable
debugging.
ENVIRONMENT VARIABLES
The following environment variables are considered:
KLEINGEBAECK_USER
KLEINGEBAECK_DEBUG
KLEINGEBAECK_VERBOSE
KLEINGEBAECK_OUTDIR
KLEINGEBAECK_LIMIT
KLEINGEBAECK_CONFIG
KLEINGEBAECK_IGNOREERRORS
Please note, that they take precedence over config file, but commandline
flags take precedence over env!
BUGS
In order to report a bug, unexpected behavior, feature requests or to
submit a patch, please open an issue on github:
<https://codeberg.org/scip/kleingebaeck/issues>.
Please repeat the failing command with debugging enabled "-d" and
include the output in the issue.
LIMITATIONS
The "kleingebaeck" doesn't currently check if it has downloaded a file
already, so it downloads everything again every time you execute it. Be
aware of it. This will change in the future.
Also there's currently no parallelization implemented. This will change
in the future.
LICENSE
Copyright 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation, either version 3 of the License, or (at your
option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License for more details.
You should have received a copy of the GNU General Public License along
with this program. If not, see <http://www.gnu.org/licenses/>.
Author
T.v.Dein <tom AT vondein DOT org>
`

View File

@@ -1,247 +0,0 @@
=head1 NAME
kleingebaeck - kleinanzeigen.de backup tool
=head1 SYNOPSYS
Usage: kleingebaeck [-dvVhmoc] [<ad-listing-url>,...]
Options:
-u --user <uid> Backup ads from user with uid <uid>.
-d --debug Enable debug output.
-v --verbose Enable verbose output.
-o --outdir <dir> Set output dir (default: current directory)
-l --limit <num> Limit the ads to download to <num>, default: load all.
-c --config <file> Use config file <file> (default: ~/.kleingebaeck).
--ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup.
-f --force Overwrite images and ads even if the already exist.
-m --manual Show manual.
-h --help Show usage.
-V --version Show program version.
=head1 DESCRIPTION
This tool can be used to backup ads on the german ad page L<https://kleinanzeigen.de>.
It downloads all (or only the specified ones) ads of one user into a
directory, each ad into its own subdirectory. The backup will contain
a textfile B<Adlisting.txt> which contains the ad contents such as
title, body, price etc. All images will be downloaded as well.
=head1 CONFIGURATION
You can create a config file to save typing. By default
C<~/.kleingebaeck> is being used but you can specify one with C<-c> as
well. We use TOML as our configuration language. See
L<https://toml.io/en/>.
Format is pretty simple:
user = 1010101
loglevel = verbose
outdir = "test"
useragent = "Mozilla/5.0"
template = """
Title: {{.Title}}
Price: {{.Price}}
Id: {{.ID}}
Category: {{.Category}}
Condition: {{.Condition}}
Created: {{.Created}}
{{.Text}}
"""
Be careful if you want to change the template. The variable is a
multiline string surrounded by three double quotes. You can left out
certain fields and use any formatting you like. Refer to
L<https://pkg.go.dev/text/template> for details how to write a
template. Also read the TEMPLATES section below.
If you're on windows and want to customize the output directory, put
it into single quotes to avoid the backslashes interpreted as escape
chars like this:
outdir = 'C:\Data\Ads'
=head1 TEMPLATES
Various parts of the configuration can be modified using templates:
the output directory, the ad directory and the ad listing itself.
=head2 OUTPUT DIR TEMPLATE
The config varialbe C<outdir> or the command line parameter C<-o> take a
template which may contain:
=over
=item C<{{.Year}}>
=item C<{{.Month}}>
=item C<{{.Day}}>
=back
That way you can create a new output directory for every backup
run. For example:
outdir = "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}"
Or using the command line flag:
-o "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}"
The default value is C<.> - the current directory.
=head2 AD DIRECTORY TEMPLATE
The ad directory name can be modified using the following ad values:
=over
=item {{.Price}}
=item {{.ID}}
=item {{.Category}}
=item {{.Condition}}
=item {{.Created}}
=item {{.Slug}}
=item {{.Text}}
=back
It can only be configured in the config file. By default only
C<{{.Slug}}> is being used, this is the title of the ad in url format.
=head2 AD NAME TEMPLATE
The name of the directory per ad can be tuned as well:
=over
=item C<{{.Year}}>
=item C<{{.Month}}>
=item C<{{.Day}}>
=item C<{{.Slug}}>
=item C<{{.Category}}>
=item C<{{.ID}}>
=back
=head2 AD TEMPLATE
The ad listing itself can be modified as well, using the same
variables as the ad name template above.
This is the default template:
Title: {{.Title}}
Price: {{.Price}}
Id: {{.ID}}
Category: {{.Category}}
Condition: {{.Condition}}
Type: {{.Type}}
Created: {{.Created}}
Expire: {{.Expire}}
{{.Text}}
The config parameter to modify is C<template>. See example.conf in the
source repository. Please take care, since this is a multiline
string. This is how it shall look if you modify it:
template="""
Title: {{.Title}}
{{.Text}}
"""
That is, the content between the two C<"""> chars is the template.
=head1 SETUP
To setup the tool, you need to lookup your userid on
kleinanzeigen.de. Go to your ad overview page while NOT being logged
in:
https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX
The B<XXXXX> part is your userid.
Put it into the configfile as outlined above. Also specify an output
directory. Then just execute C<kleingebaeck>.
You can use the B<-v> option to get verbose output or B<-d> to enable
debugging.
=head1 ENVIRONMENT VARIABLES
The following environment variables are considered:
KLEINGEBAECK_USER
KLEINGEBAECK_DEBUG
KLEINGEBAECK_VERBOSE
KLEINGEBAECK_OUTDIR
KLEINGEBAECK_LIMIT
KLEINGEBAECK_CONFIG
KLEINGEBAECK_IGNOREERRORS
Please note, that they take precedence over config file, but
commandline flags take precedence over env!
=head1 BUGS
In order to report a bug, unexpected behavior, feature requests
or to submit a patch, please open an issue on github:
L<https://codeberg.org/scip/kleingebaeck/issues>.
Please repeat the failing command with debugging enabled C<-d> and
include the output in the issue.
=head1 LIMITATIONS
The C<kleingebaeck> doesn't currently check if it has downloaded a
file already, so it downloads everything again every time you execute
it. Be aware of it. This will change in the future.
Also there's currently no parallelization implemented. This will
change in the future.
=head1 LICENSE
Copyright 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see L<http://www.gnu.org/licenses/>.
=head1 Author
T.v.Dein <tom AT vondein DOT org>
=cut

203
main.go
View File

@@ -1,203 +0,0 @@
/*
Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"bufio"
"errors"
"fmt"
"io"
"log/slog"
"os"
"runtime"
"runtime/debug"
"github.com/inconshreveable/mousetrap"
"github.com/lmittmann/tint"
"github.com/tlinden/yadu"
)
const LevelNotice = slog.Level(2)
func main() {
os.Exit(Main(os.Stdout))
}
func init() {
// if we're running on Windows AND if the user double clicked the
// exe file from explorer, we tell them and then wait until any
// key has been hit, which will make the cmd window disappear and
// thus give the user time to read it.
if runtime.GOOS == "windows" {
if mousetrap.StartedByExplorer() {
fmt.Println("Do no double click kleingebaeck.exe!")
fmt.Println("Please open a command shell and run it from there.")
fmt.Println()
fmt.Print("Press any key to quit: ")
_, err := bufio.NewReader(os.Stdin).ReadString('\n')
if err != nil {
panic(err)
}
}
}
}
func Main(output io.Writer) int {
logLevel := &slog.LevelVar{}
opts := &tint.Options{
Level: logLevel,
AddSource: false,
ReplaceAttr: func(groups []string, attr slog.Attr) slog.Attr {
// Remove time from the output
if attr.Key == slog.TimeKey {
return slog.Attr{}
}
return attr
},
NoColor: IsNoTty(),
}
logLevel.Set(LevelNotice)
handler := tint.NewHandler(output, opts)
logger := slog.New(handler)
slog.SetDefault(logger)
conf, err := InitConfig(output)
if err != nil {
return Die(err)
}
if conf.Showversion {
_, err := fmt.Fprintf(output, "This is kleingebaeck version %s\n", VERSION)
if err != nil {
panic(err)
}
return 0
}
if conf.Showhelp {
_, err := fmt.Fprintln(output, Usage)
if err != nil {
panic(err)
}
return 0
}
if conf.Showmanual {
err := man()
if err != nil {
return Die(err)
}
return 0
}
if conf.Verbose {
logLevel.Set(slog.LevelInfo)
}
if conf.Debug {
// we're using a more verbose logger in debug mode
buildInfo, _ := debug.ReadBuildInfo()
opts := &yadu.Options{
Level: logLevel,
AddSource: true,
//NoColor: IsNoTty(),
}
logLevel.Set(slog.LevelDebug)
handler := yadu.NewHandler(output, opts)
debuglogger := slog.New(handler).With(
slog.Group("program_info",
slog.Int("pid", os.Getpid()),
slog.String("go_version", buildInfo.GoVersion),
),
)
slog.SetDefault(debuglogger)
}
slog.Debug("config", "conf", conf)
// prepare output dir
outdir, err := OutDirName(conf)
if err != nil {
return Die(err)
}
conf.Outdir = outdir
// used for all HTTP requests
fetch, err := NewFetcher(conf)
if err != nil {
return Die(err)
}
// setup ad dir registry, needed to check for duplicates
DirsVisited = make(map[string]int)
switch {
case len(conf.Adlinks) >= 1:
// directly backup ad listing[s]
for _, uri := range conf.Adlinks {
err := ScrapeAd(fetch, uri)
if err != nil {
return Die(err)
}
}
case conf.User > 0:
// backup all ads of the given user (via config or cmdline)
err := ScrapeUser(fetch)
if err != nil {
return Die(err)
}
default:
return Die(errors.New("invalid or no user id or no ad link specified"))
}
if conf.StatsCountAds > 0 {
adstr := "ads"
if conf.StatsCountAds == 1 {
adstr = "ad"
}
_, err := fmt.Fprintf(output, "Successfully downloaded %d %s with %d images to %s.\n",
conf.StatsCountAds, adstr, conf.StatsCountImages, conf.Outdir)
if err != nil {
panic(err)
}
} else {
_, err := fmt.Fprintf(output, "No ads found.")
if err != nil {
panic(err)
}
}
return 0
}
func Die(err error) int {
slog.Error("Failure", "error", err.Error())
return 1
}

View File

@@ -1,594 +0,0 @@
/*
Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"bytes"
"errors"
"fmt"
"net/http"
"os"
"strings"
"testing"
tpl "text/template"
"github.com/jarcoal/httpmock"
)
// the ad list, aka:
// https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX
// Note, that this HTML code is reduced to the max, so that it only
// contains the stuff required to satisfy goquery
const LISTTPL string = `<!DOCTYPE html>
<html lang="de" >
<head>
<title>Ads</title>
</head>
<body>
{{ range . }}
<h2 class="text-module-begin">
<a class="ellipsis"
href="/s-anzeige/{{ .Slug }}/{{ .ID }}">{{ .Title }}</a>
</h2>
{{ end }}
</body>
</html>
`
// an actual ad listing, aka:
// https://www.kleinanzeigen.de/s-anzeige/ad-text-slug/1010101010
// Note, that this HTML code is reduced to the max, so that it only
// contains the stuff required to satisfy goquery
const ADTPL string = `DOCTYPE html>
<html lang="de">
<head>
<title>Ad Listing</title>
</head>
<body>
<div class="l-container-row">
<div id="vap-brdcrmb" class="breadcrump">
<a class="breadcrump-link" itemprop="url" href="/" title="Kleinanzeigen ">
<span itemprop="title">Kleinanzeigen </span>
</a>
<a class="breadcrump-link" itemprop="url" href="/egal">
<span itemprop="title">{{ .Category }}</span></a>
</div>
</div>
{{ range $image := .Images }}
<div class="galleryimage-element" data-ix="3">
<img src="{{ $image }}"/>
</div>
{{ end }}
<h1 id="viewad-title" class="boxedarticle--title" itemprop="name" data-soldlabel="Verkauft">
{{ .Title }}</h1>
<div class="boxedarticle--flex--container">
<h2 class="boxedarticle--price" id="viewad-price">
{{ .Price }}</h2>
</div>
<div id="viewad-extra-info" class="boxedarticle--details--full">
<div><i class="icon icon-small icon-calendar-gray-simple"></i><span>{{ .Created }}</span></div>
</div>
<div class="splitlinebox l-container-row" id="viewad-details">
<ul class="addetailslist">
<li class="addetailslist--detail">
Zustand<span class="addetailslist--detail--value" >
{{ .Condition }}</span>
Farbe<span class="addetailslist--detail--value" >
{{ .Color }}</span>
Art<span class="addetailslist--detail--value" >
{{ .Type }}</span>
</li>
</ul>
</div>
<div class="l-container last-paragraph-no-margin-bottom">
<p id="viewad-description-text" class="text-force-linebreak " itemprop="description">
{{ .Text }}
</p>
</div>
</body>
</html>
`
const EMPTYPAGE string = `DOCTYPE html>
<html lang="de">
<head></head>
<body></body>
</html>
`
const (
EMPTYURI string = `https://www.kleinanzeigen.de/s-anzeige/empty/1`
INVALID503URI string = `https://www.kleinanzeigen.de/s-anzeige/503/1`
INVALIDPATHURI string = `https://www.kleinanzeigen.de/anzeige/name/1`
INVALID404URI string = `https://www.kleinanzeigen.de/anzeige/name/1/foo/bar`
INVALIDURI string = `https://foo.bar/weird/things`
)
var base = "kleingebaeck -c t/config-empty.conf"
type Tests struct {
name string
args string
expect string
exitcode int
}
var tests = []Tests{
{
name: "version",
args: base + " -V",
expect: "This is",
exitcode: 0,
},
{
name: "help",
args: base + " -h",
expect: "Usage:",
exitcode: 0,
},
{
name: "debug",
args: base + " -d",
expect: "error: invalid or no user id or no ad link specified",
exitcode: 1,
},
{
name: "debug-check-programinfo",
args: base + " -d",
expect: "pid:",
exitcode: 1,
},
{
name: "no-args-no-user",
args: base,
expect: "invalid or no user id",
exitcode: 1,
},
{
name: "download-single-ad",
args: base + " -o t/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1",
expect: "Successfully downloaded 1 ad with 2 images to t/out",
exitcode: 0,
},
{
name: "download-single-ad-verbose",
args: base + " -o t/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1 -v",
expect: "wrote ad listing",
exitcode: 0,
},
{
name: "download-single-ad-debug",
args: base + " -o t/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1 -d",
expect: "DEBUG: extracted ad listing",
exitcode: 0,
},
{
name: "download-all-ads",
args: base + " -o t/out -u 1",
expect: "Successfully downloaded 7 ads with 16 images to t/out",
exitcode: 0,
},
{
name: "download-all-ads-using-config",
args: "kleingebaeck -c t/fullconfig.conf",
expect: "Successfully downloaded 7 ads with 16 images to t/out",
exitcode: 0,
},
}
var invalidtests = []Tests{
{
name: "empty-ad",
args: base + " " + EMPTYURI,
expect: "could not extract ad data from page, got empty struct",
exitcode: 1,
},
{
name: "invalid-ad",
args: base + " " + INVALIDURI,
expect: "invalid uri",
exitcode: 1,
},
{
name: "invalid-path",
args: base + " " + INVALIDPATHURI,
expect: "could not extract ad data from page, got empty struct",
exitcode: 1,
},
{
name: "404",
args: base + " " + INVALID404URI,
expect: "could not get page via HTTP",
exitcode: 1,
},
{
name: "outdir-no-exists",
args: base + " -o t/foo/bar/out https://www.kleinanzeigen.de/s-anzeige/first-ad/1 -v",
expect: "Failure",
exitcode: 1,
},
{
name: "wrong-flag",
args: base + " -X",
expect: "unknown shorthand flag: 'X' in -X",
exitcode: 1,
},
{
name: "no-config",
args: "kleingebaeck -c t/invalid.conf",
expect: "error loading config file",
exitcode: 1,
},
{
name: "503",
args: base + " " + INVALID503URI,
expect: "could not get page via HTTP",
exitcode: 1,
},
}
type AdConfig struct {
Title string
Slug string
ID string
Price string
Category string
Condition string
Type string
Color string
Created string
Text string
Images []string // files in ./t/
}
// used to generate ad listings returned by httpmock using templates
var adsrc = []AdConfig{
{
Title: "First Ad",
ID: "1", Price: "5€",
Category: "Klimbim",
Text: "Thing to sale",
Slug: "first-ad",
Condition: "Sehr Gut",
Color: "Grün",
Type: "Ball",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
{
Title: "Secnd Ad",
ID: "2", Price: "5€",
Category: "Kram",
Text: "Thing to sale",
Slug: "second-ad",
Condition: "Gut",
Color: "Lila",
Type: "Schoki",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
{
Title: "Third Ad",
ID: "3",
Price: "5€",
Category: "Kuddelmuddel",
Text: "Thing to sale",
Slug: "third-ad",
Condition: "In Ordnung",
Color: "Blau",
Type: "Auto",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
{
Title: "Forth Ad",
ID: "4",
Price: "5€",
Category: "Krempel",
Text: "Thing to sale",
Slug: "fourth-ad",
Condition: "Neu",
Color: "Rot",
Type: "Spielzeut",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
{
Title: "Fifth Ad",
ID: "5",
Price: "5€",
Category: "Kladderadatsch",
Text: "Thing to sale",
Slug: "fifth-ad",
Condition: "Sehr Gut",
Color: "Braun",
Type: "Parteibuch",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
{
Title: "Sixth Ad",
ID: "6",
Price: "5€",
Category: "Klunker",
Text: "Thing to sale",
Slug: "sixth-ad",
Condition: "Sehr Gut",
Color: "Silber",
Type: "Ring",
Created: "Yesterday",
Images: []string{"t/1.jpg", "t/2.jpg"},
},
{
Title: "Ad with multiple img formats",
ID: "7",
Price: "5€",
Category: "Klunker",
Text: "Thing to sale",
Slug: "seventh-ad",
Condition: "Sehr Gut",
Color: "Gelpb",
Type: "Schmuck",
Created: "Yesterday",
Images: []string{"t/1.png", "t/1.gif", "t/1.webp", "t/1.jpg"},
},
}
// An Adsource is used to construct a httpmock responder for a
// particular url. So, the code (scrape.go) scrapes
// https://kleinanzeigen.de, but in reality httpmock captures the
// request and responds with our mock data
type Adsource struct {
uri string
content string
status int
}
// Render a HTML template for an adlisting or an ad
func GetTemplate(adconfigs []AdConfig, adconfig *AdConfig, htmltemplate string) string {
tmpl, err := tpl.New("template").Parse(htmltemplate)
if err != nil {
panic(err)
}
var out bytes.Buffer
if adconfig.ID == "" {
err = tmpl.Execute(&out, adconfigs)
} else {
err = tmpl.Execute(&out, adconfig)
}
if err != nil {
panic(err)
}
return out.String()
}
// Initialize the valid sources for the httpmock responder
func InitValidSources() []Adsource {
// valid ad listing page 1
list1 := []AdConfig{
adsrc[0], adsrc[1], adsrc[2],
}
// valid ad listing page 2
list2 := []AdConfig{
adsrc[3], adsrc[4], adsrc[5], adsrc[6],
}
// valid ad listing page 3, which is empty
list3 := []AdConfig{}
// used to signal GetTemplate() to render a listing
empty := AdConfig{}
// prepare urls for the listing pages
ads := []Adsource{
{
uri: fmt.Sprintf("%s%s?userId=1", Baseuri, Listuri),
content: GetTemplate(list1, &empty, LISTTPL),
},
{
uri: fmt.Sprintf("%s%s?userId=1&pageNum=2", Baseuri, Listuri),
content: GetTemplate(list2, &empty, LISTTPL),
},
{
uri: fmt.Sprintf("%s%s?userId=1&pageNum=3", Baseuri, Listuri),
content: GetTemplate(list3, &empty, LISTTPL),
},
}
// prepare urls for the ads
for _, ad := range adsrc {
ads = append(ads, Adsource{
uri: fmt.Sprintf("%s/s-anzeige/%s/%s", Baseuri, ad.Slug, ad.ID),
content: GetTemplate(nil, &ad, ADTPL),
})
}
return ads
}
func InitInvalidSources() []Adsource {
empty := AdConfig{}
ads := []Adsource{
{
// valid ad page but without content
uri: fmt.Sprintf("%s/s-anzeige/empty/1", Baseuri),
content: GetTemplate(nil, &empty, EMPTYPAGE),
},
{
// some random foreign webpage
uri: INVALIDURI,
content: GetTemplate(nil, &empty, "<html>foo</html>"),
},
{
// some invalid page path
uri: fmt.Sprintf("%s/anzeige/name/1", Baseuri),
content: GetTemplate(nil, &empty, "<html></html>"),
},
{
// some none-ad page
uri: fmt.Sprintf("%s/anzeige/name/1/foo/bar", Baseuri),
content: GetTemplate(nil, &empty, "<html>HTTP 404: /eine-anzeige/ does not exist!</html>"),
status: 404,
},
{
// valid ad page but 503
uri: fmt.Sprintf("%s/s-anzeige/503/1", Baseuri),
content: GetTemplate(nil, &empty, "<html>HTTP 503: service unavailable</html>"),
status: 503,
},
}
return ads
}
// load a test image from disk
func GetImage(path string) []byte {
dat, err := os.ReadFile(path)
if err != nil {
panic(err)
}
return dat
}
// setup httpmock
func SetIntercept(ads []Adsource) {
headers := http.Header{}
headers.Add("Set-Cookie", "session=permanent")
for _, advertisement := range ads {
if advertisement.status == 0 {
advertisement.status = 200
}
httpmock.RegisterResponder("GET", advertisement.uri,
httpmock.NewStringResponder(advertisement.status, advertisement.content).HeaderAdd(headers))
}
// we just use 2 images, put this here
for _, image := range []string{"t/1.jpg", "t/2.jpg", "t/1.png", "t/1.gif", "t/1.webp"} {
httpmock.RegisterResponder("GET", image,
httpmock.NewBytesResponder(200, GetImage(image)).HeaderAdd(headers))
}
}
func VerifyAd(advertisement *AdConfig) error {
body := advertisement.Title + advertisement.Price + advertisement.ID + "Kleinanzeigen => " +
advertisement.Category + advertisement.Condition + advertisement.Created
// prepare ad dir name using DefaultAdNameTemplate
c := Config{Adnametemplate: "{{ .Slug }}"}
adstruct := Ad{Slug: advertisement.Slug, ID: advertisement.ID}
addir, err := AdDirName(&c, &adstruct)
if err != nil {
return err
}
file := fmt.Sprintf("t/out/%s/Adlisting.txt", addir)
content, err := os.ReadFile(file)
if err != nil {
return fmt.Errorf("unable to read adlisting file: %w", err)
}
if body != strings.TrimSpace(string(content)) {
msg := fmt.Sprintf("ad content doesn't match.\nExpect: %s\n Got: %s\n", body, content)
return errors.New(msg)
}
return nil
}
func TestMain(t *testing.T) {
oldargs := os.Args
defer func() { os.Args = oldargs }()
httpmock.Activate()
defer httpmock.DeactivateAndReset()
// prepare httpmock responders
SetIntercept(InitValidSources())
// run commandline tests
for _, test := range tests {
var buf bytes.Buffer
os.Args = strings.Split(test.args, " ")
ret := Main(&buf)
if ret != test.exitcode {
t.Errorf("%s with cmd <%s> did not exit with %d but %d",
test.name, test.args, test.exitcode, ret)
}
if !strings.Contains(buf.String(), test.expect) {
t.Errorf("%s with cmd <%s> output did not match.\nExpect: %s\n Got: %s\n",
test.name, test.args, test.expect, buf.String())
}
}
// verify if downloaded ads match
for _, ad := range adsrc {
if err := VerifyAd(&ad); err != nil {
t.Error(err.Error())
}
}
}
func TestMainInvalids(t *testing.T) {
oldargs := os.Args
defer func() { os.Args = oldargs }()
httpmock.Activate()
defer httpmock.DeactivateAndReset()
// prepare httpmock responders
SetIntercept(InitInvalidSources())
// run commandline tests
for _, test := range invalidtests {
var buf bytes.Buffer
os.Args = strings.Split(test.args, " ")
ret := Main(&buf)
if ret != test.exitcode {
t.Errorf("%s with cmd <%s> did not exit with %d but %d",
test.name, test.args, test.exitcode, ret)
}
if !strings.Contains(buf.String(), test.expect) {
t.Errorf("%s with cmd <%s> output did not match.\nExpect: %s\n Got: %s\n",
test.name, test.args, test.expect, buf.String())
}
}
}

View File

@@ -1,81 +0,0 @@
#!/bin/bash
# Copyright © 2023 Thomas von Dein
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# get list with: go tool dist list
DIST="darwin/amd64
freebsd/amd64
linux/amd64
netbsd/amd64
openbsd/amd64
windows/amd64
freebsd/arm64
linux/arm64
netbsd/arm64
openbsd/arm64
windows/arm64"
tool="$1"
version="$2"
if test -z "$version"; then
echo "Usage: $0 <tool name> <release version>"
exit 1
fi
rm -rf releases
mkdir -p releases
for D in $DIST; do
os=${D/\/*/}
arch=${D/*\//}
binfile="releases/${tool}-${os}-${arch}-${version}"
pie=""
if test "$os" = "windows"; then
binfile="${binfile}.exe"
fi
if test "$D" = "linux/amd64"; then
pie="-buildmode=pie"
fi
tardir="${tool}-${os}-${arch}-${version}"
tarfile="releases/${tool}-${os}-${arch}-${version}.tar.gz"
set -x
GOOS=${os} GOARCH=${arch} go build -tags osusergo,netgo -ldflags "-extldflags=-static -w" --trimpath $pie -o ${binfile}
strip --strip-all ${binfile}
mkdir -p ${tardir}
cp ${binfile} README.md LICENSE ${tardir}/
echo 'tool = kleingebaeck
PREFIX = /usr/local
UID = root
GID = 0
install:
install -d -o $(UID) -g $(GID) $(PREFIX)/bin
install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1
install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/
install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/' > ${tardir}/Makefile
tar cpzf ${tarfile} ${tardir}
sha256sum ${binfile} | cut -d' ' -f1 > ${binfile}.sha256
sha256sum ${tarfile} | cut -d' ' -f1 > ${tarfile}.sha256
rm -rf ${tardir}
set +x
done

254
scrape.go
View File

@@ -1,254 +0,0 @@
/*
Copyright © 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"bytes"
"fmt"
"log/slog"
"path/filepath"
"strconv"
"strings"
"time"
"astuart.co/goq"
"golang.org/x/sync/errgroup"
)
// extract links from all ad listing pages (that is: use pagination)
// and scrape every page
func ScrapeUser(fetch *Fetcher) error {
adlinks := []string{}
baseuri := fmt.Sprintf("%s%s?userId=%d", Baseuri, Listuri, fetch.Config.User)
page := 1
uri := baseuri
slog.Info("fetching ad pages", "user", fetch.Config.User)
for {
var index Index
slog.Debug("fetching page", "uri", uri)
body, err := fetch.Get(uri)
if err != nil {
return err
}
defer func() {
if err := body.Close(); err != nil {
panic(err)
}
}()
err = goq.NewDecoder(body).Decode(&index)
if err != nil {
return fmt.Errorf("failed to goquery decode HTML index body: %w", err)
}
if len(index.Links) == 0 {
break
}
slog.Debug("extracted ad links", "count", len(index.Links))
for _, href := range index.Links {
adlinks = append(adlinks, href)
slog.Debug("ad link", "href", href)
}
page++
uri = baseuri + "&pageNum=" + strconv.Itoa(page)
}
for index, adlink := range adlinks {
err := ScrapeAd(fetch, Baseuri+adlink)
if err != nil {
return err
}
if fetch.Config.Limit > 0 && index == fetch.Config.Limit-1 {
break
}
}
return nil
}
// scrape an ad. uri is the full uri of the ad, dir is the basedir
func ScrapeAd(fetch *Fetcher, uri string) error {
now := time.Now()
advertisement := &Ad{
Year: now.Format("2006"),
Month: now.Format("01"),
Day: now.Format("02"),
}
// extract slug and id from uri
uriparts := strings.Split(uri, "/")
if len(uriparts) < SlugURIPartNum {
return fmt.Errorf("invalid uri: %s", uri)
}
advertisement.Slug = uriparts[4]
advertisement.ID = uriparts[5]
// get the ad
slog.Debug("fetching ad page", "uri", uri)
body, err := fetch.Get(uri)
if err != nil {
return err
}
defer func() {
if err := body.Close(); err != nil {
panic(err)
}
}()
// extract ad contents with goquery/goq
err = goq.NewDecoder(body).Decode(&advertisement)
if err != nil {
return fmt.Errorf("failed to goquery decode HTML ad body: %w", err)
}
if len(advertisement.CategoryTree) > 0 {
advertisement.Category = strings.Join(advertisement.CategoryTree, " => ")
}
if advertisement.Incomplete() {
slog.Debug("got ad", "ad", advertisement)
return fmt.Errorf("could not extract ad data from page, got empty struct")
}
advertisement.DecodeAttributes()
advertisement.CalculateExpire()
// prepare ad dir name
addir, err := AdDirName(fetch.Config, advertisement)
if err != nil {
return err
}
proceed := CheckAdVisited(fetch.Config, addir)
if !proceed {
return nil
}
// write listing
err = WriteAd(fetch.Config, advertisement, addir)
if err != nil {
return err
}
// tell the user
slog.Debug("extracted ad listing", "ad", advertisement)
// stats
fetch.Config.IncrAds()
// register for later checks
DirsVisited[addir] = 1
return ScrapeImages(fetch, advertisement, addir)
}
func ScrapeImages(fetch *Fetcher, advertisement *Ad, addir string) error {
// fetch images
img := 1
adpath := filepath.Join(fetch.Config.Outdir, addir)
// scan existing images, if any
cache, err := ReadImages(adpath, fetch.Config.ForceDownload)
if err != nil {
return err
}
egroup := new(errgroup.Group)
for _, imguri := range advertisement.Images {
imguri := imguri
// we append the suffix later in NewImage() based on image format
basefilename := filepath.Join(adpath, fmt.Sprintf("%d", img))
egroup.Go(func() error {
// wait a little
throttle := GetThrottleTime()
time.Sleep(throttle)
body, err := fetch.Getimage(imguri)
if err != nil {
return err
}
buf := new(bytes.Buffer)
_, err = buf.ReadFrom(body)
if err != nil {
return fmt.Errorf("failed to read from image buffer: %w", err)
}
reader := bytes.NewReader(buf.Bytes())
image, err := NewImage(reader, basefilename, imguri)
if err != nil {
return err
}
err = image.CalcHash()
if err != nil {
return err
}
if !fetch.Config.ForceDownload {
if image.SimilarExists(cache) {
slog.Debug("similar image exists, not written", "uri", image.URI)
return nil
}
}
_, err = reader.Seek(0, 0)
if err != nil {
return fmt.Errorf("failed to seek(0) on image reader: %w", err)
}
err = WriteImage(image.Filename, reader)
if err != nil {
return err
}
slog.Debug("wrote image", "image", image, "size", buf.Len(), "throttle", throttle)
return nil
})
img++
}
if err := egroup.Wait(); err != nil {
return fmt.Errorf("failed to finalize error waitgroup: %w", err)
}
fetch.Config.IncrImgs(len(advertisement.Images))
return nil
}

185
store.go
View File

@@ -1,185 +0,0 @@
/*
Copyright © 2023-2025 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"bytes"
"fmt"
"log/slog"
"os"
"path/filepath"
"runtime"
"strings"
tpl "text/template"
"time"
)
type OutdirData struct {
Year, Day, Month string
}
func OutDirName(conf *Config) (string, error) {
tmpl, err := tpl.New("outdir").Parse(conf.Outdir)
if err != nil {
return "", fmt.Errorf("failed to parse outdir template: %w", err)
}
buf := bytes.Buffer{}
now := time.Now()
data := OutdirData{
Year: now.Format("2006"),
Month: now.Format("01"),
Day: now.Format("02"),
}
err = tmpl.Execute(&buf, data)
if err != nil {
return "", fmt.Errorf("failed to execute outdir template: %w", err)
}
return buf.String(), nil
}
func AdDirName(conf *Config, advertisement *Ad) (string, error) {
tmpl, err := tpl.New("adname").Parse(conf.Adnametemplate)
if err != nil {
return "", fmt.Errorf("failed to parse adname template: %w", err)
}
buf := bytes.Buffer{}
err = tmpl.Execute(&buf, advertisement)
if err != nil {
return "", fmt.Errorf("failed to execute adname template: %w", err)
}
return buf.String(), nil
}
func WriteAd(conf *Config, advertisement *Ad, addir string) error {
// prepare output dir
dir := filepath.Join(conf.Outdir, addir)
err := Mkdir(dir)
if err != nil {
return err
}
// write ad file
listingfile := filepath.Join(dir, "Adlisting.txt")
listingfd, err := os.Create(listingfile)
if err != nil {
return fmt.Errorf("failed to create Adlisting.txt: %w", err)
}
defer func() {
if err := listingfd.Close(); err != nil {
panic(err)
}
}()
if runtime.GOOS == WIN {
advertisement.Text = strings.ReplaceAll(advertisement.Text, "<br/>", "\r\n")
} else {
advertisement.Text = strings.ReplaceAll(advertisement.Text, "<br/>", "\n")
}
tmpl, err := tpl.New("adlisting").Parse(conf.Template)
if err != nil {
return fmt.Errorf("failed to parse adlisting template: %w", err)
}
err = tmpl.Execute(listingfd, advertisement)
if err != nil {
return fmt.Errorf("failed to execute adlisting template: %w", err)
}
slog.Info("wrote ad listing", "listingfile", listingfile)
return nil
}
func WriteImage(filename string, reader *bytes.Reader) error {
file, err := os.Create(filename)
if err != nil {
return fmt.Errorf("failed to open image file: %w", err)
}
defer func() {
if err := file.Close(); err != nil {
panic(err)
}
}()
_, err = reader.WriteTo(file)
if err != nil {
return fmt.Errorf("failed to write to image file: %w", err)
}
return nil
}
func ReadImage(filename string) (*bytes.Buffer, error) {
var buf bytes.Buffer
if !fileExists(filename) {
return nil, fmt.Errorf("image %s does not exist", filename)
}
data, err := os.ReadFile(filename)
if err != nil {
return nil, fmt.Errorf("failed to read image file: %w", err)
}
_, err = buf.Write(data)
if err != nil {
return nil, fmt.Errorf("failed to write image into buffer: %w", err)
}
return &buf, nil
}
func fileExists(filename string) bool {
info, err := os.Stat(filename)
if err != nil {
// return false on any error
return false
}
return !info.IsDir()
}
// check if an addir has already been processed by current run and
// decide what to do
func CheckAdVisited(conf *Config, adname string) bool {
if Exists(DirsVisited, adname) {
if conf.ForceDownload {
slog.Warn("an ad with the same name has already been downloaded, overwriting", "addir", adname)
return true
}
// don't overwrite
slog.Warn("an ad with the same name has already been downloaded, skipping (use -f to overwrite)", "addir", adname)
return false
}
// overwrite
return true
}

View File

@@ -1,39 +0,0 @@
/*
Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"bytes"
"testing"
)
// this is a weird thing. WriteImage() is being called in scrape.go
// which is being tested by TestMain() in main_test.go. However, it
// doesn't show up in the coverage report for unknown reasons, so
// here's a single test for it
func TestWriteImage(t *testing.T) {
t.Parallel()
reader := bytes.NewReader([]byte{1, 2, 3, 4, 5, 6, 7, 8})
file := "t/out/t.jpg"
err := WriteImage(file, reader)
if err != nil {
t.Errorf("Could not write mock image to %s: %s", file, err.Error())
}
}

BIN
t/1.gif

Binary file not shown.

Before

Width:  |  Height:  |  Size: 62 B

BIN
t/1.jpg

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1001 B

BIN
t/1.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.2 KiB

BIN
t/1.webp

Binary file not shown.

Before

Width:  |  Height:  |  Size: 4.3 KiB

BIN
t/2.jpg

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1002 B

View File

@@ -1,6 +0,0 @@
# empty config for Main() unit tests to force unit tests NOT to use an
# eventually existing ~/.kleingebaeck!
template="""
{{.Title}}{{.Price}}{{.ID}}{{.Category}}{{.Condition}}{{.Created}}
"""

View File

@@ -1,6 +0,0 @@
user = 1
loglevel = "verbose"
outdir = "t/out"
template="""
{{.Title}}{{.Price}}{{.ID}}{{.Category}}{{.Condition}}{{.Created}}
"""

View File

@@ -1,13 +0,0 @@
# Mock http server
Install ehfs from https://github.com/mjpclab/extra-http-file-server/.
Install p2cli from https://github.com/wrouesnel/p2cli.
Run `templates/render.sh` to build the file structure.
Run `server.sh` to start the http server.
To scrape an ad from it, use such a URL:
http://localhost:8080/s-anzeige/first-ad/111-11-111

Binary file not shown.

Before

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 28 KiB

View File

@@ -1,4 +0,0 @@
#!/bin/sh
ehfs -a :/s-anzeige:./kleinanzeigen \
-a :/api/v1/prod-ads/images/fc:./img \
-l localhost:8080 -I index.html

View File

@@ -1,50 +0,0 @@
<!DOCTYPE html>
<html lang="de">
<head>
<title>Ad Listing</title>
</head>
<body>
<div class="l-container-row">
<div id="vap-brdcrmb" class="breadcrump">
<a class="breadcrump-link" itemprop="url" href="/" title="Kleinanzeigen ">
<span itemprop="title">Kleinanzeigen </span>
</a>
<a class="breadcrump-link" itemprop="url" href="/egal">
<span itemprop="title">{{ category }}</span></a>
</div>
</div>
{% for image in images %}
<div class="galleryimage-element" data-ix="3">
<img src="http://localhost:8080/api/v1/prod-ads/images/fc/{{ image.id }}?rule=$_59.JPG"/>
</div>
{% endfor %}
<h1 id="viewad-title" class="boxedarticle--title" itemprop="name" data-soldlabel="Verkauft">
{{ title }}</h1>
<div class="boxedarticle--flex--container">
<h2 class="boxedarticle--price" id="viewad-price">
{{ price }}</h2>
</div>
<div id="viewad-extra-info" class="boxedarticle--details--full">
<div><i class="icon icon-small icon-calendar-gray-simple"></i><span>{{ created }}</span></div>
</div>
<div class="splitlinebox l-container-row" id="viewad-details">
<ul class="addetailslist">
<li class="addetailslist--detail">
Zustand<span class="addetailslist--detail--value" >
{{ condition }}</span>
</li>
</ul>
</div>
<div class="l-container last-paragraph-no-margin-bottom">
<p id="viewad-description-text" class="text-force-linebreak " itemprop="description">
{{ text }}
</p>
</div>
</body>
</html>

View File

@@ -1,15 +0,0 @@
<!DOCTYPE html>
<html lang="de" >
<head>
<title>Ads</title>
</head>
<body>
{% for ad in ads %}
<h2 class="text-module-begin">
<a class="ellipsis"
href="/s-anzeige/{{ ad.slug }}/{{ ad.id }}">{{ ad.title }}</a>
</h2>
{% endfor %}
</body>
</html>

View File

@@ -1,15 +0,0 @@
#!/bin/sh -x
base="../kleinanzeigen"
rm -rf $base
mkdir -p $base
echo "Generating /s-bestandsliste.html"
p2cli -t index.tpl -i vars.yaml > $base/s-bestandsliste.html
for idx in 0 1; do
slug=$(cat vars.yaml | yq ".ads[$idx].slug")
id=$(cat vars.yaml | yq ".ads[$idx].id")
mkdir -p $base/$slug/$id
cat vars.yaml | yq ".ads[$idx]" | p2cli -t ad.tpl -f yaml > $base/$slug/$id/index.html
done

View File

@@ -1,27 +0,0 @@
ads:
- slug: first-ad
id: 111-11-111
title: First Ad
price: "19 €"
condition: "Sehr gut"
category: "Weitere Elektronik"
created: 21.12.2023
images:
- id: fcf6d664-5258-42c2-bf58-d1b8e9221574
- id: fcf6d664-5258-42c2-bf58-as43as5d43as
text: |
Zu Verkaufen.
Zahlung nur Paypal.
- slug: second-ad
id: 222-22-222
title: Second Ad
price: "200 €"
condition: "Sehr gut"
category: "Elektronik"
created: 21.12.2023
images:
- id: cdas4sd5-5258-42c2-bf58-d1b8e9221574
- id: cdas4sd5-5258-42c2-bf58-as43as5d43as
text: |
Zu Verkaufen.
Zahlung nur Überweisung.

View File

@@ -1 +0,0 @@
user = "

85
util.go
View File

@@ -1,85 +0,0 @@
/*
Copyright © 2023-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
"bytes"
"errors"
"fmt"
"math/rand"
"os"
"os/exec"
"runtime"
"time"
"github.com/mattn/go-isatty"
)
func Mkdir(dir string) error {
if _, err := os.Stat(dir); errors.Is(err, os.ErrNotExist) {
err := os.MkdirAll(dir, os.ModePerm)
if err != nil {
return fmt.Errorf("failed to create directory %s: %w", dir, err)
}
}
return nil
}
func man() error {
man := exec.Command("less", "-")
var b bytes.Buffer
b.WriteString(manpage)
man.Stdout = os.Stdout
man.Stdin = &b
man.Stderr = os.Stderr
err := man.Run()
if err != nil {
return fmt.Errorf("failed to execute 'less': %w", err)
}
return nil
}
// returns TRUE if stdout is NOT a tty or windows
func IsNoTty() bool {
if runtime.GOOS == WIN || !isatty.IsTerminal(os.Stdout.Fd()) {
return true
}
// it is a tty
return false
}
func GetThrottleTime() time.Duration {
return time.Duration(rand.Intn(MaxThrottle-MinThrottle+1)+MinThrottle) * time.Millisecond
}
// look if a key in a map exists, generic variant
func Exists[K comparable, V any](m map[K]V, v K) bool {
if _, ok := m[v]; ok {
return true
}
return false
}