From 704450bc44ce574ace95b5ff06f84e5babc41270 Mon Sep 17 00:00:00 2001 From: Thomas von Dein Date: Wed, 5 Nov 2025 08:40:09 +0100 Subject: [PATCH] moved to codeberg --- .gh-dash.yml | 96 --- .goreleaser.yaml | 69 -- .woodpecker/build.yaml | 36 -- .woodpecker/image.yaml | 32 - .woodpecker/release.yaml | 15 - CODE_OF_CONDUCT.md | 114 ---- CONTRIBUTING.md | 93 --- Dockerfile | 27 - Makefile | 101 --- Makefile.dist | 20 - README-de.md | 3 + README.md | 3 + SECURITY.md | 17 - ad.go | 163 ----- config.go | 251 -------- docker-compose.yaml | 22 - example.conf | 48 -- fetch.go | 104 --- go.mod | 41 -- go.sum | 88 --- http.go | 140 ----- image.go | 191 ------ kleingebaeck.1 | 386 ------------ kleingebaeck.go | 205 ------ kleingebaeck.pod | 247 -------- main.go | 203 ------ main_test.go | 594 ------------------ mkrel.sh | 81 --- scrape.go | 254 -------- store.go | 185 ------ store_test.go | 39 -- t/1.gif | Bin 62 -> 0 bytes t/1.jpg | Bin 1001 -> 0 bytes t/1.png | Bin 4259 -> 0 bytes t/1.webp | Bin 4372 -> 0 bytes t/2.jpg | Bin 1002 -> 0 bytes t/config-empty.conf | 6 - t/fullconfig.conf | 6 - t/httproot/README.md | 13 - .../img/cdas4sd5-5258-42c2-bf58-as43as5d43as | Bin 37820 -> 0 bytes .../img/cdas4sd5-5258-42c2-bf58-d1b8e9221574 | Bin 28922 -> 0 bytes .../img/fcf6d664-5258-42c2-bf58-as43as5d43as | Bin 25539 -> 0 bytes .../img/fcf6d664-5258-42c2-bf58-d1b8e9221574 | Bin 28811 -> 0 bytes t/httproot/serve.sh | 4 - t/httproot/templates/ad.tpl | 50 -- t/httproot/templates/index.tpl | 15 - t/httproot/templates/render.sh | 15 - t/httproot/templates/vars.yaml | 27 - t/invalid.conf | 1 - util.go | 85 --- 50 files changed, 6 insertions(+), 4084 deletions(-) delete mode 100644 .gh-dash.yml delete mode 100644 .goreleaser.yaml delete mode 100644 .woodpecker/build.yaml delete mode 100644 .woodpecker/image.yaml delete mode 100644 .woodpecker/release.yaml delete mode 100644 CODE_OF_CONDUCT.md delete mode 100644 CONTRIBUTING.md delete mode 100644 Dockerfile delete mode 100644 Makefile delete mode 100644 Makefile.dist delete mode 100644 SECURITY.md delete mode 100644 ad.go delete mode 100644 config.go delete mode 100644 docker-compose.yaml delete mode 100644 example.conf delete mode 100644 fetch.go delete mode 100644 go.mod delete mode 100644 go.sum delete mode 100644 http.go delete mode 100644 image.go delete mode 100644 kleingebaeck.1 delete mode 100644 kleingebaeck.go delete mode 100644 kleingebaeck.pod delete mode 100644 main.go delete mode 100644 main_test.go delete mode 100755 mkrel.sh delete mode 100644 scrape.go delete mode 100644 store.go delete mode 100644 store_test.go delete mode 100644 t/1.gif delete mode 100644 t/1.jpg delete mode 100644 t/1.png delete mode 100644 t/1.webp delete mode 100644 t/2.jpg delete mode 100644 t/config-empty.conf delete mode 100644 t/fullconfig.conf delete mode 100644 t/httproot/README.md delete mode 100644 t/httproot/img/cdas4sd5-5258-42c2-bf58-as43as5d43as delete mode 100644 t/httproot/img/cdas4sd5-5258-42c2-bf58-d1b8e9221574 delete mode 100644 t/httproot/img/fcf6d664-5258-42c2-bf58-as43as5d43as delete mode 100644 t/httproot/img/fcf6d664-5258-42c2-bf58-d1b8e9221574 delete mode 100755 t/httproot/serve.sh delete mode 100644 t/httproot/templates/ad.tpl delete mode 100644 t/httproot/templates/index.tpl delete mode 100755 t/httproot/templates/render.sh delete mode 100644 t/httproot/templates/vars.yaml delete mode 100644 t/invalid.conf delete mode 100644 util.go diff --git a/.gh-dash.yml b/.gh-dash.yml deleted file mode 100644 index e62c39a..0000000 --- a/.gh-dash.yml +++ /dev/null @@ -1,96 +0,0 @@ -prSections: - - title: Responsible PRs - filters: repo:tlinden/kleingebaeck is:open NOT dependabot - layout: - repoName: - hidden: true - - - title: Responsible Dependabot PRs - filters: repo:tlinden/kleingebaeck is:open dependabot - layout: - repoName: - hidden: true - -issuesSections: - - title: Responsible Issues - filters: is:open repo:tlinden/kleingebaeck -author:@me - layout: - repoName: - hidden: true - - - title: Note-to-Self Issues - filters: is:open repo:tlinden/kleingebaeck author:@me - layout: - creator: - hidden: true - repoName: - hidden: true - -defaults: - preview: - open: false - width: 100 - -keybindings: - universal: - - key: "shift+down" - builtin: pageDown - - key: "shift+up" - builtin: pageUp - prs: - - key: g - name: gitu - command: > - cd {{.RepoPath}} && /home/scip/bin/gitu - - key: M - name: squash-merge - command: gh pr merge --rebase --squash --admin --repo {{.RepoName}} {{.PrNumber}} - - key: i - name: show ci checks - command: gh pr checks --repo {{.RepoName}} {{.PrNumber}} | glow -p - - key: e - name: edit pr - command: ~/.config/gh-dash/edit-gh-pr {{.RepoName}} {{.PrNumber}} - - key: E - name: open repo in emacs - command: emacsclient {{.RepoPath}} & - issues: - - key: v - name: view - command: gh issue view --repo {{.RepoName}} {{.IssueNumber}} | glow -p - - key: l - name: add label - command: gh issue --repo {{.RepoName}} edit {{.IssueNumber}} --add-label $(gum choose bug enhancement question dependencies wontfix) - - key: L - name: remove label - command: gh issue --repo {{.RepoName}} edit {{.IssueNumber}} --remove-label $(gum choose bug enhancement question dependencies wontfix) - - key: E - name: open repo in emacs - command: emacsclient {{.RepoPath}} & - -theme: - ui: - sectionsShowCount: true - table: - compact: false - showSeparator: true - colors: - text: - primary: "#E2E1ED" - secondary: "#6770cb" - inverted: "#242347" - faint: "#b0793b" - warning: "#E0AF68" - success: "#3DF294" - background: - selected: "#1B1B33" - border: - primary: "#383B5B" - secondary: "#39386B" - faint: "#8d3e0b" - -repoPaths: - :owner/:repo: ~/dev/:repo - -pager: - diff: delta diff --git a/.goreleaser.yaml b/.goreleaser.yaml deleted file mode 100644 index 0e35092..0000000 --- a/.goreleaser.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# vim: set ts=2 sw=2 tw=0 fo=cnqoj - -version: 2 - -before: - hooks: - - go mod tidy - -gitea_urls: - api: https://codeberg.org/api/v1 - download: https://codeberg.org - -builds: - - env: - - CGO_ENABLED=0 - goos: - - linux - - windows - - darwin - - freebsd - -archives: - - formats: [tar.gz] - # this name template makes the OS and Arch compatible with the results of `uname`. - name_template: >- - {{ .ProjectName }}_ - {{- title .Os }}_ - {{- if eq .Arch "amd64" }}x86_64 - {{- else if eq .Arch "386" }}i386 - {{- else }}{{ .Arch }}{{ end }} - {{- if .Arm }}v{{ .Arm }}{{ end }}_{{ .Tag }} - # use zip for windows archives - format_overrides: - - goos: windows - formats: [zip] - - goos: linux - formats: [tar.gz,binary] - files: - - src: "*.md" - strip_parent: true - - src: "docs/*" - strip_parent: true - - src: Makefile.dist - dst: Makefile - wrap_in_directory: true - -changelog: - sort: asc - filters: - exclude: - - "^docs:" - - "^test:" - groups: - - title: Improved - regexp: '^.*?(feat|add|new)(\([[:word:]]+\))??!?:.+$' - order: 0 - - title: Fixed - regexp: '^.*?(bug|fix)(\([[:word:]]+\))??!?:.+$' - order: 1 - - title: Changed - order: 999 - -release: - header: "# Release Notes" - footer: >- - - --- - - Full Changelog: [{{ .PreviousTag }}...{{ .Tag }}](https://codeberg.org/scip/epuppy/compare/{{ .PreviousTag }}...{{ .Tag }}) diff --git a/.woodpecker/build.yaml b/.woodpecker/build.yaml deleted file mode 100644 index e20cc6f..0000000 --- a/.woodpecker/build.yaml +++ /dev/null @@ -1,36 +0,0 @@ -matrix: - platform: - - linux/amd64 - goversion: - - 1.24 - -labels: - platform: ${platform} - -steps: - build: - when: - event: [push] - image: golang:${goversion} - commands: - - go get - - go build - - linter: - when: - event: [push] - image: golang:${goversion} - commands: - - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/HEAD/install.sh | sh -s -- -b $(go env GOPATH)/bin v2.5.0 - - golangci-lint --version - - golangci-lint run ./... - depends_on: [build] - - test: - when: - event: [push] - image: golang:${goversion} - commands: - - go get - - go test -v -cover - depends_on: [build,linter] diff --git a/.woodpecker/image.yaml b/.woodpecker/image.yaml deleted file mode 100644 index 80e11f6..0000000 --- a/.woodpecker/image.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# https://woodpecker-ci.org/plugins/docker-buildx -# enable Package unit and go to /scip/-/packages after building to link to proj - -variables: - - &repo codeberg.org/${CI_REPO_OWNER}/kleingebaeck - -steps: - dryrun: - image: docker.io/woodpeckerci/plugin-docker-buildx:latest - settings: - dockerfile: Dockerfile - platforms: linux/amd64 - dry_run: true - repo: *repo - tags: latest - when: - event: [pull_request] - - publish: - image: docker.io/woodpeckerci/plugin-docker-buildx:latest - settings: - dockerfile: Dockerfile - platforms: linux/amd64 - repo: *repo - registry: codeberg.org - tags: latest,${CI_COMMIT_SHA:0:8},${CI_COMMIT_TAG} - username: ${CI_REPO_OWNER} - password: - from_secret: REGISTRY_TOKEN - when: - event: [tag] - branch: main diff --git a/.woodpecker/release.yaml b/.woodpecker/release.yaml deleted file mode 100644 index 916c008..0000000 --- a/.woodpecker/release.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# build release - -labels: - platform: linux/amd64 - -steps: - goreleaser: - image: goreleaser/goreleaser - when: - event: [tag] - environment: - GITEA_TOKEN: - from_secret: DEPLOY_TOKEN - commands: - - goreleaser release --clean --verbose diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index 9f94a1a..0000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,114 +0,0 @@ -# No Code of Conduct - -*TL;DR:* This project does **NOT** have a so called Code of Conduct, -nor will it ever have one. - -## The Rant - -The reasons are somewhat complicated and I'll try my best to document -them here. - -Ethical codes or rules come along like laws. But how is ethical or -moral behavior defined? And who defines which behavior is ethical and -which is not? Certainly not me. - -Unless you live in a dictatorship (and more than half of the -population on planet earth do as of this writing), laws come into -existence by democratic procedures. Laws cover almost every aspect of -live in a society. Laws allow and forbid behavior and laws sanction -infringements. - -A software project like this one on the other hand is not a society. -There are not enough people involved to form democratic -structures. And there will always be a minority of users who have the -right to commit or reject code. How could any maintainer of a software -project dare to decree rules upon others? Actually, am I, the current -maintainer of this very project authorized to do so? - -I think the anser to this question clearly is NO. - -The issue is being complicated by the fact, that open source -development these days happens on a planetary scale. And this planet -houses hundreds if not thousands of different cultures, philosophies, -ideologies and worldviews. The answer to many ethical questions will -in most cases be vague and nebulous. - -Ones joke will always be another ones insult. - -Then there is the problem of language. I myself am not an english -native, but I publish everyting using the english language. I am able -to communicate with most people in the open source community because -of that. But I am certainly not able to understand everything and -everyone. There might be nuances to a sentence I don't sense, there -might be sarcastic connotations I don't understand or references to -historical figures, events or traditions I don't know and never have -heard of. - -Judging over other peoples online behavior looks like a titanic task -to me. It is just not my job to judge others, I am not legitimized or -authorized to do so and I am not interested in this kind of business. - -Another huge problem with ethical rules is that you need to outline -and enforce sanctions on those who violate the rules. But since I am -not an elected authority how would I be able to do this? I don't -know. And what happens if someone complains about myself? Shall I -remove myself from my own project? Come on! - -Last but not least there's the law. So, let's say someone in india -writes something insulting to some other developer in an issue. Of -course german law does not apply to indian people. Moreover, the -insult might actually not be an insult in india. In the end, nothing -would happen. Under normal circumstances, maintainers would -eventually delete the posting, ban the user or remove push privileges -etc. - -But then, is there a way for the offending user to defend himself? Of -course not, since neither indian or german law alone applies. I cannot -go to a german court and sue the guy and he cannot do the same in -india. Or - we possibly could but the judges in both countries would -just laugh and close the case. - -That being said, I don't have the power nor the tools, nor the -authority to enforce serious sanctions of any meaningful kind against -others. Therefore I cannot outline any rules whatsoever. - -And let's not even start talking about these undemocratic "comitees" -many projects are forming to circumvent this problem. Some projects -even include external entities like a lawyer or some bureaucrat -somewhere just to have the ability to complain against a comitee -member. What a mess! - - - -## So, what are the ethical rules within this project then? - -Well, there are none. - -This project is about code, not society. It doesn't matter where you -come from, how you look, how you think, what you believe, who your -friends are, whay you said or did sometime in the past. I don't even -care if you are a human being. You are an alien so bored that you need -to submit code on github? Fine with me. You're a convicted criminal? I -don't give a shit! - -**The only thing I am interested here is Code and only Code.** - -So if anyhing happens here I don't like or I am obliged by (german!) -law to act on, I will decide on a case to case basis what to do. And -unfortunately, since this is the nature of a github project, you -cannot complain, object or protest. I am very sorry! - -If you will, let's at least outline these: - -- Please - just please - behave towards others as you'd expect others - to behave towards yourself. - -- Don't judge others for any reason. - -- Only judge the code. - -But these are not rules, only a friendly appeal to you as a developer -and user. - - -Thanks a lot! diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index b9edc78..0000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,93 +0,0 @@ -## Project Goals - -The goal of this project is to build a small tool which helps in -maintaining backups of the german ad site kleinanzeigen.de. It should -be small, fast and easy to understand. - -There will be no GUI, no web interface, no public API of some sort, no -builtin interpreter. - -The programming language used for this project will always be -[GOLANG](https://go.dev/) with the exception of the documentation -([Perl POD](https://perldoc.perl.org/perlpod)) and the Makefile. - -# Contributing - -You can contribute to this project in various ways: - -## Open an issue - -If you encounter a problem or don't understand how the program works -or if you think the documentation is unclear, please don't hesitate to -open an issue. - -Please add as much information about the case as possible, such as: - -- Your environment (operating system etc) -- kleingebaeck version (`kleingebaeck --version`) -- Commandline used. Please replace sensitive information with mock data! -- Repeat the command with debugging enabled (`-d` flag) -- Actual program output, Please replace sensitive information with mock data! -- Expected program output. -- Error message - if any. - -Be aware that I am working on this (and some others) project in my -spare time which is scarce. Therefore please don't expect me to -respond to your query within hours or even days. Be patient, but I -WILL respond. - -## Pull Requests - -Code and documentation help is always much appreciated! Please follow -thes guidelines to successfully contribute: - -- Every pull request shall be based on latest `development` - branch. `main` is only used for releases. - -- Execute the unit tests before committing: `make test`. There shall - be no errors. - -- Strive to be backwards compatible so that users who are already - using the program don't have to change their habits - unless it is - really neccessary. - -- Try to add a unit test for your fix, addition or modification. - -- Don't ever change existing unit tests! - -- Add a meaningful and comprehensive rationale about your contribution: - - Why do you think it might be useful for others? - - What did you actually change or add? - - Is there an open issue which this PR fixes and if so, please link - to that issue. - -- [Re-]format your code with `gofmt -s`. - -- Avoid unneccesary dependencies, especially for very small functions. - -- **If** a new dependency is being added, it must be compatible with - our [license agreement](LICENSE). - -- You need to accept that the code or documentation you contribute - will be redistributed under the terms of said license agreement. If - your contribution is considerably large or if you contribute - regularly, then feel free to add your name (and if you want your - email address) to the *AUTHORS* section of the - [manpage](kleingebaeck.pod). - -- Adhere to the above mentioned project goals. - -- If you are unsure if your addition or change will be accepted, - better ask before starting coding. Open an issue about your proposal - and let's discuss it! That way we avoid doing unnessesary work on - both sides. - -Each pull request will be carefully reviewed and if it is a useful -addition it will be accepted. However, please be prepared that -sometimes a PR will be rejected. The reasons may vary and will be -documented. Perhaps the above guidelines are not matched, or the -addition seems to be not so useful from my perspective, maybe there -are too much changes or there might be changes I don't even -understand. - -But whatever happens: your contribution is always welcome! diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 2439c45..0000000 --- a/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -FROM golang:1.24-alpine as builder - -RUN apk update -RUN apk upgrade -RUN apk add --no-cache git make - -RUN git --version - -WORKDIR /work - -COPY go.mod . -COPY . . -RUN go mod download -RUN make - -FROM alpine:latest -LABEL maintainer="Thomas von Dein " - -WORKDIR /app -COPY --from=builder /work/kleingebaeck /app/kleingebaeck - -ENV KLEINGEBAECK_OUTDIR /backup -ENV LANG C.UTF-8 -USER 1001:1001 - -ENTRYPOINT ["/app/kleingebaeck"] -CMD ["-h"] diff --git a/Makefile b/Makefile deleted file mode 100644 index 4b08dd1..0000000 --- a/Makefile +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2023 Thomas von Dein - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# -# no need to modify anything below -tool = kleingebaeck -VERSION = $(shell grep VERSION config.go | head -1 | cut -d '"' -f2) -archs = darwin freebsd linux windows -PREFIX = /usr/local -UID = root -GID = 0 -HAVE_POD := $(shell pod2text -h 2>/dev/null) - -all: $(tool).1 $(tool).go buildlocal - -%.1: %.pod -ifdef HAVE_POD - pod2man -c "User Commands" -r 1 -s 1 $*.pod > $*.1 -endif - -%.go: %.pod -ifdef HAVE_POD - echo "package main" > $*.go - echo >> $*.go - echo "var manpage = \`" >> $*.go - pod2text $*.pod >> $*.go - echo "\`" >> $*.go -endif - -buildlocal: - CGO_LDFLAGS='-static' go build -tags osusergo,netgo -ldflags "-extldflags=-static" -o $(tool) - -install: buildlocal - install -d -o $(UID) -g $(GID) $(PREFIX)/bin - install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1 - install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/ - install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/ - -clean: - rm -rf $(tool) coverage.out testdata t/out - -test: clean - mkdir -p t/out - go test ./... $(ARGS) - -testlint: test lint - -lint: - golangci-lint run - -lint-full: - golangci-lint run --enable-all --exclude-use-default --disable exhaustivestruct,exhaustruct,depguard,interfacer,deadcode,golint,structcheck,scopelint,varcheck,ifshort,maligned,nosnakecase,godot,funlen,gofumpt,cyclop,noctx,gochecknoglobals,paralleltest - gocritic check -enableAll *.go - -testfuzzy: clean - go test -fuzz ./... $(ARGS) - -singletest: - @echo "Call like this: make singletest TEST=TestPrepareColumns ARGS=-v" - go test -run $(TEST) $(ARGS) - -cover-report: - go test ./... -cover -coverprofile=coverage.out - go tool cover -html=coverage.out - -goupdate: - go get -t -u=patch ./... - -buildall: - ./mkrel.sh $(tool) $(VERSION) - -release: - gh release create v$(VERSION) --generate-notes - -show-versions: buildlocal - @echo "### kleingebaeck version:" - @./kleingebaeck -V - - @echo - @echo "### go module versions:" - @go list -m all - - @echo - @echo "### go version used for building:" - @grep -m 1 go go.mod - -# lint: -# golangci-lint run -p bugs -p unused diff --git a/Makefile.dist b/Makefile.dist deleted file mode 100644 index cb76bca..0000000 --- a/Makefile.dist +++ /dev/null @@ -1,20 +0,0 @@ -# -*-make-*- - -.PHONY: install all - -tool = rpn -PREFIX = /usr/local -UID = root -GID = 0 - -all: - @echo "Type 'sudo make install' to install the tool." - @echo "To change prefix, type 'sudo make install PREFIX=/opt'" - -install: - install -d -o $(UID) -g $(GID) $(PREFIX)/bin - install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1 - install -d -o $(UID) -g $(GID) $(PREFIX)/share/doc - install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/ - install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/ - install -o $(UID) -g $(GID) -m 444 *.md $(PREFIX)/share/doc/ diff --git a/README-de.md b/README-de.md index 5a10401..896680e 100644 --- a/README-de.md +++ b/README-de.md @@ -9,6 +9,9 @@ [![GitHub release](https://img.shields.io/github/v/release/tlinden/kleingebaeck?color=%2300a719)](https://codeberg.org/scip/kleingebaeck/releases) [![English](https://codeberg.org/scip/kleingebaeck/raw/branch/.github/assets/english.png)](https://codeberg.org/scip/kleingebaeck/raw/branch/README.md) +> [!IMPORTANT] +> Diese Software wird jetzt bei Codeberg weitergepflegt: [Codeberg](https://codeberg.org/scip/kleingebaeck/). + Mit diesem Tool kann man seine Anzeigen bei https://kleinanzeigen.de sichern. Es kann alle Anzeigen eines Users (oder nur eine Ausgewählte) diff --git a/README.md b/README.md index ad0d4d1..b6676d3 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,9 @@ [![GitHub release](https://img.shields.io/github/v/release/tlinden/kleingebaeck?color=%2300a719)](https://codeberg.org/scip/kleingebaeck/releases) [![German](https://codeberg.org/scip/kleingebaeck/raw/branch/.github/assets/german.png)](https://codeberg.org/scip/kleingebaeck/raw/branch/README-de.md) +> [!IMPORTANT] +> This software is now being maintained on [Codeberg](https://codeberg.org/scip/kleingebaeck/). + [Die deutsche Version des READMEs findet Ihr hier](README-de.md). This tool can be used to backup ads on the german ad page https://kleinanzeigen.de diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index 4293c23..0000000 --- a/SECURITY.md +++ /dev/null @@ -1,17 +0,0 @@ -# Security Policy - -## Supported Versions - -Only the latest release is supported. If you find an issue (any -issue!), please check with the latest release first. - -## Reporting a Vulnerability - -I don't agree with the "responsible disclosure" process most projects -(and companies) work these days. - -So, if you find a vulnerability of any kind, please just open an -[issue](https://codeberg.org/scip/kleingebaeck/issues). Please add -all details required to reproduce the vulnerability. You won't be chased. - -That's just all about it. diff --git a/ad.go b/ad.go deleted file mode 100644 index c32e268..0000000 --- a/ad.go +++ /dev/null @@ -1,163 +0,0 @@ -/* -Copyright © 2023-2025 Thomas von Dein - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -package main - -import ( - "bufio" - "log/slog" - "strings" - "time" -) - -type Index struct { - Links []string `goquery:".text-module-begin a,[href]"` -} - -type Ad struct { - Title string `goquery:"h1"` - Slug string - ID string - Details string `goquery:".addetailslist--detail,text"` - Attributes map[string]string // processed afterwards - Condition string // post processed from details for backward compatibility - Type string // post processed from details for backward compatibility - Color string // post processed from details for backward compatibility - Material string // post processed from details for backward compatibility - Category string - CategoryTree []string `goquery:".breadcrump-link,text"` - Price string `goquery:"h2#viewad-price"` - Created string `goquery:"#viewad-extra-info,text"` - Text string `goquery:"p#viewad-description-text,html"` - Images []string `goquery:".galleryimage-element img,[src]"` - Shipping string `goquery:".boxedarticle--details--shipping,text"` // not always filled - Expire string - - // runtime computed - Year, Day, Month string -} - -// Used by slog to pretty print an ad -func (ad *Ad) LogValue() slog.Value { - return slog.GroupValue( - slog.String("title", ad.Title), - slog.String("price", ad.Price), - slog.String("id", ad.ID), - slog.Int("imagecount", len(ad.Images)), - slog.Int("bodysize", len(ad.Text)), - slog.String("categorytree", strings.Join(ad.CategoryTree, "+")), - slog.String("created", ad.Created), - slog.String("expire", ad.Expire), - slog.String("shipping", ad.Shipping), - slog.String("details", ad.Details), - ) -} - -// check for completeness. I erected these fields to be mandatory -// (though I really don't know if they really are). I consider images -// and meta optional. So, if either of the checked fields here is -// empty we return an error. All the checked fields are extracted -// using goquery. However, I think price is optional since there are -// ads for gifts as well. -// -// Note: we return true for "ad is incomplete" and false for "ad is complete"! -func (ad *Ad) Incomplete() bool { - if ad.Category == "" || ad.Created == "" || ad.Text == "" { - return true - } - - return false -} - -func (ad *Ad) CalculateExpire() { - if ad.Created != "" { - ts, err := time.Parse("02.01.2006", ad.Created) - if err == nil { - ad.Expire = ts.AddDate(0, 0, ExpireDays).Format("02.01.2006") - } - } -} - -/* -Decode attributes like color or condition. See -https://codeberg.org/scip/kleingebaeck/issues/117 -for more details. In short: the HTML delivered by -kleinanzeigen.de has no css attribute for the keys -so we cannot extract key=>value mappings of the -ad details but have to parse them manually. - -The ad.Details member contains this after goq run: - -Art - - Weitere Kinderzimmermöbel - - Farbe - Holz - - Zustand - In Ordnung - -We parse this into ad.Attributes and fill in some -static members for backward compatibility reasons. -*/ -func (ad *Ad) DecodeAttributes() { - rd := strings.NewReader(ad.Details) - scanner := bufio.NewScanner(rd) - - isattr := true - attr := "" - attrmap := map[string]string{} - - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - - if line == "" { - continue - } - - if isattr { - attr = line - } else { - attrmap[attr] = line - } - - isattr = !isattr - } - - ad.Attributes = attrmap - - if Exists(ad.Attributes, "Zustand") { - ad.Condition = ad.Attributes["Zustand"] - } - - if Exists(ad.Attributes, "Farbe") { - ad.Color = ad.Attributes["Farbe"] - } - - if Exists(ad.Attributes, "Art") { - ad.Type = ad.Attributes["Art"] - } - - if Exists(ad.Attributes, "Material") { - ad.Material = ad.Attributes["Material"] - } - - slog.Debug("parsed attributes", "attributes", ad.Attributes) - - ad.Shipping = strings.Replace(ad.Shipping, "+ Versand ab ", "", 1) -} diff --git a/config.go b/config.go deleted file mode 100644 index ecce57c..0000000 --- a/config.go +++ /dev/null @@ -1,251 +0,0 @@ -/* -Copyright © 2023-2025 Thomas von Dein - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ -package main - -import ( - "fmt" - "io" - "os" - "path/filepath" - "runtime" - "strings" - - "github.com/knadh/koanf/parsers/toml" - "github.com/knadh/koanf/providers/confmap" - "github.com/knadh/koanf/providers/env" - "github.com/knadh/koanf/providers/file" - "github.com/knadh/koanf/providers/posflag" - "github.com/knadh/koanf/v2" - flag "github.com/spf13/pflag" -) - -const ( - VERSION string = "0.3.23" - Baseuri string = "https://www.kleinanzeigen.de" - Listuri string = "/s-bestandsliste.html" - Defaultdir string = "." - - /* - Also possible: loop through .Attributes: - - DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" + - "Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" + - "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" - - */ - DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nShipping: {{.Shipping}}\nId: {{.ID}}\n" + - "Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" + - "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" - - DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nShipping: {{.Shipping}}\r\nId: {{.ID}}\r\n" + - "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\nColor: {{.Color}}\r\n" + - "Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n" - - DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + - "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36" - - DefaultAdNameTemplate string = "{{.Slug}}" - - DefaultOutdirTemplate string = "." - - // for image download throttling - MinThrottle int = 2 - MaxThrottle int = 20 - - // we extract the slug from the uri - SlugURIPartNum int = 6 - - // We have to calculate the ad expiry because the real value can - // only be seen by logged in users. The initial ad lifetime is 120 - // days. It can be extended by the user 8 days before expire by 60 - // days. But this is unknown to us, so we'll stick with our 120 - // days. They may be wrong for older ads. Don't rely on it! - ExpireDays int = 120 - - WIN string = "windows" -) - -var DirsVisited map[string]int - -const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool. - -Usage: kleingebaeck [-dvVhmoclu] [,...] - -Options: --u --user Backup ads from user with uid . --d --debug Enable debug output. --v --verbose Enable verbose output. --o --outdir Set output dir (default: current directory) --l --limit Limit the ads to download to , default: load all. --c --config Use config file (default: ~/.kleingebaeck). - --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. --f --force Overwrite images and ads even if the already exist. --m --manual Show manual. --h --help Show usage. --V --version Show program version. - -If one or more ad listing url's are specified, only backup those, -otherwise backup all ads of the given user.` - -type Config struct { - Verbose bool `koanf:"verbose"` // loglevel=info - Debug bool `koanf:"debug"` // loglevel=debug - Showversion bool `koanf:"version"` // -v - Showhelp bool `koanf:"help"` // -h - Showmanual bool `koanf:"manual"` // -m - User int `koanf:"user"` - Outdir string `koanf:"outdir"` - Template string `koanf:"template"` - Adnametemplate string `koanf:"adnametemplate"` - Loglevel string `koanf:"loglevel"` - Limit int `koanf:"limit"` - IgnoreErrors bool `koanf:"ignoreerrors"` - ForceDownload bool `koanf:"force"` - UserAgent string `koanf:"useragent"` // conf only - Adlinks []string - StatsCountAds int - StatsCountImages int -} - -func (c *Config) IncrAds() { - c.StatsCountAds++ -} - -func (c *Config) IncrImgs(num int) { - c.StatsCountImages += num -} - -// load commandline flags and config file -func InitConfig(output io.Writer) (*Config, error) { - var kloader = koanf.New(".") - - // determine template based on os - template := DefaultTemplate - if runtime.GOOS == WIN { - template = DefaultTemplateWin - } - - // Load default values using the confmap provider. - if err := kloader.Load(confmap.Provider(map[string]interface{}{ - "template": template, - "outdir": DefaultOutdirTemplate, - "loglevel": "notice", - "userid": 0, - "adnametemplate": DefaultAdNameTemplate, - "useragent": DefaultUserAgent, - }, "."), nil); err != nil { - return nil, fmt.Errorf("failed to load default values into koanf: %w", err) - } - - // setup custom usage - flagset := flag.NewFlagSet("config", flag.ContinueOnError) - flagset.Usage = func() { - _, err := fmt.Fprintln(output, Usage) - if err != nil { - panic(err) - } - - os.Exit(0) - } - - // parse commandline flags - flagset.StringP("config", "c", "", "config file") - flagset.StringP("outdir", "o", "", "directory where to store ads") - flagset.IntP("user", "u", 0, "user id") - flagset.IntP("limit", "l", 0, "limit ads to be downloaded (default 0, unlimited)") - flagset.BoolP("verbose", "v", false, "be verbose") - flagset.BoolP("debug", "d", false, "enable debug log") - flagset.BoolP("version", "V", false, "show program version") - flagset.BoolP("help", "h", false, "show usage") - flagset.BoolP("manual", "m", false, "show manual") - flagset.BoolP("force", "f", false, "force") - flagset.BoolP("ignoreerrors", "", false, "ignore image download HTTP errors") - - if err := flagset.Parse(os.Args[1:]); err != nil { - return nil, fmt.Errorf("failed to parse program arguments: %w", err) - } - - // generate a list of config files to try to load, including the - // one provided via -c, if any - var configfiles []string - - configfile, _ := flagset.GetString("config") - home, _ := os.UserHomeDir() - - if configfile != "" { - configfiles = []string{configfile} - } else { - configfiles = []string{ - "/etc/kleingebaeck.conf", "/usr/local/etc/kleingebaeck.conf", // unix variants - filepath.Join(home, ".config", "kleingebaeck", "config"), - filepath.Join(home, ".kleingebaeck"), - "kleingebaeck.conf", - } - } - - // Load the config file[s] - for _, cfgfile := range configfiles { - path, err := os.Stat(cfgfile) - - if err != nil { - // ignore non-existent files, but bail out on any other errors - if !os.IsNotExist(err) { - return nil, fmt.Errorf("failed to stat config file: %w", err) - } - - continue - } - - if !path.IsDir() { - if err := kloader.Load(file.Provider(cfgfile), toml.Parser()); err != nil { - return nil, fmt.Errorf("error loading config file: %w", err) - } - } - } - - // env overrides config file - if err := kloader.Load(env.Provider("KLEINGEBAECK_", ".", func(s string) string { - return strings.ReplaceAll(strings.ToLower( - strings.TrimPrefix(s, "KLEINGEBAECK_")), "_", ".") - }), nil); err != nil { - return nil, fmt.Errorf("error loading environment: %w", err) - } - - // command line overrides env - if err := kloader.Load(posflag.Provider(flagset, ".", kloader), nil); err != nil { - return nil, fmt.Errorf("error loading flags: %w", err) - } - - // fetch values - conf := &Config{} - if err := kloader.Unmarshal("", &conf); err != nil { - return nil, fmt.Errorf("error unmarshalling: %w", err) - } - - // adjust loglevel - switch conf.Loglevel { - case "verbose": - conf.Verbose = true - case "debug": - conf.Debug = true - } - - // are there any args left on commandline? if so threat them as adlinks - conf.Adlinks = flagset.Args() - - return conf, nil -} diff --git a/docker-compose.yaml b/docker-compose.yaml deleted file mode 100644 index 8cef53a..0000000 --- a/docker-compose.yaml +++ /dev/null @@ -1,22 +0,0 @@ -version: "3.9" -services: - init: - image: alpine:latest - user: "root" - group_add: - - '${GROUP_ID}' - volumes: - - ${OUTDIR}:/backup - command: chown -R ${USER_ID}:${USER_ID} /backup - - kleingebaeck: - container_name: kleingebaeck - user: "${USER_ID}:${USER_ID}" - volumes: - - ${OUTDIR}:/backup - working_dir: /backup - build: . - image: kleingebaeck:latest - depends_on: - init: - condition: service_completed_successfully diff --git a/example.conf b/example.conf deleted file mode 100644 index 3b3fbb5..0000000 --- a/example.conf +++ /dev/null @@ -1,48 +0,0 @@ -# -# kleingebaeck sample configuration file. -# put this to ~/.kleingebaeck. -# -# Comments start with the '#' character. - -# kleinanzeigen.de user-id. must be an unquoted number -user = 00000000 - -# enable verbose output (same as -v), may be true or false. -# other values: notice or debug -loglevel = "verbose" - -# directory where to store downloaded ads. kleingebaeck will try to -# create it. must be a quoted string. You can also include a couple of -# template variables, e.g: -# outdir = "test-{{.Year}}-{{.Month}}-{{.Day}}" -outdir = "test" - -# template for stored adlistings. -template=""" -Title: {{.Title}} -Price: {{.Price}} -Shipping: {{.Shipping}} -Id: {{.Id}} -Category: {{.Category}} -Condition: {{.Condition}} -Type: {{.Type}} -Created: {{.Created}} - -{{.Text}} -""" - -# Ads may contain more attributes than just the Condition. To print -# all attributes, loop over all of them: - -template=""" -Title: {{.Title}} -Price: {{.Price}} -Id: {{.Id}} -Category: {{.Category}} -{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }} -{{ end }} -Type: {{.Type}} -Created: {{.Created}} - -{{.Text}} -""" diff --git a/fetch.go b/fetch.go deleted file mode 100644 index bfdac24..0000000 --- a/fetch.go +++ /dev/null @@ -1,104 +0,0 @@ -/* -Copyright © 2023-2024 Thomas von Dein - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -package main - -import ( - "errors" - "fmt" - "io" - "log/slog" - "net/http" - "net/http/cookiejar" - "net/url" -) - -// convenient wrapper to fetch some web content -type Fetcher struct { - Config *Config - Client *http.Client - Cookies []*http.Cookie -} - -func NewFetcher(conf *Config) (*Fetcher, error) { - jar, err := cookiejar.New(nil) - if err != nil { - return nil, fmt.Errorf("failed to create a cookie jar obj: %w", err) - } - - return &Fetcher{ - Client: &http.Client{ - Transport: &loggingTransport{}, // implemented in http.go - Jar: jar, - }, - Config: conf, - Cookies: []*http.Cookie{}, - }, - nil -} - -func (f *Fetcher) Get(uri string) (io.ReadCloser, error) { - req, err := http.NewRequest(http.MethodGet, uri, http.NoBody) - if err != nil { - return nil, fmt.Errorf("failed to create a new HTTP request obj: %w", err) - } - - req.Header.Set("User-Agent", f.Config.UserAgent) - - if len(f.Cookies) > 0 { - uriobj, _ := url.Parse(Baseuri) - - slog.Debug("have cookies, sending them", - "sample-cookie-name", f.Cookies[0].Name, - "sample-cookie-expire", f.Cookies[0].Expires, - ) - - f.Client.Jar.SetCookies(uriobj, f.Cookies) - } - - res, err := f.Client.Do(req) - if err != nil { - return nil, fmt.Errorf("failed to initiate HTTP request to %s: %w", uri, err) - } - - if res.StatusCode != http.StatusOK { - return nil, errors.New("could not get page via HTTP") - } - - slog.Debug("got cookies?", "cookies", res.Cookies()) - f.Cookies = res.Cookies() - - return res.Body, nil -} - -// fetch an image -func (f *Fetcher) Getimage(uri string) (io.ReadCloser, error) { - slog.Debug("fetching ad image", "uri", uri) - - body, err := f.Get(uri) - if err != nil { - if f.Config.IgnoreErrors { - slog.Info("Failed to download image, error ignored", "error", err.Error()) - - return nil, nil - } - - return nil, err - } - - return body, nil -} diff --git a/go.mod b/go.mod deleted file mode 100644 index c40ed15..0000000 --- a/go.mod +++ /dev/null @@ -1,41 +0,0 @@ -module kleingebaeck - -go 1.24.0 - -toolchain go1.24.5 - -require ( - astuart.co/goq v1.0.0 - github.com/corona10/goimagehash v1.1.0 - github.com/inconshreveable/mousetrap v1.1.0 - github.com/jarcoal/httpmock v1.4.1 - github.com/knadh/koanf/parsers/toml v0.1.0 - github.com/knadh/koanf/providers/confmap v1.0.0 - github.com/knadh/koanf/providers/env v1.1.0 - github.com/knadh/koanf/providers/file v1.2.0 - github.com/knadh/koanf/providers/posflag v1.0.1 - github.com/knadh/koanf/v2 v2.3.0 - github.com/lmittmann/tint v1.1.2 - github.com/mattn/go-isatty v0.0.20 - github.com/spf13/pflag v1.0.10 - github.com/tlinden/yadu v0.1.3 - golang.org/x/image v0.31.0 - golang.org/x/sync v0.17.0 -) - -require ( - github.com/PuerkitoBio/goquery v1.5.1 // indirect - github.com/andybalholm/cascadia v1.1.0 // indirect - github.com/fatih/color v1.16.0 // indirect - github.com/fsnotify/fsnotify v1.9.0 // indirect - github.com/go-viper/mapstructure/v2 v2.4.0 // indirect - github.com/knadh/koanf/maps v0.1.2 // indirect - github.com/mattn/go-colorable v0.1.14 // indirect - github.com/mitchellh/copystructure v1.2.0 // indirect - github.com/mitchellh/reflectwalk v1.0.2 // indirect - github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect - github.com/pelletier/go-toml v1.9.5 // indirect - golang.org/x/net v0.38.0 // indirect - golang.org/x/sys v0.32.0 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect -) diff --git a/go.sum b/go.sum deleted file mode 100644 index cb6641c..0000000 --- a/go.sum +++ /dev/null @@ -1,88 +0,0 @@ -astuart.co/goq v1.0.0 h1:nnYIhu/Z/j0VaX9Dp+pmh2Uh7ldEz6XfgSg+bAY5Yrw= -astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno= -github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= -github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= -github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= -github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= -github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= -github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= -github.com/corona10/goimagehash v1.1.0 h1:teNMX/1e+Wn/AYSbLHX8mj+mF9r60R1kBeqE9MkoYwI= -github.com/corona10/goimagehash v1.1.0/go.mod h1:VkvE0mLn84L4aF8vCb6mafVajEb6QYMHl2ZJLn0mOGI= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= -github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= -github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= -github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= -github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= -github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= -github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= -github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/jarcoal/httpmock v1.4.1 h1:0Ju+VCFuARfFlhVXFc2HxlcQkfB+Xq12/EotHko+x2A= -github.com/jarcoal/httpmock v1.4.1/go.mod h1:ftW1xULwo+j0R0JJkJIIi7UKigZUXCLLanykgjwBXL0= -github.com/knadh/koanf/maps v0.1.2 h1:RBfmAW5CnZT+PJ1CVc1QSJKf4Xu9kxfQgYVQSu8hpbo= -github.com/knadh/koanf/maps v0.1.2/go.mod h1:npD/QZY3V6ghQDdcQzl1W4ICNVTkohC8E73eI2xW4yI= -github.com/knadh/koanf/parsers/toml v0.1.0 h1:S2hLqS4TgWZYj4/7mI5m1CQQcWurxUz6ODgOub/6LCI= -github.com/knadh/koanf/parsers/toml v0.1.0/go.mod h1:yUprhq6eo3GbyVXFFMdbfZSo928ksS+uo0FFqNMnO18= -github.com/knadh/koanf/providers/confmap v1.0.0 h1:mHKLJTE7iXEys6deO5p6olAiZdG5zwp8Aebir+/EaRE= -github.com/knadh/koanf/providers/confmap v1.0.0/go.mod h1:txHYHiI2hAtF0/0sCmcuol4IDcuQbKTybiB1nOcUo1A= -github.com/knadh/koanf/providers/env v1.1.0 h1:U2VXPY0f+CsNDkvdsG8GcsnK4ah85WwWyJgef9oQMSc= -github.com/knadh/koanf/providers/env v1.1.0/go.mod h1:QhHHHZ87h9JxJAn2czdEl6pdkNnDh/JS1Vtsyt65hTY= -github.com/knadh/koanf/providers/file v1.2.0 h1:hrUJ6Y9YOA49aNu/RSYzOTFlqzXSCpmYIDXI7OJU6+U= -github.com/knadh/koanf/providers/file v1.2.0/go.mod h1:bp1PM5f83Q+TOUu10J/0ApLBd9uIzg+n9UgthfY+nRA= -github.com/knadh/koanf/providers/posflag v1.0.1 h1:EnMxHSrPkYCFnKgBUl5KBgrjed8gVFrcXDzaW4l/C6Y= -github.com/knadh/koanf/providers/posflag v1.0.1/go.mod h1:3Wn3+YG3f4ljzRyCUgIwH7G0sZ1pMjCOsNBovrbKmAk= -github.com/knadh/koanf/v2 v2.3.0 h1:Qg076dDRFHvqnKG97ZEsi9TAg2/nFTa9hCdcSa1lvlM= -github.com/knadh/koanf/v2 v2.3.0/go.mod h1:gRb40VRAbd4iJMYYD5IxZ6hfuopFcXBpc9bbQpZwo28= -github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/lmittmann/tint v1.1.2 h1:2CQzrL6rslrsyjqLDwD11bZ5OpLBPU+g3G/r5LSfS8w= -github.com/lmittmann/tint v1.1.2/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE= -github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= -github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= -github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= -github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/maxatome/go-testdeep v1.14.0 h1:rRlLv1+kI8eOI3OaBXZwb3O7xY3exRzdW5QyX48g9wI= -github.com/maxatome/go-testdeep v1.14.0/go.mod h1:lPZc/HAcJMP92l7yI6TRz1aZN5URwUBUAfUNvrclaNM= -github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= -github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= -github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= -github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= -github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ= -github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8= -github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= -github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= -github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/tlinden/yadu v0.1.3 h1:5cRCUmj+l5yvlM2irtpFBIJwVV2DPEgYSaWvF19FtcY= -github.com/tlinden/yadu v0.1.3/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/image v0.31.0 h1:mLChjE2MV6g1S7oqbXC0/UcKijjm5fnJLUYKIYrLESA= -golang.org/x/image v0.31.0/go.mod h1:R9ec5Lcp96v9FTF+ajwaH3uGxPH4fKfHHAVbUILxghA= -golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= -golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= -golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= -golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= -golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/http.go b/http.go deleted file mode 100644 index 2c57758..0000000 --- a/http.go +++ /dev/null @@ -1,140 +0,0 @@ -/* -Copyright © 2023-2024 Thomas von Dein - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -package main - -import ( - "bytes" - "fmt" - "io" - "log/slog" - "math" - "math/rand" - "net/http" - "time" -) - -// I add an artificial "ID" to each HTTP request and the corresponding -// respose for debugging purposes so that the pair of them can be -// easier associated in debug output -var letters = []rune("ABCDEF0123456789") - -const IDLEN int = 8 - -// retry after HTTP 50x errors or err!=nil -const RetryCount = 3 - -func getid() string { - b := make([]rune, IDLEN) - for i := range b { - b[i] = letters[rand.Intn(len(letters))] - } - - return string(b) -} - -// used to inject debug log and implement retries -type loggingTransport struct{} - -// escalating timeout, $retry^2 seconds -func backoff(retries int) time.Duration { - return time.Duration(math.Pow(2, float64(retries))) * time.Second -} - -// only retry in case of errors or certain non 200 HTTP codes -func shouldRetry(err error, resp *http.Response) bool { - if err != nil { - return true - } - - if resp.StatusCode == http.StatusBadGateway || - resp.StatusCode == http.StatusServiceUnavailable || - resp.StatusCode == http.StatusGatewayTimeout { - return true - } - - return false -} - -// Body needs to be drained, otherwise we can't reuse the http.Response -func drainBody(resp *http.Response) { - if resp != nil { - if resp.Body != nil { - _, err := io.Copy(io.Discard, resp.Body) - if err != nil { - // unable to copy data? uff! - panic(err) - } - - if err := resp.Body.Close(); err != nil { - panic(err) - } - } - } -} - -// the actual logging transport with retries -func (t *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error) { - // just required for debugging - requestid := getid() - - // clone the request body, put into request on retry - var bodyBytes []byte - if req.Body != nil { - bodyBytes, _ = io.ReadAll(req.Body) - req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) - } - - slog.Debug("REQUEST", "id", requestid, "uri", req.URL, "host", req.Host) - - // first try - resp, err := http.DefaultTransport.RoundTrip(req) - if err == nil { - slog.Debug("RESPONSE", "id", requestid, "status", resp.StatusCode, - "contentlength", resp.ContentLength) - } - - // enter retry check and loop, if first req were successful, leave loop immediately - retries := 0 - for shouldRetry(err, resp) && retries < RetryCount { - time.Sleep(backoff(retries)) - - // consume any response to reuse the connection. - drainBody(resp) - - // clone the request body again - if req.Body != nil { - req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) - } - - // actual retry - resp, err = http.DefaultTransport.RoundTrip(req) - - if err == nil { - slog.Debug("RESPONSE", "id", requestid, "status", resp.StatusCode, - "contentlength", resp.ContentLength, "retry", retries) - } - - retries++ - } - - if err != nil { - return resp, fmt.Errorf("failed to get HTTP response for %s: %w", req.URL, err) - } - - return resp, nil -} diff --git a/image.go b/image.go deleted file mode 100644 index 2e56d24..0000000 --- a/image.go +++ /dev/null @@ -1,191 +0,0 @@ -/* -Copyright © 2023-2024 Thomas von Dein - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -package main - -import ( - "bytes" - "fmt" - "image" - _ "image/gif" - _ "image/jpeg" - _ "image/png" - "log/slog" - "os" - "path/filepath" - - _ "golang.org/x/image/webp" - - "github.com/corona10/goimagehash" -) - -const MaxDistance = 3 - -type Image struct { - Filename string - Hash *goimagehash.ImageHash - Data *bytes.Reader - URI string - Mime string -} - -// used for logging to avoid printing Data -func (img *Image) LogValue() slog.Value { - return slog.GroupValue( - slog.String("filename", img.Filename), - slog.String("uri", img.URI), - slog.String("hash", img.Hash.ToString()), - ) -} - -// holds all images of an ad -type Cache []*goimagehash.ImageHash - -// filename comes from the scraper, it contains directory/base w/o suffix -func NewImage(buf *bytes.Reader, filename, uri string) (*Image, error) { - _, imgconfig, err := image.DecodeConfig(buf) - if err != nil { - return nil, fmt.Errorf("failed to decode image: %w", err) - } - - _, err = buf.Seek(0, 0) - if err != nil { - return nil, fmt.Errorf("failed to seek(0) on image buffer: %w", err) - } - - if imgconfig == "jpeg" { - // we're using the format as file extension, but have used - // "jpg" in the past, so to be backwards compatible, stay with - // it. - imgconfig = "jpg" - } - - if imgconfig == "" { - return nil, fmt.Errorf("failed to process image: unknown or unsupported image format (supported: jpg,png,gif,webp)") - } - - filename += "." + imgconfig - - img := &Image{ - Filename: filename, - URI: uri, - Data: buf, - Mime: imgconfig, - } - - slog.Debug("image MIME", "mime", img.Mime) - - return img, nil -} - -// Calculate diff hash of the image -func (img *Image) CalcHash() error { - jpgdata, format, err := image.Decode(img.Data) - if err != nil { - return fmt.Errorf("failed to decode image: %w", err) - } - - if format == "" { - return fmt.Errorf("failed to decode image: unknown or unsupported image format (supported: jpg,png,gif,webp)") - } - - hash1, err := goimagehash.DifferenceHash(jpgdata) - if err != nil { - return fmt.Errorf("failed to calculate diff hash of image: %w", err) - } - - img.Hash = hash1 - - return nil -} - -// checks if 2 images are similar enough to be considered the same -func (img *Image) Similar(hash *goimagehash.ImageHash) bool { - distance, err := img.Hash.Distance(hash) - if err != nil { - slog.Debug("failed to compute diff hash distance", "error", err) - - return false - } - - if distance < MaxDistance { - slog.Debug("distance computation", "image-A", img.Hash.ToString(), - "image-B", hash.ToString(), "distance", distance) - - return true - } - - return false -} - -// check current image against all known hashes. -func (img *Image) SimilarExists(cache Cache) bool { - for _, otherimg := range cache { - if img.Similar(otherimg) { - return true - } - } - - return false -} - -// read all JPG images in a ad directory, compute diff hashes and -// store the results in the slice Images -func ReadImages(addir string, dont bool) (Cache, error) { - files, err := os.ReadDir(addir) - if err != nil { - return nil, fmt.Errorf("failed to read ad directory contents: %w", err) - } - - cache := Cache{} - - if dont { - // forced download, -f given - return cache, nil - } - - for _, file := range files { - ext := filepath.Ext(file.Name()) - if !file.IsDir() && (ext == ".jpg" || ext == ".jpeg" || ext == ".JPG" || ext == ".JPEG") { - filename := filepath.Join(addir, file.Name()) - - data, err := ReadImage(filename) - if err != nil { - return nil, err - } - - reader := bytes.NewReader(data.Bytes()) - - img, err := NewImage(reader, filename, "") - if err != nil { - return nil, err - } - - if err := img.CalcHash(); err != nil { - return nil, err - } - - if img.Hash != nil { - slog.Debug("Caching image from file system", "image", img, "hash", img.Hash.ToString()) - } - - cache = append(cache, img.Hash) - } - } - - return cache, nil -} diff --git a/kleingebaeck.1 b/kleingebaeck.1 deleted file mode 100644 index 5762a7d..0000000 --- a/kleingebaeck.1 +++ /dev/null @@ -1,386 +0,0 @@ -.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.42) -.\" -.\" Standard preamble: -.\" ======================================================================== -.de Sp \" Vertical space (when we can't use .PP) -.if t .sp .5v -.if n .sp -.. -.de Vb \" Begin verbatim text -.ft CW -.nf -.ne \\$1 -.. -.de Ve \" End verbatim text -.ft R -.fi -.. -.\" Set up some character translations and predefined strings. \*(-- will -.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left -.\" double quote, and \*(R" will give a right double quote. \*(C+ will -.\" give a nicer C++. Capital omega is used to do unbreakable dashes and -.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, -.\" nothing in troff, for use with C<>. -.tr \(*W- -.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' -.ie n \{\ -. ds -- \(*W- -. ds PI pi -. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch -. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch -. ds L" "" -. ds R" "" -. ds C` "" -. ds C' "" -'br\} -.el\{\ -. ds -- \|\(em\| -. ds PI \(*p -. ds L" `` -. ds R" '' -. ds C` -. ds C' -'br\} -.\" -.\" Escape single quotes in literal strings from groff's Unicode transform. -.ie \n(.g .ds Aq \(aq -.el .ds Aq ' -.\" -.\" If the F register is >0, we'll generate index entries on stderr for -.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index -.\" entries marked with X<> in POD. Of course, you'll have to process the -.\" output yourself in some meaningful fashion. -.\" -.\" Avoid warning from groff about undefined register 'F'. -.de IX -.. -.nr rF 0 -.if \n(.g .if rF .nr rF 1 -.if (\n(rF:(\n(.g==0)) \{\ -. if \nF \{\ -. de IX -. tm Index:\\$1\t\\n%\t"\\$2" -.. -. if !\nF==2 \{\ -. nr % 0 -. nr F 2 -. \} -. \} -.\} -.rr rF -.\" -.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). -.\" Fear. Run. Save yourself. No user-serviceable parts. -. \" fudge factors for nroff and troff -.if n \{\ -. ds #H 0 -. ds #V .8m -. ds #F .3m -. ds #[ \f1 -. ds #] \fP -.\} -.if t \{\ -. ds #H ((1u-(\\\\n(.fu%2u))*.13m) -. ds #V .6m -. ds #F 0 -. ds #[ \& -. ds #] \& -.\} -. \" simple accents for nroff and troff -.if n \{\ -. ds ' \& -. ds ` \& -. ds ^ \& -. ds , \& -. ds ~ ~ -. ds / -.\} -.if t \{\ -. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" -. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' -. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' -. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' -. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' -. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' -.\} -. \" troff and (daisy-wheel) nroff accents -.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' -.ds 8 \h'\*(#H'\(*b\h'-\*(#H' -.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] -.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' -.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' -.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] -.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] -.ds ae a\h'-(\w'a'u*4/10)'e -.ds Ae A\h'-(\w'A'u*4/10)'E -. \" corrections for vroff -.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' -.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' -. \" for low resolution devices (crt and lpr) -.if \n(.H>23 .if \n(.V>19 \ -\{\ -. ds : e -. ds 8 ss -. ds o a -. ds d- d\h'-1'\(ga -. ds D- D\h'-1'\(hy -. ds th \o'bp' -. ds Th \o'LP' -. ds ae ae -. ds Ae AE -.\} -.rm #[ #] #H #V #F C -.\" ======================================================================== -.\" -.IX Title "KLEINGEBAECK 1" -.TH KLEINGEBAECK 1 "2025-02-27" "1" "User Commands" -.\" For nroff, turn off justification. Always turn off hyphenation; it makes -.\" way too many mistakes in technical documents. -.if n .ad l -.nh -.SH "NAME" -kleingebaeck \- kleinanzeigen.de backup tool -.SH "SYNOPSYS" -.IX Header "SYNOPSYS" -.Vb 10 -\& Usage: kleingebaeck [\-dvVhmoc] [,...] -\& Options: -\& \-u \-\-user Backup ads from user with uid . -\& \-d \-\-debug Enable debug output. -\& \-v \-\-verbose Enable verbose output. -\& \-o \-\-outdir Set output dir (default: current directory) -\& \-l \-\-limit Limit the ads to download to , default: load all. -\& \-c \-\-config Use config file (default: ~/.kleingebaeck). -\& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. -\& \-f \-\-force Overwrite images and ads even if the already exist. -\& \-m \-\-manual Show manual. -\& \-h \-\-help Show usage. -\& \-V \-\-version Show program version. -.Ve -.SH "DESCRIPTION" -.IX Header "DESCRIPTION" -This tool can be used to backup ads on the german ad page . -.PP -It downloads all (or only the specified ones) ads of one user into a -directory, each ad into its own subdirectory. The backup will contain -a textfile \fBAdlisting.txt\fR which contains the ad contents such as -title, body, price etc. All images will be downloaded as well. -.SH "CONFIGURATION" -.IX Header "CONFIGURATION" -You can create a config file to save typing. By default -\&\f(CW\*(C`~/.kleingebaeck\*(C'\fR is being used but you can specify one with \f(CW\*(C`\-c\*(C'\fR as -well. We use \s-1TOML\s0 as our configuration language. See -. -.PP -Format is pretty simple: -.PP -.Vb 11 -\& user = 1010101 -\& loglevel = verbose -\& outdir = "test" -\& useragent = "Mozilla/5.0" -\& template = """ -\& Title: {{.Title}} -\& Price: {{.Price}} -\& Id: {{.ID}} -\& Category: {{.Category}} -\& Condition: {{.Condition}} -\& Created: {{.Created}} -\& -\& {{.Text}} -\& """ -.Ve -.PP -Be careful if you want to change the template. The variable is a -multiline string surrounded by three double quotes. You can left out -certain fields and use any formatting you like. Refer to - for details how to write a -template. Also read the \s-1TEMPLATES\s0 section below. -.PP -If you're on windows and want to customize the output directory, put -it into single quotes to avoid the backslashes interpreted as escape -chars like this: -.PP -.Vb 1 -\& outdir = \*(AqC:\eData\eAds\*(Aq -.Ve -.SH "TEMPLATES" -.IX Header "TEMPLATES" -Various parts of the configuration can be modified using templates: -the output directory, the ad directory and the ad listing itself. -.SS "\s-1OUTPUT DIR TEMPLATE\s0" -.IX Subsection "OUTPUT DIR TEMPLATE" -The config varialbe \f(CW\*(C`outdir\*(C'\fR or the command line parameter \f(CW\*(C`\-o\*(C'\fR take a -template which may contain: -.ie n .IP """{{.Year}}""" 4 -.el .IP "\f(CW{{.Year}}\fR" 4 -.IX Item "{{.Year}}" -.PD 0 -.ie n .IP """{{.Month}}""" 4 -.el .IP "\f(CW{{.Month}}\fR" 4 -.IX Item "{{.Month}}" -.ie n .IP """{{.Day}}""" 4 -.el .IP "\f(CW{{.Day}}\fR" 4 -.IX Item "{{.Day}}" -.PD -.PP -That way you can create a new output directory for every backup -run. For example: -.PP -.Vb 1 -\& outdir = "/home/backups/ads\-{{.Year}}\-{{.Month}}\-{{.Day}}" -.Ve -.PP -Or using the command line flag: -.PP -.Vb 1 -\& \-o "/home/backups/ads\-{{.Year}}\-{{.Month}}\-{{.Day}}" -.Ve -.PP -The default value is \f(CW\*(C`.\*(C'\fR \- the current directory. -.SS "\s-1AD DIRECTORY TEMPLATE\s0" -.IX Subsection "AD DIRECTORY TEMPLATE" -The ad directory name can be modified using the following ad values: -.IP "{{.Price}}" 4 -.IX Item "{{.Price}}" -.PD 0 -.IP "{{.ID}}" 4 -.IX Item "{{.ID}}" -.IP "{{.Category}}" 4 -.IX Item "{{.Category}}" -.IP "{{.Condition}}" 4 -.IX Item "{{.Condition}}" -.IP "{{.Created}}" 4 -.IX Item "{{.Created}}" -.IP "{{.Slug}}" 4 -.IX Item "{{.Slug}}" -.IP "{{.Text}}" 4 -.IX Item "{{.Text}}" -.PD -.PP -It can only be configured in the config file. By default only -\&\f(CW\*(C`{{.Slug}}\*(C'\fR is being used, this is the title of the ad in url format. -.SS "\s-1AD NAME TEMPLATE\s0" -.IX Subsection "AD NAME TEMPLATE" -The name of the directory per ad can be tuned as well: -.ie n .IP """{{.Year}}""" 4 -.el .IP "\f(CW{{.Year}}\fR" 4 -.IX Item "{{.Year}}" -.PD 0 -.ie n .IP """{{.Month}}""" 4 -.el .IP "\f(CW{{.Month}}\fR" 4 -.IX Item "{{.Month}}" -.ie n .IP """{{.Day}}""" 4 -.el .IP "\f(CW{{.Day}}\fR" 4 -.IX Item "{{.Day}}" -.ie n .IP """{{.Slug}}""" 4 -.el .IP "\f(CW{{.Slug}}\fR" 4 -.IX Item "{{.Slug}}" -.ie n .IP """{{.Category}}""" 4 -.el .IP "\f(CW{{.Category}}\fR" 4 -.IX Item "{{.Category}}" -.ie n .IP """{{.ID}}""" 4 -.el .IP "\f(CW{{.ID}}\fR" 4 -.IX Item "{{.ID}}" -.PD -.SS "\s-1AD TEMPLATE\s0" -.IX Subsection "AD TEMPLATE" -The ad listing itself can be modified as well, using the same -variables as the ad name template above. -.PP -This is the default template: -.PP -.Vb 8 -\& Title: {{.Title}} -\& Price: {{.Price}} -\& Id: {{.ID}} -\& Category: {{.Category}} -\& Condition: {{.Condition}} -\& Type: {{.Type}} -\& Created: {{.Created}} -\& Expire: {{.Expire}} -\& -\& {{.Text}} -.Ve -.PP -The config parameter to modify is \f(CW\*(C`template\*(C'\fR. See example.conf in the -source repository. Please take care, since this is a multiline -string. This is how it shall look if you modify it: -.PP -.Vb 2 -\& template=""" -\& Title: {{.Title}} -\& -\& {{.Text}} -\& """ -.Ve -.PP -That is, the content between the two \f(CW"""\fR chars is the template. -.SH "SETUP" -.IX Header "SETUP" -To setup the tool, you need to lookup your userid on -kleinanzeigen.de. Go to your ad overview page while \s-1NOT\s0 being logged -in: -.PP -.Vb 1 -\& https://www.kleinanzeigen.de/s\-bestandsliste.html?userId=XXXXXX -.Ve -.PP -The \fB\s-1XXXXX\s0\fR part is your userid. -.PP -Put it into the configfile as outlined above. Also specify an output -directory. Then just execute \f(CW\*(C`kleingebaeck\*(C'\fR. -.PP -You can use the \fB\-v\fR option to get verbose output or \fB\-d\fR to enable -debugging. -.SH "ENVIRONMENT VARIABLES" -.IX Header "ENVIRONMENT VARIABLES" -The following environment variables are considered: -.PP -.Vb 7 -\& KLEINGEBAECK_USER -\& KLEINGEBAECK_DEBUG -\& KLEINGEBAECK_VERBOSE -\& KLEINGEBAECK_OUTDIR -\& KLEINGEBAECK_LIMIT -\& KLEINGEBAECK_CONFIG -\& KLEINGEBAECK_IGNOREERRORS -.Ve -.PP -Please note, that they take precedence over config file, but -commandline flags take precedence over env! -.SH "BUGS" -.IX Header "BUGS" -In order to report a bug, unexpected behavior, feature requests -or to submit a patch, please open an issue on github: -. -.PP -Please repeat the failing command with debugging enabled \f(CW\*(C`\-d\*(C'\fR and -include the output in the issue. -.SH "LIMITATIONS" -.IX Header "LIMITATIONS" -The \f(CW\*(C`kleingebaeck\*(C'\fR doesn't currently check if it has downloaded a -file already, so it downloads everything again every time you execute -it. Be aware of it. This will change in the future. -.PP -Also there's currently no parallelization implemented. This will -change in the future. -.SH "LICENSE" -.IX Header "LICENSE" -Copyright 2023\-2025 Thomas von Dein -.PP -This program is free software: you can redistribute it and/or modify -it under the terms of the \s-1GNU\s0 General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. -.PP -This program is distributed in the hope that it will be useful, -but \s-1WITHOUT ANY WARRANTY\s0; without even the implied warranty of -\&\s-1MERCHANTABILITY\s0 or \s-1FITNESS FOR A PARTICULAR PURPOSE.\s0 See the -\&\s-1GNU\s0 General Public License for more details. -.PP -You should have received a copy of the \s-1GNU\s0 General Public License -along with this program. If not, see . -.SH "Author" -.IX Header "Author" -T.v.Dein diff --git a/kleingebaeck.go b/kleingebaeck.go deleted file mode 100644 index df62ba5..0000000 --- a/kleingebaeck.go +++ /dev/null @@ -1,205 +0,0 @@ -package main - -var manpage = ` -NAME - kleingebaeck - kleinanzeigen.de backup tool - -SYNOPSYS - Usage: kleingebaeck [-dvVhmoc] [,...] - Options: - -u --user Backup ads from user with uid . - -d --debug Enable debug output. - -v --verbose Enable verbose output. - -o --outdir Set output dir (default: current directory) - -l --limit Limit the ads to download to , default: load all. - -c --config Use config file (default: ~/.kleingebaeck). - --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. - -f --force Overwrite images and ads even if the already exist. - -m --manual Show manual. - -h --help Show usage. - -V --version Show program version. - -DESCRIPTION - This tool can be used to backup ads on the german ad page - . - - It downloads all (or only the specified ones) ads of one user into a - directory, each ad into its own subdirectory. The backup will contain a - textfile Adlisting.txt which contains the ad contents such as title, - body, price etc. All images will be downloaded as well. - -CONFIGURATION - You can create a config file to save typing. By default - "~/.kleingebaeck" is being used but you can specify one with "-c" as - well. We use TOML as our configuration language. See - . - - Format is pretty simple: - - user = 1010101 - loglevel = verbose - outdir = "test" - useragent = "Mozilla/5.0" - template = """ - Title: {{.Title}} - Price: {{.Price}} - Id: {{.ID}} - Category: {{.Category}} - Condition: {{.Condition}} - Created: {{.Created}} - - {{.Text}} - """ - - Be careful if you want to change the template. The variable is a - multiline string surrounded by three double quotes. You can left out - certain fields and use any formatting you like. Refer to - for details how to write a template. - Also read the TEMPLATES section below. - - If you're on windows and want to customize the output directory, put it - into single quotes to avoid the backslashes interpreted as escape chars - like this: - - outdir = 'C:\Data\Ads' - -TEMPLATES - Various parts of the configuration can be modified using templates: the - output directory, the ad directory and the ad listing itself. - - OUTPUT DIR TEMPLATE - The config varialbe "outdir" or the command line parameter "-o" take a - template which may contain: - - "{{.Year}}" - "{{.Month}}" - "{{.Day}}" - - That way you can create a new output directory for every backup run. For - example: - - outdir = "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}" - - Or using the command line flag: - - -o "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}" - - The default value is "." - the current directory. - - AD DIRECTORY TEMPLATE - The ad directory name can be modified using the following ad values: - - {{.Price}} - {{.ID}} - {{.Category}} - {{.Condition}} - {{.Created}} - {{.Slug}} - {{.Text}} - - It can only be configured in the config file. By default only - "{{.Slug}}" is being used, this is the title of the ad in url format. - - AD NAME TEMPLATE - The name of the directory per ad can be tuned as well: - - "{{.Year}}" - "{{.Month}}" - "{{.Day}}" - "{{.Slug}}" - "{{.Category}}" - "{{.ID}}" - - AD TEMPLATE - The ad listing itself can be modified as well, using the same variables - as the ad name template above. - - This is the default template: - - Title: {{.Title}} - Price: {{.Price}} - Id: {{.ID}} - Category: {{.Category}} - Condition: {{.Condition}} - Type: {{.Type}} - Created: {{.Created}} - Expire: {{.Expire}} - - {{.Text}} - - The config parameter to modify is "template". See example.conf in the - source repository. Please take care, since this is a multiline string. - This is how it shall look if you modify it: - - template=""" - Title: {{.Title}} - - {{.Text}} - """ - - That is, the content between the two """ chars is the template. - -SETUP - To setup the tool, you need to lookup your userid on kleinanzeigen.de. - Go to your ad overview page while NOT being logged in: - - https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX - - The XXXXX part is your userid. - - Put it into the configfile as outlined above. Also specify an output - directory. Then just execute "kleingebaeck". - - You can use the -v option to get verbose output or -d to enable - debugging. - -ENVIRONMENT VARIABLES - The following environment variables are considered: - - KLEINGEBAECK_USER - KLEINGEBAECK_DEBUG - KLEINGEBAECK_VERBOSE - KLEINGEBAECK_OUTDIR - KLEINGEBAECK_LIMIT - KLEINGEBAECK_CONFIG - KLEINGEBAECK_IGNOREERRORS - - Please note, that they take precedence over config file, but commandline - flags take precedence over env! - -BUGS - In order to report a bug, unexpected behavior, feature requests or to - submit a patch, please open an issue on github: - . - - Please repeat the failing command with debugging enabled "-d" and - include the output in the issue. - -LIMITATIONS - The "kleingebaeck" doesn't currently check if it has downloaded a file - already, so it downloads everything again every time you execute it. Be - aware of it. This will change in the future. - - Also there's currently no parallelization implemented. This will change - in the future. - -LICENSE - Copyright 2023-2025 Thomas von Dein - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation, either version 3 of the License, or (at your - option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General - Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program. If not, see . - -Author - T.v.Dein - -` diff --git a/kleingebaeck.pod b/kleingebaeck.pod deleted file mode 100644 index 6426a6e..0000000 --- a/kleingebaeck.pod +++ /dev/null @@ -1,247 +0,0 @@ -=head1 NAME - -kleingebaeck - kleinanzeigen.de backup tool - -=head1 SYNOPSYS - - Usage: kleingebaeck [-dvVhmoc] [,...] - Options: - -u --user Backup ads from user with uid . - -d --debug Enable debug output. - -v --verbose Enable verbose output. - -o --outdir Set output dir (default: current directory) - -l --limit Limit the ads to download to , default: load all. - -c --config Use config file (default: ~/.kleingebaeck). - --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. - -f --force Overwrite images and ads even if the already exist. - -m --manual Show manual. - -h --help Show usage. - -V --version Show program version. - -=head1 DESCRIPTION - -This tool can be used to backup ads on the german ad page L. - -It downloads all (or only the specified ones) ads of one user into a -directory, each ad into its own subdirectory. The backup will contain -a textfile B which contains the ad contents such as -title, body, price etc. All images will be downloaded as well. - -=head1 CONFIGURATION - -You can create a config file to save typing. By default -C<~/.kleingebaeck> is being used but you can specify one with C<-c> as -well. We use TOML as our configuration language. See -L. - -Format is pretty simple: - - user = 1010101 - loglevel = verbose - outdir = "test" - useragent = "Mozilla/5.0" - template = """ - Title: {{.Title}} - Price: {{.Price}} - Id: {{.ID}} - Category: {{.Category}} - Condition: {{.Condition}} - Created: {{.Created}} - - {{.Text}} - """ - -Be careful if you want to change the template. The variable is a -multiline string surrounded by three double quotes. You can left out -certain fields and use any formatting you like. Refer to -L for details how to write a -template. Also read the TEMPLATES section below. - -If you're on windows and want to customize the output directory, put -it into single quotes to avoid the backslashes interpreted as escape -chars like this: - - outdir = 'C:\Data\Ads' - -=head1 TEMPLATES - -Various parts of the configuration can be modified using templates: -the output directory, the ad directory and the ad listing itself. - -=head2 OUTPUT DIR TEMPLATE - -The config varialbe C or the command line parameter C<-o> take a -template which may contain: - -=over - -=item C<{{.Year}}> - -=item C<{{.Month}}> - -=item C<{{.Day}}> - -=back - -That way you can create a new output directory for every backup -run. For example: - - outdir = "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}" - -Or using the command line flag: - - -o "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}" - -The default value is C<.> - the current directory. - -=head2 AD DIRECTORY TEMPLATE - -The ad directory name can be modified using the following ad values: - -=over - -=item {{.Price}} - -=item {{.ID}} - -=item {{.Category}} - -=item {{.Condition}} - -=item {{.Created}} - -=item {{.Slug}} - -=item {{.Text}} - -=back - -It can only be configured in the config file. By default only -C<{{.Slug}}> is being used, this is the title of the ad in url format. - -=head2 AD NAME TEMPLATE - -The name of the directory per ad can be tuned as well: - -=over - -=item C<{{.Year}}> - -=item C<{{.Month}}> - -=item C<{{.Day}}> - -=item C<{{.Slug}}> - -=item C<{{.Category}}> - -=item C<{{.ID}}> - - -=back - -=head2 AD TEMPLATE - -The ad listing itself can be modified as well, using the same -variables as the ad name template above. - -This is the default template: - - Title: {{.Title}} - Price: {{.Price}} - Id: {{.ID}} - Category: {{.Category}} - Condition: {{.Condition}} - Type: {{.Type}} - Created: {{.Created}} - Expire: {{.Expire}} - - {{.Text}} - -The config parameter to modify is C