diff --git a/.gh-dash.yml b/.gh-dash.yml deleted file mode 100644 index e62c39a..0000000 --- a/.gh-dash.yml +++ /dev/null @@ -1,96 +0,0 @@ -prSections: - - title: Responsible PRs - filters: repo:tlinden/kleingebaeck is:open NOT dependabot - layout: - repoName: - hidden: true - - - title: Responsible Dependabot PRs - filters: repo:tlinden/kleingebaeck is:open dependabot - layout: - repoName: - hidden: true - -issuesSections: - - title: Responsible Issues - filters: is:open repo:tlinden/kleingebaeck -author:@me - layout: - repoName: - hidden: true - - - title: Note-to-Self Issues - filters: is:open repo:tlinden/kleingebaeck author:@me - layout: - creator: - hidden: true - repoName: - hidden: true - -defaults: - preview: - open: false - width: 100 - -keybindings: - universal: - - key: "shift+down" - builtin: pageDown - - key: "shift+up" - builtin: pageUp - prs: - - key: g - name: gitu - command: > - cd {{.RepoPath}} && /home/scip/bin/gitu - - key: M - name: squash-merge - command: gh pr merge --rebase --squash --admin --repo {{.RepoName}} {{.PrNumber}} - - key: i - name: show ci checks - command: gh pr checks --repo {{.RepoName}} {{.PrNumber}} | glow -p - - key: e - name: edit pr - command: ~/.config/gh-dash/edit-gh-pr {{.RepoName}} {{.PrNumber}} - - key: E - name: open repo in emacs - command: emacsclient {{.RepoPath}} & - issues: - - key: v - name: view - command: gh issue view --repo {{.RepoName}} {{.IssueNumber}} | glow -p - - key: l - name: add label - command: gh issue --repo {{.RepoName}} edit {{.IssueNumber}} --add-label $(gum choose bug enhancement question dependencies wontfix) - - key: L - name: remove label - command: gh issue --repo {{.RepoName}} edit {{.IssueNumber}} --remove-label $(gum choose bug enhancement question dependencies wontfix) - - key: E - name: open repo in emacs - command: emacsclient {{.RepoPath}} & - -theme: - ui: - sectionsShowCount: true - table: - compact: false - showSeparator: true - colors: - text: - primary: "#E2E1ED" - secondary: "#6770cb" - inverted: "#242347" - faint: "#b0793b" - warning: "#E0AF68" - success: "#3DF294" - background: - selected: "#1B1B33" - border: - primary: "#383B5B" - secondary: "#39386B" - faint: "#8d3e0b" - -repoPaths: - :owner/:repo: ~/dev/:repo - -pager: - diff: delta diff --git a/.goreleaser.yaml b/.goreleaser.yaml deleted file mode 100644 index 0e35092..0000000 --- a/.goreleaser.yaml +++ /dev/null @@ -1,69 +0,0 @@ -# vim: set ts=2 sw=2 tw=0 fo=cnqoj - -version: 2 - -before: - hooks: - - go mod tidy - -gitea_urls: - api: https://codeberg.org/api/v1 - download: https://codeberg.org - -builds: - - env: - - CGO_ENABLED=0 - goos: - - linux - - windows - - darwin - - freebsd - -archives: - - formats: [tar.gz] - # this name template makes the OS and Arch compatible with the results of `uname`. - name_template: >- - {{ .ProjectName }}_ - {{- title .Os }}_ - {{- if eq .Arch "amd64" }}x86_64 - {{- else if eq .Arch "386" }}i386 - {{- else }}{{ .Arch }}{{ end }} - {{- if .Arm }}v{{ .Arm }}{{ end }}_{{ .Tag }} - # use zip for windows archives - format_overrides: - - goos: windows - formats: [zip] - - goos: linux - formats: [tar.gz,binary] - files: - - src: "*.md" - strip_parent: true - - src: "docs/*" - strip_parent: true - - src: Makefile.dist - dst: Makefile - wrap_in_directory: true - -changelog: - sort: asc - filters: - exclude: - - "^docs:" - - "^test:" - groups: - - title: Improved - regexp: '^.*?(feat|add|new)(\([[:word:]]+\))??!?:.+$' - order: 0 - - title: Fixed - regexp: '^.*?(bug|fix)(\([[:word:]]+\))??!?:.+$' - order: 1 - - title: Changed - order: 999 - -release: - header: "# Release Notes" - footer: >- - - --- - - Full Changelog: [{{ .PreviousTag }}...{{ .Tag }}](https://codeberg.org/scip/epuppy/compare/{{ .PreviousTag }}...{{ .Tag }}) diff --git a/.woodpecker/build.yaml b/.woodpecker/build.yaml deleted file mode 100644 index e20cc6f..0000000 --- a/.woodpecker/build.yaml +++ /dev/null @@ -1,36 +0,0 @@ -matrix: - platform: - - linux/amd64 - goversion: - - 1.24 - -labels: - platform: ${platform} - -steps: - build: - when: - event: [push] - image: golang:${goversion} - commands: - - go get - - go build - - linter: - when: - event: [push] - image: golang:${goversion} - commands: - - curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/HEAD/install.sh | sh -s -- -b $(go env GOPATH)/bin v2.5.0 - - golangci-lint --version - - golangci-lint run ./... - depends_on: [build] - - test: - when: - event: [push] - image: golang:${goversion} - commands: - - go get - - go test -v -cover - depends_on: [build,linter] diff --git a/.woodpecker/image.yaml b/.woodpecker/image.yaml deleted file mode 100644 index 80e11f6..0000000 --- a/.woodpecker/image.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# https://woodpecker-ci.org/plugins/docker-buildx -# enable Package unit and go to /scip/-/packages after building to link to proj - -variables: - - &repo codeberg.org/${CI_REPO_OWNER}/kleingebaeck - -steps: - dryrun: - image: docker.io/woodpeckerci/plugin-docker-buildx:latest - settings: - dockerfile: Dockerfile - platforms: linux/amd64 - dry_run: true - repo: *repo - tags: latest - when: - event: [pull_request] - - publish: - image: docker.io/woodpeckerci/plugin-docker-buildx:latest - settings: - dockerfile: Dockerfile - platforms: linux/amd64 - repo: *repo - registry: codeberg.org - tags: latest,${CI_COMMIT_SHA:0:8},${CI_COMMIT_TAG} - username: ${CI_REPO_OWNER} - password: - from_secret: REGISTRY_TOKEN - when: - event: [tag] - branch: main diff --git a/.woodpecker/release.yaml b/.woodpecker/release.yaml deleted file mode 100644 index 916c008..0000000 --- a/.woodpecker/release.yaml +++ /dev/null @@ -1,15 +0,0 @@ -# build release - -labels: - platform: linux/amd64 - -steps: - goreleaser: - image: goreleaser/goreleaser - when: - event: [tag] - environment: - GITEA_TOKEN: - from_secret: DEPLOY_TOKEN - commands: - - goreleaser release --clean --verbose diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index 9f94a1a..0000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,114 +0,0 @@ -# No Code of Conduct - -*TL;DR:* This project does **NOT** have a so called Code of Conduct, -nor will it ever have one. - -## The Rant - -The reasons are somewhat complicated and I'll try my best to document -them here. - -Ethical codes or rules come along like laws. But how is ethical or -moral behavior defined? And who defines which behavior is ethical and -which is not? Certainly not me. - -Unless you live in a dictatorship (and more than half of the -population on planet earth do as of this writing), laws come into -existence by democratic procedures. Laws cover almost every aspect of -live in a society. Laws allow and forbid behavior and laws sanction -infringements. - -A software project like this one on the other hand is not a society. -There are not enough people involved to form democratic -structures. And there will always be a minority of users who have the -right to commit or reject code. How could any maintainer of a software -project dare to decree rules upon others? Actually, am I, the current -maintainer of this very project authorized to do so? - -I think the anser to this question clearly is NO. - -The issue is being complicated by the fact, that open source -development these days happens on a planetary scale. And this planet -houses hundreds if not thousands of different cultures, philosophies, -ideologies and worldviews. The answer to many ethical questions will -in most cases be vague and nebulous. - -Ones joke will always be another ones insult. - -Then there is the problem of language. I myself am not an english -native, but I publish everyting using the english language. I am able -to communicate with most people in the open source community because -of that. But I am certainly not able to understand everything and -everyone. There might be nuances to a sentence I don't sense, there -might be sarcastic connotations I don't understand or references to -historical figures, events or traditions I don't know and never have -heard of. - -Judging over other peoples online behavior looks like a titanic task -to me. It is just not my job to judge others, I am not legitimized or -authorized to do so and I am not interested in this kind of business. - -Another huge problem with ethical rules is that you need to outline -and enforce sanctions on those who violate the rules. But since I am -not an elected authority how would I be able to do this? I don't -know. And what happens if someone complains about myself? Shall I -remove myself from my own project? Come on! - -Last but not least there's the law. So, let's say someone in india -writes something insulting to some other developer in an issue. Of -course german law does not apply to indian people. Moreover, the -insult might actually not be an insult in india. In the end, nothing -would happen. Under normal circumstances, maintainers would -eventually delete the posting, ban the user or remove push privileges -etc. - -But then, is there a way for the offending user to defend himself? Of -course not, since neither indian or german law alone applies. I cannot -go to a german court and sue the guy and he cannot do the same in -india. Or - we possibly could but the judges in both countries would -just laugh and close the case. - -That being said, I don't have the power nor the tools, nor the -authority to enforce serious sanctions of any meaningful kind against -others. Therefore I cannot outline any rules whatsoever. - -And let's not even start talking about these undemocratic "comitees" -many projects are forming to circumvent this problem. Some projects -even include external entities like a lawyer or some bureaucrat -somewhere just to have the ability to complain against a comitee -member. What a mess! - - - -## So, what are the ethical rules within this project then? - -Well, there are none. - -This project is about code, not society. It doesn't matter where you -come from, how you look, how you think, what you believe, who your -friends are, whay you said or did sometime in the past. I don't even -care if you are a human being. You are an alien so bored that you need -to submit code on github? Fine with me. You're a convicted criminal? I -don't give a shit! - -**The only thing I am interested here is Code and only Code.** - -So if anyhing happens here I don't like or I am obliged by (german!) -law to act on, I will decide on a case to case basis what to do. And -unfortunately, since this is the nature of a github project, you -cannot complain, object or protest. I am very sorry! - -If you will, let's at least outline these: - -- Please - just please - behave towards others as you'd expect others - to behave towards yourself. - -- Don't judge others for any reason. - -- Only judge the code. - -But these are not rules, only a friendly appeal to you as a developer -and user. - - -Thanks a lot! diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index b9edc78..0000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,93 +0,0 @@ -## Project Goals - -The goal of this project is to build a small tool which helps in -maintaining backups of the german ad site kleinanzeigen.de. It should -be small, fast and easy to understand. - -There will be no GUI, no web interface, no public API of some sort, no -builtin interpreter. - -The programming language used for this project will always be -[GOLANG](https://go.dev/) with the exception of the documentation -([Perl POD](https://perldoc.perl.org/perlpod)) and the Makefile. - -# Contributing - -You can contribute to this project in various ways: - -## Open an issue - -If you encounter a problem or don't understand how the program works -or if you think the documentation is unclear, please don't hesitate to -open an issue. - -Please add as much information about the case as possible, such as: - -- Your environment (operating system etc) -- kleingebaeck version (`kleingebaeck --version`) -- Commandline used. Please replace sensitive information with mock data! -- Repeat the command with debugging enabled (`-d` flag) -- Actual program output, Please replace sensitive information with mock data! -- Expected program output. -- Error message - if any. - -Be aware that I am working on this (and some others) project in my -spare time which is scarce. Therefore please don't expect me to -respond to your query within hours or even days. Be patient, but I -WILL respond. - -## Pull Requests - -Code and documentation help is always much appreciated! Please follow -thes guidelines to successfully contribute: - -- Every pull request shall be based on latest `development` - branch. `main` is only used for releases. - -- Execute the unit tests before committing: `make test`. There shall - be no errors. - -- Strive to be backwards compatible so that users who are already - using the program don't have to change their habits - unless it is - really neccessary. - -- Try to add a unit test for your fix, addition or modification. - -- Don't ever change existing unit tests! - -- Add a meaningful and comprehensive rationale about your contribution: - - Why do you think it might be useful for others? - - What did you actually change or add? - - Is there an open issue which this PR fixes and if so, please link - to that issue. - -- [Re-]format your code with `gofmt -s`. - -- Avoid unneccesary dependencies, especially for very small functions. - -- **If** a new dependency is being added, it must be compatible with - our [license agreement](LICENSE). - -- You need to accept that the code or documentation you contribute - will be redistributed under the terms of said license agreement. If - your contribution is considerably large or if you contribute - regularly, then feel free to add your name (and if you want your - email address) to the *AUTHORS* section of the - [manpage](kleingebaeck.pod). - -- Adhere to the above mentioned project goals. - -- If you are unsure if your addition or change will be accepted, - better ask before starting coding. Open an issue about your proposal - and let's discuss it! That way we avoid doing unnessesary work on - both sides. - -Each pull request will be carefully reviewed and if it is a useful -addition it will be accepted. However, please be prepared that -sometimes a PR will be rejected. The reasons may vary and will be -documented. Perhaps the above guidelines are not matched, or the -addition seems to be not so useful from my perspective, maybe there -are too much changes or there might be changes I don't even -understand. - -But whatever happens: your contribution is always welcome! diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 2439c45..0000000 --- a/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -FROM golang:1.24-alpine as builder - -RUN apk update -RUN apk upgrade -RUN apk add --no-cache git make - -RUN git --version - -WORKDIR /work - -COPY go.mod . -COPY . . -RUN go mod download -RUN make - -FROM alpine:latest -LABEL maintainer="Thomas von Dein " - -WORKDIR /app -COPY --from=builder /work/kleingebaeck /app/kleingebaeck - -ENV KLEINGEBAECK_OUTDIR /backup -ENV LANG C.UTF-8 -USER 1001:1001 - -ENTRYPOINT ["/app/kleingebaeck"] -CMD ["-h"] diff --git a/Makefile b/Makefile deleted file mode 100644 index 4b08dd1..0000000 --- a/Makefile +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2023 Thomas von Dein - -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. - -# You should have received a copy of the GNU General Public License -# along with this program. If not, see . - - -# -# no need to modify anything below -tool = kleingebaeck -VERSION = $(shell grep VERSION config.go | head -1 | cut -d '"' -f2) -archs = darwin freebsd linux windows -PREFIX = /usr/local -UID = root -GID = 0 -HAVE_POD := $(shell pod2text -h 2>/dev/null) - -all: $(tool).1 $(tool).go buildlocal - -%.1: %.pod -ifdef HAVE_POD - pod2man -c "User Commands" -r 1 -s 1 $*.pod > $*.1 -endif - -%.go: %.pod -ifdef HAVE_POD - echo "package main" > $*.go - echo >> $*.go - echo "var manpage = \`" >> $*.go - pod2text $*.pod >> $*.go - echo "\`" >> $*.go -endif - -buildlocal: - CGO_LDFLAGS='-static' go build -tags osusergo,netgo -ldflags "-extldflags=-static" -o $(tool) - -install: buildlocal - install -d -o $(UID) -g $(GID) $(PREFIX)/bin - install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1 - install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/ - install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/ - -clean: - rm -rf $(tool) coverage.out testdata t/out - -test: clean - mkdir -p t/out - go test ./... $(ARGS) - -testlint: test lint - -lint: - golangci-lint run - -lint-full: - golangci-lint run --enable-all --exclude-use-default --disable exhaustivestruct,exhaustruct,depguard,interfacer,deadcode,golint,structcheck,scopelint,varcheck,ifshort,maligned,nosnakecase,godot,funlen,gofumpt,cyclop,noctx,gochecknoglobals,paralleltest - gocritic check -enableAll *.go - -testfuzzy: clean - go test -fuzz ./... $(ARGS) - -singletest: - @echo "Call like this: make singletest TEST=TestPrepareColumns ARGS=-v" - go test -run $(TEST) $(ARGS) - -cover-report: - go test ./... -cover -coverprofile=coverage.out - go tool cover -html=coverage.out - -goupdate: - go get -t -u=patch ./... - -buildall: - ./mkrel.sh $(tool) $(VERSION) - -release: - gh release create v$(VERSION) --generate-notes - -show-versions: buildlocal - @echo "### kleingebaeck version:" - @./kleingebaeck -V - - @echo - @echo "### go module versions:" - @go list -m all - - @echo - @echo "### go version used for building:" - @grep -m 1 go go.mod - -# lint: -# golangci-lint run -p bugs -p unused diff --git a/Makefile.dist b/Makefile.dist deleted file mode 100644 index cb76bca..0000000 --- a/Makefile.dist +++ /dev/null @@ -1,20 +0,0 @@ -# -*-make-*- - -.PHONY: install all - -tool = rpn -PREFIX = /usr/local -UID = root -GID = 0 - -all: - @echo "Type 'sudo make install' to install the tool." - @echo "To change prefix, type 'sudo make install PREFIX=/opt'" - -install: - install -d -o $(UID) -g $(GID) $(PREFIX)/bin - install -d -o $(UID) -g $(GID) $(PREFIX)/man/man1 - install -d -o $(UID) -g $(GID) $(PREFIX)/share/doc - install -o $(UID) -g $(GID) -m 555 $(tool) $(PREFIX)/sbin/ - install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/ - install -o $(UID) -g $(GID) -m 444 *.md $(PREFIX)/share/doc/ diff --git a/README-de.md b/README-de.md index 5a10401..896680e 100644 --- a/README-de.md +++ b/README-de.md @@ -9,6 +9,9 @@ [![GitHub release](https://img.shields.io/github/v/release/tlinden/kleingebaeck?color=%2300a719)](https://codeberg.org/scip/kleingebaeck/releases) [![English](https://codeberg.org/scip/kleingebaeck/raw/branch/.github/assets/english.png)](https://codeberg.org/scip/kleingebaeck/raw/branch/README.md) +> [!IMPORTANT] +> Diese Software wird jetzt bei Codeberg weitergepflegt: [Codeberg](https://codeberg.org/scip/kleingebaeck/). + Mit diesem Tool kann man seine Anzeigen bei https://kleinanzeigen.de sichern. Es kann alle Anzeigen eines Users (oder nur eine Ausgewählte) diff --git a/README.md b/README.md index ad0d4d1..b6676d3 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,9 @@ [![GitHub release](https://img.shields.io/github/v/release/tlinden/kleingebaeck?color=%2300a719)](https://codeberg.org/scip/kleingebaeck/releases) [![German](https://codeberg.org/scip/kleingebaeck/raw/branch/.github/assets/german.png)](https://codeberg.org/scip/kleingebaeck/raw/branch/README-de.md) +> [!IMPORTANT] +> This software is now being maintained on [Codeberg](https://codeberg.org/scip/kleingebaeck/). + [Die deutsche Version des READMEs findet Ihr hier](README-de.md). This tool can be used to backup ads on the german ad page https://kleinanzeigen.de diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index 4293c23..0000000 --- a/SECURITY.md +++ /dev/null @@ -1,17 +0,0 @@ -# Security Policy - -## Supported Versions - -Only the latest release is supported. If you find an issue (any -issue!), please check with the latest release first. - -## Reporting a Vulnerability - -I don't agree with the "responsible disclosure" process most projects -(and companies) work these days. - -So, if you find a vulnerability of any kind, please just open an -[issue](https://codeberg.org/scip/kleingebaeck/issues). Please add -all details required to reproduce the vulnerability. You won't be chased. - -That's just all about it. diff --git a/ad.go b/ad.go deleted file mode 100644 index c32e268..0000000 --- a/ad.go +++ /dev/null @@ -1,163 +0,0 @@ -/* -Copyright © 2023-2025 Thomas von Dein - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -package main - -import ( - "bufio" - "log/slog" - "strings" - "time" -) - -type Index struct { - Links []string `goquery:".text-module-begin a,[href]"` -} - -type Ad struct { - Title string `goquery:"h1"` - Slug string - ID string - Details string `goquery:".addetailslist--detail,text"` - Attributes map[string]string // processed afterwards - Condition string // post processed from details for backward compatibility - Type string // post processed from details for backward compatibility - Color string // post processed from details for backward compatibility - Material string // post processed from details for backward compatibility - Category string - CategoryTree []string `goquery:".breadcrump-link,text"` - Price string `goquery:"h2#viewad-price"` - Created string `goquery:"#viewad-extra-info,text"` - Text string `goquery:"p#viewad-description-text,html"` - Images []string `goquery:".galleryimage-element img,[src]"` - Shipping string `goquery:".boxedarticle--details--shipping,text"` // not always filled - Expire string - - // runtime computed - Year, Day, Month string -} - -// Used by slog to pretty print an ad -func (ad *Ad) LogValue() slog.Value { - return slog.GroupValue( - slog.String("title", ad.Title), - slog.String("price", ad.Price), - slog.String("id", ad.ID), - slog.Int("imagecount", len(ad.Images)), - slog.Int("bodysize", len(ad.Text)), - slog.String("categorytree", strings.Join(ad.CategoryTree, "+")), - slog.String("created", ad.Created), - slog.String("expire", ad.Expire), - slog.String("shipping", ad.Shipping), - slog.String("details", ad.Details), - ) -} - -// check for completeness. I erected these fields to be mandatory -// (though I really don't know if they really are). I consider images -// and meta optional. So, if either of the checked fields here is -// empty we return an error. All the checked fields are extracted -// using goquery. However, I think price is optional since there are -// ads for gifts as well. -// -// Note: we return true for "ad is incomplete" and false for "ad is complete"! -func (ad *Ad) Incomplete() bool { - if ad.Category == "" || ad.Created == "" || ad.Text == "" { - return true - } - - return false -} - -func (ad *Ad) CalculateExpire() { - if ad.Created != "" { - ts, err := time.Parse("02.01.2006", ad.Created) - if err == nil { - ad.Expire = ts.AddDate(0, 0, ExpireDays).Format("02.01.2006") - } - } -} - -/* -Decode attributes like color or condition. See -https://codeberg.org/scip/kleingebaeck/issues/117 -for more details. In short: the HTML delivered by -kleinanzeigen.de has no css attribute for the keys -so we cannot extract key=>value mappings of the -ad details but have to parse them manually. - -The ad.Details member contains this after goq run: - -Art - - Weitere Kinderzimmermöbel - - Farbe - Holz - - Zustand - In Ordnung - -We parse this into ad.Attributes and fill in some -static members for backward compatibility reasons. -*/ -func (ad *Ad) DecodeAttributes() { - rd := strings.NewReader(ad.Details) - scanner := bufio.NewScanner(rd) - - isattr := true - attr := "" - attrmap := map[string]string{} - - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - - if line == "" { - continue - } - - if isattr { - attr = line - } else { - attrmap[attr] = line - } - - isattr = !isattr - } - - ad.Attributes = attrmap - - if Exists(ad.Attributes, "Zustand") { - ad.Condition = ad.Attributes["Zustand"] - } - - if Exists(ad.Attributes, "Farbe") { - ad.Color = ad.Attributes["Farbe"] - } - - if Exists(ad.Attributes, "Art") { - ad.Type = ad.Attributes["Art"] - } - - if Exists(ad.Attributes, "Material") { - ad.Material = ad.Attributes["Material"] - } - - slog.Debug("parsed attributes", "attributes", ad.Attributes) - - ad.Shipping = strings.Replace(ad.Shipping, "+ Versand ab ", "", 1) -} diff --git a/config.go b/config.go deleted file mode 100644 index ecce57c..0000000 --- a/config.go +++ /dev/null @@ -1,251 +0,0 @@ -/* -Copyright © 2023-2025 Thomas von Dein - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ -package main - -import ( - "fmt" - "io" - "os" - "path/filepath" - "runtime" - "strings" - - "github.com/knadh/koanf/parsers/toml" - "github.com/knadh/koanf/providers/confmap" - "github.com/knadh/koanf/providers/env" - "github.com/knadh/koanf/providers/file" - "github.com/knadh/koanf/providers/posflag" - "github.com/knadh/koanf/v2" - flag "github.com/spf13/pflag" -) - -const ( - VERSION string = "0.3.23" - Baseuri string = "https://www.kleinanzeigen.de" - Listuri string = "/s-bestandsliste.html" - Defaultdir string = "." - - /* - Also possible: loop through .Attributes: - - DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" + - "Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" + - "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" - - */ - DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nShipping: {{.Shipping}}\nId: {{.ID}}\n" + - "Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" + - "Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n" - - DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nShipping: {{.Shipping}}\r\nId: {{.ID}}\r\n" + - "Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\nColor: {{.Color}}\r\n" + - "Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n" - - DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + - "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36" - - DefaultAdNameTemplate string = "{{.Slug}}" - - DefaultOutdirTemplate string = "." - - // for image download throttling - MinThrottle int = 2 - MaxThrottle int = 20 - - // we extract the slug from the uri - SlugURIPartNum int = 6 - - // We have to calculate the ad expiry because the real value can - // only be seen by logged in users. The initial ad lifetime is 120 - // days. It can be extended by the user 8 days before expire by 60 - // days. But this is unknown to us, so we'll stick with our 120 - // days. They may be wrong for older ads. Don't rely on it! - ExpireDays int = 120 - - WIN string = "windows" -) - -var DirsVisited map[string]int - -const Usage string = `This is kleingebaeck, the kleinanzeigen.de backup tool. - -Usage: kleingebaeck [-dvVhmoclu] [,...] - -Options: --u --user Backup ads from user with uid . --d --debug Enable debug output. --v --verbose Enable verbose output. --o --outdir Set output dir (default: current directory) --l --limit Limit the ads to download to , default: load all. --c --config Use config file (default: ~/.kleingebaeck). - --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. --f --force Overwrite images and ads even if the already exist. --m --manual Show manual. --h --help Show usage. --V --version Show program version. - -If one or more ad listing url's are specified, only backup those, -otherwise backup all ads of the given user.` - -type Config struct { - Verbose bool `koanf:"verbose"` // loglevel=info - Debug bool `koanf:"debug"` // loglevel=debug - Showversion bool `koanf:"version"` // -v - Showhelp bool `koanf:"help"` // -h - Showmanual bool `koanf:"manual"` // -m - User int `koanf:"user"` - Outdir string `koanf:"outdir"` - Template string `koanf:"template"` - Adnametemplate string `koanf:"adnametemplate"` - Loglevel string `koanf:"loglevel"` - Limit int `koanf:"limit"` - IgnoreErrors bool `koanf:"ignoreerrors"` - ForceDownload bool `koanf:"force"` - UserAgent string `koanf:"useragent"` // conf only - Adlinks []string - StatsCountAds int - StatsCountImages int -} - -func (c *Config) IncrAds() { - c.StatsCountAds++ -} - -func (c *Config) IncrImgs(num int) { - c.StatsCountImages += num -} - -// load commandline flags and config file -func InitConfig(output io.Writer) (*Config, error) { - var kloader = koanf.New(".") - - // determine template based on os - template := DefaultTemplate - if runtime.GOOS == WIN { - template = DefaultTemplateWin - } - - // Load default values using the confmap provider. - if err := kloader.Load(confmap.Provider(map[string]interface{}{ - "template": template, - "outdir": DefaultOutdirTemplate, - "loglevel": "notice", - "userid": 0, - "adnametemplate": DefaultAdNameTemplate, - "useragent": DefaultUserAgent, - }, "."), nil); err != nil { - return nil, fmt.Errorf("failed to load default values into koanf: %w", err) - } - - // setup custom usage - flagset := flag.NewFlagSet("config", flag.ContinueOnError) - flagset.Usage = func() { - _, err := fmt.Fprintln(output, Usage) - if err != nil { - panic(err) - } - - os.Exit(0) - } - - // parse commandline flags - flagset.StringP("config", "c", "", "config file") - flagset.StringP("outdir", "o", "", "directory where to store ads") - flagset.IntP("user", "u", 0, "user id") - flagset.IntP("limit", "l", 0, "limit ads to be downloaded (default 0, unlimited)") - flagset.BoolP("verbose", "v", false, "be verbose") - flagset.BoolP("debug", "d", false, "enable debug log") - flagset.BoolP("version", "V", false, "show program version") - flagset.BoolP("help", "h", false, "show usage") - flagset.BoolP("manual", "m", false, "show manual") - flagset.BoolP("force", "f", false, "force") - flagset.BoolP("ignoreerrors", "", false, "ignore image download HTTP errors") - - if err := flagset.Parse(os.Args[1:]); err != nil { - return nil, fmt.Errorf("failed to parse program arguments: %w", err) - } - - // generate a list of config files to try to load, including the - // one provided via -c, if any - var configfiles []string - - configfile, _ := flagset.GetString("config") - home, _ := os.UserHomeDir() - - if configfile != "" { - configfiles = []string{configfile} - } else { - configfiles = []string{ - "/etc/kleingebaeck.conf", "/usr/local/etc/kleingebaeck.conf", // unix variants - filepath.Join(home, ".config", "kleingebaeck", "config"), - filepath.Join(home, ".kleingebaeck"), - "kleingebaeck.conf", - } - } - - // Load the config file[s] - for _, cfgfile := range configfiles { - path, err := os.Stat(cfgfile) - - if err != nil { - // ignore non-existent files, but bail out on any other errors - if !os.IsNotExist(err) { - return nil, fmt.Errorf("failed to stat config file: %w", err) - } - - continue - } - - if !path.IsDir() { - if err := kloader.Load(file.Provider(cfgfile), toml.Parser()); err != nil { - return nil, fmt.Errorf("error loading config file: %w", err) - } - } - } - - // env overrides config file - if err := kloader.Load(env.Provider("KLEINGEBAECK_", ".", func(s string) string { - return strings.ReplaceAll(strings.ToLower( - strings.TrimPrefix(s, "KLEINGEBAECK_")), "_", ".") - }), nil); err != nil { - return nil, fmt.Errorf("error loading environment: %w", err) - } - - // command line overrides env - if err := kloader.Load(posflag.Provider(flagset, ".", kloader), nil); err != nil { - return nil, fmt.Errorf("error loading flags: %w", err) - } - - // fetch values - conf := &Config{} - if err := kloader.Unmarshal("", &conf); err != nil { - return nil, fmt.Errorf("error unmarshalling: %w", err) - } - - // adjust loglevel - switch conf.Loglevel { - case "verbose": - conf.Verbose = true - case "debug": - conf.Debug = true - } - - // are there any args left on commandline? if so threat them as adlinks - conf.Adlinks = flagset.Args() - - return conf, nil -} diff --git a/docker-compose.yaml b/docker-compose.yaml deleted file mode 100644 index 8cef53a..0000000 --- a/docker-compose.yaml +++ /dev/null @@ -1,22 +0,0 @@ -version: "3.9" -services: - init: - image: alpine:latest - user: "root" - group_add: - - '${GROUP_ID}' - volumes: - - ${OUTDIR}:/backup - command: chown -R ${USER_ID}:${USER_ID} /backup - - kleingebaeck: - container_name: kleingebaeck - user: "${USER_ID}:${USER_ID}" - volumes: - - ${OUTDIR}:/backup - working_dir: /backup - build: . - image: kleingebaeck:latest - depends_on: - init: - condition: service_completed_successfully diff --git a/example.conf b/example.conf deleted file mode 100644 index 3b3fbb5..0000000 --- a/example.conf +++ /dev/null @@ -1,48 +0,0 @@ -# -# kleingebaeck sample configuration file. -# put this to ~/.kleingebaeck. -# -# Comments start with the '#' character. - -# kleinanzeigen.de user-id. must be an unquoted number -user = 00000000 - -# enable verbose output (same as -v), may be true or false. -# other values: notice or debug -loglevel = "verbose" - -# directory where to store downloaded ads. kleingebaeck will try to -# create it. must be a quoted string. You can also include a couple of -# template variables, e.g: -# outdir = "test-{{.Year}}-{{.Month}}-{{.Day}}" -outdir = "test" - -# template for stored adlistings. -template=""" -Title: {{.Title}} -Price: {{.Price}} -Shipping: {{.Shipping}} -Id: {{.Id}} -Category: {{.Category}} -Condition: {{.Condition}} -Type: {{.Type}} -Created: {{.Created}} - -{{.Text}} -""" - -# Ads may contain more attributes than just the Condition. To print -# all attributes, loop over all of them: - -template=""" -Title: {{.Title}} -Price: {{.Price}} -Id: {{.Id}} -Category: {{.Category}} -{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }} -{{ end }} -Type: {{.Type}} -Created: {{.Created}} - -{{.Text}} -""" diff --git a/fetch.go b/fetch.go deleted file mode 100644 index bfdac24..0000000 --- a/fetch.go +++ /dev/null @@ -1,104 +0,0 @@ -/* -Copyright © 2023-2024 Thomas von Dein - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -package main - -import ( - "errors" - "fmt" - "io" - "log/slog" - "net/http" - "net/http/cookiejar" - "net/url" -) - -// convenient wrapper to fetch some web content -type Fetcher struct { - Config *Config - Client *http.Client - Cookies []*http.Cookie -} - -func NewFetcher(conf *Config) (*Fetcher, error) { - jar, err := cookiejar.New(nil) - if err != nil { - return nil, fmt.Errorf("failed to create a cookie jar obj: %w", err) - } - - return &Fetcher{ - Client: &http.Client{ - Transport: &loggingTransport{}, // implemented in http.go - Jar: jar, - }, - Config: conf, - Cookies: []*http.Cookie{}, - }, - nil -} - -func (f *Fetcher) Get(uri string) (io.ReadCloser, error) { - req, err := http.NewRequest(http.MethodGet, uri, http.NoBody) - if err != nil { - return nil, fmt.Errorf("failed to create a new HTTP request obj: %w", err) - } - - req.Header.Set("User-Agent", f.Config.UserAgent) - - if len(f.Cookies) > 0 { - uriobj, _ := url.Parse(Baseuri) - - slog.Debug("have cookies, sending them", - "sample-cookie-name", f.Cookies[0].Name, - "sample-cookie-expire", f.Cookies[0].Expires, - ) - - f.Client.Jar.SetCookies(uriobj, f.Cookies) - } - - res, err := f.Client.Do(req) - if err != nil { - return nil, fmt.Errorf("failed to initiate HTTP request to %s: %w", uri, err) - } - - if res.StatusCode != http.StatusOK { - return nil, errors.New("could not get page via HTTP") - } - - slog.Debug("got cookies?", "cookies", res.Cookies()) - f.Cookies = res.Cookies() - - return res.Body, nil -} - -// fetch an image -func (f *Fetcher) Getimage(uri string) (io.ReadCloser, error) { - slog.Debug("fetching ad image", "uri", uri) - - body, err := f.Get(uri) - if err != nil { - if f.Config.IgnoreErrors { - slog.Info("Failed to download image, error ignored", "error", err.Error()) - - return nil, nil - } - - return nil, err - } - - return body, nil -} diff --git a/go.mod b/go.mod deleted file mode 100644 index c40ed15..0000000 --- a/go.mod +++ /dev/null @@ -1,41 +0,0 @@ -module kleingebaeck - -go 1.24.0 - -toolchain go1.24.5 - -require ( - astuart.co/goq v1.0.0 - github.com/corona10/goimagehash v1.1.0 - github.com/inconshreveable/mousetrap v1.1.0 - github.com/jarcoal/httpmock v1.4.1 - github.com/knadh/koanf/parsers/toml v0.1.0 - github.com/knadh/koanf/providers/confmap v1.0.0 - github.com/knadh/koanf/providers/env v1.1.0 - github.com/knadh/koanf/providers/file v1.2.0 - github.com/knadh/koanf/providers/posflag v1.0.1 - github.com/knadh/koanf/v2 v2.3.0 - github.com/lmittmann/tint v1.1.2 - github.com/mattn/go-isatty v0.0.20 - github.com/spf13/pflag v1.0.10 - github.com/tlinden/yadu v0.1.3 - golang.org/x/image v0.31.0 - golang.org/x/sync v0.17.0 -) - -require ( - github.com/PuerkitoBio/goquery v1.5.1 // indirect - github.com/andybalholm/cascadia v1.1.0 // indirect - github.com/fatih/color v1.16.0 // indirect - github.com/fsnotify/fsnotify v1.9.0 // indirect - github.com/go-viper/mapstructure/v2 v2.4.0 // indirect - github.com/knadh/koanf/maps v0.1.2 // indirect - github.com/mattn/go-colorable v0.1.14 // indirect - github.com/mitchellh/copystructure v1.2.0 // indirect - github.com/mitchellh/reflectwalk v1.0.2 // indirect - github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect - github.com/pelletier/go-toml v1.9.5 // indirect - golang.org/x/net v0.38.0 // indirect - golang.org/x/sys v0.32.0 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect -) diff --git a/go.sum b/go.sum deleted file mode 100644 index cb6641c..0000000 --- a/go.sum +++ /dev/null @@ -1,88 +0,0 @@ -astuart.co/goq v1.0.0 h1:nnYIhu/Z/j0VaX9Dp+pmh2Uh7ldEz6XfgSg+bAY5Yrw= -astuart.co/goq v1.0.0/go.mod h1:+fokcnFrO8Pw2fj8drdStJvzoMFebJH69rw8IC21rno= -github.com/PuerkitoBio/goquery v1.5.0/go.mod h1:qD2PgZ9lccMbQlc7eEOjaeRlFQON7xY8kdmcsrnKqMg= -github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= -github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= -github.com/andybalholm/cascadia v1.0.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= -github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= -github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= -github.com/corona10/goimagehash v1.1.0 h1:teNMX/1e+Wn/AYSbLHX8mj+mF9r60R1kBeqE9MkoYwI= -github.com/corona10/goimagehash v1.1.0/go.mod h1:VkvE0mLn84L4aF8vCb6mafVajEb6QYMHl2ZJLn0mOGI= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= -github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= -github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= -github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= -github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= -github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= -github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= -github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= -github.com/jarcoal/httpmock v1.4.1 h1:0Ju+VCFuARfFlhVXFc2HxlcQkfB+Xq12/EotHko+x2A= -github.com/jarcoal/httpmock v1.4.1/go.mod h1:ftW1xULwo+j0R0JJkJIIi7UKigZUXCLLanykgjwBXL0= -github.com/knadh/koanf/maps v0.1.2 h1:RBfmAW5CnZT+PJ1CVc1QSJKf4Xu9kxfQgYVQSu8hpbo= -github.com/knadh/koanf/maps v0.1.2/go.mod h1:npD/QZY3V6ghQDdcQzl1W4ICNVTkohC8E73eI2xW4yI= -github.com/knadh/koanf/parsers/toml v0.1.0 h1:S2hLqS4TgWZYj4/7mI5m1CQQcWurxUz6ODgOub/6LCI= -github.com/knadh/koanf/parsers/toml v0.1.0/go.mod h1:yUprhq6eo3GbyVXFFMdbfZSo928ksS+uo0FFqNMnO18= -github.com/knadh/koanf/providers/confmap v1.0.0 h1:mHKLJTE7iXEys6deO5p6olAiZdG5zwp8Aebir+/EaRE= -github.com/knadh/koanf/providers/confmap v1.0.0/go.mod h1:txHYHiI2hAtF0/0sCmcuol4IDcuQbKTybiB1nOcUo1A= -github.com/knadh/koanf/providers/env v1.1.0 h1:U2VXPY0f+CsNDkvdsG8GcsnK4ah85WwWyJgef9oQMSc= -github.com/knadh/koanf/providers/env v1.1.0/go.mod h1:QhHHHZ87h9JxJAn2czdEl6pdkNnDh/JS1Vtsyt65hTY= -github.com/knadh/koanf/providers/file v1.2.0 h1:hrUJ6Y9YOA49aNu/RSYzOTFlqzXSCpmYIDXI7OJU6+U= -github.com/knadh/koanf/providers/file v1.2.0/go.mod h1:bp1PM5f83Q+TOUu10J/0ApLBd9uIzg+n9UgthfY+nRA= -github.com/knadh/koanf/providers/posflag v1.0.1 h1:EnMxHSrPkYCFnKgBUl5KBgrjed8gVFrcXDzaW4l/C6Y= -github.com/knadh/koanf/providers/posflag v1.0.1/go.mod h1:3Wn3+YG3f4ljzRyCUgIwH7G0sZ1pMjCOsNBovrbKmAk= -github.com/knadh/koanf/v2 v2.3.0 h1:Qg076dDRFHvqnKG97ZEsi9TAg2/nFTa9hCdcSa1lvlM= -github.com/knadh/koanf/v2 v2.3.0/go.mod h1:gRb40VRAbd4iJMYYD5IxZ6hfuopFcXBpc9bbQpZwo28= -github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/lmittmann/tint v1.1.2 h1:2CQzrL6rslrsyjqLDwD11bZ5OpLBPU+g3G/r5LSfS8w= -github.com/lmittmann/tint v1.1.2/go.mod h1:HIS3gSy7qNwGCj+5oRjAutErFBl4BzdQP6cJZ0NfMwE= -github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= -github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= -github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= -github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= -github.com/maxatome/go-testdeep v1.14.0 h1:rRlLv1+kI8eOI3OaBXZwb3O7xY3exRzdW5QyX48g9wI= -github.com/maxatome/go-testdeep v1.14.0/go.mod h1:lPZc/HAcJMP92l7yI6TRz1aZN5URwUBUAfUNvrclaNM= -github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= -github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= -github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= -github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw= -github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ= -github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8= -github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= -github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= -github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/tlinden/yadu v0.1.3 h1:5cRCUmj+l5yvlM2irtpFBIJwVV2DPEgYSaWvF19FtcY= -github.com/tlinden/yadu v0.1.3/go.mod h1:l3bRmHKL9zGAR6pnBHY2HRPxBecf7L74BoBgOOpTcUA= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/image v0.31.0 h1:mLChjE2MV6g1S7oqbXC0/UcKijjm5fnJLUYKIYrLESA= -golang.org/x/image v0.31.0/go.mod h1:R9ec5Lcp96v9FTF+ajwaH3uGxPH4fKfHHAVbUILxghA= -golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20190606173856-1492cefac77f/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= -golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= -golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= -golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.32.0 h1:s77OFDvIQeibCmezSnk/q6iAfkdiQaJi4VzroCFrN20= -golang.org/x/sys v0.32.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/http.go b/http.go deleted file mode 100644 index 2c57758..0000000 --- a/http.go +++ /dev/null @@ -1,140 +0,0 @@ -/* -Copyright © 2023-2024 Thomas von Dein - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -package main - -import ( - "bytes" - "fmt" - "io" - "log/slog" - "math" - "math/rand" - "net/http" - "time" -) - -// I add an artificial "ID" to each HTTP request and the corresponding -// respose for debugging purposes so that the pair of them can be -// easier associated in debug output -var letters = []rune("ABCDEF0123456789") - -const IDLEN int = 8 - -// retry after HTTP 50x errors or err!=nil -const RetryCount = 3 - -func getid() string { - b := make([]rune, IDLEN) - for i := range b { - b[i] = letters[rand.Intn(len(letters))] - } - - return string(b) -} - -// used to inject debug log and implement retries -type loggingTransport struct{} - -// escalating timeout, $retry^2 seconds -func backoff(retries int) time.Duration { - return time.Duration(math.Pow(2, float64(retries))) * time.Second -} - -// only retry in case of errors or certain non 200 HTTP codes -func shouldRetry(err error, resp *http.Response) bool { - if err != nil { - return true - } - - if resp.StatusCode == http.StatusBadGateway || - resp.StatusCode == http.StatusServiceUnavailable || - resp.StatusCode == http.StatusGatewayTimeout { - return true - } - - return false -} - -// Body needs to be drained, otherwise we can't reuse the http.Response -func drainBody(resp *http.Response) { - if resp != nil { - if resp.Body != nil { - _, err := io.Copy(io.Discard, resp.Body) - if err != nil { - // unable to copy data? uff! - panic(err) - } - - if err := resp.Body.Close(); err != nil { - panic(err) - } - } - } -} - -// the actual logging transport with retries -func (t *loggingTransport) RoundTrip(req *http.Request) (*http.Response, error) { - // just required for debugging - requestid := getid() - - // clone the request body, put into request on retry - var bodyBytes []byte - if req.Body != nil { - bodyBytes, _ = io.ReadAll(req.Body) - req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) - } - - slog.Debug("REQUEST", "id", requestid, "uri", req.URL, "host", req.Host) - - // first try - resp, err := http.DefaultTransport.RoundTrip(req) - if err == nil { - slog.Debug("RESPONSE", "id", requestid, "status", resp.StatusCode, - "contentlength", resp.ContentLength) - } - - // enter retry check and loop, if first req were successful, leave loop immediately - retries := 0 - for shouldRetry(err, resp) && retries < RetryCount { - time.Sleep(backoff(retries)) - - // consume any response to reuse the connection. - drainBody(resp) - - // clone the request body again - if req.Body != nil { - req.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) - } - - // actual retry - resp, err = http.DefaultTransport.RoundTrip(req) - - if err == nil { - slog.Debug("RESPONSE", "id", requestid, "status", resp.StatusCode, - "contentlength", resp.ContentLength, "retry", retries) - } - - retries++ - } - - if err != nil { - return resp, fmt.Errorf("failed to get HTTP response for %s: %w", req.URL, err) - } - - return resp, nil -} diff --git a/image.go b/image.go deleted file mode 100644 index 2e56d24..0000000 --- a/image.go +++ /dev/null @@ -1,191 +0,0 @@ -/* -Copyright © 2023-2024 Thomas von Dein - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see . -*/ - -package main - -import ( - "bytes" - "fmt" - "image" - _ "image/gif" - _ "image/jpeg" - _ "image/png" - "log/slog" - "os" - "path/filepath" - - _ "golang.org/x/image/webp" - - "github.com/corona10/goimagehash" -) - -const MaxDistance = 3 - -type Image struct { - Filename string - Hash *goimagehash.ImageHash - Data *bytes.Reader - URI string - Mime string -} - -// used for logging to avoid printing Data -func (img *Image) LogValue() slog.Value { - return slog.GroupValue( - slog.String("filename", img.Filename), - slog.String("uri", img.URI), - slog.String("hash", img.Hash.ToString()), - ) -} - -// holds all images of an ad -type Cache []*goimagehash.ImageHash - -// filename comes from the scraper, it contains directory/base w/o suffix -func NewImage(buf *bytes.Reader, filename, uri string) (*Image, error) { - _, imgconfig, err := image.DecodeConfig(buf) - if err != nil { - return nil, fmt.Errorf("failed to decode image: %w", err) - } - - _, err = buf.Seek(0, 0) - if err != nil { - return nil, fmt.Errorf("failed to seek(0) on image buffer: %w", err) - } - - if imgconfig == "jpeg" { - // we're using the format as file extension, but have used - // "jpg" in the past, so to be backwards compatible, stay with - // it. - imgconfig = "jpg" - } - - if imgconfig == "" { - return nil, fmt.Errorf("failed to process image: unknown or unsupported image format (supported: jpg,png,gif,webp)") - } - - filename += "." + imgconfig - - img := &Image{ - Filename: filename, - URI: uri, - Data: buf, - Mime: imgconfig, - } - - slog.Debug("image MIME", "mime", img.Mime) - - return img, nil -} - -// Calculate diff hash of the image -func (img *Image) CalcHash() error { - jpgdata, format, err := image.Decode(img.Data) - if err != nil { - return fmt.Errorf("failed to decode image: %w", err) - } - - if format == "" { - return fmt.Errorf("failed to decode image: unknown or unsupported image format (supported: jpg,png,gif,webp)") - } - - hash1, err := goimagehash.DifferenceHash(jpgdata) - if err != nil { - return fmt.Errorf("failed to calculate diff hash of image: %w", err) - } - - img.Hash = hash1 - - return nil -} - -// checks if 2 images are similar enough to be considered the same -func (img *Image) Similar(hash *goimagehash.ImageHash) bool { - distance, err := img.Hash.Distance(hash) - if err != nil { - slog.Debug("failed to compute diff hash distance", "error", err) - - return false - } - - if distance < MaxDistance { - slog.Debug("distance computation", "image-A", img.Hash.ToString(), - "image-B", hash.ToString(), "distance", distance) - - return true - } - - return false -} - -// check current image against all known hashes. -func (img *Image) SimilarExists(cache Cache) bool { - for _, otherimg := range cache { - if img.Similar(otherimg) { - return true - } - } - - return false -} - -// read all JPG images in a ad directory, compute diff hashes and -// store the results in the slice Images -func ReadImages(addir string, dont bool) (Cache, error) { - files, err := os.ReadDir(addir) - if err != nil { - return nil, fmt.Errorf("failed to read ad directory contents: %w", err) - } - - cache := Cache{} - - if dont { - // forced download, -f given - return cache, nil - } - - for _, file := range files { - ext := filepath.Ext(file.Name()) - if !file.IsDir() && (ext == ".jpg" || ext == ".jpeg" || ext == ".JPG" || ext == ".JPEG") { - filename := filepath.Join(addir, file.Name()) - - data, err := ReadImage(filename) - if err != nil { - return nil, err - } - - reader := bytes.NewReader(data.Bytes()) - - img, err := NewImage(reader, filename, "") - if err != nil { - return nil, err - } - - if err := img.CalcHash(); err != nil { - return nil, err - } - - if img.Hash != nil { - slog.Debug("Caching image from file system", "image", img, "hash", img.Hash.ToString()) - } - - cache = append(cache, img.Hash) - } - } - - return cache, nil -} diff --git a/kleingebaeck.1 b/kleingebaeck.1 deleted file mode 100644 index 5762a7d..0000000 --- a/kleingebaeck.1 +++ /dev/null @@ -1,386 +0,0 @@ -.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.42) -.\" -.\" Standard preamble: -.\" ======================================================================== -.de Sp \" Vertical space (when we can't use .PP) -.if t .sp .5v -.if n .sp -.. -.de Vb \" Begin verbatim text -.ft CW -.nf -.ne \\$1 -.. -.de Ve \" End verbatim text -.ft R -.fi -.. -.\" Set up some character translations and predefined strings. \*(-- will -.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left -.\" double quote, and \*(R" will give a right double quote. \*(C+ will -.\" give a nicer C++. Capital omega is used to do unbreakable dashes and -.\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, -.\" nothing in troff, for use with C<>. -.tr \(*W- -.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' -.ie n \{\ -. ds -- \(*W- -. ds PI pi -. if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch -. if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch -. ds L" "" -. ds R" "" -. ds C` "" -. ds C' "" -'br\} -.el\{\ -. ds -- \|\(em\| -. ds PI \(*p -. ds L" `` -. ds R" '' -. ds C` -. ds C' -'br\} -.\" -.\" Escape single quotes in literal strings from groff's Unicode transform. -.ie \n(.g .ds Aq \(aq -.el .ds Aq ' -.\" -.\" If the F register is >0, we'll generate index entries on stderr for -.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index -.\" entries marked with X<> in POD. Of course, you'll have to process the -.\" output yourself in some meaningful fashion. -.\" -.\" Avoid warning from groff about undefined register 'F'. -.de IX -.. -.nr rF 0 -.if \n(.g .if rF .nr rF 1 -.if (\n(rF:(\n(.g==0)) \{\ -. if \nF \{\ -. de IX -. tm Index:\\$1\t\\n%\t"\\$2" -.. -. if !\nF==2 \{\ -. nr % 0 -. nr F 2 -. \} -. \} -.\} -.rr rF -.\" -.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). -.\" Fear. Run. Save yourself. No user-serviceable parts. -. \" fudge factors for nroff and troff -.if n \{\ -. ds #H 0 -. ds #V .8m -. ds #F .3m -. ds #[ \f1 -. ds #] \fP -.\} -.if t \{\ -. ds #H ((1u-(\\\\n(.fu%2u))*.13m) -. ds #V .6m -. ds #F 0 -. ds #[ \& -. ds #] \& -.\} -. \" simple accents for nroff and troff -.if n \{\ -. ds ' \& -. ds ` \& -. ds ^ \& -. ds , \& -. ds ~ ~ -. ds / -.\} -.if t \{\ -. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" -. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' -. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' -. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' -. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' -. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' -.\} -. \" troff and (daisy-wheel) nroff accents -.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' -.ds 8 \h'\*(#H'\(*b\h'-\*(#H' -.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] -.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' -.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' -.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] -.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] -.ds ae a\h'-(\w'a'u*4/10)'e -.ds Ae A\h'-(\w'A'u*4/10)'E -. \" corrections for vroff -.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' -.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' -. \" for low resolution devices (crt and lpr) -.if \n(.H>23 .if \n(.V>19 \ -\{\ -. ds : e -. ds 8 ss -. ds o a -. ds d- d\h'-1'\(ga -. ds D- D\h'-1'\(hy -. ds th \o'bp' -. ds Th \o'LP' -. ds ae ae -. ds Ae AE -.\} -.rm #[ #] #H #V #F C -.\" ======================================================================== -.\" -.IX Title "KLEINGEBAECK 1" -.TH KLEINGEBAECK 1 "2025-02-27" "1" "User Commands" -.\" For nroff, turn off justification. Always turn off hyphenation; it makes -.\" way too many mistakes in technical documents. -.if n .ad l -.nh -.SH "NAME" -kleingebaeck \- kleinanzeigen.de backup tool -.SH "SYNOPSYS" -.IX Header "SYNOPSYS" -.Vb 10 -\& Usage: kleingebaeck [\-dvVhmoc] [,...] -\& Options: -\& \-u \-\-user Backup ads from user with uid . -\& \-d \-\-debug Enable debug output. -\& \-v \-\-verbose Enable verbose output. -\& \-o \-\-outdir Set output dir (default: current directory) -\& \-l \-\-limit Limit the ads to download to , default: load all. -\& \-c \-\-config Use config file (default: ~/.kleingebaeck). -\& \-\-ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. -\& \-f \-\-force Overwrite images and ads even if the already exist. -\& \-m \-\-manual Show manual. -\& \-h \-\-help Show usage. -\& \-V \-\-version Show program version. -.Ve -.SH "DESCRIPTION" -.IX Header "DESCRIPTION" -This tool can be used to backup ads on the german ad page . -.PP -It downloads all (or only the specified ones) ads of one user into a -directory, each ad into its own subdirectory. The backup will contain -a textfile \fBAdlisting.txt\fR which contains the ad contents such as -title, body, price etc. All images will be downloaded as well. -.SH "CONFIGURATION" -.IX Header "CONFIGURATION" -You can create a config file to save typing. By default -\&\f(CW\*(C`~/.kleingebaeck\*(C'\fR is being used but you can specify one with \f(CW\*(C`\-c\*(C'\fR as -well. We use \s-1TOML\s0 as our configuration language. See -. -.PP -Format is pretty simple: -.PP -.Vb 11 -\& user = 1010101 -\& loglevel = verbose -\& outdir = "test" -\& useragent = "Mozilla/5.0" -\& template = """ -\& Title: {{.Title}} -\& Price: {{.Price}} -\& Id: {{.ID}} -\& Category: {{.Category}} -\& Condition: {{.Condition}} -\& Created: {{.Created}} -\& -\& {{.Text}} -\& """ -.Ve -.PP -Be careful if you want to change the template. The variable is a -multiline string surrounded by three double quotes. You can left out -certain fields and use any formatting you like. Refer to - for details how to write a -template. Also read the \s-1TEMPLATES\s0 section below. -.PP -If you're on windows and want to customize the output directory, put -it into single quotes to avoid the backslashes interpreted as escape -chars like this: -.PP -.Vb 1 -\& outdir = \*(AqC:\eData\eAds\*(Aq -.Ve -.SH "TEMPLATES" -.IX Header "TEMPLATES" -Various parts of the configuration can be modified using templates: -the output directory, the ad directory and the ad listing itself. -.SS "\s-1OUTPUT DIR TEMPLATE\s0" -.IX Subsection "OUTPUT DIR TEMPLATE" -The config varialbe \f(CW\*(C`outdir\*(C'\fR or the command line parameter \f(CW\*(C`\-o\*(C'\fR take a -template which may contain: -.ie n .IP """{{.Year}}""" 4 -.el .IP "\f(CW{{.Year}}\fR" 4 -.IX Item "{{.Year}}" -.PD 0 -.ie n .IP """{{.Month}}""" 4 -.el .IP "\f(CW{{.Month}}\fR" 4 -.IX Item "{{.Month}}" -.ie n .IP """{{.Day}}""" 4 -.el .IP "\f(CW{{.Day}}\fR" 4 -.IX Item "{{.Day}}" -.PD -.PP -That way you can create a new output directory for every backup -run. For example: -.PP -.Vb 1 -\& outdir = "/home/backups/ads\-{{.Year}}\-{{.Month}}\-{{.Day}}" -.Ve -.PP -Or using the command line flag: -.PP -.Vb 1 -\& \-o "/home/backups/ads\-{{.Year}}\-{{.Month}}\-{{.Day}}" -.Ve -.PP -The default value is \f(CW\*(C`.\*(C'\fR \- the current directory. -.SS "\s-1AD DIRECTORY TEMPLATE\s0" -.IX Subsection "AD DIRECTORY TEMPLATE" -The ad directory name can be modified using the following ad values: -.IP "{{.Price}}" 4 -.IX Item "{{.Price}}" -.PD 0 -.IP "{{.ID}}" 4 -.IX Item "{{.ID}}" -.IP "{{.Category}}" 4 -.IX Item "{{.Category}}" -.IP "{{.Condition}}" 4 -.IX Item "{{.Condition}}" -.IP "{{.Created}}" 4 -.IX Item "{{.Created}}" -.IP "{{.Slug}}" 4 -.IX Item "{{.Slug}}" -.IP "{{.Text}}" 4 -.IX Item "{{.Text}}" -.PD -.PP -It can only be configured in the config file. By default only -\&\f(CW\*(C`{{.Slug}}\*(C'\fR is being used, this is the title of the ad in url format. -.SS "\s-1AD NAME TEMPLATE\s0" -.IX Subsection "AD NAME TEMPLATE" -The name of the directory per ad can be tuned as well: -.ie n .IP """{{.Year}}""" 4 -.el .IP "\f(CW{{.Year}}\fR" 4 -.IX Item "{{.Year}}" -.PD 0 -.ie n .IP """{{.Month}}""" 4 -.el .IP "\f(CW{{.Month}}\fR" 4 -.IX Item "{{.Month}}" -.ie n .IP """{{.Day}}""" 4 -.el .IP "\f(CW{{.Day}}\fR" 4 -.IX Item "{{.Day}}" -.ie n .IP """{{.Slug}}""" 4 -.el .IP "\f(CW{{.Slug}}\fR" 4 -.IX Item "{{.Slug}}" -.ie n .IP """{{.Category}}""" 4 -.el .IP "\f(CW{{.Category}}\fR" 4 -.IX Item "{{.Category}}" -.ie n .IP """{{.ID}}""" 4 -.el .IP "\f(CW{{.ID}}\fR" 4 -.IX Item "{{.ID}}" -.PD -.SS "\s-1AD TEMPLATE\s0" -.IX Subsection "AD TEMPLATE" -The ad listing itself can be modified as well, using the same -variables as the ad name template above. -.PP -This is the default template: -.PP -.Vb 8 -\& Title: {{.Title}} -\& Price: {{.Price}} -\& Id: {{.ID}} -\& Category: {{.Category}} -\& Condition: {{.Condition}} -\& Type: {{.Type}} -\& Created: {{.Created}} -\& Expire: {{.Expire}} -\& -\& {{.Text}} -.Ve -.PP -The config parameter to modify is \f(CW\*(C`template\*(C'\fR. See example.conf in the -source repository. Please take care, since this is a multiline -string. This is how it shall look if you modify it: -.PP -.Vb 2 -\& template=""" -\& Title: {{.Title}} -\& -\& {{.Text}} -\& """ -.Ve -.PP -That is, the content between the two \f(CW"""\fR chars is the template. -.SH "SETUP" -.IX Header "SETUP" -To setup the tool, you need to lookup your userid on -kleinanzeigen.de. Go to your ad overview page while \s-1NOT\s0 being logged -in: -.PP -.Vb 1 -\& https://www.kleinanzeigen.de/s\-bestandsliste.html?userId=XXXXXX -.Ve -.PP -The \fB\s-1XXXXX\s0\fR part is your userid. -.PP -Put it into the configfile as outlined above. Also specify an output -directory. Then just execute \f(CW\*(C`kleingebaeck\*(C'\fR. -.PP -You can use the \fB\-v\fR option to get verbose output or \fB\-d\fR to enable -debugging. -.SH "ENVIRONMENT VARIABLES" -.IX Header "ENVIRONMENT VARIABLES" -The following environment variables are considered: -.PP -.Vb 7 -\& KLEINGEBAECK_USER -\& KLEINGEBAECK_DEBUG -\& KLEINGEBAECK_VERBOSE -\& KLEINGEBAECK_OUTDIR -\& KLEINGEBAECK_LIMIT -\& KLEINGEBAECK_CONFIG -\& KLEINGEBAECK_IGNOREERRORS -.Ve -.PP -Please note, that they take precedence over config file, but -commandline flags take precedence over env! -.SH "BUGS" -.IX Header "BUGS" -In order to report a bug, unexpected behavior, feature requests -or to submit a patch, please open an issue on github: -. -.PP -Please repeat the failing command with debugging enabled \f(CW\*(C`\-d\*(C'\fR and -include the output in the issue. -.SH "LIMITATIONS" -.IX Header "LIMITATIONS" -The \f(CW\*(C`kleingebaeck\*(C'\fR doesn't currently check if it has downloaded a -file already, so it downloads everything again every time you execute -it. Be aware of it. This will change in the future. -.PP -Also there's currently no parallelization implemented. This will -change in the future. -.SH "LICENSE" -.IX Header "LICENSE" -Copyright 2023\-2025 Thomas von Dein -.PP -This program is free software: you can redistribute it and/or modify -it under the terms of the \s-1GNU\s0 General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. -.PP -This program is distributed in the hope that it will be useful, -but \s-1WITHOUT ANY WARRANTY\s0; without even the implied warranty of -\&\s-1MERCHANTABILITY\s0 or \s-1FITNESS FOR A PARTICULAR PURPOSE.\s0 See the -\&\s-1GNU\s0 General Public License for more details. -.PP -You should have received a copy of the \s-1GNU\s0 General Public License -along with this program. If not, see . -.SH "Author" -.IX Header "Author" -T.v.Dein diff --git a/kleingebaeck.go b/kleingebaeck.go deleted file mode 100644 index df62ba5..0000000 --- a/kleingebaeck.go +++ /dev/null @@ -1,205 +0,0 @@ -package main - -var manpage = ` -NAME - kleingebaeck - kleinanzeigen.de backup tool - -SYNOPSYS - Usage: kleingebaeck [-dvVhmoc] [,...] - Options: - -u --user Backup ads from user with uid . - -d --debug Enable debug output. - -v --verbose Enable verbose output. - -o --outdir Set output dir (default: current directory) - -l --limit Limit the ads to download to , default: load all. - -c --config Use config file (default: ~/.kleingebaeck). - --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. - -f --force Overwrite images and ads even if the already exist. - -m --manual Show manual. - -h --help Show usage. - -V --version Show program version. - -DESCRIPTION - This tool can be used to backup ads on the german ad page - . - - It downloads all (or only the specified ones) ads of one user into a - directory, each ad into its own subdirectory. The backup will contain a - textfile Adlisting.txt which contains the ad contents such as title, - body, price etc. All images will be downloaded as well. - -CONFIGURATION - You can create a config file to save typing. By default - "~/.kleingebaeck" is being used but you can specify one with "-c" as - well. We use TOML as our configuration language. See - . - - Format is pretty simple: - - user = 1010101 - loglevel = verbose - outdir = "test" - useragent = "Mozilla/5.0" - template = """ - Title: {{.Title}} - Price: {{.Price}} - Id: {{.ID}} - Category: {{.Category}} - Condition: {{.Condition}} - Created: {{.Created}} - - {{.Text}} - """ - - Be careful if you want to change the template. The variable is a - multiline string surrounded by three double quotes. You can left out - certain fields and use any formatting you like. Refer to - for details how to write a template. - Also read the TEMPLATES section below. - - If you're on windows and want to customize the output directory, put it - into single quotes to avoid the backslashes interpreted as escape chars - like this: - - outdir = 'C:\Data\Ads' - -TEMPLATES - Various parts of the configuration can be modified using templates: the - output directory, the ad directory and the ad listing itself. - - OUTPUT DIR TEMPLATE - The config varialbe "outdir" or the command line parameter "-o" take a - template which may contain: - - "{{.Year}}" - "{{.Month}}" - "{{.Day}}" - - That way you can create a new output directory for every backup run. For - example: - - outdir = "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}" - - Or using the command line flag: - - -o "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}" - - The default value is "." - the current directory. - - AD DIRECTORY TEMPLATE - The ad directory name can be modified using the following ad values: - - {{.Price}} - {{.ID}} - {{.Category}} - {{.Condition}} - {{.Created}} - {{.Slug}} - {{.Text}} - - It can only be configured in the config file. By default only - "{{.Slug}}" is being used, this is the title of the ad in url format. - - AD NAME TEMPLATE - The name of the directory per ad can be tuned as well: - - "{{.Year}}" - "{{.Month}}" - "{{.Day}}" - "{{.Slug}}" - "{{.Category}}" - "{{.ID}}" - - AD TEMPLATE - The ad listing itself can be modified as well, using the same variables - as the ad name template above. - - This is the default template: - - Title: {{.Title}} - Price: {{.Price}} - Id: {{.ID}} - Category: {{.Category}} - Condition: {{.Condition}} - Type: {{.Type}} - Created: {{.Created}} - Expire: {{.Expire}} - - {{.Text}} - - The config parameter to modify is "template". See example.conf in the - source repository. Please take care, since this is a multiline string. - This is how it shall look if you modify it: - - template=""" - Title: {{.Title}} - - {{.Text}} - """ - - That is, the content between the two """ chars is the template. - -SETUP - To setup the tool, you need to lookup your userid on kleinanzeigen.de. - Go to your ad overview page while NOT being logged in: - - https://www.kleinanzeigen.de/s-bestandsliste.html?userId=XXXXXX - - The XXXXX part is your userid. - - Put it into the configfile as outlined above. Also specify an output - directory. Then just execute "kleingebaeck". - - You can use the -v option to get verbose output or -d to enable - debugging. - -ENVIRONMENT VARIABLES - The following environment variables are considered: - - KLEINGEBAECK_USER - KLEINGEBAECK_DEBUG - KLEINGEBAECK_VERBOSE - KLEINGEBAECK_OUTDIR - KLEINGEBAECK_LIMIT - KLEINGEBAECK_CONFIG - KLEINGEBAECK_IGNOREERRORS - - Please note, that they take precedence over config file, but commandline - flags take precedence over env! - -BUGS - In order to report a bug, unexpected behavior, feature requests or to - submit a patch, please open an issue on github: - . - - Please repeat the failing command with debugging enabled "-d" and - include the output in the issue. - -LIMITATIONS - The "kleingebaeck" doesn't currently check if it has downloaded a file - already, so it downloads everything again every time you execute it. Be - aware of it. This will change in the future. - - Also there's currently no parallelization implemented. This will change - in the future. - -LICENSE - Copyright 2023-2025 Thomas von Dein - - This program is free software: you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by the - Free Software Foundation, either version 3 of the License, or (at your - option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General - Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program. If not, see . - -Author - T.v.Dein - -` diff --git a/kleingebaeck.pod b/kleingebaeck.pod deleted file mode 100644 index 6426a6e..0000000 --- a/kleingebaeck.pod +++ /dev/null @@ -1,247 +0,0 @@ -=head1 NAME - -kleingebaeck - kleinanzeigen.de backup tool - -=head1 SYNOPSYS - - Usage: kleingebaeck [-dvVhmoc] [,...] - Options: - -u --user Backup ads from user with uid . - -d --debug Enable debug output. - -v --verbose Enable verbose output. - -o --outdir Set output dir (default: current directory) - -l --limit Limit the ads to download to , default: load all. - -c --config Use config file (default: ~/.kleingebaeck). - --ignoreerrors Ignore HTTP errors, may lead to incomplete ad backup. - -f --force Overwrite images and ads even if the already exist. - -m --manual Show manual. - -h --help Show usage. - -V --version Show program version. - -=head1 DESCRIPTION - -This tool can be used to backup ads on the german ad page L. - -It downloads all (or only the specified ones) ads of one user into a -directory, each ad into its own subdirectory. The backup will contain -a textfile B which contains the ad contents such as -title, body, price etc. All images will be downloaded as well. - -=head1 CONFIGURATION - -You can create a config file to save typing. By default -C<~/.kleingebaeck> is being used but you can specify one with C<-c> as -well. We use TOML as our configuration language. See -L. - -Format is pretty simple: - - user = 1010101 - loglevel = verbose - outdir = "test" - useragent = "Mozilla/5.0" - template = """ - Title: {{.Title}} - Price: {{.Price}} - Id: {{.ID}} - Category: {{.Category}} - Condition: {{.Condition}} - Created: {{.Created}} - - {{.Text}} - """ - -Be careful if you want to change the template. The variable is a -multiline string surrounded by three double quotes. You can left out -certain fields and use any formatting you like. Refer to -L for details how to write a -template. Also read the TEMPLATES section below. - -If you're on windows and want to customize the output directory, put -it into single quotes to avoid the backslashes interpreted as escape -chars like this: - - outdir = 'C:\Data\Ads' - -=head1 TEMPLATES - -Various parts of the configuration can be modified using templates: -the output directory, the ad directory and the ad listing itself. - -=head2 OUTPUT DIR TEMPLATE - -The config varialbe C or the command line parameter C<-o> take a -template which may contain: - -=over - -=item C<{{.Year}}> - -=item C<{{.Month}}> - -=item C<{{.Day}}> - -=back - -That way you can create a new output directory for every backup -run. For example: - - outdir = "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}" - -Or using the command line flag: - - -o "/home/backups/ads-{{.Year}}-{{.Month}}-{{.Day}}" - -The default value is C<.> - the current directory. - -=head2 AD DIRECTORY TEMPLATE - -The ad directory name can be modified using the following ad values: - -=over - -=item {{.Price}} - -=item {{.ID}} - -=item {{.Category}} - -=item {{.Condition}} - -=item {{.Created}} - -=item {{.Slug}} - -=item {{.Text}} - -=back - -It can only be configured in the config file. By default only -C<{{.Slug}}> is being used, this is the title of the ad in url format. - -=head2 AD NAME TEMPLATE - -The name of the directory per ad can be tuned as well: - -=over - -=item C<{{.Year}}> - -=item C<{{.Month}}> - -=item C<{{.Day}}> - -=item C<{{.Slug}}> - -=item C<{{.Category}}> - -=item C<{{.ID}}> - - -=back - -=head2 AD TEMPLATE - -The ad listing itself can be modified as well, using the same -variables as the ad name template above. - -This is the default template: - - Title: {{.Title}} - Price: {{.Price}} - Id: {{.ID}} - Category: {{.Category}} - Condition: {{.Condition}} - Type: {{.Type}} - Created: {{.Created}} - Expire: {{.Expire}} - - {{.Text}} - -The config parameter to modify is C