mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-17 04:21:00 +01:00
attempt to fix #117. code works, tests are still failing
This commit is contained in:
14
ad.go
14
ad.go
@@ -31,10 +31,8 @@ type Ad struct {
|
|||||||
Title string `goquery:"h1"`
|
Title string `goquery:"h1"`
|
||||||
Slug string
|
Slug string
|
||||||
ID string
|
ID string
|
||||||
Details []string `goquery:".addetailslist--detail--value,text"`
|
Details string `goquery:".addetailslist--detail,text"`
|
||||||
Condition string // post processed from details
|
Attributes map[string]string // processed afterwards
|
||||||
Type string // post processed from details
|
|
||||||
Color string // post processed from details
|
|
||||||
Category string
|
Category string
|
||||||
CategoryTree []string `goquery:".breadcrump-link,text"`
|
CategoryTree []string `goquery:".breadcrump-link,text"`
|
||||||
Price string `goquery:"h2#viewad-price"`
|
Price string `goquery:"h2#viewad-price"`
|
||||||
@@ -53,19 +51,11 @@ func (ad *Ad) LogValue() slog.Value {
|
|||||||
slog.Int("imagecount", len(ad.Images)),
|
slog.Int("imagecount", len(ad.Images)),
|
||||||
slog.Int("bodysize", len(ad.Text)),
|
slog.Int("bodysize", len(ad.Text)),
|
||||||
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
|
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
|
||||||
slog.String("condition", ad.Condition),
|
|
||||||
slog.String("created", ad.Created),
|
slog.String("created", ad.Created),
|
||||||
slog.String("expire", ad.Expire),
|
slog.String("expire", ad.Expire),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
// static set of conditions available, used for post processing details
|
|
||||||
var CONDITIONS = []string{"Neu", "Gut", "Sehr Gut", "In Ordnung"}
|
|
||||||
var COLORS = []string{"Beige", "Blau", "Braun", "Bunt", "Burgunderrot",
|
|
||||||
"Creme", "Gelb", "Gold", "Grau", "Grün", "Holz", "Khaki", "Lavelndel",
|
|
||||||
"Lila", "Orange", "Pink", "Print", "Rot", "Schwarz", "Silber",
|
|
||||||
"Transparent", "Türkis", "Weiß", "Sonstige"}
|
|
||||||
|
|
||||||
// check for completeness. I erected these fields to be mandatory
|
// check for completeness. I erected these fields to be mandatory
|
||||||
// (though I really don't know if they really are). I consider images
|
// (though I really don't know if they really are). I consider images
|
||||||
// and meta optional. So, if either of the checked fields here is
|
// and meta optional. So, if either of the checked fields here is
|
||||||
|
|||||||
@@ -40,11 +40,11 @@ const (
|
|||||||
Defaultdir string = "."
|
Defaultdir string = "."
|
||||||
|
|
||||||
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
|
DefaultTemplate string = "Title: {{.Title}}\nPrice: {{.Price}}\nId: {{.ID}}\n" +
|
||||||
"Category: {{.Category}}\nCondition: {{.Condition}}\nType: {{.Type}}\nColor: {{.Color}}\n" +
|
"Category: {{.Category}}\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\n{{ end }}" +
|
||||||
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
|
"Created: {{.Created}}\nExpire: {{.Expire}}\n\n{{.Text}}\n"
|
||||||
|
|
||||||
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" +
|
DefaultTemplateWin string = "Title: {{.Title}}\r\nPrice: {{.Price}}\r\nId: {{.ID}}\r\n" +
|
||||||
"Category: {{.Category}}\r\nCondition: {{.Condition}}\r\nType: {{.Type}}\r\nColor: {{.Color}}\r\n" +
|
"Category: {{.Category}}\r\n{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}\r\n{{ end }}\r\n" +
|
||||||
"Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n"
|
"Created: {{.Created}}\r\nExpires: {{.Expire}}\r\n\r\n{{.Text}}\r\n"
|
||||||
|
|
||||||
DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
|
DefaultUserAgent string = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " +
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ outdir = "test"
|
|||||||
#Price: {{.Price}}
|
#Price: {{.Price}}
|
||||||
#Id: {{.Id}}
|
#Id: {{.Id}}
|
||||||
#Category: {{.Category}}
|
#Category: {{.Category}}
|
||||||
#Condition: {{.Condition}}
|
#Condition: {{.Attributes.Condition}}
|
||||||
#Type: {{.Type}}
|
#Type: {{.Type}}
|
||||||
#Created: {{.Created}}
|
#Created: {{.Created}}
|
||||||
|
|
||||||
|
|||||||
@@ -45,9 +45,8 @@ Format is pretty simple:
|
|||||||
Price: {{.Price}}
|
Price: {{.Price}}
|
||||||
Id: {{.ID}}
|
Id: {{.ID}}
|
||||||
Category: {{.Category}}
|
Category: {{.Category}}
|
||||||
Condition: {{.Condition}}
|
{{ range $key,$val := .Attributes }}{{ $key }}: {{ $val }}
|
||||||
Type: {{.Type}}
|
{{ end }}
|
||||||
Color: {{.Color}}
|
|
||||||
Created: {{.Created}}
|
Created: {{.Created}}
|
||||||
|
|
||||||
{{.Text}}
|
{{.Text}}
|
||||||
|
|||||||
42
scrape.go
42
scrape.go
@@ -18,11 +18,11 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"slices"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -125,16 +125,7 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
|
|||||||
return fmt.Errorf("could not extract ad data from page, got empty struct")
|
return fmt.Errorf("could not extract ad data from page, got empty struct")
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, detail := range advertisement.Details {
|
advertisement.Attributes = DecodeAttributes(advertisement.Details)
|
||||||
switch {
|
|
||||||
case slices.Contains(CONDITIONS, detail):
|
|
||||||
advertisement.Condition = detail
|
|
||||||
case slices.Contains(COLORS, detail):
|
|
||||||
advertisement.Color = detail
|
|
||||||
default:
|
|
||||||
advertisement.Type = detail
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
advertisement.CalculateExpire()
|
advertisement.CalculateExpire()
|
||||||
|
|
||||||
@@ -167,6 +158,35 @@ func ScrapeAd(fetch *Fetcher, uri string) error {
|
|||||||
return ScrapeImages(fetch, advertisement, addir)
|
return ScrapeImages(fetch, advertisement, addir)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func DecodeAttributes(attributes string) map[string]string {
|
||||||
|
rd := strings.NewReader(attributes)
|
||||||
|
scanner := bufio.NewScanner(rd)
|
||||||
|
|
||||||
|
isattr := true
|
||||||
|
attr := ""
|
||||||
|
attrmap := map[string]string{}
|
||||||
|
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
|
||||||
|
if line == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if isattr {
|
||||||
|
attr = line
|
||||||
|
} else {
|
||||||
|
attrmap[attr] = line
|
||||||
|
}
|
||||||
|
|
||||||
|
isattr = !isattr
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println(attributes)
|
||||||
|
|
||||||
|
return attrmap
|
||||||
|
}
|
||||||
|
|
||||||
func ScrapeImages(fetch *Fetcher, advertisement *Ad, addir string) error {
|
func ScrapeImages(fetch *Fetcher, advertisement *Ad, addir string) error {
|
||||||
// fetch images
|
// fetch images
|
||||||
img := 1
|
img := 1
|
||||||
|
|||||||
Reference in New Issue
Block a user