mirror of
https://codeberg.org/scip/kleingebaeck.git
synced 2025-12-16 20:11:01 +01:00
Generic attributes (#120)
* fix #117: use a generic attribute parser, still support fixed attrs
This commit is contained in:
80
ad.go
80
ad.go
@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -31,10 +32,12 @@ type Ad struct {
|
||||
Title string `goquery:"h1"`
|
||||
Slug string
|
||||
ID string
|
||||
Details []string `goquery:".addetailslist--detail--value,text"`
|
||||
Condition string // post processed from details
|
||||
Type string // post processed from details
|
||||
Color string // post processed from details
|
||||
Details string `goquery:".addetailslist--detail,text"`
|
||||
Attributes map[string]string // processed afterwards
|
||||
Condition string // post processed from details for backward compatibility
|
||||
Type string // post processed from details for backward compatibility
|
||||
Color string // post processed from details for backward compatibility
|
||||
Material string // post processed from details for backward compatibility
|
||||
Category string
|
||||
CategoryTree []string `goquery:".breadcrump-link,text"`
|
||||
Price string `goquery:"h2#viewad-price"`
|
||||
@@ -53,19 +56,11 @@ func (ad *Ad) LogValue() slog.Value {
|
||||
slog.Int("imagecount", len(ad.Images)),
|
||||
slog.Int("bodysize", len(ad.Text)),
|
||||
slog.String("categorytree", strings.Join(ad.CategoryTree, "+")),
|
||||
slog.String("condition", ad.Condition),
|
||||
slog.String("created", ad.Created),
|
||||
slog.String("expire", ad.Expire),
|
||||
)
|
||||
}
|
||||
|
||||
// static set of conditions available, used for post processing details
|
||||
var CONDITIONS = []string{"Neu", "Gut", "Sehr Gut", "In Ordnung"}
|
||||
var COLORS = []string{"Beige", "Blau", "Braun", "Bunt", "Burgunderrot",
|
||||
"Creme", "Gelb", "Gold", "Grau", "Grün", "Holz", "Khaki", "Lavelndel",
|
||||
"Lila", "Orange", "Pink", "Print", "Rot", "Schwarz", "Silber",
|
||||
"Transparent", "Türkis", "Weiß", "Sonstige"}
|
||||
|
||||
// check for completeness. I erected these fields to be mandatory
|
||||
// (though I really don't know if they really are). I consider images
|
||||
// and meta optional. So, if either of the checked fields here is
|
||||
@@ -90,3 +85,64 @@ func (ad *Ad) CalculateExpire() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Decode attributes like color or condition. See
|
||||
https://github.com/TLINDEN/kleingebaeck/issues/117
|
||||
for more details. In short: the HTML delivered by
|
||||
kleinanzeigen.de has no css attribute for the keys
|
||||
so we cannot extract key=>value mappings of the
|
||||
ad details but have to parse them manually.
|
||||
|
||||
The ad.Details member contains this after goq run:
|
||||
|
||||
Art
|
||||
|
||||
Weitere Kinderzimmermöbel
|
||||
|
||||
Farbe
|
||||
Holz
|
||||
|
||||
Zustand
|
||||
In Ordnung
|
||||
|
||||
We parse this into ad.Attributes and fill in some
|
||||
static members for backward compatibility reasons.
|
||||
*/
|
||||
func (ad *Ad) DecodeAttributes() {
|
||||
rd := strings.NewReader(ad.Details)
|
||||
scanner := bufio.NewScanner(rd)
|
||||
|
||||
isattr := true
|
||||
attr := ""
|
||||
attrmap := map[string]string{}
|
||||
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if isattr {
|
||||
attr = line
|
||||
} else {
|
||||
attrmap[attr] = line
|
||||
}
|
||||
|
||||
isattr = !isattr
|
||||
}
|
||||
|
||||
ad.Attributes = attrmap
|
||||
|
||||
switch {
|
||||
case Exists(ad.Attributes, "Zustand"):
|
||||
ad.Condition = ad.Attributes["Zustand"]
|
||||
case Exists(ad.Attributes, "Farbe"):
|
||||
ad.Color = ad.Attributes["Farbe"]
|
||||
case Exists(ad.Attributes, "Art"):
|
||||
ad.Type = ad.Attributes["Type"]
|
||||
case Exists(ad.Attributes, "Material"):
|
||||
ad.Material = ad.Attributes["Material"]
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user