mirror of
https://codeberg.org/scip/epuppy.git
synced 2025-12-17 04:20:59 +01:00
56 lines
1.2 KiB
Go
56 lines
1.2 KiB
Go
package epub
|
|
|
|
import (
|
|
"encoding/xml"
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
)
|
|
|
|
var (
|
|
cleantitle = regexp.MustCompile(`(?s)<head>.*</head>`)
|
|
cleanmarkup = regexp.MustCompile(`<[^<>]+>`)
|
|
cleanentities = regexp.MustCompile(`&.+;`)
|
|
cleancomments = regexp.MustCompile(`/*.*/`)
|
|
cleanspace = regexp.MustCompile(`^\s*`)
|
|
cleanh1 = regexp.MustCompile(`<h[1-6].*</h[1-6]>`)
|
|
)
|
|
|
|
// Content nav-point content
|
|
type Content struct {
|
|
Src string `xml:"src,attr" json:"src"`
|
|
Empty bool
|
|
Body string
|
|
Title string
|
|
XML []byte
|
|
}
|
|
|
|
func (c *Content) String(content []byte) error {
|
|
title := Title{}
|
|
|
|
err := xml.Unmarshal(content, &title)
|
|
if err != nil {
|
|
if !strings.HasPrefix(err.Error(), "XML syntax error") {
|
|
return fmt.Errorf("XML parser error %w", err)
|
|
}
|
|
}
|
|
|
|
c.Title = strings.TrimSpace(title.Content)
|
|
|
|
txt := cleantitle.ReplaceAllString(string(content), "")
|
|
txt = cleanh1.ReplaceAllString(txt, "")
|
|
txt = cleanmarkup.ReplaceAllString(txt, "")
|
|
txt = cleanentities.ReplaceAllString(txt, " ")
|
|
txt = cleancomments.ReplaceAllString(txt, "")
|
|
txt = strings.TrimSpace(txt)
|
|
|
|
c.Body = cleanspace.ReplaceAllString(txt, "")
|
|
c.XML = content
|
|
|
|
if len(c.Body) == 0 {
|
|
c.Empty = true
|
|
}
|
|
|
|
return nil
|
|
}
|