Parserfixes std (#11)

* clean svg and cdata
* refactored ebook preparation, separated from calling the pager
* added better unit tests
* add free ebooks for testing
This commit is contained in:
T.v.Dein
2025-10-21 21:57:12 +02:00
committed by GitHub
parent f524083210
commit 238972f11f
22 changed files with 155 additions and 27 deletions

View File

@@ -10,7 +10,8 @@ import (
var (
cleanentitles = regexp.MustCompile(`&[a-z]+;`)
empty = regexp.MustCompile(`(?s)^[\s ]*$`)
newlines = regexp.MustCompile(`[\r\n]+`)
newlines = regexp.MustCompile(`[\r\n\s]+`)
cleansvg = regexp.MustCompile(`(<svg.+</svg>|<!\[CDATA\[.+\]\]>)`)
cleanmarkup = regexp.MustCompile(`<[^<>]+>`)
)
@@ -27,7 +28,8 @@ type Content struct {
func (c *Content) String(content []byte) error {
doc, err := xmlquery.Parse(
strings.NewReader(
cleanentitles.ReplaceAllString(string(content), " ")))
cleansvg.ReplaceAllString(
cleanentitles.ReplaceAllString(string(content), " "), "")))
if err != nil {
return err
}