fix XML parsing (#2)

- Use antchfx/xmlquery for easier XML parsing. No more regexp wrangling and the result is much more reliable over a variety of ebooks. Much good.
- fix chapter selection, look for `<?xml[...]` which is much more reliable
- add option `-x` to dump the XML ebook source for debugging
This commit is contained in:
T.v.Dein
2025-10-16 18:57:05 +02:00
committed by GitHub
parent 90d30cb3e1
commit b50c6acff0
13 changed files with 143 additions and 71 deletions

View File

@@ -2,7 +2,6 @@ package cmd
import (
"fmt"
"log"
"os"
"path/filepath"
"strings"
@@ -33,17 +32,11 @@ func ViewText(conf *Config) (int, error) {
}
func ViewEpub(conf *Config) (int, error) {
book, err := epub.Open(conf.Document)
book, err := epub.Open(conf.Document, conf.XML)
if err != nil {
return 0, err
}
defer func() {
if err := book.Close(); err != nil {
log.Fatal(err)
}
}()
buf := strings.Builder{}
head := strings.Builder{}
@@ -59,9 +52,20 @@ func ViewEpub(conf *Config) (int, error) {
head.WriteString(" ")
}
// FIXME: since the switch to book.Files() in epub.Open() this
// returns invalid chapter numbering
fetchByContent(conf, &buf, book)
if conf.Dump {
return fmt.Println(buf.String())
}
return Pager(conf, head.String(), buf.String())
}
// FIXME: since the switch to book.Files() in epub.Open() this
// returns invalid chapter numbering
func fetchByContent(conf *Config, buf *strings.Builder, book *epub.Book) bool {
chapter := 1
var gotbody bool
for _, content := range book.Content {
if len(content.Body) > 0 {
@@ -81,13 +85,10 @@ func ViewEpub(conf *Config) (int, error) {
buf.WriteString("\r\n\r\n\r\n\r\n")
chapter++
gotbody = true
}
}
if conf.Dump {
return fmt.Println(buf.String())
}
return Pager(conf, head.String(), buf.String())
return gotbody
}