fix XML parsing (#2)

- Use antchfx/xmlquery for easier XML parsing. No more regexp wrangling and the result is much more reliable over a variety of ebooks. Much good.
- fix chapter selection, look for `<?xml[...]` which is much more reliable
- add option `-x` to dump the XML ebook source for debugging
This commit is contained in:
T.v.Dein
2025-10-16 18:57:05 +02:00
committed by GitHub
parent 90d30cb3e1
commit b50c6acff0
13 changed files with 143 additions and 71 deletions

View File

@@ -16,7 +16,7 @@ import (
)
const (
Version string = `v0.0.2`
Version string = `v0.0.3`
Usage string = `Usage epuppy [options] <epub file>
Options:
@@ -25,6 +25,7 @@ Options:
-n --line-numbers add line numbers
-c --config <file> use config <file>
-t --txt dump readable content to STDOUT
-x --xml dump source xml to STDOUT
-d --debug enable debugging
-h --help show help message
-v --version show program version`
@@ -37,6 +38,7 @@ type Config struct {
Darkmode bool `koanf:"dark"` // -D
LineNumbers bool `koanf:"line-numbers"` // -n
Dump bool `koanf:"txt"` // -t
XML bool `koanf:"xml"` // -x
Config string `koanf:"config"` // -c
ColorDark ColorSetting `koanf:"colordark"` // comes from config file only
ColorLight ColorSetting `koanf:"colorlight"` // comes from config file only
@@ -66,6 +68,7 @@ func InitConfig(output io.Writer) (*Config, error) {
flagset.BoolP("store-progress", "s", false, "store reading progress")
flagset.BoolP("line-numbers", "n", false, "add line numbers")
flagset.BoolP("txt", "t", false, "dump readable content to STDOUT")
flagset.BoolP("xml", "x", false, "dump xml to STDOUT")
flagset.StringP("config", "c", "", "read config from file")
if err := flagset.Parse(os.Args[1:]); err != nil {

View File

@@ -2,7 +2,6 @@ package cmd
import (
"fmt"
"log"
"os"
"path/filepath"
"strings"
@@ -33,17 +32,11 @@ func ViewText(conf *Config) (int, error) {
}
func ViewEpub(conf *Config) (int, error) {
book, err := epub.Open(conf.Document)
book, err := epub.Open(conf.Document, conf.XML)
if err != nil {
return 0, err
}
defer func() {
if err := book.Close(); err != nil {
log.Fatal(err)
}
}()
buf := strings.Builder{}
head := strings.Builder{}
@@ -59,9 +52,20 @@ func ViewEpub(conf *Config) (int, error) {
head.WriteString(" ")
}
// FIXME: since the switch to book.Files() in epub.Open() this
// returns invalid chapter numbering
fetchByContent(conf, &buf, book)
if conf.Dump {
return fmt.Println(buf.String())
}
return Pager(conf, head.String(), buf.String())
}
// FIXME: since the switch to book.Files() in epub.Open() this
// returns invalid chapter numbering
func fetchByContent(conf *Config, buf *strings.Builder, book *epub.Book) bool {
chapter := 1
var gotbody bool
for _, content := range book.Content {
if len(content.Body) > 0 {
@@ -81,13 +85,10 @@ func ViewEpub(conf *Config) (int, error) {
buf.WriteString("\r\n\r\n\r\n\r\n")
chapter++
gotbody = true
}
}
if conf.Dump {
return fmt.Println(buf.String())
}
return Pager(conf, head.String(), buf.String())
return gotbody
}