fix XML parsing (#2)

- Use antchfx/xmlquery for easier XML parsing. No more regexp wrangling and the result is much more reliable over a variety of ebooks. Much good.
- fix chapter selection, look for `<?xml[...]` which is much more reliable
- add option `-x` to dump the XML ebook source for debugging
This commit is contained in:
T.v.Dein
2025-10-16 18:57:05 +02:00
committed by GitHub
parent 90d30cb3e1
commit b50c6acff0
13 changed files with 143 additions and 71 deletions

View File

@@ -16,7 +16,7 @@ import (
)
const (
Version string = `v0.0.2`
Version string = `v0.0.3`
Usage string = `Usage epuppy [options] <epub file>
Options:
@@ -25,6 +25,7 @@ Options:
-n --line-numbers add line numbers
-c --config <file> use config <file>
-t --txt dump readable content to STDOUT
-x --xml dump source xml to STDOUT
-d --debug enable debugging
-h --help show help message
-v --version show program version`
@@ -37,6 +38,7 @@ type Config struct {
Darkmode bool `koanf:"dark"` // -D
LineNumbers bool `koanf:"line-numbers"` // -n
Dump bool `koanf:"txt"` // -t
XML bool `koanf:"xml"` // -x
Config string `koanf:"config"` // -c
ColorDark ColorSetting `koanf:"colordark"` // comes from config file only
ColorLight ColorSetting `koanf:"colorlight"` // comes from config file only
@@ -66,6 +68,7 @@ func InitConfig(output io.Writer) (*Config, error) {
flagset.BoolP("store-progress", "s", false, "store reading progress")
flagset.BoolP("line-numbers", "n", false, "add line numbers")
flagset.BoolP("txt", "t", false, "dump readable content to STDOUT")
flagset.BoolP("xml", "x", false, "dump xml to STDOUT")
flagset.StringP("config", "c", "", "read config from file")
if err := flagset.Parse(os.Args[1:]); err != nil {