5 Commits

Author SHA1 Message Date
T.v.Dein
f524083210 Fix more parser failures (#10)
* stabilize section parsing, now seems to read all ebooks I tested with
* refactored Open() into smaller funcs
* bump version
2025-10-20 18:54:49 +00:00
T.v.Dein
cb671b7401 Fix crash and add support for content in <div> (#9)
* fix #8: better regex to remove html entities
* add opf and ncx debug
* make epub content retrieval more flexible
* fix epub content retrieval: also support html files with <div>
2025-10-19 22:30:13 +02:00
T.v.Dein
2c6e81a2c8 add cover image support (#7) 2025-10-19 20:39:04 +02:00
c2abc4ba4d gofmt 2025-10-17 14:22:03 +02:00
b9b0ad8603 add badges 2025-10-17 14:19:16 +02:00
11 changed files with 307 additions and 70 deletions

View File

@@ -1,3 +1,8 @@
[![Actions](https://github.com/tlinden/epuppy/actions/workflows/ci.yaml/badge.svg)](https://github.com/tlinden/epuppy/actions)
[![License](https://img.shields.io/badge/license-GPL-blue.svg)](https://github.com/tlinden/epuppy/blob/master/LICENSE)
[![Go Report Card](https://goreportcard.com/badge/github.com/tlinden/epuppy)](https://goreportcard.com/report/github.com/tlinden/epuppy)
# epuppy - terminal epub reader # epuppy - terminal epub reader
This is a little TUI epub ebook reader. This is a work in progress and This is a little TUI epub ebook reader. This is a work in progress and

View File

@@ -32,7 +32,7 @@ import (
) )
const ( const (
Version string = `v0.0.4` Version string = `v0.0.6`
Usage string = `This is epuppy, a terminal ui ebook viewer. Usage string = `This is epuppy, a terminal ui ebook viewer.
Usage: epuppy [options] <epub file> Usage: epuppy [options] <epub file>
@@ -42,6 +42,7 @@ Options:
-s --store-progress remember reading position -s --store-progress remember reading position
-n --line-numbers add line numbers -n --line-numbers add line numbers
-c --config <file> use config <file> -c --config <file> use config <file>
-i --cover-image display cover image
-t --txt dump readable content to STDOUT -t --txt dump readable content to STDOUT
-x --xml dump source xml to STDOUT -x --xml dump source xml to STDOUT
-N --no-color disable colors (or use $NO_COLOR env var) -N --no-color disable colors (or use $NO_COLOR env var)
@@ -63,6 +64,7 @@ type Config struct {
ColorDark ColorSetting `koanf:"colordark"` // comes from config file only ColorDark ColorSetting `koanf:"colordark"` // comes from config file only
ColorLight ColorSetting `koanf:"colorlight"` // comes from config file only ColorLight ColorSetting `koanf:"colorlight"` // comes from config file only
ShowHelp bool `koanf:"help"` ShowHelp bool `koanf:"help"`
ShowCover bool `koanf:"cover-image"` // -i
Colors Colors // generated from user config file or internal defaults, respects dark mode Colors Colors // generated from user config file or internal defaults, respects dark mode
Document string Document string
InitialProgress int // lines InitialProgress int // lines
@@ -89,6 +91,7 @@ func InitConfig(output io.Writer) (*Config, error) {
flagset.BoolP("txt", "t", false, "dump readable content to STDOUT") flagset.BoolP("txt", "t", false, "dump readable content to STDOUT")
flagset.BoolP("xml", "x", false, "dump xml to STDOUT") flagset.BoolP("xml", "x", false, "dump xml to STDOUT")
flagset.BoolP("no-color", "N", false, "disable colors") flagset.BoolP("no-color", "N", false, "disable colors")
flagset.BoolP("cover-image", "i", false, "show cover image")
flagset.BoolP("help", "h", false, "show help") flagset.BoolP("help", "h", false, "show help")
flagset.StringP("config", "c", "", "read config from file") flagset.StringP("config", "c", "", "read config from file")

View File

@@ -21,10 +21,13 @@ package cmd
// https://github.com/charmbracelet/bubbletea/tree/main/examples/pager // https://github.com/charmbracelet/bubbletea/tree/main/examples/pager
import ( import (
"bytes"
"fmt" "fmt"
"image"
"os" "os"
"strings" "strings"
"github.com/blacktop/go-termimg"
"github.com/charmbracelet/bubbles/help" "github.com/charmbracelet/bubbles/help"
"github.com/charmbracelet/bubbles/key" "github.com/charmbracelet/bubbles/key"
"github.com/charmbracelet/bubbles/viewport" "github.com/charmbracelet/bubbles/viewport"
@@ -132,6 +135,7 @@ type Doc struct {
hideUi bool hideUi bool
meta *Meta meta *Meta
config *Config config *Config
Cover *termimg.ImageWidget
keys keyMap keys keyMap
help help.Model help help.Model
@@ -220,7 +224,12 @@ func (m *Doc) Rewrap() {
// wrapping has changed, update viewport and line count // wrapping has changed, update viewport and line count
if content != "" { if content != "" {
m.viewport.SetContent(content) if m.Cover != nil {
cover, _ := m.Cover.Render()
m.viewport.SetContent(cover + "\n\n" + content)
} else {
m.viewport.SetContent(content)
}
m.meta.lines = len(strings.Split(content, "\n")) m.meta.lines = len(strings.Split(content, "\n"))
} }
@@ -270,7 +279,15 @@ func max(a, b int) int {
return b return b
} }
func Pager(conf *Config, title, message string) (int, error) { type Ebook struct {
Config *Config
Title string
Body string
Cover []byte
MediaType string
}
func Pager(book *Ebook) (int, error) {
width := 80 width := 80
scrollto := 0 scrollto := 0
@@ -281,32 +298,44 @@ func Pager(conf *Config, title, message string) (int, error) {
} }
} }
if conf.StoreProgress { if book.Config.StoreProgress {
scrollto = conf.InitialProgress scrollto = book.Config.InitialProgress
} }
if conf.LineNumbers { if book.Config.LineNumbers {
catn := "" catn := ""
for idx, line := range strings.Split(message, "\n") { for idx, line := range strings.Split(book.Body, "\n") {
catn += fmt.Sprintf("%4d: %s\n", idx, line) catn += fmt.Sprintf("%4d: %s\n", idx, line)
} }
message = catn book.Body = catn
} }
meta := Meta{ meta := Meta{
initialprogress: scrollto, initialprogress: scrollto,
lines: len(strings.Split(message, "\n")), lines: len(strings.Split(book.Body, "\n")),
}
doc := Doc{
content: book.Body,
title: book.Title,
initialwidth: width,
meta: &meta,
config: book.Config,
keys: keys,
}
if book.MediaType != "" && book.Config.ShowCover {
img, _, err := image.Decode(bytes.NewReader(book.Cover))
if err != nil {
return 0, fmt.Errorf("failed to load cover image: %w", err)
}
doc.Cover = termimg.NewImageWidgetFromImage(img)
doc.Cover.SetSize(width, width).SetProtocol(termimg.Auto)
} }
p := tea.NewProgram( p := tea.NewProgram(
Doc{ doc,
content: message,
title: title,
initialwidth: width,
meta: &meta,
config: conf,
keys: keys,
},
tea.WithAltScreen(), // use the full size of the terminal in its "alternate screen buffer" tea.WithAltScreen(), // use the full size of the terminal in its "alternate screen buffer"
tea.WithMouseCellMotion(), // turn on mouse support so we can track the mouse wheel tea.WithMouseCellMotion(), // turn on mouse support so we can track the mouse wheel
) )
@@ -315,7 +344,7 @@ func Pager(conf *Config, title, message string) (int, error) {
return 0, fmt.Errorf("could not run pager: %w", err) return 0, fmt.Errorf("could not run pager: %w", err)
} }
if conf.Debug { if book.Config.Debug {
fmt.Printf("scrollto: %d, last: %d, diff: %d\n", fmt.Printf("scrollto: %d, last: %d, diff: %d\n",
scrollto, meta.currentline, scrollto-meta.currentline) scrollto, meta.currentline, scrollto-meta.currentline)
} }

View File

@@ -22,6 +22,7 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/alecthomas/repr"
"github.com/tlinden/epuppy/pkg/epub" "github.com/tlinden/epuppy/pkg/epub"
) )
@@ -44,7 +45,11 @@ func ViewText(conf *Config) (int, error) {
return fmt.Println(string(data)) return fmt.Println(string(data))
} }
return Pager(conf, conf.Document, string(data)) return Pager(&Ebook{
Config: conf,
Title: conf.Document,
Body: string(data),
})
} }
func ViewEpub(conf *Config) (int, error) { func ViewEpub(conf *Config) (int, error) {
@@ -53,6 +58,14 @@ func ViewEpub(conf *Config) (int, error) {
return 0, err return 0, err
} }
if conf.Debug {
repr.Println("book.Files()")
repr.Println(book.Files())
repr.Println(book.Ncx)
repr.Println(book.Sections)
repr.Println(book.Opf.Manifest)
}
buf := strings.Builder{} buf := strings.Builder{}
head := strings.Builder{} head := strings.Builder{}
@@ -74,11 +87,19 @@ func ViewEpub(conf *Config) (int, error) {
return fmt.Println(buf.String()) return fmt.Println(buf.String())
} }
return Pager(conf, head.String(), buf.String()) if conf.Debug || conf.XML {
return 0, nil
}
return Pager(&Ebook{
Config: conf,
Title: head.String(),
Body: buf.String(),
Cover: book.CoverImage,
MediaType: book.CoverMediaType,
})
} }
// FIXME: since the switch to book.Files() in epub.Open() this
// returns invalid chapter numbering
func fetchByContent(conf *Config, buf *strings.Builder, book *epub.Book) bool { func fetchByContent(conf *Config, buf *strings.Builder, book *epub.Book) bool {
chapter := 1 chapter := 1
var gotbody bool var gotbody bool

7
go.mod
View File

@@ -41,14 +41,21 @@ require (
require ( require (
github.com/antchfx/xpath v1.3.5 // indirect github.com/antchfx/xpath v1.3.5 // indirect
github.com/blacktop/go-termimg v0.1.20 // indirect
github.com/charmbracelet/x/mosaic v0.0.0-20250702191427-5bdfc8f2e4ff // indirect
github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/knadh/koanf/maps v0.1.2 // indirect github.com/knadh/koanf/maps v0.1.2 // indirect
github.com/makeworld-the-better-one/dither/v2 v2.4.0 // indirect
github.com/mattn/go-sixel v0.0.5 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect
github.com/pelletier/go-toml v1.9.5 // indirect github.com/pelletier/go-toml v1.9.5 // indirect
github.com/rogpeppe/go-internal v1.14.1 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect
github.com/soniakeys/quant v1.0.0 // indirect
golang.org/x/image v0.25.0 // indirect
golang.org/x/net v0.44.0 // indirect golang.org/x/net v0.44.0 // indirect
golang.org/x/tools v0.37.0 // indirect golang.org/x/tools v0.37.0 // indirect
) )

19
go.sum
View File

@@ -8,6 +8,8 @@ github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiE
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/aymanbagabas/go-udiff v0.2.0 h1:TK0fH4MteXUDspT88n8CKzvK0X9O2xu9yQjWpi6yML8= github.com/aymanbagabas/go-udiff v0.2.0 h1:TK0fH4MteXUDspT88n8CKzvK0X9O2xu9yQjWpi6yML8=
github.com/aymanbagabas/go-udiff v0.2.0/go.mod h1:RE4Ex0qsGkTAJoQdQQCA0uG+nAzJO/pI/QwceO5fgrA= github.com/aymanbagabas/go-udiff v0.2.0/go.mod h1:RE4Ex0qsGkTAJoQdQQCA0uG+nAzJO/pI/QwceO5fgrA=
github.com/blacktop/go-termimg v0.1.20 h1:+EAUc3c9hwE/fUYaqRV1BSLvAlOuLySgLTEBzxGbYK4=
github.com/blacktop/go-termimg v0.1.20/go.mod h1:nwxrOjfFcBjtS358oIGBLfscSLnCpNdRlMVRxsnZwMU=
github.com/charmbracelet/bubbles v0.21.0 h1:9TdC97SdRVg/1aaXNVWfFH3nnLAwOXr8Fn6u6mfQdFs= github.com/charmbracelet/bubbles v0.21.0 h1:9TdC97SdRVg/1aaXNVWfFH3nnLAwOXr8Fn6u6mfQdFs=
github.com/charmbracelet/bubbles v0.21.0/go.mod h1:HF+v6QUR4HkEpz62dx7ym2xc71/KBHg+zKwJtMw+qtg= github.com/charmbracelet/bubbles v0.21.0/go.mod h1:HF+v6QUR4HkEpz62dx7ym2xc71/KBHg+zKwJtMw+qtg=
github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw= github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw=
@@ -22,8 +24,11 @@ github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd h1:vy0G
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd/go.mod h1:xe0nKWGd3eJgtqZRaN9RjMtK7xUYchjzPr7q6kcvCCs= github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd/go.mod h1:xe0nKWGd3eJgtqZRaN9RjMtK7xUYchjzPr7q6kcvCCs=
github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91 h1:payRxjMjKgx2PaCWLZ4p3ro9y97+TVLZNaRZgJwSVDQ= github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91 h1:payRxjMjKgx2PaCWLZ4p3ro9y97+TVLZNaRZgJwSVDQ=
github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U= github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U=
github.com/charmbracelet/x/mosaic v0.0.0-20250702191427-5bdfc8f2e4ff h1:OVBKPzoa0k5ZVMoor27BReRZxER1IEDtLHXkRjaHElg=
github.com/charmbracelet/x/mosaic v0.0.0-20250702191427-5bdfc8f2e4ff/go.mod h1:5qLP4S++M5quSc/xbvWWW8vKkgKwOqOT/IVhAas26XI=
github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ= github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ=
github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg= github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
@@ -47,6 +52,8 @@ github.com/knadh/koanf/v2 v2.3.0 h1:Qg076dDRFHvqnKG97ZEsi9TAg2/nFTa9hCdcSa1lvlM=
github.com/knadh/koanf/v2 v2.3.0/go.mod h1:gRb40VRAbd4iJMYYD5IxZ6hfuopFcXBpc9bbQpZwo28= github.com/knadh/koanf/v2 v2.3.0/go.mod h1:gRb40VRAbd4iJMYYD5IxZ6hfuopFcXBpc9bbQpZwo28=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/makeworld-the-better-one/dither/v2 v2.4.0 h1:Az/dYXiTcwcRSe59Hzw4RI1rSnAZns+1msaCXetrMFE=
github.com/makeworld-the-better-one/dither/v2 v2.4.0/go.mod h1:VBtN8DXO7SNtyGmLiGA7IsFeKrBkQPze1/iAeM95arc=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4= github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
@@ -54,6 +61,8 @@ github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+Ei
github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-sixel v0.0.5 h1:55w2FR5ncuhKhXrM5ly1eiqMQfZsnAHIpYNGZX03Cv8=
github.com/mattn/go-sixel v0.0.5/go.mod h1:h2Sss+DiUEHy0pUqcIB6PFXo5Cy8sTQEFr3a9/5ZLNw=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
@@ -66,6 +75,8 @@ github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8= github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc= github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk= github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ=
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@@ -76,8 +87,12 @@ github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/soniakeys/quant v1.0.0 h1:N1um9ktjbkZVcywBVAAYpZYSHxEfJGzshHCxx/DaI0Y=
github.com/soniakeys/quant v1.0.0/go.mod h1:HI1k023QuVbD4H8i9YdfZP2munIHU4QpjsImz6Y6zds=
github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
@@ -91,6 +106,8 @@ golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E= golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E=
golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
golang.org/x/image v0.25.0 h1:Y6uW6rH1y5y/LK1J8BPWZtr6yZ7hrsy6hFrXjgsc2fQ=
golang.org/x/image v0.25.0/go.mod h1:tCAmOEGthTtkalusGp1g3xa2gke8J6c2N565dTyl9Rs=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
@@ -161,5 +178,7 @@ golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxb
golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE=
golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -9,14 +9,24 @@ import (
"path" "path"
) )
// a section in the book
type Section struct {
File, Title, MediaType string
}
// Book epub book // Book epub book
type Book struct { type Book struct {
Ncx Ncx `json:"ncx"` Ncx Ncx `json:"ncx"`
Opf Opf `json:"opf"` Opf Opf `json:"opf"`
Container Container `json:"-"` Container Container `json:"-"`
Mimetype string `json:"-"` Mimetype string `json:"-"`
Content []Content Content []Content
fd *zip.ReadCloser fd *zip.ReadCloser
CoverImage []byte
CoverFile string
CoverMediaType string
Sections []Section
dumpxml bool
} }
// Open open resource file // Open open resource file

View File

@@ -1,11 +1,11 @@
package epub package epub
//Container META-INF/container.xml file // Container META-INF/container.xml file
type Container struct { type Container struct {
Rootfile Rootfile `xml:"rootfiles>rootfile" json:"rootfile"` Rootfile Rootfile `xml:"rootfiles>rootfile" json:"rootfile"`
} }
//Rootfile root file // Rootfile root file
type Rootfile struct { type Rootfile struct {
Path string `xml:"full-path,attr" json:"path"` Path string `xml:"full-path,attr" json:"path"`
Type string `xml:"media-type,attr" json:"type"` Type string `xml:"media-type,attr" json:"type"`

View File

@@ -8,9 +8,10 @@ import (
) )
var ( var (
cleanentitles = regexp.MustCompile(`&.+;`) cleanentitles = regexp.MustCompile(`&[a-z]+;`)
empty = regexp.MustCompile(`(?s)^[\s ]*$`) empty = regexp.MustCompile(`(?s)^[\s ]*$`)
newlines = regexp.MustCompile(`[\r\n]+`) newlines = regexp.MustCompile(`[\r\n]+`)
cleanmarkup = regexp.MustCompile(`<[^<>]+>`)
) )
// Content nav-point content // Content nav-point content
@@ -22,30 +23,45 @@ type Content struct {
XML []byte XML []byte
} }
// parse XML, look for title and <p>.*</p> stuff
func (c *Content) String(content []byte) error { func (c *Content) String(content []byte) error {
// parse XML, look for title and <p>.*</p> stuff
doc, err := xmlquery.Parse( doc, err := xmlquery.Parse(
strings.NewReader( strings.NewReader(
cleanentitles.ReplaceAllString(string(content), " "))) cleanentitles.ReplaceAllString(string(content), " ")))
if err != nil { if err != nil {
panic(err) return err
} }
// extract the title if c.Title == "" {
for _, item := range xmlquery.Find(doc, "//title") { // extract the title
c.Title = strings.TrimSpace(item.InnerText()) for _, item := range xmlquery.Find(doc, "//title") {
c.Title = strings.TrimSpace(item.InnerText())
}
} }
// extract all paragraphs, ignore any formatting and re-fill the // extract all paragraphs, ignore any formatting and re-fill the
// paragraph, that is, we replaces all newlines inside with one // paragraph, that is, we replace all newlines inside with one
// space. // space.
txt := strings.Builder{} txt := strings.Builder{}
var have_p bool
for _, item := range xmlquery.Find(doc, "//p") { for _, item := range xmlquery.Find(doc, "//p") {
if !empty.MatchString(item.InnerText()) { if !empty.MatchString(item.InnerText()) {
have_p = true
txt.WriteString(newlines.ReplaceAllString(item.InnerText(), " ") + "\n\n") txt.WriteString(newlines.ReplaceAllString(item.InnerText(), " ") + "\n\n")
} }
} }
if !have_p {
// try <div></div>, which some ebooks use, so get all divs,
// remove markup and paragraphify the parts
for _, item := range xmlquery.Find(doc, "//div") {
if !empty.MatchString(item.InnerText()) {
cleaned := cleanmarkup.ReplaceAllString(item.InnerText(), "")
txt.WriteString(newlines.ReplaceAllString(cleaned, " ") + "\n\n")
}
}
}
c.Body = strings.TrimSpace(txt.String()) c.Body = strings.TrimSpace(txt.String())
c.XML = content c.XML = content

View File

@@ -4,75 +4,202 @@ import (
"archive/zip" "archive/zip"
"fmt" "fmt"
"log" "log"
"os" "path/filepath"
"regexp"
"strings" "strings"
) )
// Open open a epub file var (
// to find content
types = regexp.MustCompile(`application/(xml|html|xhtml|htm)`)
// cleanup regexes
deanchor = regexp.MustCompile(`#.*$`)
cleanext = regexp.MustCompile(`^\.`)
)
// Open open a epub file and return the filled Book structure
func Open(fn string, dumpxml bool) (*Book, error) { func Open(fn string, dumpxml bool) (*Book, error) {
bk, err := openFile(fn, dumpxml)
if err != nil {
return bk, err
}
defer func() {
if err := bk.fd.Close(); err != nil {
log.Fatal(err)
}
}()
if err := bk.getManifest(); err != nil {
return bk, err
}
if err := bk.getSections(); err != nil {
return bk, err
}
if err := bk.readSectionContent(); err != nil {
return bk, err
}
return bk, nil
}
// load the epub zip file
func openFile(fn string, dumpxml bool) (*Book, error) {
fd, err := zip.OpenReader(fn) fd, err := zip.OpenReader(fn)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer func() { bk := &Book{fd: fd, dumpxml: dumpxml}
if err := fd.Close(); err != nil {
log.Fatal(err)
}
}()
bk := Book{fd: fd} return bk, nil
}
// load the manifest
func (bk *Book) getManifest() error {
mt, err := bk.readBytes("mimetype") mt, err := bk.readBytes("mimetype")
if err != nil { if err != nil {
return &bk, err return err
} }
bk.Mimetype = string(mt) bk.Mimetype = string(mt)
// contains the root path
err = bk.readXML("META-INF/container.xml", &bk.Container) err = bk.readXML("META-INF/container.xml", &bk.Container)
if err != nil { if err != nil {
return &bk, err return err
} }
// contains the OPF data
err = bk.readXML(bk.Container.Rootfile.Path, &bk.Opf) err = bk.readXML(bk.Container.Rootfile.Path, &bk.Opf)
if err != nil { if err != nil {
return &bk, err return err
} }
// look for TOC (might be incomplete, see below!)
for _, mf := range bk.Opf.Manifest { for _, mf := range bk.Opf.Manifest {
if mf.ID == bk.Opf.Spine.Toc { if mf.ID == bk.Opf.Spine.Toc {
err = bk.readXML(bk.filename(mf.Href), &bk.Ncx) err = bk.readXML(bk.filename(mf.Href), &bk.Ncx)
if err != nil { if err != nil {
return &bk, err return err
} }
}
break if mf.ID == "cover-image" {
bk.CoverFile = mf.Href
bk.CoverMediaType = mf.MediaType
} }
} }
for _, file := range bk.Files() { return nil
content, err := bk.readBytes(file) }
if err != nil {
return &bk, err // extract the readable sections of the epub
func (bk *Book) getSections() error {
// to store our final content sections
sections := []Section{}
// count the content items in the raw manifest
var manifestcount int
for _, item := range bk.Opf.Manifest {
if types.MatchString(item.MediaType) {
manifestcount++
}
}
// we have ncx points from the TOC, try those
if len(bk.Ncx.Points) > 0 {
for _, block := range bk.Ncx.Points {
sect := Section{
File: "OEBPS/" + block.Content.Src,
Title: block.Text,
}
srcfile := deanchor.ReplaceAllString(block.Content.Src, "")
for _, file := range bk.Files() {
if strings.Contains(file, srcfile) {
sect.File = file
sect.MediaType = "application/" + cleanext.ReplaceAllString(filepath.Ext(file), "")
break
}
}
sections = append(sections, sect)
} }
ct := Content{Src: file} if len(sections) < manifestcount {
if strings.Contains(string(content), "<?xml") { // TOC was incomplete, restart from scratch but use the
if err := ct.String(content); err != nil { // OPF Manifest directly
return &bk, err
sections = []Section{}
for _, item := range bk.Opf.Manifest {
if types.MatchString(item.MediaType) {
sect := Section{
File: "OEBPS/" + item.Href,
MediaType: item.MediaType,
}
srcfile := deanchor.ReplaceAllString(item.Href, "")
for _, file := range bk.Files() {
if strings.Contains(file, srcfile) {
sect.File = file
break
}
}
sections = append(sections, sect)
}
} }
} }
} else {
// no TOC, just pull in the files directly
for _, file := range bk.Files() {
sections = append(sections,
Section{
File: file,
MediaType: "application/" + cleanext.ReplaceAllString(filepath.Ext(file), ""),
})
}
}
// final sections to keep
bk.Sections = sections
return nil
}
func (bk *Book) readSectionContent() error {
// now read in the actual xml contents
for _, section := range bk.Sections {
content, err := bk.readBytes(section.File)
if err != nil {
return err
}
if strings.Contains(section.File, bk.CoverFile) {
bk.CoverImage = content
}
ct := Content{Src: section.File, Title: section.Title}
if types.MatchString(section.MediaType) {
if err := ct.String(content); err != nil {
return err
}
}
if bk.dumpxml {
fmt.Println(string(ct.XML))
}
bk.Content = append(bk.Content, ct) bk.Content = append(bk.Content, ct)
if dumpxml {
fmt.Println(string(ct.XML))
}
} }
if dumpxml { return nil
os.Exit(0)
}
return &bk, nil
} }

View File

@@ -1,13 +1,13 @@
package epub package epub
//Opf content.opf // Opf content.opf
type Opf struct { type Opf struct {
Metadata Metadata `xml:"metadata" json:"metadata"` Metadata Metadata `xml:"metadata" json:"metadata"`
Manifest []Manifest `xml:"manifest>item" json:"manifest"` Manifest []Manifest `xml:"manifest>item" json:"manifest"`
Spine Spine `xml:"spine" json:"spine"` Spine Spine `xml:"spine" json:"spine"`
} }
//Metadata metadata // Metadata metadata
type Metadata struct { type Metadata struct {
Title []string `xml:"title" json:"title"` Title []string `xml:"title" json:"title"`
Language []string `xml:"language" json:"language"` Language []string `xml:"language" json:"language"`
@@ -53,7 +53,7 @@ type Metafield struct {
Content string `xml:"content,attr" json:"content"` Content string `xml:"content,attr" json:"content"`
} }
//Manifest manifest // Manifest manifest
type Manifest struct { type Manifest struct {
ID string `xml:"id,attr" json:"id"` ID string `xml:"id,attr" json:"id"`
Href string `xml:"href,attr" json:"href"` Href string `xml:"href,attr" json:"href"`