6 Commits

Author SHA1 Message Date
T.v.Dein
238972f11f Parserfixes std (#11)
* clean svg and cdata
* refactored ebook preparation, separated from calling the pager
* added better unit tests
* add free ebooks for testing
2025-10-21 21:57:12 +02:00
T.v.Dein
f524083210 Fix more parser failures (#10)
* stabilize section parsing, now seems to read all ebooks I tested with
* refactored Open() into smaller funcs
* bump version
2025-10-20 18:54:49 +00:00
T.v.Dein
cb671b7401 Fix crash and add support for content in <div> (#9)
* fix #8: better regex to remove html entities
* add opf and ncx debug
* make epub content retrieval more flexible
* fix epub content retrieval: also support html files with <div>
2025-10-19 22:30:13 +02:00
T.v.Dein
2c6e81a2c8 add cover image support (#7) 2025-10-19 20:39:04 +02:00
c2abc4ba4d gofmt 2025-10-17 14:22:03 +02:00
b9b0ad8603 add badges 2025-10-17 14:19:16 +02:00
28 changed files with 453 additions and 88 deletions

View File

@@ -1,3 +1,8 @@
[![Actions](https://github.com/tlinden/epuppy/actions/workflows/ci.yaml/badge.svg)](https://github.com/tlinden/epuppy/actions)
[![License](https://img.shields.io/badge/license-GPL-blue.svg)](https://github.com/tlinden/epuppy/blob/master/LICENSE)
[![Go Report Card](https://goreportcard.com/badge/github.com/tlinden/epuppy)](https://goreportcard.com/report/github.com/tlinden/epuppy)
# epuppy - terminal epub reader # epuppy - terminal epub reader
This is a little TUI epub ebook reader. This is a work in progress and This is a little TUI epub ebook reader. This is a work in progress and

View File

@@ -32,7 +32,7 @@ import (
) )
const ( const (
Version string = `v0.0.4` Version string = `v0.0.7`
Usage string = `This is epuppy, a terminal ui ebook viewer. Usage string = `This is epuppy, a terminal ui ebook viewer.
Usage: epuppy [options] <epub file> Usage: epuppy [options] <epub file>
@@ -42,6 +42,7 @@ Options:
-s --store-progress remember reading position -s --store-progress remember reading position
-n --line-numbers add line numbers -n --line-numbers add line numbers
-c --config <file> use config <file> -c --config <file> use config <file>
-i --cover-image display cover image
-t --txt dump readable content to STDOUT -t --txt dump readable content to STDOUT
-x --xml dump source xml to STDOUT -x --xml dump source xml to STDOUT
-N --no-color disable colors (or use $NO_COLOR env var) -N --no-color disable colors (or use $NO_COLOR env var)
@@ -63,6 +64,7 @@ type Config struct {
ColorDark ColorSetting `koanf:"colordark"` // comes from config file only ColorDark ColorSetting `koanf:"colordark"` // comes from config file only
ColorLight ColorSetting `koanf:"colorlight"` // comes from config file only ColorLight ColorSetting `koanf:"colorlight"` // comes from config file only
ShowHelp bool `koanf:"help"` ShowHelp bool `koanf:"help"`
ShowCover bool `koanf:"cover-image"` // -i
Colors Colors // generated from user config file or internal defaults, respects dark mode Colors Colors // generated from user config file or internal defaults, respects dark mode
Document string Document string
InitialProgress int // lines InitialProgress int // lines
@@ -89,6 +91,7 @@ func InitConfig(output io.Writer) (*Config, error) {
flagset.BoolP("txt", "t", false, "dump readable content to STDOUT") flagset.BoolP("txt", "t", false, "dump readable content to STDOUT")
flagset.BoolP("xml", "x", false, "dump xml to STDOUT") flagset.BoolP("xml", "x", false, "dump xml to STDOUT")
flagset.BoolP("no-color", "N", false, "disable colors") flagset.BoolP("no-color", "N", false, "disable colors")
flagset.BoolP("cover-image", "i", false, "show cover image")
flagset.BoolP("help", "h", false, "show help") flagset.BoolP("help", "h", false, "show help")
flagset.StringP("config", "c", "", "read config from file") flagset.StringP("config", "c", "", "read config from file")

View File

@@ -21,10 +21,13 @@ package cmd
// https://github.com/charmbracelet/bubbletea/tree/main/examples/pager // https://github.com/charmbracelet/bubbletea/tree/main/examples/pager
import ( import (
"bytes"
"fmt" "fmt"
"image"
"os" "os"
"strings" "strings"
"github.com/blacktop/go-termimg"
"github.com/charmbracelet/bubbles/help" "github.com/charmbracelet/bubbles/help"
"github.com/charmbracelet/bubbles/key" "github.com/charmbracelet/bubbles/key"
"github.com/charmbracelet/bubbles/viewport" "github.com/charmbracelet/bubbles/viewport"
@@ -132,6 +135,7 @@ type Doc struct {
hideUi bool hideUi bool
meta *Meta meta *Meta
config *Config config *Config
Cover *termimg.ImageWidget
keys keyMap keys keyMap
help help.Model help help.Model
@@ -220,7 +224,12 @@ func (m *Doc) Rewrap() {
// wrapping has changed, update viewport and line count // wrapping has changed, update viewport and line count
if content != "" { if content != "" {
m.viewport.SetContent(content) if m.Cover != nil {
cover, _ := m.Cover.Render()
m.viewport.SetContent(cover + "\n\n" + content)
} else {
m.viewport.SetContent(content)
}
m.meta.lines = len(strings.Split(content, "\n")) m.meta.lines = len(strings.Split(content, "\n"))
} }
@@ -270,7 +279,15 @@ func max(a, b int) int {
return b return b
} }
func Pager(conf *Config, title, message string) (int, error) { type Ebook struct {
Config *Config
Title string
Body string
Cover []byte
MediaType string
}
func Pager(book *Ebook) (int, error) {
width := 80 width := 80
scrollto := 0 scrollto := 0
@@ -281,32 +298,44 @@ func Pager(conf *Config, title, message string) (int, error) {
} }
} }
if conf.StoreProgress { if book.Config.StoreProgress {
scrollto = conf.InitialProgress scrollto = book.Config.InitialProgress
} }
if conf.LineNumbers { if book.Config.LineNumbers {
catn := "" catn := ""
for idx, line := range strings.Split(message, "\n") { for idx, line := range strings.Split(book.Body, "\n") {
catn += fmt.Sprintf("%4d: %s\n", idx, line) catn += fmt.Sprintf("%4d: %s\n", idx, line)
} }
message = catn book.Body = catn
} }
meta := Meta{ meta := Meta{
initialprogress: scrollto, initialprogress: scrollto,
lines: len(strings.Split(message, "\n")), lines: len(strings.Split(book.Body, "\n")),
}
doc := Doc{
content: book.Body,
title: book.Title,
initialwidth: width,
meta: &meta,
config: book.Config,
keys: keys,
}
if book.MediaType != "" && book.Config.ShowCover {
img, _, err := image.Decode(bytes.NewReader(book.Cover))
if err != nil {
return 0, fmt.Errorf("failed to load cover image: %w", err)
}
doc.Cover = termimg.NewImageWidgetFromImage(img)
doc.Cover.SetSize(width, width).SetProtocol(termimg.Auto)
} }
p := tea.NewProgram( p := tea.NewProgram(
Doc{ doc,
content: message,
title: title,
initialwidth: width,
meta: &meta,
config: conf,
keys: keys,
},
tea.WithAltScreen(), // use the full size of the terminal in its "alternate screen buffer" tea.WithAltScreen(), // use the full size of the terminal in its "alternate screen buffer"
tea.WithMouseCellMotion(), // turn on mouse support so we can track the mouse wheel tea.WithMouseCellMotion(), // turn on mouse support so we can track the mouse wheel
) )
@@ -315,7 +344,7 @@ func Pager(conf *Config, title, message string) (int, error) {
return 0, fmt.Errorf("could not run pager: %w", err) return 0, fmt.Errorf("could not run pager: %w", err)
} }
if conf.Debug { if book.Config.Debug {
fmt.Printf("scrollto: %d, last: %d, diff: %d\n", fmt.Printf("scrollto: %d, last: %d, diff: %d\n",
scrollto, meta.currentline, scrollto-meta.currentline) scrollto, meta.currentline, scrollto-meta.currentline)
} }

View File

@@ -22,35 +22,44 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/alecthomas/repr"
"github.com/tlinden/epuppy/pkg/epub" "github.com/tlinden/epuppy/pkg/epub"
) )
func View(conf *Config) (int, error) { func Prepare(conf *Config) (*Ebook, error) {
switch filepath.Ext(conf.Document) { switch filepath.Ext(conf.Document) {
case ".epub": case ".epub":
return ViewEpub(conf) return PrepareEpub(conf)
default: default:
return ViewText(conf) return PrepareText(conf)
} }
} }
func ViewText(conf *Config) (int, error) { func PrepareText(conf *Config) (*Ebook, error) {
data, err := os.ReadFile(conf.Document) data, err := os.ReadFile(conf.Document)
if err != nil { if err != nil {
return 0, err return nil, err
} }
if conf.Dump { return &Ebook{
return fmt.Println(string(data)) Config: conf,
} Title: conf.Document,
Body: string(data),
return Pager(conf, conf.Document, string(data)) }, nil
} }
func ViewEpub(conf *Config) (int, error) { func PrepareEpub(conf *Config) (*Ebook, error) {
book, err := epub.Open(conf.Document, conf.XML) book, err := epub.Open(conf.Document, conf.XML)
if err != nil { if err != nil {
return 0, err return nil, err
}
if conf.Debug {
repr.Println("book.Files()")
repr.Println(book.Files())
repr.Println(book.Ncx)
repr.Println(book.Sections)
repr.Println(book.Opf.Manifest)
} }
buf := strings.Builder{} buf := strings.Builder{}
@@ -70,15 +79,15 @@ func ViewEpub(conf *Config) (int, error) {
fetchByContent(conf, &buf, book) fetchByContent(conf, &buf, book)
if conf.Dump { return &Ebook{
return fmt.Println(buf.String()) Config: conf,
} Title: head.String(),
Body: buf.String(),
return Pager(conf, head.String(), buf.String()) Cover: book.CoverImage,
MediaType: book.CoverMediaType,
}, nil
} }
// FIXME: since the switch to book.Files() in epub.Open() this
// returns invalid chapter numbering
func fetchByContent(conf *Config, buf *strings.Builder, book *epub.Book) bool { func fetchByContent(conf *Config, buf *strings.Builder, book *epub.Book) bool {
chapter := 1 chapter := 1
var gotbody bool var gotbody bool

View File

@@ -59,7 +59,21 @@ func Execute(output io.Writer) int {
} }
} }
progress, err := View(conf) ebook, err := Prepare(conf)
if err != nil {
return Die(err)
}
if conf.Dump {
fmt.Println(ebook.Body)
return 0
}
if conf.Debug || conf.XML {
return 0
}
progress, err := Pager(ebook)
if err != nil { if err != nil {
return Die(err) return Die(err)
} }

118
cmd/view_test.go Normal file
View File

@@ -0,0 +1,118 @@
package cmd
import (
"fmt"
"testing"
"github.com/stretchr/testify/assert"
)
func TestPrepare(t *testing.T) {
var tests = []struct {
file string
body string
}{
{
"t/epub/basic-v3plus2.epub",
`shirt court, an whinny retched a cordage offer groin-murder, picked inner windrow,`,
},
{
"t/epub/childrens-literature.epub",
`The child's natural literature. The world has lost certain secrets as the price of an advancing civilization. It is a commonplace of observation that no one can duplicate the success of Mother Goose, whether she be thought of as the maker of jingles or the teller of tales. The conditions of modern life preclude the generally naïve attitude that produced the folk rhymes, ballads, tales, proverbs, fables, and myths. The folk saw things simply and directly. The complex, analytic, questioning mind is not yet, either in or out of stories. The motives from which people act are to them plain and not mixed. Characters are good or bad. They feel no need of elaborately explaining their joys and sorrows. Such experiences come with the day's work. "To-morrow to fresh woods, and pastures new." The zest of life with them is emphatic. Their humor is fresh, unbounded, sincere; there is no trace of cynicism. In folk literature we do not feel the presence of a "writer" who is mightily concerned about maintaining his reputation for wisdom, originality, or style. Hence the freedom from any note of straining after effect, of artificiality. In the midst of a life limited to fundamental needs, their literature deals with fundamentals. On the whole, it was a literature for entertainment. A more learned upper class may have concerned itself then about "problems" and "purposes," as the whole world does now, but the literature of the folk had no such interests.`,
},
{
"t/epub/cole-voyage-of-life.epub",
`Thomas Cole is regarded as the founder of the Hudson River School, an American art movement that flourished in the mid-19th century and was concerned with the realistic and detailed portrayal of nature but with a strong influence from Romanticism. This group of American landscape painters worked between about 1825 and 1870 and shared a sense of national pride as well as an interest in celebrating the unique natural beauty found in the United States. The wild, untamed nature found in America was viewed as its special character; Europe had ancient ruins, but America had the uncharted wilderness. As Cole's friend William Cullen Bryant sermonized in verse, so Cole sermonized in paint. Both men saw nature as God's work and as a refuge from the ugly materialism of cities. Cole clearly intended the Voyage of Life to be a didactic, moralizing series of paintings using the landscape as an allegory for religious faith.`,
},
{
"t/epub/epub30-spec.epub",
`IDPF Members
Invited Experts/Observers
Version 2.0.1 of this specification was prepared by the International Digital Publishing Forums EPUB Maintenance Working Group under the leadership of:
Active members of the working group at the time of publication of revision 2.0.1 were:
Version 1.0 of this specification was prepared by the International Digital Publishing Forums Unified OEBPS Container Format Working Group under the leadership of:
Active members of the working group at the time of publication of revision 1.0 were:`,
},
{
"t/epub/epub_sample_file_50KB.epub",
`magna aliqua. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna`,
},
{
"t/epub/Fundamental-Accessibility-Tests-Basic-Functionality-v2.0.0.epub",
`Open the Fundamental Accessibility Test book in the reading system.
If the test book is not available in the bookshelf, then open any other book that is available.
If the reading system also supports side loading, then please provide notes about the accessibility of the side loading feature.
Indicate Pass or Fail.`,
},
{
"t/epub/georgia-cfi.epub",
`The Great Valley Region consists of folded sedimentary rocks, extensive erosion having removed the soft layers to form valleys, leaving the hard layers as ridges, both layers running in a N.E.-S.W. direction. In the extreme north-west corner of the state is a small part of the Cumberland Plateau, represented by Lookout and Sand Mts.
On the Blue Ridge escarpment near the N.E. corner of the state is a water-parting separating the waters which find their way respectively N.W. to the Tennessee river, S.W. to the Gulf of Mexico and S.E. to the Atlantic Ocean; indeed, according to B.M. and M.R. Hall (Water Resources of Georgia, p. 2), "there are three springs in north-east Georgia within a stone's throw of each other that send out their waters to Savannah, Ga., to Apalachicola, Fla., and to New Orleans, La." The water-parting between the waters flowing into the`,
},
{
"t/epub/israelsailing.epub",
` במשלוח דואר, מה שלפעמים היה נחמד כי גם חשבונות לתשלום לא היו מגיעים. 'טוב, מי`,
},
{
"t/epub/jlreq-in-japanese.epub",
` 2.5.1 基本版面からはみ出す例 2.5.2 基本版面で設定した行位置の適用 2.5.3 `,
},
{
"t/epub/minimal-v2.epub",
`This is a paragraph.`,
},
{
"t/epub/minimal-v3.epub",
`This is a paragraph.`,
},
{
"t/epub/minimal-v3plus2.epub",
`This is a paragraph.`,
},
{
"t/epub/moby-dick.epub",
`Call me Ishmael. Some years ago—never mind how long precisely—having little or no money in my purse, and nothing particular to interest me on shore, I thought I would sail about a little and see the watery part of the world. It is a way I have of driving off the spleen and regulating the circulation. Whenever I find myself growing grim about the mouth; whenever it is a damp, drizzly November in my soul; whenever I find myself involuntarily pausing before coffin warehouses, and bringing up the rear of every funeral I meet; and especially whenever my hypos get such an upper hand of me, that it requires a strong moral principle to prevent me from deliberately stepping into the street, and methodically knocking peoples hats off—then, I account it high time to get to sea as soon as I can. This is my substitute for pistol and ball. With a philosophical flourish Cato throws himself upon his sword; I quietly take to the ship. There is nothing surprising in this. If they but knew it, almost all men in their degree, some time or other, cherish very nearly the same feelings towards the ocean with me.`,
},
{
"t/epub/sous-le-vent.epub",
`SOUS LE VENT`,
},
{
"t/epub/wasteland-otf.epub",
`Line 20. Cf. Ezekiel 2:1.
23. Cf. Ecclesiastes 12:5.
31. V. Tristan und Isolde, i, verses 5-8.
42. Id. iii, verse 24.`,
},
{
"pkg/epub/test.epub",
`This EPUB file contains 10 hard coded page breaks. Note that these page breaks are different from the reflowed page numbers. If the total number of pages of this book in the reading app is not exactly 10, then you are looking at the reflowed pages. The app may be having a feature to switch between print page and reflowed pages for navigation. Or the print page list may be appearing in the TOC. If print page navigation feature does not exist or does not work then this test should be marked 'Fail'.`,
},
}
for _, tt := range tests {
testname := fmt.Sprintf("prepare/%s", tt.file)
t.Run(testname, func(t *testing.T) {
conf := Config{
Document: "../" + tt.file,
}
ebook, err := Prepare(&conf)
assert.NoError(t, err)
assert.Contains(t, ebook.Body, tt.body, "expected text not found")
})
}
}

11
go.mod
View File

@@ -41,14 +41,25 @@ require (
require ( require (
github.com/antchfx/xpath v1.3.5 // indirect github.com/antchfx/xpath v1.3.5 // indirect
github.com/blacktop/go-termimg v0.1.20 // indirect
github.com/charmbracelet/x/mosaic v0.0.0-20250702191427-5bdfc8f2e4ff // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/knadh/koanf/maps v0.1.2 // indirect github.com/knadh/koanf/maps v0.1.2 // indirect
github.com/makeworld-the-better-one/dither/v2 v2.4.0 // indirect
github.com/mattn/go-sixel v0.0.5 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/mitchellh/reflectwalk v1.0.2 // indirect
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 // indirect
github.com/pelletier/go-toml v1.9.5 // indirect github.com/pelletier/go-toml v1.9.5 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rogpeppe/go-internal v1.14.1 // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect
github.com/soniakeys/quant v1.0.0 // indirect
github.com/stretchr/testify v1.11.1 // indirect
golang.org/x/image v0.25.0 // indirect
golang.org/x/net v0.44.0 // indirect golang.org/x/net v0.44.0 // indirect
golang.org/x/tools v0.37.0 // indirect golang.org/x/tools v0.37.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
) )

21
go.sum
View File

@@ -8,6 +8,8 @@ github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiE
github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8=
github.com/aymanbagabas/go-udiff v0.2.0 h1:TK0fH4MteXUDspT88n8CKzvK0X9O2xu9yQjWpi6yML8= github.com/aymanbagabas/go-udiff v0.2.0 h1:TK0fH4MteXUDspT88n8CKzvK0X9O2xu9yQjWpi6yML8=
github.com/aymanbagabas/go-udiff v0.2.0/go.mod h1:RE4Ex0qsGkTAJoQdQQCA0uG+nAzJO/pI/QwceO5fgrA= github.com/aymanbagabas/go-udiff v0.2.0/go.mod h1:RE4Ex0qsGkTAJoQdQQCA0uG+nAzJO/pI/QwceO5fgrA=
github.com/blacktop/go-termimg v0.1.20 h1:+EAUc3c9hwE/fUYaqRV1BSLvAlOuLySgLTEBzxGbYK4=
github.com/blacktop/go-termimg v0.1.20/go.mod h1:nwxrOjfFcBjtS358oIGBLfscSLnCpNdRlMVRxsnZwMU=
github.com/charmbracelet/bubbles v0.21.0 h1:9TdC97SdRVg/1aaXNVWfFH3nnLAwOXr8Fn6u6mfQdFs= github.com/charmbracelet/bubbles v0.21.0 h1:9TdC97SdRVg/1aaXNVWfFH3nnLAwOXr8Fn6u6mfQdFs=
github.com/charmbracelet/bubbles v0.21.0/go.mod h1:HF+v6QUR4HkEpz62dx7ym2xc71/KBHg+zKwJtMw+qtg= github.com/charmbracelet/bubbles v0.21.0/go.mod h1:HF+v6QUR4HkEpz62dx7ym2xc71/KBHg+zKwJtMw+qtg=
github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw= github.com/charmbracelet/bubbletea v1.3.10 h1:otUDHWMMzQSB0Pkc87rm691KZ3SWa4KUlvF9nRvCICw=
@@ -22,8 +24,11 @@ github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd h1:vy0G
github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd/go.mod h1:xe0nKWGd3eJgtqZRaN9RjMtK7xUYchjzPr7q6kcvCCs= github.com/charmbracelet/x/cellbuf v0.0.13-0.20250311204145-2c3ea96c31dd/go.mod h1:xe0nKWGd3eJgtqZRaN9RjMtK7xUYchjzPr7q6kcvCCs=
github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91 h1:payRxjMjKgx2PaCWLZ4p3ro9y97+TVLZNaRZgJwSVDQ= github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91 h1:payRxjMjKgx2PaCWLZ4p3ro9y97+TVLZNaRZgJwSVDQ=
github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U= github.com/charmbracelet/x/exp/golden v0.0.0-20241011142426-46044092ad91/go.mod h1:wDlXFlCrmJ8J+swcL/MnGUuYnqgQdW9rhSD61oNMb6U=
github.com/charmbracelet/x/mosaic v0.0.0-20250702191427-5bdfc8f2e4ff h1:OVBKPzoa0k5ZVMoor27BReRZxER1IEDtLHXkRjaHElg=
github.com/charmbracelet/x/mosaic v0.0.0-20250702191427-5bdfc8f2e4ff/go.mod h1:5qLP4S++M5quSc/xbvWWW8vKkgKwOqOT/IVhAas26XI=
github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ= github.com/charmbracelet/x/term v0.2.1 h1:AQeHeLZ1OqSXhrAWpYUtZyX1T3zVxfpZuEQMIQaGIAQ=
github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg= github.com/charmbracelet/x/term v0.2.1/go.mod h1:oQ4enTYFV7QN4m0i9mzHrViD7TQKvNEEkHUMCmsxdUg=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4= github.com/erikgeiser/coninput v0.0.0-20211004153227-1c3628e74d0f h1:Y/CXytFA4m6baUTXGLOoWe4PQhGxaX0KpnayAqC48p4=
@@ -47,6 +52,8 @@ github.com/knadh/koanf/v2 v2.3.0 h1:Qg076dDRFHvqnKG97ZEsi9TAg2/nFTa9hCdcSa1lvlM=
github.com/knadh/koanf/v2 v2.3.0/go.mod h1:gRb40VRAbd4iJMYYD5IxZ6hfuopFcXBpc9bbQpZwo28= github.com/knadh/koanf/v2 v2.3.0/go.mod h1:gRb40VRAbd4iJMYYD5IxZ6hfuopFcXBpc9bbQpZwo28=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/makeworld-the-better-one/dither/v2 v2.4.0 h1:Az/dYXiTcwcRSe59Hzw4RI1rSnAZns+1msaCXetrMFE=
github.com/makeworld-the-better-one/dither/v2 v2.4.0/go.mod h1:VBtN8DXO7SNtyGmLiGA7IsFeKrBkQPze1/iAeM95arc=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4= github.com/mattn/go-localereader v0.0.1 h1:ygSAOl7ZXTx4RdPYinUpg6W99U8jWvWi9Ye2JC/oIi4=
@@ -54,6 +61,8 @@ github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+Ei
github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc= github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mattn/go-sixel v0.0.5 h1:55w2FR5ncuhKhXrM5ly1eiqMQfZsnAHIpYNGZX03Cv8=
github.com/mattn/go-sixel v0.0.5/go.mod h1:h2Sss+DiUEHy0pUqcIB6PFXo5Cy8sTQEFr3a9/5ZLNw=
github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s=
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
@@ -66,6 +75,8 @@ github.com/muesli/reflow v0.3.0 h1:IFsN6K9NfGtjeggFP+68I4chLZV2yIKsXJFNZ+eWh6s=
github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8= github.com/muesli/reflow v0.3.0/go.mod h1:pbwTDkVPibjO2kyvBQRBxTWEEGDGq0FlB1BIKtnHY/8=
github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc= github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc=
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk= github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646 h1:zYyBkD/k9seD2A7fsi6Oo2LfFZAehjjQMERAvZLEDnQ=
github.com/nfnt/resize v0.0.0-20180221191011-83c6a9932646/go.mod h1:jpp1/29i3P1S/RLdc7JQKbRpFeM1dOBd8T9ki5s+AY8=
github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8=
github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c= github.com/pelletier/go-toml v1.9.5/go.mod h1:u1nR/EPcESfeI/szUZKdtJ0xRNbUoANCkoOuaOx1Y+c=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
@@ -76,10 +87,16 @@ github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
github.com/soniakeys/quant v1.0.0 h1:N1um9ktjbkZVcywBVAAYpZYSHxEfJGzshHCxx/DaI0Y=
github.com/soniakeys/quant v1.0.0/go.mod h1:HI1k023QuVbD4H8i9YdfZP2munIHU4QpjsImz6Y6zds=
github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk=
github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no=
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
@@ -91,6 +108,8 @@ golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v
golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E= golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561 h1:MDc5xs78ZrZr3HMQugiXOAkSZtfTpbJLDr/lwfgO53E=
golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= golang.org/x/exp v0.0.0-20220909182711-5c715a9e8561/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE=
golang.org/x/image v0.25.0 h1:Y6uW6rH1y5y/LK1J8BPWZtr6yZ7hrsy6hFrXjgsc2fQ=
golang.org/x/image v0.25.0/go.mod h1:tCAmOEGthTtkalusGp1g3xa2gke8J6c2N565dTyl9Rs=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
@@ -161,5 +180,7 @@ golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxb
golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE=
golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -9,14 +9,24 @@ import (
"path" "path"
) )
// a section in the book
type Section struct {
File, Title, MediaType string
}
// Book epub book // Book epub book
type Book struct { type Book struct {
Ncx Ncx `json:"ncx"` Ncx Ncx `json:"ncx"`
Opf Opf `json:"opf"` Opf Opf `json:"opf"`
Container Container `json:"-"` Container Container `json:"-"`
Mimetype string `json:"-"` Mimetype string `json:"-"`
Content []Content Content []Content
fd *zip.ReadCloser fd *zip.ReadCloser
CoverImage []byte
CoverFile string
CoverMediaType string
Sections []Section
dumpxml bool
} }
// Open open resource file // Open open resource file

View File

@@ -1,11 +1,11 @@
package epub package epub
//Container META-INF/container.xml file // Container META-INF/container.xml file
type Container struct { type Container struct {
Rootfile Rootfile `xml:"rootfiles>rootfile" json:"rootfile"` Rootfile Rootfile `xml:"rootfiles>rootfile" json:"rootfile"`
} }
//Rootfile root file // Rootfile root file
type Rootfile struct { type Rootfile struct {
Path string `xml:"full-path,attr" json:"path"` Path string `xml:"full-path,attr" json:"path"`
Type string `xml:"media-type,attr" json:"type"` Type string `xml:"media-type,attr" json:"type"`

View File

@@ -8,9 +8,11 @@ import (
) )
var ( var (
cleanentitles = regexp.MustCompile(`&.+;`) cleanentitles = regexp.MustCompile(`&[a-z]+;`)
empty = regexp.MustCompile(`(?s)^[\s ]*$`) empty = regexp.MustCompile(`(?s)^[\s ]*$`)
newlines = regexp.MustCompile(`[\r\n]+`) newlines = regexp.MustCompile(`[\r\n\s]+`)
cleansvg = regexp.MustCompile(`(<svg.+</svg>|<!\[CDATA\[.+\]\]>)`)
cleanmarkup = regexp.MustCompile(`<[^<>]+>`)
) )
// Content nav-point content // Content nav-point content
@@ -22,30 +24,46 @@ type Content struct {
XML []byte XML []byte
} }
// parse XML, look for title and <p>.*</p> stuff
func (c *Content) String(content []byte) error { func (c *Content) String(content []byte) error {
// parse XML, look for title and <p>.*</p> stuff
doc, err := xmlquery.Parse( doc, err := xmlquery.Parse(
strings.NewReader( strings.NewReader(
cleanentitles.ReplaceAllString(string(content), " "))) cleansvg.ReplaceAllString(
cleanentitles.ReplaceAllString(string(content), " "), "")))
if err != nil { if err != nil {
panic(err) return err
} }
// extract the title if c.Title == "" {
for _, item := range xmlquery.Find(doc, "//title") { // extract the title
c.Title = strings.TrimSpace(item.InnerText()) for _, item := range xmlquery.Find(doc, "//title") {
c.Title = strings.TrimSpace(item.InnerText())
}
} }
// extract all paragraphs, ignore any formatting and re-fill the // extract all paragraphs, ignore any formatting and re-fill the
// paragraph, that is, we replaces all newlines inside with one // paragraph, that is, we replace all newlines inside with one
// space. // space.
txt := strings.Builder{} txt := strings.Builder{}
var have_p bool
for _, item := range xmlquery.Find(doc, "//p") { for _, item := range xmlquery.Find(doc, "//p") {
if !empty.MatchString(item.InnerText()) { if !empty.MatchString(item.InnerText()) {
have_p = true
txt.WriteString(newlines.ReplaceAllString(item.InnerText(), " ") + "\n\n") txt.WriteString(newlines.ReplaceAllString(item.InnerText(), " ") + "\n\n")
} }
} }
if !have_p {
// try <div></div>, which some ebooks use, so get all divs,
// remove markup and paragraphify the parts
for _, item := range xmlquery.Find(doc, "//div") {
if !empty.MatchString(item.InnerText()) {
cleaned := cleanmarkup.ReplaceAllString(item.InnerText(), "")
txt.WriteString(newlines.ReplaceAllString(cleaned, " ") + "\n\n")
}
}
}
c.Body = strings.TrimSpace(txt.String()) c.Body = strings.TrimSpace(txt.String())
c.XML = content c.XML = content

View File

@@ -4,75 +4,202 @@ import (
"archive/zip" "archive/zip"
"fmt" "fmt"
"log" "log"
"os" "path/filepath"
"regexp"
"strings" "strings"
) )
// Open open a epub file var (
// to find content
types = regexp.MustCompile(`application/(xml|html|xhtml|htm)`)
// cleanup regexes
deanchor = regexp.MustCompile(`#.*$`)
cleanext = regexp.MustCompile(`^\.`)
)
// Open open a epub file and return the filled Book structure
func Open(fn string, dumpxml bool) (*Book, error) { func Open(fn string, dumpxml bool) (*Book, error) {
bk, err := openFile(fn, dumpxml)
if err != nil {
return bk, err
}
defer func() {
if err := bk.fd.Close(); err != nil {
log.Fatal(err)
}
}()
if err := bk.getManifest(); err != nil {
return bk, err
}
if err := bk.getSections(); err != nil {
return bk, err
}
if err := bk.readSectionContent(); err != nil {
return bk, err
}
return bk, nil
}
// load the epub zip file
func openFile(fn string, dumpxml bool) (*Book, error) {
fd, err := zip.OpenReader(fn) fd, err := zip.OpenReader(fn)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer func() { bk := &Book{fd: fd, dumpxml: dumpxml}
if err := fd.Close(); err != nil {
log.Fatal(err)
}
}()
bk := Book{fd: fd} return bk, nil
}
// load the manifest
func (bk *Book) getManifest() error {
mt, err := bk.readBytes("mimetype") mt, err := bk.readBytes("mimetype")
if err != nil { if err != nil {
return &bk, err return err
} }
bk.Mimetype = string(mt) bk.Mimetype = string(mt)
// contains the root path
err = bk.readXML("META-INF/container.xml", &bk.Container) err = bk.readXML("META-INF/container.xml", &bk.Container)
if err != nil { if err != nil {
return &bk, err return err
} }
// contains the OPF data
err = bk.readXML(bk.Container.Rootfile.Path, &bk.Opf) err = bk.readXML(bk.Container.Rootfile.Path, &bk.Opf)
if err != nil { if err != nil {
return &bk, err return err
} }
// look for TOC (might be incomplete, see below!)
for _, mf := range bk.Opf.Manifest { for _, mf := range bk.Opf.Manifest {
if mf.ID == bk.Opf.Spine.Toc { if mf.ID == bk.Opf.Spine.Toc {
err = bk.readXML(bk.filename(mf.Href), &bk.Ncx) err = bk.readXML(bk.filename(mf.Href), &bk.Ncx)
if err != nil { if err != nil {
return &bk, err return err
} }
}
break if mf.ID == "cover-image" {
bk.CoverFile = mf.Href
bk.CoverMediaType = mf.MediaType
} }
} }
for _, file := range bk.Files() { return nil
content, err := bk.readBytes(file) }
if err != nil {
return &bk, err // extract the readable sections of the epub
func (bk *Book) getSections() error {
// to store our final content sections
sections := []Section{}
// count the content items in the raw manifest
var manifestcount int
for _, item := range bk.Opf.Manifest {
if types.MatchString(item.MediaType) {
manifestcount++
}
}
// we have ncx points from the TOC, try those
if len(bk.Ncx.Points) > 0 {
for _, block := range bk.Ncx.Points {
sect := Section{
File: "OEBPS/" + block.Content.Src,
Title: block.Text,
}
srcfile := deanchor.ReplaceAllString(block.Content.Src, "")
for _, file := range bk.Files() {
if strings.Contains(file, srcfile) {
sect.File = file
sect.MediaType = "application/" + cleanext.ReplaceAllString(filepath.Ext(file), "")
break
}
}
sections = append(sections, sect)
} }
ct := Content{Src: file} if len(sections) < manifestcount {
if strings.Contains(string(content), "<?xml") { // TOC was incomplete, restart from scratch but use the
if err := ct.String(content); err != nil { // OPF Manifest directly
return &bk, err
sections = []Section{}
for _, item := range bk.Opf.Manifest {
if types.MatchString(item.MediaType) {
sect := Section{
File: "OEBPS/" + item.Href,
MediaType: item.MediaType,
}
srcfile := deanchor.ReplaceAllString(item.Href, "")
for _, file := range bk.Files() {
if strings.Contains(file, srcfile) {
sect.File = file
break
}
}
sections = append(sections, sect)
}
} }
} }
} else {
// no TOC, just pull in the files directly
for _, file := range bk.Files() {
sections = append(sections,
Section{
File: file,
MediaType: "application/" + cleanext.ReplaceAllString(filepath.Ext(file), ""),
})
}
}
// final sections to keep
bk.Sections = sections
return nil
}
func (bk *Book) readSectionContent() error {
// now read in the actual xml contents
for _, section := range bk.Sections {
content, err := bk.readBytes(section.File)
if err != nil {
return err
}
if strings.Contains(section.File, bk.CoverFile) {
bk.CoverImage = content
}
ct := Content{Src: section.File, Title: section.Title}
if types.MatchString(section.MediaType) {
if err := ct.String(content); err != nil {
return err
}
}
if bk.dumpxml {
fmt.Println(string(ct.XML))
}
bk.Content = append(bk.Content, ct) bk.Content = append(bk.Content, ct)
if dumpxml {
fmt.Println(string(ct.XML))
}
} }
if dumpxml { return nil
os.Exit(0)
}
return &bk, nil
} }

View File

@@ -1,13 +1,13 @@
package epub package epub
//Opf content.opf // Opf content.opf
type Opf struct { type Opf struct {
Metadata Metadata `xml:"metadata" json:"metadata"` Metadata Metadata `xml:"metadata" json:"metadata"`
Manifest []Manifest `xml:"manifest>item" json:"manifest"` Manifest []Manifest `xml:"manifest>item" json:"manifest"`
Spine Spine `xml:"spine" json:"spine"` Spine Spine `xml:"spine" json:"spine"`
} }
//Metadata metadata // Metadata metadata
type Metadata struct { type Metadata struct {
Title []string `xml:"title" json:"title"` Title []string `xml:"title" json:"title"`
Language []string `xml:"language" json:"language"` Language []string `xml:"language" json:"language"`
@@ -53,7 +53,7 @@ type Metafield struct {
Content string `xml:"content,attr" json:"content"` Content string `xml:"content,attr" json:"content"`
} }
//Manifest manifest // Manifest manifest
type Manifest struct { type Manifest struct {
ID string `xml:"id,attr" json:"id"` ID string `xml:"id,attr" json:"id"`
Href string `xml:"href,attr" json:"href"` Href string `xml:"href,attr" json:"href"`

BIN
t/epub/basic-v3plus2.epub Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
t/epub/epub30-spec.epub Normal file

Binary file not shown.

Binary file not shown.

BIN
t/epub/georgia-cfi.epub Normal file

Binary file not shown.

BIN
t/epub/israelsailing.epub Normal file

Binary file not shown.

Binary file not shown.

BIN
t/epub/minimal-v2.epub Normal file

Binary file not shown.

BIN
t/epub/minimal-v3.epub Normal file

Binary file not shown.

BIN
t/epub/minimal-v3plus2.epub Normal file

Binary file not shown.

BIN
t/epub/moby-dick.epub Normal file

Binary file not shown.

BIN
t/epub/sous-le-vent.epub Normal file

Binary file not shown.

BIN
t/epub/wasteland-otf.epub Normal file

Binary file not shown.