initial commit

This commit is contained in:
2025-10-15 00:54:19 +02:00
parent 97c7383cf1
commit 0d4c44ee11
14 changed files with 1242 additions and 0 deletions

73
pkg/epub/book.go Normal file
View File

@@ -0,0 +1,73 @@
package epub
import (
"archive/zip"
"encoding/xml"
"fmt"
"io"
"path"
)
// Book epub book
type Book struct {
Ncx Ncx `json:"ncx"`
Opf Opf `json:"opf"`
Container Container `json:"-"`
Mimetype string `json:"-"`
fd *zip.ReadCloser
}
// Open open resource file
func (p *Book) Open(n string) (io.ReadCloser, error) {
return p.open(p.filename(n))
}
// Files list resource files
func (p *Book) Files() []string {
var fns []string
for _, f := range p.fd.File {
fns = append(fns, f.Name)
}
return fns
}
// Close close file reader
func (p *Book) Close() {
p.fd.Close()
}
// -----------------------------------------------------------------------------
func (p *Book) filename(n string) string {
return path.Join(path.Dir(p.Container.Rootfile.Path), n)
}
func (p *Book) readXML(n string, v interface{}) error {
fd, err := p.open(n)
if err != nil {
return nil
}
defer fd.Close()
dec := xml.NewDecoder(fd)
return dec.Decode(v)
}
func (p *Book) readBytes(n string) ([]byte, error) {
fd, err := p.open(n)
if err != nil {
return nil, nil
}
defer fd.Close()
return io.ReadAll(fd)
}
func (p *Book) open(n string) (io.ReadCloser, error) {
for _, f := range p.fd.File {
if f.Name == n {
return f.Open()
}
}
return nil, fmt.Errorf("file %s not exist", n)
}

12
pkg/epub/container.go Normal file
View File

@@ -0,0 +1,12 @@
package epub
//Container META-INF/container.xml file
type Container struct {
Rootfile Rootfile `xml:"rootfiles>rootfile" json:"rootfile"`
}
//Rootfile root file
type Rootfile struct {
Path string `xml:"full-path,attr" json:"path"`
Type string `xml:"media-type,attr" json:"type"`
}

28
pkg/epub/epub_test.go Normal file
View File

@@ -0,0 +1,28 @@
package epub_test
import (
"testing"
"github.com/kapmahc/epub"
)
func TestEpub(t *testing.T) {
bk, err := open(t, "test.epub")
if err != nil {
t.Fatal(err)
}
defer bk.Close()
}
func open(t *testing.T, f string) (*epub.Book, error) {
bk, err := epub.Open(f)
if err != nil {
return nil, err
}
defer bk.Close()
t.Logf("files: %+v", bk.Files())
t.Logf("book: %+v", bk)
return bk, nil
}

71
pkg/epub/ncx.go Normal file
View File

@@ -0,0 +1,71 @@
package epub
import (
"encoding/xml"
"regexp"
"strings"
)
var (
cleantitle = regexp.MustCompile(`(?s)<head>.*</head>`)
cleanmarkup = regexp.MustCompile(`<[^<>]+>`)
cleanentities = regexp.MustCompile(`&.+;`)
cleancomments = regexp.MustCompile(`/*.*/`)
cleanspace = regexp.MustCompile(`^\s*`)
cleanh1 = regexp.MustCompile(`<h[1-6].*</h[1-6]>`)
)
// Ncx OPS/toc.ncx
type Ncx struct {
Points []*NavPoint `xml:"navMap>navPoint" json:"points"`
}
// NavPoint nav point
type NavPoint struct {
Text string `xml:"navLabel>text" json:"text"`
Content Content `xml:"content" json:"content"`
Points []NavPoint `xml:"navPoint" json:"points"`
}
type Title struct {
Content string `xml:"head>title"`
}
// Content nav-point content
type Content struct {
Src string `xml:"src,attr" json:"src"`
Empty bool
Body string
Title string
XML []byte
}
func (c *Content) String(content []byte) error {
title := Title{}
err := xml.Unmarshal(content, &title)
if err != nil {
if !strings.HasPrefix(err.Error(), "XML syntax error") {
return err
}
}
c.Title = title.Content
txt := cleantitle.ReplaceAllString(string(content), "")
txt = cleanh1.ReplaceAllString(txt, "")
txt = cleanmarkup.ReplaceAllString(txt, "")
txt = cleanentities.ReplaceAllString(txt, " ")
txt = cleancomments.ReplaceAllString(txt, "")
txt = strings.TrimSpace(txt)
c.Body = cleanspace.ReplaceAllString(txt, "")
c.XML = content
if len(c.Body) == 0 {
c.Empty = true
}
return nil
}

53
pkg/epub/open.go Normal file
View File

@@ -0,0 +1,53 @@
package epub
import (
"archive/zip"
)
// Open open a epub file
func Open(fn string) (*Book, error) {
fd, err := zip.OpenReader(fn)
if err != nil {
return nil, err
}
defer fd.Close()
bk := Book{fd: fd}
mt, err := bk.readBytes("mimetype")
if err != nil {
return &bk, err
}
bk.Mimetype = string(mt)
err = bk.readXML("META-INF/container.xml", &bk.Container)
if err != nil {
return &bk, err
}
err = bk.readXML(bk.Container.Rootfile.Path, &bk.Opf)
if err != nil {
return &bk, err
}
for _, mf := range bk.Opf.Manifest {
if mf.ID == bk.Opf.Spine.Toc {
err = bk.readXML(bk.filename(mf.Href), &bk.Ncx)
break
}
}
for _, ncx := range bk.Ncx.Points {
content, err := bk.readBytes(bk.filename(ncx.Content.Src))
if err != nil {
return &bk, err
}
if err := ncx.Content.String(content); err != nil {
return &bk, err
}
}
return &bk, nil
}

80
pkg/epub/opf.go Normal file
View File

@@ -0,0 +1,80 @@
package epub
//Opf content.opf
type Opf struct {
Metadata Metadata `xml:"metadata" json:"metadata"`
Manifest []Manifest `xml:"manifest>item" json:"manifest"`
Spine Spine `xml:"spine" json:"spine"`
}
//Metadata metadata
type Metadata struct {
Title []string `xml:"title" json:"title"`
Language []string `xml:"language" json:"language"`
Identifier []Identifier `xml:"identifier" json:"identifier"`
Creator []Author `xml:"creator" json:"creator"`
Subject []string `xml:"subject" json:"subject"`
Description []string `xml:"description" json:"description"`
Publisher []string `xml:"publisher" json:"publisher"`
Contributor []Author `xml:"contributor" json:"contributor"`
Date []Date `xml:"date" json:"date"`
Type []string `xml:"type" json:"type"`
Format []string `xml:"format" json:"format"`
Source []string `xml:"source" json:"source"`
Relation []string `xml:"relation" json:"relation"`
Coverage []string `xml:"coverage" json:"coverage"`
Rights []string `xml:"rights" json:"rights"`
Meta []Metafield `xml:"meta" json:"meta"`
}
// Identifier identifier
type Identifier struct {
Data string `xml:",chardata" json:"data"`
ID string `xml:"id,attr" json:"id"`
Scheme string `xml:"scheme,attr" json:"scheme"`
}
// Author author
type Author struct {
Data string `xml:",chardata" json:"author"`
FileAs string `xml:"file-as,attr" json:"file_as"`
Role string `xml:"role,attr" json:"role"`
}
// Date date
type Date struct {
Data string `xml:",chardata" json:"data"`
Event string `xml:"event,attr" json:"event"`
}
// Metafield metafield
type Metafield struct {
Name string `xml:"name,attr" json:"name"`
Content string `xml:"content,attr" json:"content"`
}
//Manifest manifest
type Manifest struct {
ID string `xml:"id,attr" json:"id"`
Href string `xml:"href,attr" json:"href"`
MediaType string `xml:"media-type,attr" json:"type"`
Fallback string `xml:"media-fallback,attr" json:"fallback"`
Properties string `xml:"properties,attr" json:"properties"`
MediaOverlay string `xml:"media-overlay,attr" json:"overlay"`
}
// Spine spine
type Spine struct {
ID string `xml:"id,attr" json:"id"`
Toc string `xml:"toc,attr" json:"toc"`
PageProgression string `xml:"page-progression-direction,attr" json:"progression"`
Items []SpineItem `xml:"itemref" json:"items"`
}
// SpineItem spine item
type SpineItem struct {
IDref string `xml:"idref,attr" json:"id_ref"`
Linear string `xml:"linear,attr" json:"linear"`
ID string `xml:"id,attr" json:"id"`
Properties string `xml:"properties,attr" json:"properties"`
}