mirror of
https://codeberg.org/scip/epuppy.git
synced 2025-12-16 20:11:00 +01:00
initial commit
This commit is contained in:
73
pkg/epub/book.go
Normal file
73
pkg/epub/book.go
Normal file
@@ -0,0 +1,73 @@
|
||||
package epub
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"path"
|
||||
)
|
||||
|
||||
// Book epub book
|
||||
type Book struct {
|
||||
Ncx Ncx `json:"ncx"`
|
||||
Opf Opf `json:"opf"`
|
||||
Container Container `json:"-"`
|
||||
Mimetype string `json:"-"`
|
||||
|
||||
fd *zip.ReadCloser
|
||||
}
|
||||
|
||||
// Open open resource file
|
||||
func (p *Book) Open(n string) (io.ReadCloser, error) {
|
||||
return p.open(p.filename(n))
|
||||
}
|
||||
|
||||
// Files list resource files
|
||||
func (p *Book) Files() []string {
|
||||
var fns []string
|
||||
for _, f := range p.fd.File {
|
||||
fns = append(fns, f.Name)
|
||||
}
|
||||
return fns
|
||||
}
|
||||
|
||||
// Close close file reader
|
||||
func (p *Book) Close() {
|
||||
p.fd.Close()
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
func (p *Book) filename(n string) string {
|
||||
return path.Join(path.Dir(p.Container.Rootfile.Path), n)
|
||||
}
|
||||
|
||||
func (p *Book) readXML(n string, v interface{}) error {
|
||||
fd, err := p.open(n)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
defer fd.Close()
|
||||
dec := xml.NewDecoder(fd)
|
||||
return dec.Decode(v)
|
||||
}
|
||||
|
||||
func (p *Book) readBytes(n string) ([]byte, error) {
|
||||
fd, err := p.open(n)
|
||||
if err != nil {
|
||||
return nil, nil
|
||||
}
|
||||
defer fd.Close()
|
||||
|
||||
return io.ReadAll(fd)
|
||||
|
||||
}
|
||||
|
||||
func (p *Book) open(n string) (io.ReadCloser, error) {
|
||||
for _, f := range p.fd.File {
|
||||
if f.Name == n {
|
||||
return f.Open()
|
||||
}
|
||||
}
|
||||
return nil, fmt.Errorf("file %s not exist", n)
|
||||
}
|
||||
12
pkg/epub/container.go
Normal file
12
pkg/epub/container.go
Normal file
@@ -0,0 +1,12 @@
|
||||
package epub
|
||||
|
||||
//Container META-INF/container.xml file
|
||||
type Container struct {
|
||||
Rootfile Rootfile `xml:"rootfiles>rootfile" json:"rootfile"`
|
||||
}
|
||||
|
||||
//Rootfile root file
|
||||
type Rootfile struct {
|
||||
Path string `xml:"full-path,attr" json:"path"`
|
||||
Type string `xml:"media-type,attr" json:"type"`
|
||||
}
|
||||
28
pkg/epub/epub_test.go
Normal file
28
pkg/epub/epub_test.go
Normal file
@@ -0,0 +1,28 @@
|
||||
package epub_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/kapmahc/epub"
|
||||
)
|
||||
|
||||
func TestEpub(t *testing.T) {
|
||||
bk, err := open(t, "test.epub")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer bk.Close()
|
||||
}
|
||||
|
||||
func open(t *testing.T, f string) (*epub.Book, error) {
|
||||
bk, err := epub.Open(f)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer bk.Close()
|
||||
|
||||
t.Logf("files: %+v", bk.Files())
|
||||
t.Logf("book: %+v", bk)
|
||||
|
||||
return bk, nil
|
||||
}
|
||||
71
pkg/epub/ncx.go
Normal file
71
pkg/epub/ncx.go
Normal file
@@ -0,0 +1,71 @@
|
||||
package epub
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
cleantitle = regexp.MustCompile(`(?s)<head>.*</head>`)
|
||||
cleanmarkup = regexp.MustCompile(`<[^<>]+>`)
|
||||
cleanentities = regexp.MustCompile(`&.+;`)
|
||||
cleancomments = regexp.MustCompile(`/*.*/`)
|
||||
cleanspace = regexp.MustCompile(`^\s*`)
|
||||
cleanh1 = regexp.MustCompile(`<h[1-6].*</h[1-6]>`)
|
||||
)
|
||||
|
||||
// Ncx OPS/toc.ncx
|
||||
type Ncx struct {
|
||||
Points []*NavPoint `xml:"navMap>navPoint" json:"points"`
|
||||
}
|
||||
|
||||
// NavPoint nav point
|
||||
type NavPoint struct {
|
||||
Text string `xml:"navLabel>text" json:"text"`
|
||||
Content Content `xml:"content" json:"content"`
|
||||
Points []NavPoint `xml:"navPoint" json:"points"`
|
||||
}
|
||||
|
||||
type Title struct {
|
||||
Content string `xml:"head>title"`
|
||||
}
|
||||
|
||||
// Content nav-point content
|
||||
type Content struct {
|
||||
Src string `xml:"src,attr" json:"src"`
|
||||
Empty bool
|
||||
Body string
|
||||
Title string
|
||||
XML []byte
|
||||
}
|
||||
|
||||
func (c *Content) String(content []byte) error {
|
||||
title := Title{}
|
||||
|
||||
err := xml.Unmarshal(content, &title)
|
||||
if err != nil {
|
||||
if !strings.HasPrefix(err.Error(), "XML syntax error") {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
c.Title = title.Content
|
||||
|
||||
txt := cleantitle.ReplaceAllString(string(content), "")
|
||||
txt = cleanh1.ReplaceAllString(txt, "")
|
||||
txt = cleanmarkup.ReplaceAllString(txt, "")
|
||||
txt = cleanentities.ReplaceAllString(txt, " ")
|
||||
txt = cleancomments.ReplaceAllString(txt, "")
|
||||
|
||||
txt = strings.TrimSpace(txt)
|
||||
|
||||
c.Body = cleanspace.ReplaceAllString(txt, "")
|
||||
c.XML = content
|
||||
|
||||
if len(c.Body) == 0 {
|
||||
c.Empty = true
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
53
pkg/epub/open.go
Normal file
53
pkg/epub/open.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package epub
|
||||
|
||||
import (
|
||||
"archive/zip"
|
||||
)
|
||||
|
||||
// Open open a epub file
|
||||
func Open(fn string) (*Book, error) {
|
||||
fd, err := zip.OpenReader(fn)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
defer fd.Close()
|
||||
|
||||
bk := Book{fd: fd}
|
||||
mt, err := bk.readBytes("mimetype")
|
||||
if err != nil {
|
||||
return &bk, err
|
||||
}
|
||||
|
||||
bk.Mimetype = string(mt)
|
||||
|
||||
err = bk.readXML("META-INF/container.xml", &bk.Container)
|
||||
if err != nil {
|
||||
return &bk, err
|
||||
}
|
||||
|
||||
err = bk.readXML(bk.Container.Rootfile.Path, &bk.Opf)
|
||||
if err != nil {
|
||||
return &bk, err
|
||||
}
|
||||
|
||||
for _, mf := range bk.Opf.Manifest {
|
||||
if mf.ID == bk.Opf.Spine.Toc {
|
||||
err = bk.readXML(bk.filename(mf.Href), &bk.Ncx)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
for _, ncx := range bk.Ncx.Points {
|
||||
content, err := bk.readBytes(bk.filename(ncx.Content.Src))
|
||||
if err != nil {
|
||||
return &bk, err
|
||||
}
|
||||
|
||||
if err := ncx.Content.String(content); err != nil {
|
||||
return &bk, err
|
||||
}
|
||||
}
|
||||
|
||||
return &bk, nil
|
||||
}
|
||||
80
pkg/epub/opf.go
Normal file
80
pkg/epub/opf.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package epub
|
||||
|
||||
//Opf content.opf
|
||||
type Opf struct {
|
||||
Metadata Metadata `xml:"metadata" json:"metadata"`
|
||||
Manifest []Manifest `xml:"manifest>item" json:"manifest"`
|
||||
Spine Spine `xml:"spine" json:"spine"`
|
||||
}
|
||||
|
||||
//Metadata metadata
|
||||
type Metadata struct {
|
||||
Title []string `xml:"title" json:"title"`
|
||||
Language []string `xml:"language" json:"language"`
|
||||
Identifier []Identifier `xml:"identifier" json:"identifier"`
|
||||
Creator []Author `xml:"creator" json:"creator"`
|
||||
Subject []string `xml:"subject" json:"subject"`
|
||||
Description []string `xml:"description" json:"description"`
|
||||
Publisher []string `xml:"publisher" json:"publisher"`
|
||||
Contributor []Author `xml:"contributor" json:"contributor"`
|
||||
Date []Date `xml:"date" json:"date"`
|
||||
Type []string `xml:"type" json:"type"`
|
||||
Format []string `xml:"format" json:"format"`
|
||||
Source []string `xml:"source" json:"source"`
|
||||
Relation []string `xml:"relation" json:"relation"`
|
||||
Coverage []string `xml:"coverage" json:"coverage"`
|
||||
Rights []string `xml:"rights" json:"rights"`
|
||||
Meta []Metafield `xml:"meta" json:"meta"`
|
||||
}
|
||||
|
||||
// Identifier identifier
|
||||
type Identifier struct {
|
||||
Data string `xml:",chardata" json:"data"`
|
||||
ID string `xml:"id,attr" json:"id"`
|
||||
Scheme string `xml:"scheme,attr" json:"scheme"`
|
||||
}
|
||||
|
||||
// Author author
|
||||
type Author struct {
|
||||
Data string `xml:",chardata" json:"author"`
|
||||
FileAs string `xml:"file-as,attr" json:"file_as"`
|
||||
Role string `xml:"role,attr" json:"role"`
|
||||
}
|
||||
|
||||
// Date date
|
||||
type Date struct {
|
||||
Data string `xml:",chardata" json:"data"`
|
||||
Event string `xml:"event,attr" json:"event"`
|
||||
}
|
||||
|
||||
// Metafield metafield
|
||||
type Metafield struct {
|
||||
Name string `xml:"name,attr" json:"name"`
|
||||
Content string `xml:"content,attr" json:"content"`
|
||||
}
|
||||
|
||||
//Manifest manifest
|
||||
type Manifest struct {
|
||||
ID string `xml:"id,attr" json:"id"`
|
||||
Href string `xml:"href,attr" json:"href"`
|
||||
MediaType string `xml:"media-type,attr" json:"type"`
|
||||
Fallback string `xml:"media-fallback,attr" json:"fallback"`
|
||||
Properties string `xml:"properties,attr" json:"properties"`
|
||||
MediaOverlay string `xml:"media-overlay,attr" json:"overlay"`
|
||||
}
|
||||
|
||||
// Spine spine
|
||||
type Spine struct {
|
||||
ID string `xml:"id,attr" json:"id"`
|
||||
Toc string `xml:"toc,attr" json:"toc"`
|
||||
PageProgression string `xml:"page-progression-direction,attr" json:"progression"`
|
||||
Items []SpineItem `xml:"itemref" json:"items"`
|
||||
}
|
||||
|
||||
// SpineItem spine item
|
||||
type SpineItem struct {
|
||||
IDref string `xml:"idref,attr" json:"id_ref"`
|
||||
Linear string `xml:"linear,attr" json:"linear"`
|
||||
ID string `xml:"id,attr" json:"id"`
|
||||
Properties string `xml:"properties,attr" json:"properties"`
|
||||
}
|
||||
Reference in New Issue
Block a user