bump version

fix #3 :
- clean mobi page breaks - reorganize the code a little - avoid for duplicate sections
2026-07-10 11:44:31 +02:00 · 2026-01-05 09:15:31 +01:00 · 2026-01-05 08:32:09 +01:00 · 2026-01-03 23:37:47 +01:00 · 2025-11-05 09:01:42 +01:00 · 2025-11-01 21:03:50 +01:00
4 changed files with 65 additions and 27 deletions
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
-[![status-badge](https://ci.codeberg.org/api/badges/15473/status.svg?branch=woodpecker)](https://ci.codeberg.org/repos/15473)
+[![status-badge](https://ci.codeberg.org/api/badges/15473/status.svg?branch=main)](https://ci.codeberg.org/repos/15473)
-[![License](https://img.shields.io/badge/license-GPL-blue.svg)](https://codeberg.org/scip/epuppy/raw/branch/master/LICENSE)
+[![License](https://img.shields.io/badge/license-GPL-blue.svg)](https://codeberg.org/scip/epuppy/raw/branch/main/LICENSE)
 [![Go Report Card](https://goreportcard.com/badge/codeberg.org/scip/epuppy)](https://goreportcard.com/report/codeberg.org/scip/epuppy)
@@ -20,16 +20,20 @@ long run.
 ## Screenshots
- Viewing an ebook in dark mode
+### Viewing an ebook in dark mode
 ![Screenshot](https://codeberg.org/scip/epuppy/raw/branch/main/.codeberg/assets/darkmode.png)
- Viewing an ebook in light mode
+### Viewing an ebook in light mode
 ![Screenshot](https://codeberg.org/scip/epuppy/raw/branch/main/.codeberg/assets/light.png)
- You can interactively adjust text width
+### You can interactively adjust text width
 ![Screenshot](https://codeberg.org/scip/epuppy/raw/branch/main/.codeberg/assets/margin.png)
- Showing the help
+### Showing the help
 ![Screenshot](https://codeberg.org/scip/epuppy/raw/branch/main/.codeberg/assets/help.png)
 ## Usage
@@ -42,7 +46,7 @@ progress.
 Sometimes you may be unhappy with the colors. Depending on your
 terminal style you can enable dark mode with `-D`, light mode is the
 default. You can also configure custom colors in a config file in
-`$HOME/.config/epuppy/confit.toml`:
+`$HOME/.config/epuppy/config.toml`:
 ```toml
 # color setting for dark mode
@@ -92,6 +96,16 @@ Options:
 -v --version             show program version
 ```
 ## Reading mobi files
 `epuppy`   doesn't   support  mobi   files,   but   you  can   install
 [mobitool](https://github.com/bfabiszewski/libmobi/)  and  use  it  to
 convert mobi files to epub. The ubuntu package is `libmobi-tools`. To convert, execute: 
 ```default
 mobitool -e somebook.epub
 ```
 ## Installation
 The tool does not have any dependencies.  Just download the binary for
@@ -104,7 +118,7 @@ You can use [stew](https://github.com/marwanhawari/stew) to install epuppy:
 stew install https://codeberg.org/scip/epuppy
 ```
-Or go to the [latest release page](https://codeberg.org/scip/epuppy/releases/latest)
+Or go to the [latest release page](https://codeberg.org/scip/epuppy/releases/)
 and look for your OS and platform. There are two options to install the binary:
 Directly     download     the     binary    for     your     platform,
--- a/cmd/config.go
+++ b/cmd/config.go
@@ -32,7 +32,7 @@ import (
 )
 const (
-	Version string = `v0.0.7`
+	Version string = `v0.0.8`
 	Usage   string = `This is epuppy, a terminal ui ebook viewer.
 Usage: epuppy [options] <epub file>
--- a/pkg/epub/content.go
+++ b/pkg/epub/content.go
@@ -8,11 +8,12 @@ import (
 )
 var (
-	cleanentitles = regexp.MustCompile(`&[a-z]+;`)
+	cleanenTitles       = regexp.MustCompile(`&[a-z]+;`)
-	empty         = regexp.MustCompile(`(?s)^[\s ]*$`)
+	isEmpty             = regexp.MustCompile(`(?s)^[\s ]*$`)
-	newlines      = regexp.MustCompile(`[\r\n\s]+`)
+	cleanNewlines       = regexp.MustCompile(`[\r\n\s]+`)
-	cleansvg      = regexp.MustCompile(`(<svg.+</svg>|<!\[CDATA\[.+\]\]>)`)
+	cleanSVG            = regexp.MustCompile(`(<svg.+</svg>|<!\[CDATA\[.+\]\]>)`)
-	cleanmarkup   = regexp.MustCompile(`<[^<>]+>`)
+	cleanMarkup         = regexp.MustCompile(`<[^<>]+>`)
 	cleanMobiPageBreaks = regexp.MustCompile(`<mbp:pagebreak/>`)
 )
 // Content nav-point content
@@ -25,15 +26,30 @@ type Content struct {
 }
 // parse XML, look for title and <p>.*</p> stuff
-func (c *Content) String(content []byte) error {
+func (c *Content) Extract(content []byte) error {
-	doc, err := xmlquery.Parse(
+	rawXML := cleanSVG.ReplaceAllString(
-		strings.NewReader(
+		cleanenTitles.ReplaceAllString(string(content), " "), "")
-			cleansvg.ReplaceAllString(
+
-				cleanentitles.ReplaceAllString(string(content), " "), "")))
+	var doc *xmlquery.Node
 	var err error
 	doc, err = xmlquery.Parse(strings.NewReader(rawXML))
 	if err != nil {
 		if strings.Contains(err.Error(), `namespace mbp is missing`) {
 			fixedmbp := strings.NewReader(
 				cleanMobiPageBreaks.ReplaceAllString(
 					rawXML, `<span style="page-break-after: always" />`))
 			doc, err = xmlquery.Parse(fixedmbp)
 			if err != nil {
 				return err
 			}
 		} else {
 			return err
 		}
 	}
 	if c.Title == "" {
 		// extract the title
 		for _, item := range xmlquery.Find(doc, "//title") {
@@ -47,9 +63,9 @@ func (c *Content) String(content []byte) error {
 	txt := strings.Builder{}
 	var have_p bool
 	for _, item := range xmlquery.Find(doc, "//p") {
-		if !empty.MatchString(item.InnerText()) {
+		if !isEmpty.MatchString(item.InnerText()) {
 			have_p = true
-			txt.WriteString(newlines.ReplaceAllString(item.InnerText(), " ") + "\n\n")
+			txt.WriteString(cleanNewlines.ReplaceAllString(item.InnerText(), " ") + "\n\n")
 		}
 	}
@@ -57,9 +73,9 @@ func (c *Content) String(content []byte) error {
 		// try  <div></div>, which some  ebooks use, so get  all divs,
 		// remove markup and paragraphify the parts
 		for _, item := range xmlquery.Find(doc, "//div") {
-			if !empty.MatchString(item.InnerText()) {
+			if !isEmpty.MatchString(item.InnerText()) {
-				cleaned := cleanmarkup.ReplaceAllString(item.InnerText(), "")
+				cleaned := cleanMarkup.ReplaceAllString(item.InnerText(), "")
-				txt.WriteString(newlines.ReplaceAllString(cleaned, " ") + "\n\n")
+				txt.WriteString(cleanNewlines.ReplaceAllString(cleaned, " ") + "\n\n")
 			}
 		}
 	}
--- a/pkg/epub/open.go
+++ b/pkg/epub/open.go
@@ -112,6 +112,8 @@ func (bk *Book) getSections() error {
 	// we have ncx points from the TOC, try those
 	if len(bk.Ncx.Points) > 0 {
 		known := map[string]int{}
 		for _, block := range bk.Ncx.Points {
 			sect := Section{
 				File:  "OEBPS/" + block.Content.Src,
@@ -128,7 +130,13 @@ func (bk *Book) getSections() error {
 				}
 			}
 			if _, haveFile := known[sect.File]; !haveFile {
 				// sometimes  epub's have  many sections but  they all
 				// point to the same  file. To avoid duplicate content
 				// we ignore sections (thus files) we have already seen.
 				sections = append(sections, sect)
 				known[sect.File] = 1
 			}
 		}
 		if len(sections) < manifestcount {
@@ -189,7 +197,7 @@ func (bk *Book) readSectionContent() error {
 		ct := Content{Src: section.File, Title: section.Title}
 		if types.MatchString(section.MediaType) {
-			if err := ct.String(content); err != nil {
+			if err := ct.Extract(content); err != nil {
 				return err
 			}
 		}
Author	SHA1	Message	Date
Thomas von Dein	ecbfba8809	bump version	2026-01-05 09:15:31 +01:00
Thomas von Dein	372a7b1b00	fix #3 : - clean mobi page breaks - reorganize the code a little - avoid for duplicate sections	2026-01-05 08:32:09 +01:00
Thomas von Dein	02c99da8e9	fix typo	2026-01-03 23:37:47 +01:00
Thomas von Dein	4ca12b907b	fix links	2025-11-05 09:01:42 +01:00
Thomas von Dein	807a2712e5	fix latest release link	2025-11-01 21:03:50 +01:00
Thomas von Dein	0d80f0ef42	fix badge	2025-10-31 23:21:05 +01:00
Thomas von Dein	120b88803c	fix badge	2025-10-31 23:20:06 +01:00
Thomas von Dein	fc9ff4a23f	fix screenshot page format	2025-10-31 00:06:04 +01:00