Release v1.0.17 (#9)

* add shortcut -H to --no-headers, it's too cumbersome to type
* added fuzzy search support
* Added basic lisp plugin facilities
* Lisp plugin Addidions:
- added process hook facilities
- added working example lisp plugin for filter hook
- load-path can now be a file as well
- added a couple of lisp helper functions (atoi, split), more may
  follow, see lisplib.go
* linting fixes
This commit is contained in:
T.v.Dein
2023-10-02 18:15:41 +02:00
committed by GitHub
parent 93800f81c1
commit 9eadb941da
15 changed files with 615 additions and 37 deletions

293
lib/lisp.go Normal file
View File

@@ -0,0 +1,293 @@
/*
Copyright © 2023 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package lib
import (
"errors"
"fmt"
"log"
"os"
"strings"
"github.com/glycerine/zygomys/zygo"
"github.com/tlinden/tablizer/cfg"
)
/*
needs to be global because we can't feed an cfg object to AddHook()
which is being called from user lisp code
*/
var Hooks map[string][]*zygo.SexpSymbol
/*
AddHook() (called addhook from lisp code) can be used by the user to
add a function to one of the available hooks provided by tablizer.
*/
func AddHook(env *zygo.Zlisp, name string, args []zygo.Sexp) (zygo.Sexp, error) {
var hookname string
if len(args) < 2 {
return zygo.SexpNull, errors.New("argument of %add-hook should be: %hook-name %your-function")
}
switch t := args[0].(type) {
case *zygo.SexpSymbol:
if !HookExists(t.Name()) {
return zygo.SexpNull, errors.New("Unknown hook " + t.Name())
}
hookname = t.Name()
default:
return zygo.SexpNull, errors.New("hook name must be a symbol!")
}
switch t := args[1].(type) {
case *zygo.SexpSymbol:
_, exists := Hooks[hookname]
if !exists {
Hooks[hookname] = []*zygo.SexpSymbol{t}
} else {
Hooks[hookname] = append(Hooks[hookname], t)
}
default:
return zygo.SexpNull, errors.New("hook function must be a symbol!")
}
return zygo.SexpNull, nil
}
/*
Check if a hook exists
*/
func HookExists(key string) bool {
for _, hook := range cfg.ValidHooks {
if hook == key {
return true
}
}
return false
}
/*
* Basic sanity checks and load lisp file
*/
func LoadFile(env *zygo.Zlisp, path string) error {
if strings.HasSuffix(path, `.zy`) {
code, err := os.ReadFile(path)
if err != nil {
return err
}
// FIXME: check what res (_ here) could be and mean
_, err = env.EvalString(string(code))
if err != nil {
log.Fatalf(env.GetStackTrace(err))
}
}
return nil
}
/*
* Setup lisp interpreter environment
*/
func SetupLisp(c *cfg.Config) error {
Hooks = make(map[string][]*zygo.SexpSymbol)
env := zygo.NewZlispSandbox()
env.AddFunction("addhook", AddHook)
// iterate over load-path and evaluate all *.zy files there, if any
// we ignore if load-path does not exist, which is the default anyway
if path, err := os.Stat(c.LispLoadPath); !os.IsNotExist(err) {
if !path.IsDir() {
err := LoadFile(env, c.LispLoadPath)
if err != nil {
return err
}
} else {
dir, err := os.ReadDir(c.LispLoadPath)
if err != nil {
return err
}
for _, entry := range dir {
if !entry.IsDir() {
err := LoadFile(env, c.LispLoadPath+"/"+entry.Name())
if err != nil {
return err
}
}
}
}
}
RegisterLib(env)
c.Lisp = env
return nil
}
/*
Execute every user lisp function registered as filter hook.
Each function is given the current line as argument and is expected to
return a boolean. True indicates to keep the line, false to skip
it.
If there are multiple such functions registered, then the first one
returning false wins, that is if each function returns true the line
will be kept, if at least one of them returns false, it will be
skipped.
*/
func RunFilterHooks(c cfg.Config, line string) (bool, error) {
for _, hook := range Hooks["filter"] {
var result bool
c.Lisp.Clear()
res, err := c.Lisp.EvalString(fmt.Sprintf("(%s `%s`)", hook.Name(), line))
if err != nil {
return false, err
}
switch t := res.(type) {
case *zygo.SexpBool:
result = t.Val
default:
return false, errors.New("filter hook shall return BOOL!")
}
if !result {
// the first hook which returns false leads to complete false
return result, nil
}
}
// if no hook returned false, we succeed and accept the given line
return true, nil
}
/*
These hooks get the data (Tabdata) readily processed by tablizer as
argument. They are expected to return a SexpPair containing a boolean
denoting if the data has been modified and the actual modified
data. Columns must be the same, rows may differ. Cells may also have
been modified.
Replaces the internal data structure Tabdata with the user supplied
version.
Only one process hook function is supported.
The somewhat complicated code is being caused by the fact, that we
need to convert our internal structure to a lisp variable and vice
versa afterwards.
*/
func RunProcessHooks(c cfg.Config, data Tabdata) (Tabdata, bool, error) {
var userdata Tabdata
lisplist := []zygo.Sexp{}
if len(Hooks["process"]) == 0 {
return userdata, false, nil
}
if len(Hooks["process"]) > 1 {
fmt.Println("Warning: only one process hook is allowed!")
}
// there are hook[s] installed, convert the go data structure 'data to lisp
for _, row := range data.entries {
var entry zygo.SexpHash
for idx, cell := range row {
err := entry.HashSet(&zygo.SexpStr{S: data.headers[idx]}, &zygo.SexpStr{S: cell})
if err != nil {
return userdata, false, err
}
}
lisplist = append(lisplist, &entry)
}
// we need to add it to the env so that the function can use the struct directly
c.Lisp.AddGlobal("data", &zygo.SexpArray{Val: lisplist, Env: c.Lisp})
// execute the actual hook
hook := Hooks["process"][0]
var result bool
c.Lisp.Clear()
res, err := c.Lisp.EvalString(fmt.Sprintf("(%s data)", hook.Name()))
if err != nil {
return userdata, false, err
}
// we expect (bool, array(hash)) as return from the function
switch t := res.(type) {
case *zygo.SexpPair:
switch th := t.Head.(type) {
case *zygo.SexpBool:
result = th.Val
default:
return userdata, false, errors.New("Expect (bool, array(hash)) as return value!")
}
switch tt := t.Tail.(type) {
case *zygo.SexpArray:
lisplist = tt.Val
default:
return userdata, false, errors.New("Expect (bool, array(hash)) as return value!")
}
default:
return userdata, false, errors.New("filter hook shall return array of hashes!")
}
if !result {
// no further processing required
return userdata, result, nil
}
// finally convert lispdata back to Tabdata
for _, item := range lisplist {
row := []string{}
switch hash := item.(type) {
case *zygo.SexpHash:
for _, header := range data.headers {
entry, err := hash.HashGetDefault(c.Lisp, &zygo.SexpStr{S: header}, &zygo.SexpStr{S: ""})
if err != nil {
return userdata, false, err
}
switch t := entry.(type) {
case *zygo.SexpStr:
row = append(row, t.S)
default:
return userdata, false, errors.New("Hash values should be string!")
}
}
default:
return userdata, false, errors.New("Returned array should contain hashes!")
}
userdata.entries = append(userdata.entries, row)
}
userdata.headers = data.headers
return userdata, result, nil
}

84
lib/lisplib.go Normal file
View File

@@ -0,0 +1,84 @@
/*
Copyright © 2023 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package lib
import (
"errors"
"regexp"
"strconv"
"github.com/glycerine/zygomys/zygo"
)
func Splice2SexpList(list []string) zygo.Sexp {
slist := []zygo.Sexp{}
for _, item := range list {
slist = append(slist, &zygo.SexpStr{S: item})
}
return zygo.MakeList(slist)
}
func StringReSplit(env *zygo.Zlisp, name string, args []zygo.Sexp) (zygo.Sexp, error) {
if len(args) < 2 {
return zygo.SexpNull, errors.New("expecting 2 arguments!")
}
var separator string
var input string
switch t := args[0].(type) {
case *zygo.SexpStr:
input = t.S
default:
return zygo.SexpNull, errors.New("second argument must be a string!")
}
switch t := args[1].(type) {
case *zygo.SexpStr:
separator = t.S
default:
return zygo.SexpNull, errors.New("first argument must be a string!")
}
sep := regexp.MustCompile(separator)
return Splice2SexpList(sep.Split(input, -1)), nil
}
func String2Int(env *zygo.Zlisp, name string, args []zygo.Sexp) (zygo.Sexp, error) {
var number int
switch t := args[0].(type) {
case *zygo.SexpStr:
num, err := strconv.Atoi(t.S)
if err != nil {
return zygo.SexpNull, err
}
number = num
default:
return zygo.SexpNull, errors.New("argument must be a string!")
}
return &zygo.SexpInt{Val: int64(number)}, nil
}
func RegisterLib(env *zygo.Zlisp) {
env.AddFunction("resplit", StringReSplit)
env.AddFunction("atoi", String2Int)
}

View File

@@ -22,15 +22,33 @@ import (
"encoding/csv"
"errors"
"fmt"
"github.com/alecthomas/repr"
"github.com/tlinden/tablizer/cfg"
"io"
"regexp"
"strings"
"github.com/alecthomas/repr"
"github.com/lithammer/fuzzysearch/fuzzy"
"github.com/tlinden/tablizer/cfg"
)
/*
Parser switch
* [!]Match a line, use fuzzy search for normal pattern strings and
* regexp otherwise.
*/
func matchPattern(c cfg.Config, line string) bool {
if len(c.Pattern) > 0 {
if c.UseFuzzySearch {
return fuzzy.MatchFold(c.Pattern, line)
} else {
return c.PatternR.MatchString(line)
}
}
return true
}
/*
Parser switch
*/
func Parse(c cfg.Config, input io.Reader) (Tabdata, error) {
if len(c.Separator) == 1 {
@@ -41,7 +59,7 @@ func Parse(c cfg.Config, input io.Reader) (Tabdata, error) {
}
/*
Parse CSV input.
Parse CSV input.
*/
func parseCSV(c cfg.Config, input io.Reader) (Tabdata, error) {
var content io.Reader = input
@@ -55,13 +73,25 @@ func parseCSV(c cfg.Config, input io.Reader) (Tabdata, error) {
line := strings.TrimSpace(scanner.Text())
if hadFirst {
// don't match 1st line, it's the header
if c.PatternR.MatchString(line) == c.InvertMatch {
if matchPattern(c, line) == c.InvertMatch {
// by default -v is false, so if a line does NOT
// match the pattern, we will ignore it. However,
// if the user specified -v, the matching is inverted,
// so we ignore all lines, which DO match.
continue
}
// apply user defined lisp filters, if any
accept, err := RunFilterHooks(c, line)
if err != nil {
return data, errors.Unwrap(fmt.Errorf("Failed to apply filter hook: %w", err))
}
if !accept {
// IF there are filter hook[s] and IF one of them
// returns false on the current line, reject it
continue
}
}
lines = append(lines, line)
hadFirst = true
@@ -94,11 +124,20 @@ func parseCSV(c cfg.Config, input io.Reader) (Tabdata, error) {
}
}
// apply user defined lisp process hooks, if any
userdata, changed, err := RunProcessHooks(c, data)
if err != nil {
return data, errors.Unwrap(fmt.Errorf("Failed to apply filter hook: %w", err))
}
if changed {
data = userdata
}
return data, nil
}
/*
Parse tabular input.
Parse tabular input.
*/
func parseTabular(c cfg.Config, input io.Reader) (Tabdata, error) {
data := Tabdata{}
@@ -141,14 +180,24 @@ func parseTabular(c cfg.Config, input io.Reader) (Tabdata, error) {
}
} else {
// data processing
if len(c.Pattern) > 0 {
if c.PatternR.MatchString(line) == c.InvertMatch {
// by default -v is false, so if a line does NOT
// match the pattern, we will ignore it. However,
// if the user specified -v, the matching is inverted,
// so we ignore all lines, which DO match.
continue
}
if matchPattern(c, line) == c.InvertMatch {
// by default -v is false, so if a line does NOT
// match the pattern, we will ignore it. However,
// if the user specified -v, the matching is inverted,
// so we ignore all lines, which DO match.
continue
}
// apply user defined lisp filters, if any
accept, err := RunFilterHooks(c, line)
if err != nil {
return data, errors.Unwrap(fmt.Errorf("Failed to apply filter hook: %w", err))
}
if !accept {
// IF there are filter hook[s] and IF one of them
// returns false on the current line, reject it
continue
}
idx := 0 // we cannot use the header index, because we could exclude columns
@@ -175,6 +224,15 @@ func parseTabular(c cfg.Config, input io.Reader) (Tabdata, error) {
return data, errors.Unwrap(fmt.Errorf("Failed to read from io.Reader: %w", scanner.Err()))
}
// apply user defined lisp process hooks, if any
userdata, changed, err := RunProcessHooks(c, data)
if err != nil {
return data, errors.Unwrap(fmt.Errorf("Failed to apply filter hook: %w", err))
}
if changed {
data = userdata
}
if c.Debug {
repr.Print(data)
}

View File

@@ -19,10 +19,11 @@ package lib
import (
"fmt"
"github.com/tlinden/tablizer/cfg"
"reflect"
"strings"
"testing"
"github.com/tlinden/tablizer/cfg"
)
var input = []struct {