added tests, reorganized Parse() by dismantling parsing and processing

This commit is contained in:
2025-01-13 18:45:53 +01:00
committed by T.v.Dein
parent 4d894a728b
commit cf1a555b9b
9 changed files with 217 additions and 39 deletions

View File

@@ -301,7 +301,7 @@ func (conf *Config) PrepareTransposers() error {
}
for _, transposer := range conf.Transposers {
parts := strings.Split(transposer, "/")
parts := strings.Split(transposer, string(transposer[0]))
if len(parts) != 4 {
return fmt.Errorf("transposer function must have the format /regexp/replace-string/")
}

View File

@@ -188,6 +188,21 @@ DESCRIPTION
where "C" is our regexp which matches CMD.
TRANSPOSE FIELDS USING REGEXPS
You can manipulate field contents using regular expressions. You have to
tell tablizer which field[s] to operate on using the option "-T" and the
search/replace pattern using "-R". The number of columns and patterns
must match.
A search/replace pattern consists of the following elements:
/search-regexp/replace-string/
The separator can be any valid character. Especially if you want to use
a regexp containing the "/" character, eg:
|search-regexp|replace-string|
OUTPUT MODES
There might be cases when the tabular output of a program is way too
large for your current terminal but you still need to see every column.

View File

@@ -65,7 +65,9 @@ func ValidateConsistency(data *Tabdata) error {
}
// parse columns list given with -c, modifies config.UseColumns based
// on eventually given regex
// on eventually given regex.
// This is an output filter, because -cN,N,... is being applied AFTER
// processing of the input data.
func PrepareColumns(conf *cfg.Config, data *Tabdata) error {
// -c columns
usecolumns, err := PrepareColumnVars(conf.Columns, data)
@@ -78,6 +80,8 @@ func PrepareColumns(conf *cfg.Config, data *Tabdata) error {
return nil
}
// Same thing as above but for -T option, which is an input option,
// because transposers are being applied before output.
func PrepareTransposerColumns(conf *cfg.Config, data *Tabdata) error {
// -T columns
usetransposecolumns, err := PrepareColumnVars(conf.TransposeColumns, data)

View File

@@ -90,6 +90,86 @@ func TestPrepareColumns(t *testing.T) {
}
}
func TestPrepareTransposerColumns(t *testing.T) {
data := Tabdata{
maxwidthHeader: 5,
columns: 3,
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{
"2", "3", "4",
},
},
}
var tests = []struct {
input string
transp []string
exp int
wanterror bool // expect error
}{
{
"1",
[]string{`/\d/x/`},
1,
false,
},
{
"T", // will match [T]WO and [T]HREE
[]string{`/\d/x/`, `/.//`},
2,
false,
},
{
"T,2",
[]string{`/\d/x/`, `/.//`},
2,
false,
},
{
"1",
[]string{},
1,
true,
},
{
"",
[]string{`|.|N|`},
0,
true,
},
{
"1",
[]string{`|.|N|`},
1,
false,
},
}
for _, testdata := range tests {
testname := fmt.Sprintf("PrepareTransposerColumns-%s-%t", testdata.input, testdata.wanterror)
t.Run(testname, func(t *testing.T) {
conf := cfg.Config{TransposeColumns: testdata.input, Transposers: testdata.transp}
err := PrepareTransposerColumns(&conf, &data)
if err != nil {
if !testdata.wanterror {
t.Errorf("got error: %v", err)
}
} else {
if len(conf.UseTransposeColumns) != testdata.exp {
t.Errorf("got %d, want %d", conf.UseTransposeColumns, testdata.exp)
}
if len(conf.Transposers) != len(conf.UseTransposeColumns) {
t.Errorf("got %d, want %d", conf.UseTransposeColumns, testdata.exp)
}
}
})
}
}
func TestReduceColumns(t *testing.T) {
var tests = []struct {
expect [][]string

View File

@@ -217,13 +217,13 @@ The somewhat complicated code is being caused by the fact, that we
need to convert our internal structure to a lisp variable and vice
versa afterwards.
*/
func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
func RunProcessHooks(conf cfg.Config, data *Tabdata) (*Tabdata, bool, error) {
var userdata Tabdata
lisplist := []zygo.Sexp{}
if len(Hooks["process"]) == 0 {
return userdata, false, nil
return data, false, nil
}
if len(Hooks["process"]) > 1 {
@@ -237,7 +237,7 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
for idx, cell := range row {
err := entry.HashSet(&zygo.SexpStr{S: data.headers[idx]}, &zygo.SexpStr{S: cell})
if err != nil {
return userdata, false, fmt.Errorf("failed to convert to lisp data: %w", err)
return data, false, fmt.Errorf("failed to convert to lisp data: %w", err)
}
}
@@ -256,7 +256,7 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
res, err := conf.Lisp.EvalString(fmt.Sprintf("(%s data)", hook.Name()))
if err != nil {
return userdata, false, fmt.Errorf("failed to eval lisp loader: %w", err)
return data, false, fmt.Errorf("failed to eval lisp loader: %w", err)
}
// we expect (bool, array(hash)) as return from the function
@@ -266,22 +266,22 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
case *zygo.SexpBool:
result = th.Val
default:
return userdata, false, errors.New("expect (bool, array(hash)) as return value")
return data, false, errors.New("expect (bool, array(hash)) as return value")
}
switch sexptailtype := sexptype.Tail.(type) {
case *zygo.SexpArray:
lisplist = sexptailtype.Val
default:
return userdata, false, errors.New("expect (bool, array(hash)) as return value ")
return data, false, errors.New("expect (bool, array(hash)) as return value ")
}
default:
return userdata, false, errors.New("process hook shall return array of hashes ")
return data, false, errors.New("process hook shall return array of hashes ")
}
if !result {
// no further processing required
return userdata, result, nil
return data, result, nil
}
// finally convert lispdata back to Tabdata
@@ -296,18 +296,18 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
&zygo.SexpStr{S: header},
&zygo.SexpStr{S: ""})
if err != nil {
return userdata, false, fmt.Errorf("failed to get lisp hash entry: %w", err)
return data, false, fmt.Errorf("failed to get lisp hash entry: %w", err)
}
switch sexptype := entry.(type) {
case *zygo.SexpStr:
row = append(row, sexptype.S)
default:
return userdata, false, errors.New("hash values should be string ")
return data, false, errors.New("hash values should be string ")
}
}
default:
return userdata, false, errors.New("returned array should contain hashes ")
return data, false, errors.New("returned array should contain hashes ")
}
userdata.entries = append(userdata.entries, row)
@@ -315,5 +315,5 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
userdata.headers = data.headers
return userdata, result, nil
return &userdata, result, nil
}

View File

@@ -33,11 +33,31 @@ import (
Parser switch
*/
func Parse(conf cfg.Config, input io.Reader) (Tabdata, error) {
var data Tabdata
var err error
// first step, parse the data
if len(conf.Separator) == 1 {
return parseCSV(conf, input)
data, err = parseCSV(conf, input)
} else {
data, err = parseTabular(conf, input)
}
return parseTabular(conf, input)
if err != nil {
return data, err
}
// 2nd step, apply filters, code or transposers, if any
postdata, changed, err := PostProcess(conf, &data)
if err != nil {
return data, err
}
if changed {
return *postdata, nil
}
return data, err
}
/*
@@ -77,16 +97,6 @@ func parseCSV(conf cfg.Config, input io.Reader) (Tabdata, error) {
}
}
// apply user defined lisp process hooks, if any
userdata, changed, err := RunProcessHooks(conf, data)
if err != nil {
return data, fmt.Errorf("failed to apply filter hook: %w", err)
}
if changed {
data = userdata
}
return data, nil
}
@@ -174,43 +184,53 @@ func parseTabular(conf cfg.Config, input io.Reader) (Tabdata, error) {
return data, fmt.Errorf("failed to read from io.Reader: %w", scanner.Err())
}
return data, nil
}
func PostProcess(conf cfg.Config, data *Tabdata) (*Tabdata, bool, error) {
var modified bool
// filter by field filters, if any
filtereddata, changed, err := FilterByFields(conf, &data)
filtereddata, changed, err := FilterByFields(conf, data)
if err != nil {
return data, fmt.Errorf("failed to filter fields: %w", err)
return data, false, fmt.Errorf("failed to filter fields: %w", err)
}
if changed {
data = *filtereddata
data = filtereddata
modified = true
}
// check if transposers are valid and turn into Transposer structs
if err := PrepareTransposerColumns(&conf, data); err != nil {
return data, false, err
}
// transpose if demanded
if err := PrepareTransposerColumns(&conf, &data); err != nil {
return data, err
}
modifieddata, changed, err := TransposeFields(conf, &data)
modifieddata, changed, err := TransposeFields(conf, data)
if err != nil {
return data, fmt.Errorf("failed to transpose fields: %w", err)
return data, false, fmt.Errorf("failed to transpose fields: %w", err)
}
if changed {
data = *modifieddata
data = modifieddata
modified = true
}
// apply user defined lisp process hooks, if any
userdata, changed, err := RunProcessHooks(conf, data)
if err != nil {
return data, fmt.Errorf("failed to apply filter hook: %w", err)
return data, false, fmt.Errorf("failed to apply filter hook: %w", err)
}
if changed {
data = userdata
modified = true
}
if conf.Debug {
repr.Print(data)
}
return data, nil
return data, modified, nil
}

6
t/testtable.csv Normal file
View File

@@ -0,0 +1,6 @@
NAME,DURATION
x,10
a,100
z,0
u,4
k,6
1 NAME DURATION
2 x 10
3 a 100
4 z 0
5 u 4
6 k 6

View File

@@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "TABLIZER 1"
.TH TABLIZER 1 "2025-01-12" "1" "User Commands"
.TH TABLIZER 1 "2025-01-13" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
@@ -351,6 +351,25 @@ We want to see only the \s-1CMD\s0 column and use a regex for this:
.Ve
.PP
where \*(L"C\*(R" is our regexp which matches \s-1CMD.\s0
.SS "\s-1TRANSPOSE FIELDS USING REGEXPS\s0"
.IX Subsection "TRANSPOSE FIELDS USING REGEXPS"
You can manipulate field contents using regular expressions. You have
to tell tablizer which field[s] to operate on using the option \f(CW\*(C`\-T\*(C'\fR
and the search/replace pattern using \f(CW\*(C`\-R\*(C'\fR. The number of columns and
patterns must match.
.PP
A search/replace pattern consists of the following elements:
.PP
.Vb 1
\& /search\-regexp/replace\-string/
.Ve
.PP
The separator can be any valid character. Especially if you want to
use a regexp containing the \f(CW\*(C`/\*(C'\fR character, eg:
.PP
.Vb 1
\& |search\-regexp|replace\-string|
.Ve
.SS "\s-1OUTPUT MODES\s0"
.IX Subsection "OUTPUT MODES"
There might be cases when the tabular output of a program is way too

View File

@@ -206,6 +206,40 @@ We want to see only the CMD column and use a regex for this:
where "C" is our regexp which matches CMD.
=head2 TRANSPOSE FIELDS USING REGEXPS
You can manipulate field contents using regular expressions. You have
to tell tablizer which field[s] to operate on using the option C<-T>
and the search/replace pattern using C<-R>. The number of columns and
patterns must match.
A search/replace pattern consists of the following elements:
/search-regexp/replace-string/
The separator can be any valid character. Especially if you want to
use a regexp containing the C</> character, eg:
|search-regexp|replace-string|
Example:
cat t/testtable2
NAME DURATION
x 10
a 100
z 0
u 4
k 6
cat t/testtable2 | tablizer -T2 -R '/^\d/4/' -n
NAME DURATION
x 40
a 400
z 4
u 4
k 4
=head2 OUTPUT MODES
There might be cases when the tabular output of a program is way too