added tests, reorganized Parse() by dismantling parsing and processing

This commit is contained in:
2025-01-13 18:45:53 +01:00
committed by T.v.Dein
parent 4d894a728b
commit cf1a555b9b
9 changed files with 217 additions and 39 deletions

View File

@@ -301,7 +301,7 @@ func (conf *Config) PrepareTransposers() error {
} }
for _, transposer := range conf.Transposers { for _, transposer := range conf.Transposers {
parts := strings.Split(transposer, "/") parts := strings.Split(transposer, string(transposer[0]))
if len(parts) != 4 { if len(parts) != 4 {
return fmt.Errorf("transposer function must have the format /regexp/replace-string/") return fmt.Errorf("transposer function must have the format /regexp/replace-string/")
} }

View File

@@ -188,6 +188,21 @@ DESCRIPTION
where "C" is our regexp which matches CMD. where "C" is our regexp which matches CMD.
TRANSPOSE FIELDS USING REGEXPS
You can manipulate field contents using regular expressions. You have to
tell tablizer which field[s] to operate on using the option "-T" and the
search/replace pattern using "-R". The number of columns and patterns
must match.
A search/replace pattern consists of the following elements:
/search-regexp/replace-string/
The separator can be any valid character. Especially if you want to use
a regexp containing the "/" character, eg:
|search-regexp|replace-string|
OUTPUT MODES OUTPUT MODES
There might be cases when the tabular output of a program is way too There might be cases when the tabular output of a program is way too
large for your current terminal but you still need to see every column. large for your current terminal but you still need to see every column.

View File

@@ -65,7 +65,9 @@ func ValidateConsistency(data *Tabdata) error {
} }
// parse columns list given with -c, modifies config.UseColumns based // parse columns list given with -c, modifies config.UseColumns based
// on eventually given regex // on eventually given regex.
// This is an output filter, because -cN,N,... is being applied AFTER
// processing of the input data.
func PrepareColumns(conf *cfg.Config, data *Tabdata) error { func PrepareColumns(conf *cfg.Config, data *Tabdata) error {
// -c columns // -c columns
usecolumns, err := PrepareColumnVars(conf.Columns, data) usecolumns, err := PrepareColumnVars(conf.Columns, data)
@@ -78,6 +80,8 @@ func PrepareColumns(conf *cfg.Config, data *Tabdata) error {
return nil return nil
} }
// Same thing as above but for -T option, which is an input option,
// because transposers are being applied before output.
func PrepareTransposerColumns(conf *cfg.Config, data *Tabdata) error { func PrepareTransposerColumns(conf *cfg.Config, data *Tabdata) error {
// -T columns // -T columns
usetransposecolumns, err := PrepareColumnVars(conf.TransposeColumns, data) usetransposecolumns, err := PrepareColumnVars(conf.TransposeColumns, data)

View File

@@ -90,6 +90,86 @@ func TestPrepareColumns(t *testing.T) {
} }
} }
func TestPrepareTransposerColumns(t *testing.T) {
data := Tabdata{
maxwidthHeader: 5,
columns: 3,
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{
"2", "3", "4",
},
},
}
var tests = []struct {
input string
transp []string
exp int
wanterror bool // expect error
}{
{
"1",
[]string{`/\d/x/`},
1,
false,
},
{
"T", // will match [T]WO and [T]HREE
[]string{`/\d/x/`, `/.//`},
2,
false,
},
{
"T,2",
[]string{`/\d/x/`, `/.//`},
2,
false,
},
{
"1",
[]string{},
1,
true,
},
{
"",
[]string{`|.|N|`},
0,
true,
},
{
"1",
[]string{`|.|N|`},
1,
false,
},
}
for _, testdata := range tests {
testname := fmt.Sprintf("PrepareTransposerColumns-%s-%t", testdata.input, testdata.wanterror)
t.Run(testname, func(t *testing.T) {
conf := cfg.Config{TransposeColumns: testdata.input, Transposers: testdata.transp}
err := PrepareTransposerColumns(&conf, &data)
if err != nil {
if !testdata.wanterror {
t.Errorf("got error: %v", err)
}
} else {
if len(conf.UseTransposeColumns) != testdata.exp {
t.Errorf("got %d, want %d", conf.UseTransposeColumns, testdata.exp)
}
if len(conf.Transposers) != len(conf.UseTransposeColumns) {
t.Errorf("got %d, want %d", conf.UseTransposeColumns, testdata.exp)
}
}
})
}
}
func TestReduceColumns(t *testing.T) { func TestReduceColumns(t *testing.T) {
var tests = []struct { var tests = []struct {
expect [][]string expect [][]string

View File

@@ -217,13 +217,13 @@ The somewhat complicated code is being caused by the fact, that we
need to convert our internal structure to a lisp variable and vice need to convert our internal structure to a lisp variable and vice
versa afterwards. versa afterwards.
*/ */
func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) { func RunProcessHooks(conf cfg.Config, data *Tabdata) (*Tabdata, bool, error) {
var userdata Tabdata var userdata Tabdata
lisplist := []zygo.Sexp{} lisplist := []zygo.Sexp{}
if len(Hooks["process"]) == 0 { if len(Hooks["process"]) == 0 {
return userdata, false, nil return data, false, nil
} }
if len(Hooks["process"]) > 1 { if len(Hooks["process"]) > 1 {
@@ -237,7 +237,7 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
for idx, cell := range row { for idx, cell := range row {
err := entry.HashSet(&zygo.SexpStr{S: data.headers[idx]}, &zygo.SexpStr{S: cell}) err := entry.HashSet(&zygo.SexpStr{S: data.headers[idx]}, &zygo.SexpStr{S: cell})
if err != nil { if err != nil {
return userdata, false, fmt.Errorf("failed to convert to lisp data: %w", err) return data, false, fmt.Errorf("failed to convert to lisp data: %w", err)
} }
} }
@@ -256,7 +256,7 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
res, err := conf.Lisp.EvalString(fmt.Sprintf("(%s data)", hook.Name())) res, err := conf.Lisp.EvalString(fmt.Sprintf("(%s data)", hook.Name()))
if err != nil { if err != nil {
return userdata, false, fmt.Errorf("failed to eval lisp loader: %w", err) return data, false, fmt.Errorf("failed to eval lisp loader: %w", err)
} }
// we expect (bool, array(hash)) as return from the function // we expect (bool, array(hash)) as return from the function
@@ -266,22 +266,22 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
case *zygo.SexpBool: case *zygo.SexpBool:
result = th.Val result = th.Val
default: default:
return userdata, false, errors.New("expect (bool, array(hash)) as return value") return data, false, errors.New("expect (bool, array(hash)) as return value")
} }
switch sexptailtype := sexptype.Tail.(type) { switch sexptailtype := sexptype.Tail.(type) {
case *zygo.SexpArray: case *zygo.SexpArray:
lisplist = sexptailtype.Val lisplist = sexptailtype.Val
default: default:
return userdata, false, errors.New("expect (bool, array(hash)) as return value ") return data, false, errors.New("expect (bool, array(hash)) as return value ")
} }
default: default:
return userdata, false, errors.New("process hook shall return array of hashes ") return data, false, errors.New("process hook shall return array of hashes ")
} }
if !result { if !result {
// no further processing required // no further processing required
return userdata, result, nil return data, result, nil
} }
// finally convert lispdata back to Tabdata // finally convert lispdata back to Tabdata
@@ -296,18 +296,18 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
&zygo.SexpStr{S: header}, &zygo.SexpStr{S: header},
&zygo.SexpStr{S: ""}) &zygo.SexpStr{S: ""})
if err != nil { if err != nil {
return userdata, false, fmt.Errorf("failed to get lisp hash entry: %w", err) return data, false, fmt.Errorf("failed to get lisp hash entry: %w", err)
} }
switch sexptype := entry.(type) { switch sexptype := entry.(type) {
case *zygo.SexpStr: case *zygo.SexpStr:
row = append(row, sexptype.S) row = append(row, sexptype.S)
default: default:
return userdata, false, errors.New("hash values should be string ") return data, false, errors.New("hash values should be string ")
} }
} }
default: default:
return userdata, false, errors.New("returned array should contain hashes ") return data, false, errors.New("returned array should contain hashes ")
} }
userdata.entries = append(userdata.entries, row) userdata.entries = append(userdata.entries, row)
@@ -315,5 +315,5 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
userdata.headers = data.headers userdata.headers = data.headers
return userdata, result, nil return &userdata, result, nil
} }

View File

@@ -33,11 +33,31 @@ import (
Parser switch Parser switch
*/ */
func Parse(conf cfg.Config, input io.Reader) (Tabdata, error) { func Parse(conf cfg.Config, input io.Reader) (Tabdata, error) {
var data Tabdata
var err error
// first step, parse the data
if len(conf.Separator) == 1 { if len(conf.Separator) == 1 {
return parseCSV(conf, input) data, err = parseCSV(conf, input)
} else {
data, err = parseTabular(conf, input)
} }
return parseTabular(conf, input) if err != nil {
return data, err
}
// 2nd step, apply filters, code or transposers, if any
postdata, changed, err := PostProcess(conf, &data)
if err != nil {
return data, err
}
if changed {
return *postdata, nil
}
return data, err
} }
/* /*
@@ -77,16 +97,6 @@ func parseCSV(conf cfg.Config, input io.Reader) (Tabdata, error) {
} }
} }
// apply user defined lisp process hooks, if any
userdata, changed, err := RunProcessHooks(conf, data)
if err != nil {
return data, fmt.Errorf("failed to apply filter hook: %w", err)
}
if changed {
data = userdata
}
return data, nil return data, nil
} }
@@ -174,43 +184,53 @@ func parseTabular(conf cfg.Config, input io.Reader) (Tabdata, error) {
return data, fmt.Errorf("failed to read from io.Reader: %w", scanner.Err()) return data, fmt.Errorf("failed to read from io.Reader: %w", scanner.Err())
} }
return data, nil
}
func PostProcess(conf cfg.Config, data *Tabdata) (*Tabdata, bool, error) {
var modified bool
// filter by field filters, if any // filter by field filters, if any
filtereddata, changed, err := FilterByFields(conf, &data) filtereddata, changed, err := FilterByFields(conf, data)
if err != nil { if err != nil {
return data, fmt.Errorf("failed to filter fields: %w", err) return data, false, fmt.Errorf("failed to filter fields: %w", err)
} }
if changed { if changed {
data = *filtereddata data = filtereddata
modified = true
}
// check if transposers are valid and turn into Transposer structs
if err := PrepareTransposerColumns(&conf, data); err != nil {
return data, false, err
} }
// transpose if demanded // transpose if demanded
if err := PrepareTransposerColumns(&conf, &data); err != nil { modifieddata, changed, err := TransposeFields(conf, data)
return data, err
}
modifieddata, changed, err := TransposeFields(conf, &data)
if err != nil { if err != nil {
return data, fmt.Errorf("failed to transpose fields: %w", err) return data, false, fmt.Errorf("failed to transpose fields: %w", err)
} }
if changed { if changed {
data = *modifieddata data = modifieddata
modified = true
} }
// apply user defined lisp process hooks, if any // apply user defined lisp process hooks, if any
userdata, changed, err := RunProcessHooks(conf, data) userdata, changed, err := RunProcessHooks(conf, data)
if err != nil { if err != nil {
return data, fmt.Errorf("failed to apply filter hook: %w", err) return data, false, fmt.Errorf("failed to apply filter hook: %w", err)
} }
if changed { if changed {
data = userdata data = userdata
modified = true
} }
if conf.Debug { if conf.Debug {
repr.Print(data) repr.Print(data)
} }
return data, nil return data, modified, nil
} }

6
t/testtable.csv Normal file
View File

@@ -0,0 +1,6 @@
NAME,DURATION
x,10
a,100
z,0
u,4
k,6
1 NAME DURATION
2 x 10
3 a 100
4 z 0
5 u 4
6 k 6

View File

@@ -133,7 +133,7 @@
.\" ======================================================================== .\" ========================================================================
.\" .\"
.IX Title "TABLIZER 1" .IX Title "TABLIZER 1"
.TH TABLIZER 1 "2025-01-12" "1" "User Commands" .TH TABLIZER 1 "2025-01-13" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents. .\" way too many mistakes in technical documents.
.if n .ad l .if n .ad l
@@ -351,6 +351,25 @@ We want to see only the \s-1CMD\s0 column and use a regex for this:
.Ve .Ve
.PP .PP
where \*(L"C\*(R" is our regexp which matches \s-1CMD.\s0 where \*(L"C\*(R" is our regexp which matches \s-1CMD.\s0
.SS "\s-1TRANSPOSE FIELDS USING REGEXPS\s0"
.IX Subsection "TRANSPOSE FIELDS USING REGEXPS"
You can manipulate field contents using regular expressions. You have
to tell tablizer which field[s] to operate on using the option \f(CW\*(C`\-T\*(C'\fR
and the search/replace pattern using \f(CW\*(C`\-R\*(C'\fR. The number of columns and
patterns must match.
.PP
A search/replace pattern consists of the following elements:
.PP
.Vb 1
\& /search\-regexp/replace\-string/
.Ve
.PP
The separator can be any valid character. Especially if you want to
use a regexp containing the \f(CW\*(C`/\*(C'\fR character, eg:
.PP
.Vb 1
\& |search\-regexp|replace\-string|
.Ve
.SS "\s-1OUTPUT MODES\s0" .SS "\s-1OUTPUT MODES\s0"
.IX Subsection "OUTPUT MODES" .IX Subsection "OUTPUT MODES"
There might be cases when the tabular output of a program is way too There might be cases when the tabular output of a program is way too

View File

@@ -206,6 +206,40 @@ We want to see only the CMD column and use a regex for this:
where "C" is our regexp which matches CMD. where "C" is our regexp which matches CMD.
=head2 TRANSPOSE FIELDS USING REGEXPS
You can manipulate field contents using regular expressions. You have
to tell tablizer which field[s] to operate on using the option C<-T>
and the search/replace pattern using C<-R>. The number of columns and
patterns must match.
A search/replace pattern consists of the following elements:
/search-regexp/replace-string/
The separator can be any valid character. Especially if you want to
use a regexp containing the C</> character, eg:
|search-regexp|replace-string|
Example:
cat t/testtable2
NAME DURATION
x 10
a 100
z 0
u 4
k 6
cat t/testtable2 | tablizer -T2 -R '/^\d/4/' -n
NAME DURATION
x 40
a 400
z 4
u 4
k 4
=head2 OUTPUT MODES =head2 OUTPUT MODES
There might be cases when the tabular output of a program is way too There might be cases when the tabular output of a program is way too