added tests, reorganized Parse() by dismantling parsing and processing

2026-02-04 02:20:56 +01:00 · 2025-01-13 18:45:53 +01:00
parent 4d894a728b
commit cf1a555b9b
9 changed files with 217 additions and 39 deletions
--- a/cfg/config.go
+++ b/cfg/config.go
@@ -301,7 +301,7 @@ func (conf *Config) PrepareTransposers() error {
 	}
 	for _, transposer := range conf.Transposers {
-		parts := strings.Split(transposer, "/")
+		parts := strings.Split(transposer, string(transposer[0]))
 		if len(parts) != 4 {
 			return fmt.Errorf("transposer function must have the format /regexp/replace-string/")
 		}
--- a/cmd/tablizer.go
+++ b/cmd/tablizer.go
@@ -188,6 +188,21 @@ DESCRIPTION
    where "C" is our regexp which matches CMD.
  TRANSPOSE FIELDS USING REGEXPS
    You can manipulate field contents using regular expressions. You have to
    tell tablizer which field[s] to operate on using the option "-T" and the
    search/replace pattern using "-R". The number of columns and patterns
    must match.
    A search/replace pattern consists of the following elements:
        /search-regexp/replace-string/
    The separator can be any valid character. Especially if you want to use
    a regexp containing the "/" character, eg:
        |search-regexp|replace-string|
  OUTPUT MODES
    There might be cases when the tabular output of a program is way too
    large for your current terminal but you still need to see every column.
--- a/lib/helpers.go
+++ b/lib/helpers.go
@@ -65,7 +65,9 @@ func ValidateConsistency(data *Tabdata) error {
 }
 // parse columns list given  with -c, modifies config.UseColumns based
-// on eventually given regex
+// on eventually given regex.
 // This is an output filter, because -cN,N,... is being applied AFTER
 // processing of the input data.
 func PrepareColumns(conf *cfg.Config, data *Tabdata) error {
 	// -c columns
 	usecolumns, err := PrepareColumnVars(conf.Columns, data)
@@ -78,6 +80,8 @@ func PrepareColumns(conf *cfg.Config, data *Tabdata) error {
 	return nil
 }
 // Same thing as above but for -T option, which is an input option,
 // because transposers are being applied before output.
 func PrepareTransposerColumns(conf *cfg.Config, data *Tabdata) error {
 	// -T columns
 	usetransposecolumns, err := PrepareColumnVars(conf.TransposeColumns, data)
--- a/lib/helpers_test.go
+++ b/lib/helpers_test.go
@@ -90,6 +90,86 @@ func TestPrepareColumns(t *testing.T) {
 	}
 }
 func TestPrepareTransposerColumns(t *testing.T) {
 	data := Tabdata{
 		maxwidthHeader: 5,
 		columns:        3,
 		headers: []string{
 			"ONE", "TWO", "THREE",
 		},
 		entries: [][]string{
 			{
 				"2", "3", "4",
 			},
 		},
 	}
 	var tests = []struct {
 		input     string
 		transp    []string
 		exp       int
 		wanterror bool // expect error
 	}{
 		{
 			"1",
 			[]string{`/\d/x/`},
 			1,
 			false,
 		},
 		{
 			"T", // will match [T]WO and [T]HREE
 			[]string{`/\d/x/`, `/.//`},
 			2,
 			false,
 		},
 		{
 			"T,2",
 			[]string{`/\d/x/`, `/.//`},
 			2,
 			false,
 		},
 		{
 			"1",
 			[]string{},
 			1,
 			true,
 		},
 		{
 			"",
 			[]string{`|.|N|`},
 			0,
 			true,
 		},
 		{
 			"1",
 			[]string{`|.|N|`},
 			1,
 			false,
 		},
 	}
 	for _, testdata := range tests {
 		testname := fmt.Sprintf("PrepareTransposerColumns-%s-%t", testdata.input, testdata.wanterror)
 		t.Run(testname, func(t *testing.T) {
 			conf := cfg.Config{TransposeColumns: testdata.input, Transposers: testdata.transp}
 			err := PrepareTransposerColumns(&conf, &data)
 			if err != nil {
 				if !testdata.wanterror {
 					t.Errorf("got error: %v", err)
 				}
 			} else {
 				if len(conf.UseTransposeColumns) != testdata.exp {
 					t.Errorf("got %d, want %d", conf.UseTransposeColumns, testdata.exp)
 				}
 				if len(conf.Transposers) != len(conf.UseTransposeColumns) {
 					t.Errorf("got %d, want %d", conf.UseTransposeColumns, testdata.exp)
 				}
 			}
 		})
 	}
 }
 func TestReduceColumns(t *testing.T) {
 	var tests = []struct {
 		expect  [][]string
--- a/lib/lisp.go
+++ b/lib/lisp.go
@@ -217,13 +217,13 @@ The somewhat complicated code is being  caused by the fact, that we
 need to convert our internal structure  to a lisp variable and vice
 versa afterwards.
 */
-func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
+func RunProcessHooks(conf cfg.Config, data *Tabdata) (*Tabdata, bool, error) {
 	var userdata Tabdata
 	lisplist := []zygo.Sexp{}
 	if len(Hooks["process"]) == 0 {
-		return userdata, false, nil
+		return data, false, nil
 	}
 	if len(Hooks["process"]) > 1 {
@@ -237,7 +237,7 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
 		for idx, cell := range row {
 			err := entry.HashSet(&zygo.SexpStr{S: data.headers[idx]}, &zygo.SexpStr{S: cell})
 			if err != nil {
-				return userdata, false, fmt.Errorf("failed to convert to lisp data: %w", err)
+				return data, false, fmt.Errorf("failed to convert to lisp data: %w", err)
 			}
 		}
@@ -256,7 +256,7 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
 	res, err := conf.Lisp.EvalString(fmt.Sprintf("(%s data)", hook.Name()))
 	if err != nil {
-		return userdata, false, fmt.Errorf("failed to eval lisp loader: %w", err)
+		return data, false, fmt.Errorf("failed to eval lisp loader: %w", err)
 	}
 	// we expect (bool, array(hash)) as return from the function
@@ -266,22 +266,22 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
 		case *zygo.SexpBool:
 			result = th.Val
 		default:
-			return userdata, false, errors.New("expect (bool, array(hash)) as return value")
+			return data, false, errors.New("expect (bool, array(hash)) as return value")
 		}
 		switch sexptailtype := sexptype.Tail.(type) {
 		case *zygo.SexpArray:
 			lisplist = sexptailtype.Val
 		default:
-			return userdata, false, errors.New("expect (bool, array(hash)) as return value ")
+			return data, false, errors.New("expect (bool, array(hash)) as return value ")
 		}
 	default:
-		return userdata, false, errors.New("process hook shall return array of hashes ")
+		return data, false, errors.New("process hook shall return array of hashes ")
 	}
 	if !result {
 		// no further processing required
-		return userdata, result, nil
+		return data, result, nil
 	}
 	// finally convert lispdata back to Tabdata
@@ -296,18 +296,18 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
 					&zygo.SexpStr{S: header},
 					&zygo.SexpStr{S: ""})
 				if err != nil {
-					return userdata, false, fmt.Errorf("failed to get lisp hash entry: %w", err)
+					return data, false, fmt.Errorf("failed to get lisp hash entry: %w", err)
 				}
 				switch sexptype := entry.(type) {
 				case *zygo.SexpStr:
 					row = append(row, sexptype.S)
 				default:
-					return userdata, false, errors.New("hash values should be string ")
+					return data, false, errors.New("hash values should be string ")
 				}
 			}
 		default:
-			return userdata, false, errors.New("returned array should contain hashes ")
+			return data, false, errors.New("returned array should contain hashes ")
 		}
 		userdata.entries = append(userdata.entries, row)
@@ -315,5 +315,5 @@ func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
 	userdata.headers = data.headers
-	return userdata, result, nil
+	return &userdata, result, nil
 }
--- a/lib/parser.go
+++ b/lib/parser.go
@@ -33,11 +33,31 @@ import (
 Parser switch
 */
 func Parse(conf cfg.Config, input io.Reader) (Tabdata, error) {
 	var data Tabdata
 	var err error
 	// first step, parse the data
 	if len(conf.Separator) == 1 {
-		return parseCSV(conf, input)
+		data, err = parseCSV(conf, input)
 	} else {
 		data, err = parseTabular(conf, input)
 	}
-	return parseTabular(conf, input)
+	if err != nil {
 		return data, err
 	}
 	// 2nd step, apply filters, code or transposers, if any
 	postdata, changed, err := PostProcess(conf, &data)
 	if err != nil {
 		return data, err
 	}
 	if changed {
 		return *postdata, nil
 	}
 	return data, err
 }
 /*
@@ -77,16 +97,6 @@ func parseCSV(conf cfg.Config, input io.Reader) (Tabdata, error) {
 		}
 	}
 	// apply user defined lisp process hooks, if any
 	userdata, changed, err := RunProcessHooks(conf, data)
 	if err != nil {
 		return data, fmt.Errorf("failed to apply filter hook: %w", err)
 	}
 	if changed {
 		data = userdata
 	}
 	return data, nil
 }
@@ -174,43 +184,53 @@ func parseTabular(conf cfg.Config, input io.Reader) (Tabdata, error) {
 		return data, fmt.Errorf("failed to read from io.Reader: %w", scanner.Err())
 	}
 	return data, nil
 }
 func PostProcess(conf cfg.Config, data *Tabdata) (*Tabdata, bool, error) {
 	var modified bool
 	// filter by field filters, if any
-	filtereddata, changed, err := FilterByFields(conf, &data)
+	filtereddata, changed, err := FilterByFields(conf, data)
 	if err != nil {
-		return data, fmt.Errorf("failed to filter fields: %w", err)
+		return data, false, fmt.Errorf("failed to filter fields: %w", err)
 	}
 	if changed {
-		data = *filtereddata
+		data = filtereddata
 		modified = true
 	}
 	// check if transposers are valid and turn into Transposer structs
 	if err := PrepareTransposerColumns(&conf, data); err != nil {
 		return data, false, err
 	}
 	// transpose if demanded
-	if err := PrepareTransposerColumns(&conf, &data); err != nil {
+	modifieddata, changed, err := TransposeFields(conf, data)
 		return data, err
 	}
 	modifieddata, changed, err := TransposeFields(conf, &data)
 	if err != nil {
-		return data, fmt.Errorf("failed to transpose fields: %w", err)
+		return data, false, fmt.Errorf("failed to transpose fields: %w", err)
 	}
 	if changed {
-		data = *modifieddata
+		data = modifieddata
 		modified = true
 	}
 	// apply user defined lisp process hooks, if any
 	userdata, changed, err := RunProcessHooks(conf, data)
 	if err != nil {
-		return data, fmt.Errorf("failed to apply filter hook: %w", err)
+		return data, false, fmt.Errorf("failed to apply filter hook: %w", err)
 	}
 	if changed {
 		data = userdata
 		modified = true
 	}
 	if conf.Debug {
 		repr.Print(data)
 	}
-	return data, nil
+	return data, modified, nil
 }
--- a/t/testtable.csv
+++ b/t/testtable.csv
@@ -0,0 +1,6 @@
 NAME,DURATION
 x,10
 a,100
 z,0
 u,4
 k,6
--- a/tablizer.1
+++ b/tablizer.1
@@ -133,7 +133,7 @@
 .\" ========================================================================
 .\"
 .IX Title "TABLIZER 1"
-.TH TABLIZER 1 "2025-01-12" "1" "User Commands"
+.TH TABLIZER 1 "2025-01-13" "1" "User Commands"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
@@ -351,6 +351,25 @@ We want to see only the \s-1CMD\s0 column and use a regex for this:
 .Ve
 .PP
 where \*(L"C\*(R" is our regexp which matches \s-1CMD.\s0
 .SS "\s-1TRANSPOSE FIELDS USING REGEXPS\s0"
 .IX Subsection "TRANSPOSE FIELDS USING REGEXPS"
 You can manipulate field contents using regular expressions. You have
 to tell tablizer which field[s] to operate on using the option \f(CW\*(C`\-T\*(C'\fR
 and the search/replace pattern using \f(CW\*(C`\-R\*(C'\fR. The number of columns and
 patterns must match.
 .PP
 A search/replace pattern consists of the following elements:
 .PP
 .Vb 1
 \&    /search\-regexp/replace\-string/
 .Ve
 .PP
 The separator can be any valid character. Especially if you want to
 use a regexp containing the \f(CW\*(C`/\*(C'\fR character, eg:
 .PP
 .Vb 1
 \&    |search\-regexp|replace\-string|
 .Ve
 .SS "\s-1OUTPUT MODES\s0"
 .IX Subsection "OUTPUT MODES"
 There might be cases  when the tabular output of a  program is way too
--- a/tablizer.pod
+++ b/tablizer.pod
@@ -206,6 +206,40 @@ We want to see only the CMD column and use a regex for this:
 where "C" is our regexp which matches CMD.
 =head2 TRANSPOSE FIELDS USING REGEXPS
 You can manipulate field contents using regular expressions. You have
 to tell tablizer which field[s] to operate on using the option C<-T>
 and the search/replace pattern using C<-R>. The number of columns and
 patterns must match.
 A search/replace pattern consists of the following elements:
    /search-regexp/replace-string/
 The separator can be any valid character. Especially if you want to
 use a regexp containing the C</> character, eg:
    |search-regexp|replace-string|
 Example:
    cat t/testtable2
    NAME  DURATION
    x     10
    a     100
    z     0
    u     4
    k     6
    cat t/testtable2 | tablizer -T2 -R '/^\d/4/' -n
    NAME    DURATION 
    x       40      
    a       400     
    z       4       
    u       4       
    k       4  
 =head2 OUTPUT MODES
 There might be cases  when the tabular output of a  program is way too