From 85277bbf5ea1cc60bd8159e8e0f09a89b3e83b50 Mon Sep 17 00:00:00 2001
From: Thomas von Dein <tom@vondein.org>
Date: Wed, 5 Oct 2022 14:31:01 +0200
Subject: [PATCH] more refactoring, fixed bug in shell mode output, fixed
 default Separator and fixed #3

---
 cmd/root.go         |  2 +-
 lib/common.go       | 29 +++++++++++-----------
 lib/helpers.go      | 41 +++++++++++++++++++++++++++++++
 lib/helpers_test.go | 44 +++++++++++++++++++++++++++++++++
 lib/parser.go       |  1 -
 lib/parser_test.go  | 25 ++++++-------------
 lib/printer.go      | 42 +++++--------------------------
 lib/printer_test.go | 60 +++++++++++++++++++++++++++++----------------
 tablizer.pod        | 38 ++++++++++++++++++++++++----
 9 files changed, 186 insertions(+), 96 deletions(-)

diff --git a/cmd/root.go b/cmd/root.go
index c8db5ec..441287b 100644
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -87,7 +87,7 @@ func init() {
 	rootCmd.PersistentFlags().BoolVarP(&lib.ShowVersion, "version", "V", false, "Print program version")
 	rootCmd.PersistentFlags().BoolVarP(&lib.InvertMatch, "invert-match", "v", false, "select non-matching rows")
 	rootCmd.PersistentFlags().BoolVarP(&ShowManual, "man", "m", false, "Display manual page")
-	rootCmd.PersistentFlags().StringVarP(&lib.Separator, "separator", "s", "", "Custom field separator")
+	rootCmd.PersistentFlags().StringVarP(&lib.Separator, "separator", "s", lib.DefaultSeparator, "Custom field separator")
 	rootCmd.PersistentFlags().StringVarP(&lib.Columns, "columns", "c", "", "Only show the speficied columns (separated by ,)")
 
 	// output flags, only 1 allowed, hidden, since just short cuts
diff --git a/lib/common.go b/lib/common.go
index ee0fb79..8ea392f 100644
--- a/lib/common.go
+++ b/lib/common.go
@@ -19,25 +19,26 @@ package lib
 
 var (
 	// command line flags
-	Debug           bool
-	XtendedOut      bool
-	NoNumbering     bool
-	ShowVersion     bool
-	Columns         string
-	UseColumns      []int
-	Separator       string = `(\s\s+|\t)`
-	OutflagExtended bool
-	OutflagMarkdown bool
-	OutflagOrgtable bool
-	OutflagShell    bool
-	OutputMode      string
-	InvertMatch     bool
+	Debug            bool
+	XtendedOut       bool
+	NoNumbering      bool
+	ShowVersion      bool
+	Columns          string
+	UseColumns       []int
+	DefaultSeparator string = `(\s\s+|\t)`
+	Separator        string = `(\s\s+|\t)`
+	OutflagExtended  bool
+	OutflagMarkdown  bool
+	OutflagOrgtable  bool
+	OutflagShell     bool
+	OutputMode       string
+	InvertMatch      bool
 
 	// used for validation
 	validOutputmodes = "(orgtbl|markdown|extended|ascii)"
 
 	// main program version
-	Version = "v1.0.5"
+	Version = "v1.0.6"
 
 	// generated  version string, used  by -v contains  lib.Version on
 	//  main  branch,   and  lib.Version-$branch-$lastcommit-$date  on
diff --git a/lib/helpers.go b/lib/helpers.go
index 6d047a0..f644da2 100644
--- a/lib/helpers.go
+++ b/lib/helpers.go
@@ -48,8 +48,49 @@ func PrepareColumns() error {
 	return nil
 }
 
+func numberizeHeaders(data *Tabdata) {
+	// prepare headers: add numbers to headers
+	numberedHeaders := []string{}
+	for i, head := range data.headers {
+		if len(Columns) > 0 {
+			// -c specified
+			if !contains(UseColumns, i+1) {
+				// ignore this one
+				continue
+			}
+		}
+		if NoNumbering {
+			numberedHeaders = append(numberedHeaders, head)
+		} else {
+			numberedHeaders = append(numberedHeaders, fmt.Sprintf("%s(%d)", head, i+1))
+		}
+	}
+	data.headers = numberedHeaders
+}
+
+func reduceColumns(data *Tabdata) {
+	// exclude columns, if any
+	if len(Columns) > 0 {
+		reducedEntries := [][]string{}
+		reducedEntry := []string{}
+		for _, entry := range data.entries {
+			reducedEntry = nil
+			for i, value := range entry {
+				if !contains(UseColumns, i+1) {
+					continue
+				}
+
+				reducedEntry = append(reducedEntry, value)
+			}
+			reducedEntries = append(reducedEntries, reducedEntry)
+		}
+		data.entries = reducedEntries
+	}
+}
+
 func PrepareModeFlags() error {
 	if len(OutputMode) == 0 {
+		// associate short flags like -X with mode selector
 		switch {
 		case OutflagExtended:
 			OutputMode = "extended"
diff --git a/lib/helpers_test.go b/lib/helpers_test.go
index b02bb9b..cf0ae53 100644
--- a/lib/helpers_test.go
+++ b/lib/helpers_test.go
@@ -52,6 +52,7 @@ func TestPrepareColumns(t *testing.T) {
 	}{
 		{"1,2,3", []int{1, 2, 3}, false},
 		{"1,2,", []int{}, true},
+		{"a,b", []int{}, true},
 	}
 
 	for _, tt := range tests {
@@ -71,3 +72,46 @@ func TestPrepareColumns(t *testing.T) {
 		})
 	}
 }
+
+func TestReduceColumns(t *testing.T) {
+	var tests = []struct {
+		expect  [][]string
+		columns []int
+	}{
+		{
+			expect:  [][]string{[]string{"a", "b"}},
+			columns: []int{1, 2},
+		},
+		{
+			expect:  [][]string{[]string{"a", "c"}},
+			columns: []int{1, 3},
+		},
+		{
+			expect:  [][]string{[]string{"a"}},
+			columns: []int{1},
+		},
+		{
+			expect:  [][]string{nil},
+			columns: []int{4},
+		},
+	}
+
+	input := [][]string{[]string{"a", "b", "c"}}
+
+	Columns = "y" // used as a flag with len(Columns)...
+
+	for _, tt := range tests {
+		testname := fmt.Sprintf("reduce-columns-by-%+v", tt.columns)
+		t.Run(testname, func(t *testing.T) {
+			UseColumns = tt.columns
+			data := Tabdata{entries: input}
+			reduceColumns(&data)
+			if !reflect.DeepEqual(data.entries, tt.expect) {
+				t.Errorf("reduceColumns returned invalid data:\ngot: %+v\nexp: %+v", data.entries, tt.expect)
+			}
+		})
+	}
+
+	Columns = "" // reset for other tests
+	UseColumns = nil
+}
diff --git a/lib/parser.go b/lib/parser.go
index e1b4c89..981ceda 100644
--- a/lib/parser.go
+++ b/lib/parser.go
@@ -46,7 +46,6 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
 
 	hadFirst := false
 	separate := regexp.MustCompile(Separator)
-
 	patternR, err := regexp.Compile(pattern)
 	if err != nil {
 		return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, err))
diff --git a/lib/parser_test.go b/lib/parser_test.go
index 53efe1d..97312d1 100644
--- a/lib/parser_test.go
+++ b/lib/parser_test.go
@@ -28,26 +28,18 @@ func TestParser(t *testing.T) {
 	data := Tabdata{
 		maxwidthHeader: 5,
 		maxwidthPerCol: []int{
-			5,
-			5,
-			8,
+			5, 5, 8,
 		},
 		columns: 3,
 		headers: []string{
-			"ONE",
-			"TWO",
-			"THREE",
+			"ONE", "TWO", "THREE",
 		},
 		entries: [][]string{
 			[]string{
-				"asd",
-				"igig",
-				"cxxxncnc",
+				"asd", "igig", "cxxxncnc",
 			},
 			[]string{
-				"19191",
-				"EDD 1",
-				"X",
+				"19191", "EDD 1", "X",
 			},
 		},
 	}
@@ -58,6 +50,7 @@ asd    igig   cxxxncnc
 
 	readFd := strings.NewReader(table)
 	gotdata, err := parseFile(readFd, "")
+	Separator = DefaultSeparator
 
 	if err != nil {
 		t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, gotdata)
@@ -77,9 +70,7 @@ func TestParserPatternmatching(t *testing.T) {
 		{
 			entries: [][]string{
 				[]string{
-					"asd",
-					"igig",
-					"cxxxncnc",
+					"asd", "igig", "cxxxncnc",
 				},
 			},
 			pattern: "ig",
@@ -88,9 +79,7 @@ func TestParserPatternmatching(t *testing.T) {
 		{
 			entries: [][]string{
 				[]string{
-					"19191",
-					"EDD 1",
-					"X",
+					"19191", "EDD 1", "X",
 				},
 			},
 			pattern: "ig",
diff --git a/lib/printer.go b/lib/printer.go
index 937757d..db0844d 100644
--- a/lib/printer.go
+++ b/lib/printer.go
@@ -26,41 +26,10 @@ import (
 )
 
 func printData(data *Tabdata) {
-	// prepare headers: add numbers to headers
-	numberedHeaders := []string{}
-	for i, head := range data.headers {
-		if len(Columns) > 0 {
-			// -c specified
-			if !contains(UseColumns, i+1) {
-				// ignore this one
-				continue
-			}
-		}
-		if NoNumbering {
-			numberedHeaders = append(numberedHeaders, head)
-		} else {
-			numberedHeaders = append(numberedHeaders, fmt.Sprintf("%s(%d)", head, i+1))
-		}
-	}
-	data.headers = numberedHeaders
-
-	// prepare data
-	if len(Columns) > 0 {
-		reducedEntries := [][]string{}
-		reducedEntry := []string{}
-		for _, entry := range data.entries {
-			reducedEntry = nil
-			for i, value := range entry {
-				if !contains(UseColumns, i+1) {
-					continue
-				}
-
-				reducedEntry = append(reducedEntry, value)
-			}
-			reducedEntries = append(reducedEntries, reducedEntry)
-		}
-		data.entries = reducedEntries
+	if OutputMode != "shell" {
+		numberizeHeaders(data)
 	}
+	reduceColumns(data)
 
 	switch OutputMode {
 	case "extended":
@@ -190,6 +159,7 @@ func printShellData(data *Tabdata) {
 		var idx int
 		for _, entry := range data.entries {
 			idx = 0
+			shentries := []string{}
 			for i, value := range entry {
 				if len(Columns) > 0 {
 					if !contains(UseColumns, i+1) {
@@ -197,10 +167,10 @@ func printShellData(data *Tabdata) {
 					}
 				}
 
-				fmt.Printf("%s=\"%s\" ", data.headers[idx], value)
+				shentries = append(shentries, fmt.Sprintf("%s=\"%s\"", data.headers[idx], value))
 				idx++
 			}
-			fmt.Println()
+			fmt.Println(strings.Join(shentries, " "))
 		}
 	}
 }
diff --git a/lib/printer_test.go b/lib/printer_test.go
index 9530574..5ced090 100644
--- a/lib/printer_test.go
+++ b/lib/printer_test.go
@@ -18,15 +18,33 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
 package lib
 
 import (
+	"fmt"
 	"os"
 	"strings"
 	"testing"
 )
 
 func TestPrinter(t *testing.T) {
-	table := `ONE    TWO    THREE  
-asd    igig   cxxxncnc  
-19191  EDD 1  X`
+	startdata := Tabdata{
+		maxwidthHeader: 5,
+		maxwidthPerCol: []int{
+			5,
+			5,
+			8,
+		},
+		columns: 3,
+		headers: []string{
+			"ONE", "TWO", "THREE",
+		},
+		entries: [][]string{
+			[]string{
+				"asd", "igig", "cxxxncnc",
+			},
+			[]string{
+				"19191", "EDD 1", "X",
+			},
+		},
+	}
 
 	expects := map[string]string{
 		"ascii": `ONE(1)	TWO(2)	THREE(3) 
@@ -42,6 +60,8 @@ asd   	igig  	cxxxncnc
 |--------|--------|----------|
 | asd    | igig   | cxxxncnc |
 |  19191 | EDD 1  | X        |`,
+		"shell": `ONE="asd" TWO="igig" THREE="cxxxncnc"
+ONE="19191" TWO="EDD 1" THREE="X"`,
 	}
 
 	r, w, err := os.Pipe()
@@ -52,27 +72,25 @@ asd   	igig  	cxxxncnc
 	os.Stdout = w
 
 	for mode, expect := range expects {
-		OutputMode = mode
-		fd := strings.NewReader(table)
-		data, err := parseFile(fd, "")
+		testname := fmt.Sprintf("print-%s", mode)
+		t.Run(testname, func(t *testing.T) {
 
-		if err != nil {
-			t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, data)
-		}
+			OutputMode = mode
+			data := startdata // we need to reset our mock data, since it's being modified in printData()
+			printData(&data)
 
-		printData(&data)
+			buf := make([]byte, 1024)
+			n, err := r.Read(buf)
+			if err != nil {
+				t.Fatal(err)
+			}
+			buf = buf[:n]
+			output := strings.TrimSpace(string(buf))
 
-		buf := make([]byte, 1024)
-		n, err := r.Read(buf)
-		if err != nil {
-			t.Fatal(err)
-		}
-		buf = buf[:n]
-		output := strings.TrimSpace(string(buf))
-
-		if output != expect {
-			t.Errorf("output mode: %s, got:\n%s\nwant:\n%s\n (%d <=> %d)", mode, output, expect, len(output), len(expect))
-		}
+			if output != expect {
+				t.Errorf("output mode: %s, got:\n%s\nwant:\n%s\n (%d <=> %d)", mode, output, expect, len(output), len(expect))
+			}
+		})
 	}
 
 	// Restore
diff --git a/tablizer.pod b/tablizer.pod
index f3137e4..37c31ad 100644
--- a/tablizer.pod
+++ b/tablizer.pod
@@ -24,12 +24,12 @@ tablizer - Manipulate tabular output of other programs
 
 =head1 DESCRIPTION
 
-Many  programs generate  tabular  output.  But  sometimes  you need  to
+Many  programs generate  tabular output.   But sometimes  you need  to
 post-process these tables, you may need  to remove one or more columns
-or you may want to filter for  some pattern or you may need the output
-in another program and need to  parse it somehow.  Standard unix tools
-such as  awk(1), grep(1) or column(1)  may help, but sometimes  it's a
-tedious business.
+or you  may want to filter  for some pattern (See  L<PATTERNS>) or you
+may need the  output in another program and need  to parse it somehow.
+Standard unix tools such as awk(1), grep(1) or column(1) may help, but
+sometimes it's a tedious business.
 
 Let's take  the output of  the tool  kubectl.  It contains  cells with
 withespace and they do not separate columns by TAB characters. This is
@@ -77,6 +77,34 @@ The numbering can be suppressed by using the B<-n> option.
 Finally the  B<-d> option  enables debugging  output which  is mostly
 usefull for the developer.
 
+=head2 PATTERNS
+
+You can reduce the rows being  displayed by using a regular expression
+pattern.  The  regexp is  PCRE compatible, refer  to the  syntax cheat
+sheet here: L<https://github.com/google/re2/wiki/Syntax>.  If you want
+to read  a more comprehensive  documentation about the topic  and have
+perl installed you can read it with:
+
+    perldoc perlre
+
+Or read it online: L<https://perldoc.perl.org/perlre>.
+
+A note on  modifiers: the regexp engine used in  tablizer uses another
+modifier syntax:
+
+    (?MODIFIER)
+
+The most important modifiers are:
+
+C<i> ignore case
+C<m> multiline mode
+C<s> single line mode
+
+Example for a case insensitve search:
+
+    kubectl get pods -A | tablizer "(?i)account"
+
+
 =head2 OUTPUT MODES
 
 There might be cases  when the tabular output of a  program is way too