Merge branch 'development'

2025-12-16 20:20:57 +01:00 · 2022-10-05 19:17:34 +02:00
parent 5be18e27c9 85277bbf5e
commit 196833ed3c
9 changed files with 204 additions and 163 deletions
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -87,7 +87,7 @@ func init() {
 	rootCmd.PersistentFlags().BoolVarP(&lib.ShowVersion, "version", "V", false, "Print program version")
 	rootCmd.PersistentFlags().BoolVarP(&lib.InvertMatch, "invert-match", "v", false, "select non-matching rows")
 	rootCmd.PersistentFlags().BoolVarP(&ShowManual, "man", "m", false, "Display manual page")
-	rootCmd.PersistentFlags().StringVarP(&lib.Separator, "separator", "s", "", "Custom field separator")
+	rootCmd.PersistentFlags().StringVarP(&lib.Separator, "separator", "s", lib.DefaultSeparator, "Custom field separator")
 	rootCmd.PersistentFlags().StringVarP(&lib.Columns, "columns", "c", "", "Only show the speficied columns (separated by ,)")

 	// output flags, only 1 allowed, hidden, since just short cuts
--- a/lib/common.go
+++ b/lib/common.go
@@ -19,25 +19,26 @@ package lib

 var (
 	// command line flags
-	Debug           bool
-	XtendedOut      bool
-	NoNumbering     bool
-	ShowVersion     bool
-	Columns         string
-	UseColumns      []int
-	Separator       string
-	OutflagExtended bool
-	OutflagMarkdown bool
-	OutflagOrgtable bool
-	OutflagShell    bool
-	OutputMode      string
-	InvertMatch     bool
+	Debug            bool
+	XtendedOut       bool
+	NoNumbering      bool
+	ShowVersion      bool
+	Columns          string
+	UseColumns       []int
+	DefaultSeparator string = `(\s\s+|\t)`
+	Separator        string = `(\s\s+|\t)`
+	OutflagExtended  bool
+	OutflagMarkdown  bool
+	OutflagOrgtable  bool
+	OutflagShell     bool
+	OutputMode       string
+	InvertMatch      bool

 	// used for validation
 	validOutputmodes = "(orgtbl|markdown|extended|ascii)"

 	// main program version
-	Version = "v1.0.5"
+	Version = "v1.0.6"

 	// generated  version string, used  by -v contains  lib.Version on
 	//  main  branch,   and  lib.Version-$branch-$lastcommit-$date  on
--- a/lib/helpers.go
+++ b/lib/helpers.go
@@ -48,8 +48,49 @@ func PrepareColumns() error {
 	return nil
 }

+func numberizeHeaders(data *Tabdata) {
+	// prepare headers: add numbers to headers
+	numberedHeaders := []string{}
+	for i, head := range data.headers {
+		if len(Columns) > 0 {
+			// -c specified
+			if !contains(UseColumns, i+1) {
+				// ignore this one
+				continue
+			}
+		}
+		if NoNumbering {
+			numberedHeaders = append(numberedHeaders, head)
+		} else {
+			numberedHeaders = append(numberedHeaders, fmt.Sprintf("%s(%d)", head, i+1))
+		}
+	}
+	data.headers = numberedHeaders
+}
+
+func reduceColumns(data *Tabdata) {
+	// exclude columns, if any
+	if len(Columns) > 0 {
+		reducedEntries := [][]string{}
+		reducedEntry := []string{}
+		for _, entry := range data.entries {
+			reducedEntry = nil
+			for i, value := range entry {
+				if !contains(UseColumns, i+1) {
+					continue
+				}
+
+				reducedEntry = append(reducedEntry, value)
+			}
+			reducedEntries = append(reducedEntries, reducedEntry)
+		}
+		data.entries = reducedEntries
+	}
+}
+
 func PrepareModeFlags() error {
 	if len(OutputMode) == 0 {
+		// associate short flags like -X with mode selector
 		switch {
 		case OutflagExtended:
 			OutputMode = "extended"
--- a/lib/helpers_test.go
+++ b/lib/helpers_test.go
@@ -52,6 +52,7 @@ func TestPrepareColumns(t *testing.T) {
 	}{
 		{"1,2,3", []int{1, 2, 3}, false},
 		{"1,2,", []int{}, true},
+		{"a,b", []int{}, true},
 	}

 	for _, tt := range tests {
@@ -71,3 +72,46 @@ func TestPrepareColumns(t *testing.T) {
 		})
 	}
 }
+
+func TestReduceColumns(t *testing.T) {
+	var tests = []struct {
+		expect  [][]string
+		columns []int
+	}{
+		{
+			expect:  [][]string{[]string{"a", "b"}},
+			columns: []int{1, 2},
+		},
+		{
+			expect:  [][]string{[]string{"a", "c"}},
+			columns: []int{1, 3},
+		},
+		{
+			expect:  [][]string{[]string{"a"}},
+			columns: []int{1},
+		},
+		{
+			expect:  [][]string{nil},
+			columns: []int{4},
+		},
+	}
+
+	input := [][]string{[]string{"a", "b", "c"}}
+
+	Columns = "y" // used as a flag with len(Columns)...
+
+	for _, tt := range tests {
+		testname := fmt.Sprintf("reduce-columns-by-%+v", tt.columns)
+		t.Run(testname, func(t *testing.T) {
+			UseColumns = tt.columns
+			data := Tabdata{entries: input}
+			reduceColumns(&data)
+			if !reflect.DeepEqual(data.entries, tt.expect) {
+				t.Errorf("reduceColumns returned invalid data:\ngot: %+v\nexp: %+v", data.entries, tt.expect)
+			}
+		})
+	}
+
+	Columns = "" // reset for other tests
+	UseColumns = nil
+}
--- a/lib/parser.go
+++ b/lib/parser.go
@@ -29,49 +29,36 @@ import (

 // contains a whole parsed table
 type Tabdata struct {
-	maxwidthHeader int   // longest header
-	maxwidthPerCol []int // max width per column
-	columns        int
-	headerIndices  []map[string]int // [ {beg=>0, end=>17}, ... ]
-	headers        []string         // [ "ID", "NAME", ...]
+	maxwidthHeader int      // longest header
+	maxwidthPerCol []int    // max width per column
+	columns        int      // count
+	headers        []string // [ "ID", "NAME", ...]
 	entries        [][]string
 }

 /*
-   Parse tabular input. We split the  header (first line) by 2 or more
-   spaces, remember the positions of  the header fields. We then split
-   the data (everything after the first line) by those positions. That
-   way we can turn "tabular data" (with fields containing whitespaces)
-   into real tabular data. We re-tabulate our input if you will.
+   Parse tabular input.
 */
 func parseFile(input io.Reader, pattern string) (Tabdata, error) {
 	data := Tabdata{}

 	var scanner *bufio.Scanner
-	var spaces = `\s\s+|$`
-
-	if len(Separator) > 0 {
-		spaces = Separator
-	}

 	hadFirst := false
-	spacefinder := regexp.MustCompile(spaces)
-	beg := 0
+	separate := regexp.MustCompile(Separator)
+	patternR, err := regexp.Compile(pattern)
+	if err != nil {
+		return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, err))
+	}

 	scanner = bufio.NewScanner(input)

 	for scanner.Scan() {
 		line := strings.TrimSpace(scanner.Text())
-		values := []string{}
-
-		patternR, err := regexp.Compile(pattern)
-		if err != nil {
-			return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, err))
-		}
+		parts := separate.Split(line, -1)

 		if !hadFirst {
 			// header processing
-			parts := spacefinder.FindAllStringIndex(line, -1)
 			data.columns = len(parts)
 			// if Debug {
 			// 	fmt.Println(parts)
@@ -83,30 +70,14 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
 				// 	fmt.Printf("Part: <%s>\n", string(line[beg:part[0]]))
 				//}

-				// current field
-				head := string(line[beg:part[0]])
-
-				// register begin and end of field within line
-				indices := make(map[string]int)
-				indices["beg"] = beg
-				if part[0] == part[1] {
-					indices["end"] = 0
-				} else {
-					indices["end"] = part[1] - 1
-				}
-
 				// register widest header field
-				headerlen := len(head)
+				headerlen := len(part)
 				if headerlen > data.maxwidthHeader {
 					data.maxwidthHeader = headerlen
 				}

 				// register fields data
-				data.headerIndices = append(data.headerIndices, indices)
-				data.headers = append(data.headers, head)
-
-				// end of current field == begin of next one
-				beg = part[1]
+				data.headers = append(data.headers, strings.TrimSpace(part))

 				// done
 				hadFirst = true
@@ -124,16 +95,9 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
 			}

 			idx := 0 // we cannot use the header index, because we could exclude columns
-			for _, index := range data.headerIndices {
-				value := ""
-
-				if index["end"] == 0 {
-					value = string(line[index["beg"]:])
-				} else {
-					value = string(line[index["beg"]:index["end"]])
-				}
-
-				width := len(strings.TrimSpace(value))
+			values := []string{}
+			for _, part := range parts {
+				width := len(strings.TrimSpace(part))

 				if len(data.maxwidthPerCol)-1 < idx {
 					data.maxwidthPerCol = append(data.maxwidthPerCol, width)
@@ -146,7 +110,7 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
 				// if Debug {
 				// 	fmt.Printf("<%s> ", value)
 				// }
-				values = append(values, strings.TrimSpace(value))
+				values = append(values, strings.TrimSpace(part))

 				idx++
 			}
--- a/lib/parser_test.go
+++ b/lib/parser_test.go
@@ -28,40 +28,18 @@ func TestParser(t *testing.T) {
 	data := Tabdata{
 		maxwidthHeader: 5,
 		maxwidthPerCol: []int{
-			5,
-			5,
-			8,
+			5, 5, 8,
 		},
 		columns: 3,
-		headerIndices: []map[string]int{
-			map[string]int{
-				"beg": 0,
-				"end": 6,
-			},
-			map[string]int{
-				"end": 13,
-				"beg": 7,
-			},
-			map[string]int{
-				"beg": 14,
-				"end": 0,
-			},
-		},
 		headers: []string{
-			"ONE",
-			"TWO",
-			"THREE",
+			"ONE", "TWO", "THREE",
 		},
 		entries: [][]string{
 			[]string{
-				"asd",
-				"igig",
-				"cxxxncnc",
+				"asd", "igig", "cxxxncnc",
 			},
 			[]string{
-				"19191",
-				"EDD 1",
-				"X",
+				"19191", "EDD 1", "X",
 			},
 		},
 	}
@@ -72,13 +50,14 @@ asd    igig   cxxxncnc

 	readFd := strings.NewReader(table)
 	gotdata, err := parseFile(readFd, "")
+	Separator = DefaultSeparator

 	if err != nil {
 		t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, gotdata)
 	}

 	if !reflect.DeepEqual(data, gotdata) {
-		t.Errorf("Parser returned invalid data\nExp: %+v\nGot: %+v\n", data, gotdata)
+		t.Errorf("Parser returned invalid data, Regex: %s\nExp: %+v\nGot: %+v\n", Separator, data, gotdata)
 	}
 }

@@ -91,9 +70,7 @@ func TestParserPatternmatching(t *testing.T) {
 		{
 			entries: [][]string{
 				[]string{
-					"asd",
-					"igig",
-					"cxxxncnc",
+					"asd", "igig", "cxxxncnc",
 				},
 			},
 			pattern: "ig",
@@ -102,9 +79,7 @@ func TestParserPatternmatching(t *testing.T) {
 		{
 			entries: [][]string{
 				[]string{
-					"19191",
-					"EDD 1",
-					"X",
+					"19191", "EDD 1", "X",
 				},
 			},
 			pattern: "ig",
--- a/lib/printer.go
+++ b/lib/printer.go
@@ -26,41 +26,10 @@ import (
 )

 func printData(data *Tabdata) {
-	// prepare headers: add numbers to headers
-	numberedHeaders := []string{}
-	for i, head := range data.headers {
-		if len(Columns) > 0 {
-			// -c specified
-			if !contains(UseColumns, i+1) {
-				// ignore this one
-				continue
-			}
-		}
-		if NoNumbering {
-			numberedHeaders = append(numberedHeaders, head)
-		} else {
-			numberedHeaders = append(numberedHeaders, fmt.Sprintf("%s(%d)", head, i+1))
-		}
-	}
-	data.headers = numberedHeaders
-
-	// prepare data
-	if len(Columns) > 0 {
-		reducedEntries := [][]string{}
-		reducedEntry := []string{}
-		for _, entry := range data.entries {
-			reducedEntry = nil
-			for i, value := range entry {
-				if !contains(UseColumns, i+1) {
-					continue
-				}
-
-				reducedEntry = append(reducedEntry, value)
-			}
-			reducedEntries = append(reducedEntries, reducedEntry)
-		}
-		data.entries = reducedEntries
+	if OutputMode != "shell" {
+		numberizeHeaders(data)
 	}
+	reduceColumns(data)

 	switch OutputMode {
 	case "extended":
@@ -190,6 +159,7 @@ func printShellData(data *Tabdata) {
 		var idx int
 		for _, entry := range data.entries {
 			idx = 0
+			shentries := []string{}
 			for i, value := range entry {
 				if len(Columns) > 0 {
 					if !contains(UseColumns, i+1) {
@@ -197,10 +167,10 @@ func printShellData(data *Tabdata) {
 					}
 				}

-				fmt.Printf("%s=\"%s\" ", data.headers[idx], value)
+				shentries = append(shentries, fmt.Sprintf("%s=\"%s\"", data.headers[idx], value))
 				idx++
 			}
-			fmt.Println()
+			fmt.Println(strings.Join(shentries, " "))
 		}
 	}
 }
--- a/lib/printer_test.go
+++ b/lib/printer_test.go
@@ -18,15 +18,33 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
 package lib

 import (
+	"fmt"
 	"os"
 	"strings"
 	"testing"
 )

 func TestPrinter(t *testing.T) {
-	table := `ONE    TWO    THREE  
-asd    igig   cxxxncnc  
-19191  EDD 1  X`
+	startdata := Tabdata{
+		maxwidthHeader: 5,
+		maxwidthPerCol: []int{
+			5,
+			5,
+			8,
+		},
+		columns: 3,
+		headers: []string{
+			"ONE", "TWO", "THREE",
+		},
+		entries: [][]string{
+			[]string{
+				"asd", "igig", "cxxxncnc",
+			},
+			[]string{
+				"19191", "EDD 1", "X",
+			},
+		},
+	}

 	expects := map[string]string{
 		"ascii": `ONE(1)	TWO(2)	THREE(3) 
@@ -42,6 +60,8 @@ asd   	igig  	cxxxncnc
 |--------|--------|----------|
 | asd    | igig   | cxxxncnc |
 |  19191 | EDD 1  | X        |`,
+		"shell": `ONE="asd" TWO="igig" THREE="cxxxncnc"
+ONE="19191" TWO="EDD 1" THREE="X"`,
 	}

 	r, w, err := os.Pipe()
@@ -52,27 +72,25 @@ asd   	igig  	cxxxncnc
 	os.Stdout = w

 	for mode, expect := range expects {
-		OutputMode = mode
-		fd := strings.NewReader(table)
-		data, err := parseFile(fd, "")
+		testname := fmt.Sprintf("print-%s", mode)
+		t.Run(testname, func(t *testing.T) {

-		if err != nil {
-			t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, data)
-		}
+			OutputMode = mode
+			data := startdata // we need to reset our mock data, since it's being modified in printData()
+			printData(&data)

-		printData(&data)
+			buf := make([]byte, 1024)
+			n, err := r.Read(buf)
+			if err != nil {
+				t.Fatal(err)
+			}
+			buf = buf[:n]
+			output := strings.TrimSpace(string(buf))

-		buf := make([]byte, 1024)
-		n, err := r.Read(buf)
-		if err != nil {
-			t.Fatal(err)
-		}
-		buf = buf[:n]
-		output := strings.TrimSpace(string(buf))
-
-		if output != expect {
-			t.Errorf("output mode: %s, got:\n%s\nwant:\n%s\n (%d <=> %d)", mode, output, expect, len(output), len(expect))
-		}
+			if output != expect {
+				t.Errorf("output mode: %s, got:\n%s\nwant:\n%s\n (%d <=> %d)", mode, output, expect, len(output), len(expect))
+			}
+		})
 	}

 	// Restore
--- a/tablizer.pod
+++ b/tablizer.pod
@@ -24,12 +24,12 @@ tablizer - Manipulate tabular output of other programs

 =head1 DESCRIPTION

-Many  programs generate  tabular  output.  But  sometimes  you need  to
+Many  programs generate  tabular output.   But sometimes  you need  to
 post-process these tables, you may need  to remove one or more columns
-or you may want to filter for  some pattern or you may need the output
-in another program and need to  parse it somehow.  Standard unix tools
-such as  awk(1), grep(1) or column(1)  may help, but sometimes  it's a
-tedious business.
+or you  may want to filter  for some pattern (See  L<PATTERNS>) or you
+may need the  output in another program and need  to parse it somehow.
+Standard unix tools such as awk(1), grep(1) or column(1) may help, but
+sometimes it's a tedious business.

 Let's take  the output of  the tool  kubectl.  It contains  cells with
 withespace and they do not separate columns by TAB characters. This is
@@ -77,6 +77,34 @@ The numbering can be suppressed by using the B<-n> option.
 Finally the  B<-d> option  enables debugging  output which  is mostly
 usefull for the developer.

+=head2 PATTERNS
+
+You can reduce the rows being  displayed by using a regular expression
+pattern.  The  regexp is  PCRE compatible, refer  to the  syntax cheat
+sheet here: L<https://github.com/google/re2/wiki/Syntax>.  If you want
+to read  a more comprehensive  documentation about the topic  and have
+perl installed you can read it with:
+
+    perldoc perlre
+
+Or read it online: L<https://perldoc.perl.org/perlre>.
+
+A note on  modifiers: the regexp engine used in  tablizer uses another
+modifier syntax:
+
+    (?MODIFIER)
+
+The most important modifiers are:
+
+C<i> ignore case
+C<m> multiline mode
+C<s> single line mode
+
+Example for a case insensitve search:
+
+    kubectl get pods -A | tablizer "(?i)account"
+
+
 =head2 OUTPUT MODES

 There might be cases  when the tabular output of a  program is way too