added support for regexp in -c parameter, added deduplication as well

2026-02-04 02:20:56 +01:00 · 2022-10-15 14:03:30 +02:00
parent aef545d51e
commit 4ec6ccd0fd
9 changed files with 163 additions and 20 deletions
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -60,12 +60,7 @@ var rootCmd = &cobra.Command{
 			return nil
 		}

-		err := lib.PrepareColumns()
-		if err != nil {
-			return err
-		}
-
-		err = lib.PrepareModeFlags()
+		err := lib.PrepareModeFlags()
 		if err != nil {
 			return err
 		}
--- a/cmd/tablizer.go
+++ b/cmd/tablizer.go
@@ -64,7 +64,7 @@ DESCRIPTION
       NAME(1) READY(2) STATUS(3) RESTARTS(4) AGE(5)

    These numbers denote the column and you can use them to specify which
-    columns you want to have in your output:
+    columns you want to have in your output (see COLUMNS:

       kubectl get pods | tablizer -c1,3

@@ -107,6 +107,32 @@ DESCRIPTION

        kubectl get pods -A | tablizer "(?i)account"

+  COLUMNS
+    The parameter -c can be used to specify, which columns to display. By
+    default tablizer numerizes the header names and these numbers can be
+    used to specify which header to display, see example above.
+
+    However, beside numbers, you can also use regular expressions with -c,
+    also separated by comma. And you can mix column numbers with regexps.
+
+    Lets take this table:
+
+            PID TTY          TIME CMD
+          14001 pts/0    00:00:00 bash
+          42871 pts/0    00:00:00 ps
+          42872 pts/0    00:00:00 sed
+
+    We want to see only the CMD column and use a regex for this:
+
+        ps | tablizer -s '\s+' -c C
+        CMD(4)
+        bash
+        ps
+        tablizer
+        sed
+
+    where "C" is our regexp which matches CMD.
+
  OUTPUT MODES
    There might be cases when the tabular output of a program is way too
    large for your current terminal but you still need to see every column.
--- a/go.mod
+++ b/go.mod
@@ -7,12 +7,13 @@ require (
 	github.com/gookit/color v1.5.2
 	github.com/olekukonko/tablewriter v0.0.5
 	github.com/spf13/cobra v1.5.0
-	github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778
+	github.com/xhit/go-str2duration v1.2.0
 )

 require (
 	github.com/inconshreveable/mousetrap v1.0.0 // indirect
 	github.com/mattn/go-runewidth v0.0.9 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
+	github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 // indirect
 	golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -24,6 +24,8 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
+github.com/xhit/go-str2duration v1.2.0 h1:BcV5u025cITWxEQKGWr1URRzrcXtu7uk8+luz3Yuhwc=
+github.com/xhit/go-str2duration v1.2.0/go.mod h1:3cPSlfZlUHVlneIVfePFWcJZsuwf+P1v2SRTV4cUmp4=
 github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 h1:QldyIu/L63oPpyvQmHgvgickp1Yw510KJOqX7H24mg8=
 github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs=
 golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44 h1:Bli41pIlzTzf3KEY06n+xnzK/BESIg2ze4Pgfh/aI8c=
--- a/lib/helpers.go
+++ b/lib/helpers.go
@@ -36,22 +36,56 @@ func contains(s []int, e int) bool {
 	return false
 }

-func PrepareColumns() error {
+// parse columns list given with -c
+func PrepareColumns(data *Tabdata) error {
+	UseColumns = nil
 	if len(Columns) > 0 {
 		for _, use := range strings.Split(Columns, ",") {
-			usenum, err := strconv.Atoi(use)
-			if err != nil {
-				msg := fmt.Sprintf("Could not parse columns list %s: %v", Columns, err)
+			if len(use) == 0 {
+				msg := fmt.Sprintf("Could not parse columns list %s: empty column", Columns)
 				return errors.New(msg)
 			}
-			UseColumns = append(UseColumns, usenum)
+
+			usenum, err := strconv.Atoi(use)
+			if err != nil {
+				// might be a regexp
+				colPattern, err := regexp.Compile(use)
+				if err != nil {
+					msg := fmt.Sprintf("Could not parse columns list %s: %v", Columns, err)
+					return errors.New(msg)
+				}
+
+				// find matching header fields
+				for i, head := range data.headers {
+					if colPattern.MatchString(head) {
+						UseColumns = append(UseColumns, i+1)
+					}
+
+				}
+			} else {
+				// we digress from go  best practises here, because if
+				// a colum spec is not a number, we process them above
+				// inside the err handler  for atoi(). so only add the
+				// number, if it's really just a number.
+				UseColumns = append(UseColumns, usenum)
+			}
+		}
+
+		// deduplicate
+		imap := make(map[int]int)
+		for _, i := range UseColumns {
+			imap[i] = 0
+		}
+		UseColumns = nil
+		for k := range imap {
+			UseColumns = append(UseColumns, k)
 		}
 	}
 	return nil
 }

+// prepare headers: add numbers to headers
 func numberizeHeaders(data *Tabdata) {
-	// prepare headers: add numbers to headers
 	numberedHeaders := []string{}
 	maxwidth := 0 // start from scratch, so we only look at displayed column widths

@@ -83,8 +117,8 @@ func numberizeHeaders(data *Tabdata) {
 	}
 }

+// exclude columns, if any
 func reduceColumns(data *Tabdata) {
-	// exclude columns, if any
 	if len(Columns) > 0 {
 		reducedEntries := [][]string{}
 		var reducedEntry []string
--- a/lib/helpers_test.go
+++ b/lib/helpers_test.go
@@ -45,6 +45,23 @@ func Testcontains(t *testing.T) {
 }

 func TestPrepareColumns(t *testing.T) {
+	data := Tabdata{
+		maxwidthHeader: 5,
+		maxwidthPerCol: []int{
+			5,
+			5,
+			8,
+		},
+		columns: 3,
+		headers: []string{
+			"ONE", "TWO", "THREE",
+		},
+		entries: [][]string{
+			{
+				"2", "3", "4",
+			},
+		},
+	}
 	var tests = []struct {
 		input     string
 		exp       []int
@@ -52,14 +69,15 @@ func TestPrepareColumns(t *testing.T) {
 	}{
 		{"1,2,3", []int{1, 2, 3}, false},
 		{"1,2,", []int{}, true},
-		{"a,b", []int{}, true},
+		{"T", []int{2, 3}, false},
+		{"T,2,3", []int{2, 3}, false},
 	}

 	for _, tt := range tests {
 		testname := fmt.Sprintf("PrepareColumns-%s-%t", tt.input, tt.wanterror)
 		t.Run(testname, func(t *testing.T) {
 			Columns = tt.input
-			err := PrepareColumns()
+			err := PrepareColumns(&data)
 			if err != nil {
 				if !tt.wanterror {
 					t.Errorf("got error: %v", err)
--- a/lib/io.go
+++ b/lib/io.go
@@ -44,6 +44,12 @@ func ProcessFiles(args []string) error {
 		if err != nil {
 			return err
 		}
+
+		err = PrepareColumns(&data)
+		if err != nil {
+			return err
+		}
+
 		printData(&data)
 	}

--- a/tablizer.1
+++ b/tablizer.1
@@ -133,7 +133,7 @@
 .\" ========================================================================
 .\"
 .IX Title "TABLIZER 1"
-.TH TABLIZER 1 "2022-10-14" "1" "User Commands"
+.TH TABLIZER 1 "2022-10-15" "1" "User Commands"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
@@ -209,7 +209,7 @@ have a numer associated with it, e.g.:
 .Ve
 .PP
 These numbers denote the column and  you can use them to specify which
-columns you want to have in your output:
+columns you want to have in your output (see \s-1COLUMNS\s0:
 .PP
 .Vb 1
 \&   kubectl get pods | tablizer \-c1,3
@@ -261,6 +261,38 @@ Example for a case insensitive search:
 .Vb 1
 \&    kubectl get pods \-A | tablizer "(?i)account"
 .Ve
+.SS "\s-1COLUMNS\s0"
+.IX Subsection "COLUMNS"
+The  parameter  \fB\-c\fR  can  be  used  to  specify,  which  columns  to
+display.  By default  tablizer numerizes  the header  names and  these
+numbers can  be used to specify  which header to display,  see example
+above.
+.PP
+However, beside  numbers, you  can also  use regular  expressions with
+\&\fB\-c\fR, also  separated by comma. And  you can mix column  numbers with
+regexps.
+.PP
+Lets take this table:
+.PP
+.Vb 4
+\&        PID TTY          TIME CMD
+\&      14001 pts/0    00:00:00 bash
+\&      42871 pts/0    00:00:00 ps
+\&      42872 pts/0    00:00:00 sed
+.Ve
+.PP
+We want to see only the \s-1CMD\s0 column and use a regex for this:
+.PP
+.Vb 6
+\&    ps | tablizer \-s \*(Aq\es+\*(Aq \-c C
+\&    CMD(4)
+\&    bash
+\&    ps
+\&    tablizer
+\&    sed
+.Ve
+.PP
+where \*(L"C\*(R" is our regexp which matches \s-1CMD.\s0
 .SS "\s-1OUTPUT MODES\s0"
 .IX Subsection "OUTPUT MODES"
 There might be cases  when the tabular output of a  program is way too
--- a/tablizer.pod
+++ b/tablizer.pod
@@ -67,7 +67,7 @@ have a numer associated with it, e.g.:
   NAME(1) READY(2) STATUS(3) RESTARTS(4) AGE(5)

 These numbers denote the column and  you can use them to specify which
-columns you want to have in your output:
+columns you want to have in your output (see L<COLUMNS>:

   kubectl get pods | tablizer -c1,3

@@ -114,6 +114,35 @@ Example for a case insensitive search:
    kubectl get pods -A | tablizer "(?i)account"


+=head2 COLUMNS
+
+The  parameter  B<-c>  can  be  used  to  specify,  which  columns  to
+display.  By default  tablizer numerizes  the header  names and  these
+numbers can  be used to specify  which header to display,  see example
+above.
+
+However, beside  numbers, you  can also  use regular  expressions with
+B<-c>, also  separated by comma. And  you can mix column  numbers with
+regexps.
+
+Lets take this table:
+
+        PID TTY          TIME CMD
+      14001 pts/0    00:00:00 bash
+      42871 pts/0    00:00:00 ps
+      42872 pts/0    00:00:00 sed
+
+We want to see only the CMD column and use a regex for this:
+
+    ps | tablizer -s '\s+' -c C
+    CMD(4)
+    bash
+    ps
+    tablizer
+    sed
+
+where "C" is our regexp which matches CMD.
+
 =head2 OUTPUT MODES

 There might be cases  when the tabular output of a  program is way too