added support for regexp in -c parameter, added deduplication as well

2026-02-04 10:20:59 +01:00 · 2022-10-15 14:03:30 +02:00
parent aef545d51e
commit 4ec6ccd0fd
9 changed files with 163 additions and 20 deletions
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -60,12 +60,7 @@ var rootCmd = &cobra.Command{
 			return nil
 		}
-		err := lib.PrepareColumns()
+		err := lib.PrepareModeFlags()
 		if err != nil {
 			return err
 		}
 		err = lib.PrepareModeFlags()
 		if err != nil {
 			return err
 		}
--- a/cmd/tablizer.go
+++ b/cmd/tablizer.go
@@ -64,7 +64,7 @@ DESCRIPTION
       NAME(1) READY(2) STATUS(3) RESTARTS(4) AGE(5)
    These numbers denote the column and you can use them to specify which
-    columns you want to have in your output:
+    columns you want to have in your output (see COLUMNS:
       kubectl get pods | tablizer -c1,3
@@ -107,6 +107,32 @@ DESCRIPTION
        kubectl get pods -A | tablizer "(?i)account"
  COLUMNS
    The parameter -c can be used to specify, which columns to display. By
    default tablizer numerizes the header names and these numbers can be
    used to specify which header to display, see example above.
    However, beside numbers, you can also use regular expressions with -c,
    also separated by comma. And you can mix column numbers with regexps.
    Lets take this table:
            PID TTY          TIME CMD
          14001 pts/0    00:00:00 bash
          42871 pts/0    00:00:00 ps
          42872 pts/0    00:00:00 sed
    We want to see only the CMD column and use a regex for this:
        ps | tablizer -s '\s+' -c C
        CMD(4)
        bash
        ps
        tablizer
        sed
    where "C" is our regexp which matches CMD.
  OUTPUT MODES
    There might be cases when the tabular output of a program is way too
    large for your current terminal but you still need to see every column.
--- a/go.mod
+++ b/go.mod
@@ -7,12 +7,13 @@ require (
 	github.com/gookit/color v1.5.2
 	github.com/olekukonko/tablewriter v0.0.5
 	github.com/spf13/cobra v1.5.0
-	github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778
+	github.com/xhit/go-str2duration v1.2.0
 )
 require (
 	github.com/inconshreveable/mousetrap v1.0.0 // indirect
 	github.com/mattn/go-runewidth v0.0.9 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 // indirect
 	golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44 // indirect
 )
--- a/go.sum
+++ b/go.sum
@@ -24,6 +24,8 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS
 github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
 github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
 github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
 github.com/xhit/go-str2duration v1.2.0 h1:BcV5u025cITWxEQKGWr1URRzrcXtu7uk8+luz3Yuhwc=
 github.com/xhit/go-str2duration v1.2.0/go.mod h1:3cPSlfZlUHVlneIVfePFWcJZsuwf+P1v2SRTV4cUmp4=
 github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 h1:QldyIu/L63oPpyvQmHgvgickp1Yw510KJOqX7H24mg8=
 github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs=
 golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44 h1:Bli41pIlzTzf3KEY06n+xnzK/BESIg2ze4Pgfh/aI8c=
--- a/lib/helpers.go
+++ b/lib/helpers.go
@@ -36,22 +36,56 @@ func contains(s []int, e int) bool {
 	return false
 }
-func PrepareColumns() error {
+// parse columns list given with -c
 func PrepareColumns(data *Tabdata) error {
 	UseColumns = nil
 	if len(Columns) > 0 {
 		for _, use := range strings.Split(Columns, ",") {
-			usenum, err := strconv.Atoi(use)
+			if len(use) == 0 {
-			if err != nil {
+				msg := fmt.Sprintf("Could not parse columns list %s: empty column", Columns)
 				msg := fmt.Sprintf("Could not parse columns list %s: %v", Columns, err)
 				return errors.New(msg)
 			}
-			UseColumns = append(UseColumns, usenum)
+
 			usenum, err := strconv.Atoi(use)
 			if err != nil {
 				// might be a regexp
 				colPattern, err := regexp.Compile(use)
 				if err != nil {
 					msg := fmt.Sprintf("Could not parse columns list %s: %v", Columns, err)
 					return errors.New(msg)
 				}
 				// find matching header fields
 				for i, head := range data.headers {
 					if colPattern.MatchString(head) {
 						UseColumns = append(UseColumns, i+1)
 					}
 				}
 			} else {
 				// we digress from go  best practises here, because if
 				// a colum spec is not a number, we process them above
 				// inside the err handler  for atoi(). so only add the
 				// number, if it's really just a number.
 				UseColumns = append(UseColumns, usenum)
 			}
 		}
 		// deduplicate
 		imap := make(map[int]int)
 		for _, i := range UseColumns {
 			imap[i] = 0
 		}
 		UseColumns = nil
 		for k := range imap {
 			UseColumns = append(UseColumns, k)
 		}
 	}
 	return nil
 }
 // prepare headers: add numbers to headers
 func numberizeHeaders(data *Tabdata) {
 	// prepare headers: add numbers to headers
 	numberedHeaders := []string{}
 	maxwidth := 0 // start from scratch, so we only look at displayed column widths
@@ -83,8 +117,8 @@ func numberizeHeaders(data *Tabdata) {
 	}
 }
 // exclude columns, if any
 func reduceColumns(data *Tabdata) {
 	// exclude columns, if any
 	if len(Columns) > 0 {
 		reducedEntries := [][]string{}
 		var reducedEntry []string
--- a/lib/helpers_test.go
+++ b/lib/helpers_test.go
@@ -45,6 +45,23 @@ func Testcontains(t *testing.T) {
 }
 func TestPrepareColumns(t *testing.T) {
 	data := Tabdata{
 		maxwidthHeader: 5,
 		maxwidthPerCol: []int{
 			5,
 			5,
 			8,
 		},
 		columns: 3,
 		headers: []string{
 			"ONE", "TWO", "THREE",
 		},
 		entries: [][]string{
 			{
 				"2", "3", "4",
 			},
 		},
 	}
 	var tests = []struct {
 		input     string
 		exp       []int
@@ -52,14 +69,15 @@ func TestPrepareColumns(t *testing.T) {
 	}{
 		{"1,2,3", []int{1, 2, 3}, false},
 		{"1,2,", []int{}, true},
-		{"a,b", []int{}, true},
+		{"T", []int{2, 3}, false},
 		{"T,2,3", []int{2, 3}, false},
 	}
 	for _, tt := range tests {
 		testname := fmt.Sprintf("PrepareColumns-%s-%t", tt.input, tt.wanterror)
 		t.Run(testname, func(t *testing.T) {
 			Columns = tt.input
-			err := PrepareColumns()
+			err := PrepareColumns(&data)
 			if err != nil {
 				if !tt.wanterror {
 					t.Errorf("got error: %v", err)
--- a/lib/io.go
+++ b/lib/io.go
@@ -44,6 +44,12 @@ func ProcessFiles(args []string) error {
 		if err != nil {
 			return err
 		}
 		err = PrepareColumns(&data)
 		if err != nil {
 			return err
 		}
 		printData(&data)
 	}
--- a/tablizer.1
+++ b/tablizer.1
@@ -133,7 +133,7 @@
 .\" ========================================================================
 .\"
 .IX Title "TABLIZER 1"
-.TH TABLIZER 1 "2022-10-14" "1" "User Commands"
+.TH TABLIZER 1 "2022-10-15" "1" "User Commands"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
@@ -209,7 +209,7 @@ have a numer associated with it, e.g.:
 .Ve
 .PP
 These numbers denote the column and  you can use them to specify which
-columns you want to have in your output:
+columns you want to have in your output (see \s-1COLUMNS\s0:
 .PP
 .Vb 1
 \&   kubectl get pods | tablizer \-c1,3
@@ -261,6 +261,38 @@ Example for a case insensitive search:
 .Vb 1
 \&    kubectl get pods \-A | tablizer "(?i)account"
 .Ve
 .SS "\s-1COLUMNS\s0"
 .IX Subsection "COLUMNS"
 The  parameter  \fB\-c\fR  can  be  used  to  specify,  which  columns  to
 display.  By default  tablizer numerizes  the header  names and  these
 numbers can  be used to specify  which header to display,  see example
 above.
 .PP
 However, beside  numbers, you  can also  use regular  expressions with
 \&\fB\-c\fR, also  separated by comma. And  you can mix column  numbers with
 regexps.
 .PP
 Lets take this table:
 .PP
 .Vb 4
 \&        PID TTY          TIME CMD
 \&      14001 pts/0    00:00:00 bash
 \&      42871 pts/0    00:00:00 ps
 \&      42872 pts/0    00:00:00 sed
 .Ve
 .PP
 We want to see only the \s-1CMD\s0 column and use a regex for this:
 .PP
 .Vb 6
 \&    ps | tablizer \-s \*(Aq\es+\*(Aq \-c C
 \&    CMD(4)
 \&    bash
 \&    ps
 \&    tablizer
 \&    sed
 .Ve
 .PP
 where \*(L"C\*(R" is our regexp which matches \s-1CMD.\s0
 .SS "\s-1OUTPUT MODES\s0"
 .IX Subsection "OUTPUT MODES"
 There might be cases  when the tabular output of a  program is way too
--- a/tablizer.pod
+++ b/tablizer.pod
@@ -67,7 +67,7 @@ have a numer associated with it, e.g.:
   NAME(1) READY(2) STATUS(3) RESTARTS(4) AGE(5)
 These numbers denote the column and  you can use them to specify which
-columns you want to have in your output:
+columns you want to have in your output (see L<COLUMNS>:
   kubectl get pods | tablizer -c1,3
@@ -114,6 +114,35 @@ Example for a case insensitive search:
    kubectl get pods -A | tablizer "(?i)account"
 =head2 COLUMNS
 The  parameter  B<-c>  can  be  used  to  specify,  which  columns  to
 display.  By default  tablizer numerizes  the header  names and  these
 numbers can  be used to specify  which header to display,  see example
 above.
 However, beside  numbers, you  can also  use regular  expressions with
 B<-c>, also  separated by comma. And  you can mix column  numbers with
 regexps.
 Lets take this table:
        PID TTY          TIME CMD
      14001 pts/0    00:00:00 bash
      42871 pts/0    00:00:00 ps
      42872 pts/0    00:00:00 sed
 We want to see only the CMD column and use a regex for this:
    ps | tablizer -s '\s+' -c C
    CMD(4)
    bash
    ps
    tablizer
    sed
 where "C" is our regexp which matches CMD.
 =head2 OUTPUT MODES
 There might be cases  when the tabular output of a  program is way too