Merge branch 'development'

more refactoring, fixed bug in shell mode output, fixed default
Separator and fixed #3
2025-12-18 13:01:11 +01:00 · 2022-10-05 19:17:34 +02:00 · 2022-10-05 16:43:51 +02:00 · 2022-10-05 12:55:33 +02:00 · 2022-10-05 09:20:02 +02:00 · 2022-10-05 09:12:46 +02:00
17 changed files with 766 additions and 213 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -1,38 +0,0 @@
---
-version: 2.1
-
-jobs:
-  compile:
-    docker:
-    - image: cimg/go:1.18
-    steps:
-    - checkout
-    - run: make
-
-  test:
-    parameters:
-      go_version:
-        type: string
-      run_test:
-        type: boolean
-        default: true
-    docker:
-    - image: cimg/go:<< parameters.go_version >>
-    steps:
-    - checkout
-    - run: make test
-
-workflows:
-  version: 2
-  unit-test:
-    jobs:
-    - compile
-    - test:
-        name: testing
-        matrix:
-          parameters:
-            go_version:
-            - "1.16"
-            - "1.17"
-            - "1.18"
-            - "1.19"
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -0,0 +1,25 @@
+name: build-and-test-tablizer
+on: [push, pull_request]
+jobs:
+  build:
+    strategy:
+      matrix:
+        version: [1.17, 1.18, 1.19]
+        os: [ubuntu-latest, windows-latest, macos-latest]
+    name: Build
+    runs-on: ${{ matrix.os }}
+    steps:
+    - name: Set up Go 1.18
+      uses: actions/setup-go@v3
+      with:
+        go-version: ${{ matrix.version }}
+      id: go
+
+    - name: checkout
+      uses: actions/checkout@v3
+
+    - name: build
+      run: make
+
+    - name: test
+      run: make test
--- a/25
+++ b/25
@@ -18,19 +18,30 @@
 #
 # no need to modify anything below
 tool    = tablizer
-version = $(shell egrep "^var Version = " lib/common.go | cut -d'=' -f2 | cut -d'"' -f 2)
+version = $(shell egrep "= .v" lib/common.go | cut -d'=' -f2 | cut -d'"' -f 2)
 archs   = android darwin freebsd linux netbsd openbsd windows
-PREFIX = /usr/local
-UID    = root
-GID    = 0
+PREFIX  = /usr/local
+UID     = root
+GID     = 0
+BRANCH  = $(shell git describe --all | cut -d/ -f2)
+COMMIT  = $(shell git rev-parse --short=8 HEAD)
+BUILD   = $(shell date +%Y.%m.%d.%H%M%S) 
+VERSION:= $(if $(filter $(BRANCH), development),$(version)-$(BRANCH)-$(COMMIT)-$(BUILD))

-all: buildlocal $(tool).1
+
+all: $(tool).1 cmd/$(tool).go buildlocal

 %.1: %.pod
 	pod2man -c "User Commands" -r 1 -s 1 $*.pod > $*.1

+cmd/%.go: %.pod
+	echo "package cmd" > cmd/$*.go
+	echo "var manpage = \`" >> cmd/$*.go
+	pod2text $*.pod >> cmd/$*.go
+	echo "\`" >> cmd/$*.go
+
 buildlocal:
-	go build
+	go build -ldflags "-X 'github.com/tlinden/tablizer/lib.VERSION=$(VERSION)'"

 release:
 	./mkrel.sh $(tool) $(version)
@@ -42,7 +53,7 @@ install: buildlocal
 	install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/

 clean:
-	rm -rf $(tool) $(tool).1 releases
+	rm -rf $(tool) releases

 test:
 	go test -v ./...
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
-[![<ORG_NAME>](https://circleci.com/gh/TLINDEN/tablizer.svg?style=svg)](https://app.circleci.com/pipelines/github/TLINDEN/tablizer)
+[![Actions](https://github.com/tlinden/tablizer/actions/workflows/ci.yaml/badge.svg)](https://github.com/tlinden/tablizer/actions)
+[![License](https://img.shields.io/badge/license-GPL-blue.svg)](https://github.com/tlinden/tablizer/blob/master/LICENSE)

 ## tablizer - Manipulate tabular output of other programs

--- a/cmd/root.go
+++ b/cmd/root.go
@@ -17,19 +17,46 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
 package cmd

 import (
-	"daemon.de/tablizer/lib"
+	"bytes"
 	"fmt"
 	"github.com/spf13/cobra"
+	"github.com/tlinden/tablizer/lib"
+	"log"
 	"os"
+	"os/exec"
 )

+var ShowManual = false
+
+func man() {
+	man := exec.Command("less", "-")
+
+	var b bytes.Buffer
+	b.Write([]byte(manpage))
+
+	man.Stdout = os.Stdout
+	man.Stdin = &b
+	man.Stderr = os.Stderr
+
+	err := man.Run()
+
+	if err != nil {
+		log.Fatal(err)
+	}
+}
+
 var rootCmd = &cobra.Command{
 	Use:   "tablizer [regex] [file, ...]",
 	Short: "[Re-]tabularize tabular data",
 	Long:  `Manipulate tabular output of other programs`,
 	RunE: func(cmd *cobra.Command, args []string) error {
 		if lib.ShowVersion {
-			fmt.Printf("This is tablizer version %s\n", lib.Version)
+			fmt.Printf("This is tablizer version %s\n", lib.VERSION)
+			return nil
+		}
+
+		if ShowManual {
+			man()
 			return nil
 		}

@@ -57,8 +84,10 @@ func Execute() {
 func init() {
 	rootCmd.PersistentFlags().BoolVarP(&lib.Debug, "debug", "d", false, "Enable debugging")
 	rootCmd.PersistentFlags().BoolVarP(&lib.NoNumbering, "no-numbering", "n", false, "Disable header numbering")
-	rootCmd.PersistentFlags().BoolVarP(&lib.ShowVersion, "version", "v", false, "Print program version")
-	rootCmd.PersistentFlags().StringVarP(&lib.Separator, "separator", "s", "", "Custom field separator")
+	rootCmd.PersistentFlags().BoolVarP(&lib.ShowVersion, "version", "V", false, "Print program version")
+	rootCmd.PersistentFlags().BoolVarP(&lib.InvertMatch, "invert-match", "v", false, "select non-matching rows")
+	rootCmd.PersistentFlags().BoolVarP(&ShowManual, "man", "m", false, "Display manual page")
+	rootCmd.PersistentFlags().StringVarP(&lib.Separator, "separator", "s", lib.DefaultSeparator, "Custom field separator")
 	rootCmd.PersistentFlags().StringVarP(&lib.Columns, "columns", "c", "", "Only show the speficied columns (separated by ,)")

 	// output flags, only 1 allowed, hidden, since just short cuts
--- a/cmd/tablizer.go
+++ b/cmd/tablizer.go
@@ -0,0 +1,131 @@
+package cmd
+var manpage = `
+NAME
+    tablizer - Manipulate tabular output of other programs
+
+SYNOPSIS
+        Usage:
+          tablizer [regex] [file, ...] [flags]
+    
+        Flags:
+          -c, --columns string     Only show the speficied columns (separated by ,)
+          -d, --debug              Enable debugging
+          -h, --help               help for tablizer
+          -v, --invert-match       select non-matching rows
+          -m, --man                Display manual page
+          -n, --no-numbering       Disable header numbering
+          -o, --output string      Output mode - one of: orgtbl, markdown, extended, ascii(default)
+          -X, --extended           Enable extended output
+          -M, --markdown           Enable markdown table output
+          -O, --orgtbl             Enable org-mode table output
+          -s, --separator string   Custom field separator
+          -v, --version            Print program version
+
+DESCRIPTION
+    Many programs generate tabular output. But sometimes you need to
+    post-process these tables, you may need to remove one or more columns or
+    you may want to filter for some pattern or you may need the output in
+    another program and need to parse it somehow. Standard unix tools such
+    as awk(1), grep(1) or column(1) may help, but sometimes it's a tedious
+    business.
+
+    Let's take the output of the tool kubectl. It contains cells with
+    withespace and they do not separate columns by TAB characters. This is
+    not easy to process.
+
+    You can use tablizer to do these and more things.
+
+    tablizer analyses the header fiels of a table, registers the column
+    positions of each header field and separates columns by those positions.
+
+    Without any options it reads its input from "STDIN", but you can also
+    specify a file as a parameter. If you want to reduce the output by some
+    regular expression, just specify it as its first parameter. You may also
+    use the -v option to exclude all rows which match the pattern. Hence:
+
+       # read from STDIN
+       kubectl get pods | tablizer
+
+       # read a file
+       tablizer filename
+
+       # search for pattern in a file (works like grep)
+       tablizer regex filename
+
+       # search for pattern in STDIN
+       kubectl get pods | tablizer regex
+
+    The output looks like the original one but every header field will have
+    a numer associated with it, e.g.:
+
+       NAME(1) READY(2) STATUS(3) RESTARTS(4) AGE(5)
+
+    These numbers denote the column and you can use them to specify which
+    columns you want to have in your output:
+
+       kubectl get pods | tablizer -c1,3
+
+    You can specify the numbers in any order but output will always follow
+    the original order.
+
+    The numbering can be suppressed by using the -n option.
+
+    Finally the -d option enables debugging output which is mostly usefull
+    for the developer.
+
+  OUTPUT MODES
+    There might be cases when the tabular output of a program is way too
+    large for your current terminal but you still need to see every column.
+    In such cases the -o extended or -X option can be usefull which enables
+    *extended mode*. In this mode, each row will be printed vertically,
+    header left, value right, aligned by the field widths. Here's an
+    example:
+
+        kubectl get pods | ./tablizer -o extended
+            NAME: repldepl-7bcd8d5b64-7zq4l  
+           READY: 1/1    
+          STATUS: Running  
+        RESTARTS: 1 (71m ago)  
+             AGE: 5h28m
+
+    You can of course still use a regex to reduce the number of rows
+    displayed.
+
+    The option -o shell can be used if the output has to be processed by the
+    shell, it prints variable assignments for each cell, one line per row:
+
+        kubectl get pods | ./tablizer -o extended ./tablizer -o shell
+        NAME="repldepl-7bcd8d5b64-7zq4l" READY="1/1" STATUS="Running" RESTARTS="9 (47m ago)" AGE="4d23h" 
+        NAME="repldepl-7bcd8d5b64-m48n8" READY="1/1" STATUS="Running" RESTARTS="9 (47m ago)" AGE="4d23h" 
+        NAME="repldepl-7bcd8d5b64-q2bf4" READY="1/1" STATUS="Running" RESTARTS="9 (47m ago)" AGE="4d23h"
+
+    You can use this in an eval loop.
+
+    Beside normal ascii mode (the default) and extended mode there are more
+    output modes available: orgtbl which prints an Emacs org-mode table and
+    markdown which prints a Markdown table.
+
+BUGS
+    In order to report a bug, unexpected behavior, feature requests or to
+    submit a patch, please open an issue on github:
+    <https://github.com/TLINDEN/tablizer/issues>.
+
+LICENSE
+    This software is licensed under the GNU GENERAL PUBLIC LICENSE version
+    3.
+
+    Copyright (c) 2022 by Thomas von Dein
+
+    This software uses the following GO libraries:
+
+    repr (https://github.com/alecthomas/repr)
+        Released under the MIT License, Copyright (c) 2016 Alec Thomas
+
+    cobra (https://github.com/spf13/cobra)
+        Released under the Apache 2.0 license, Copyright 2013-2022 The Cobra
+        Authors
+
+AUTHORS
+    Thomas von Dein tom AT vondein DOT org
+
+`
--- a/go.mod
+++ b/go.mod
@@ -1,4 +1,4 @@
-module daemon.de/tablizer
+module github.com/tlinden/tablizer

 go 1.18

--- a/lib/common.go
+++ b/lib/common.go
@@ -17,19 +17,31 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.

 package lib

-// command line flags
-var Debug bool
-var XtendedOut bool
-var NoNumbering bool
-var ShowVersion bool
-var Columns string
-var UseColumns []int
-var Separator string
-var OutflagExtended bool
-var OutflagMarkdown bool
-var OutflagOrgtable bool
-var OutflagShell bool
-var OutputMode string
+var (
+	// command line flags
+	Debug            bool
+	XtendedOut       bool
+	NoNumbering      bool
+	ShowVersion      bool
+	Columns          string
+	UseColumns       []int
+	DefaultSeparator string = `(\s\s+|\t)`
+	Separator        string = `(\s\s+|\t)`
+	OutflagExtended  bool
+	OutflagMarkdown  bool
+	OutflagOrgtable  bool
+	OutflagShell     bool
+	OutputMode       string
+	InvertMatch      bool

-var Version = "v1.0.3"
-var validOutputmodes = "(orgtbl|markdown|extended|ascii)"
+	// used for validation
+	validOutputmodes = "(orgtbl|markdown|extended|ascii)"
+
+	// main program version
+	Version = "v1.0.6"
+
+	// generated  version string, used  by -v contains  lib.Version on
+	//  main  branch,   and  lib.Version-$branch-$lastcommit-$date  on
+	// development branch
+	VERSION string
+)
--- a/lib/helpers.go
+++ b/lib/helpers.go
@@ -48,8 +48,49 @@ func PrepareColumns() error {
 	return nil
 }

+func numberizeHeaders(data *Tabdata) {
+	// prepare headers: add numbers to headers
+	numberedHeaders := []string{}
+	for i, head := range data.headers {
+		if len(Columns) > 0 {
+			// -c specified
+			if !contains(UseColumns, i+1) {
+				// ignore this one
+				continue
+			}
+		}
+		if NoNumbering {
+			numberedHeaders = append(numberedHeaders, head)
+		} else {
+			numberedHeaders = append(numberedHeaders, fmt.Sprintf("%s(%d)", head, i+1))
+		}
+	}
+	data.headers = numberedHeaders
+}
+
+func reduceColumns(data *Tabdata) {
+	// exclude columns, if any
+	if len(Columns) > 0 {
+		reducedEntries := [][]string{}
+		reducedEntry := []string{}
+		for _, entry := range data.entries {
+			reducedEntry = nil
+			for i, value := range entry {
+				if !contains(UseColumns, i+1) {
+					continue
+				}
+
+				reducedEntry = append(reducedEntry, value)
+			}
+			reducedEntries = append(reducedEntries, reducedEntry)
+		}
+		data.entries = reducedEntries
+	}
+}
+
 func PrepareModeFlags() error {
 	if len(OutputMode) == 0 {
+		// associate short flags like -X with mode selector
 		switch {
 		case OutflagExtended:
 			OutputMode = "extended"
--- a/lib/helpers_test.go
+++ b/lib/helpers_test.go
@@ -52,6 +52,7 @@ func TestPrepareColumns(t *testing.T) {
 	}{
 		{"1,2,3", []int{1, 2, 3}, false},
 		{"1,2,", []int{}, true},
+		{"a,b", []int{}, true},
 	}

 	for _, tt := range tests {
@@ -71,3 +72,46 @@ func TestPrepareColumns(t *testing.T) {
 		})
 	}
 }
+
+func TestReduceColumns(t *testing.T) {
+	var tests = []struct {
+		expect  [][]string
+		columns []int
+	}{
+		{
+			expect:  [][]string{[]string{"a", "b"}},
+			columns: []int{1, 2},
+		},
+		{
+			expect:  [][]string{[]string{"a", "c"}},
+			columns: []int{1, 3},
+		},
+		{
+			expect:  [][]string{[]string{"a"}},
+			columns: []int{1},
+		},
+		{
+			expect:  [][]string{nil},
+			columns: []int{4},
+		},
+	}
+
+	input := [][]string{[]string{"a", "b", "c"}}
+
+	Columns = "y" // used as a flag with len(Columns)...
+
+	for _, tt := range tests {
+		testname := fmt.Sprintf("reduce-columns-by-%+v", tt.columns)
+		t.Run(testname, func(t *testing.T) {
+			UseColumns = tt.columns
+			data := Tabdata{entries: input}
+			reduceColumns(&data)
+			if !reflect.DeepEqual(data.entries, tt.expect) {
+				t.Errorf("reduceColumns returned invalid data:\ngot: %+v\nexp: %+v", data.entries, tt.expect)
+			}
+		})
+	}
+
+	Columns = "" // reset for other tests
+	UseColumns = nil
+}
--- a/lib/parser.go
+++ b/lib/parser.go
@@ -29,49 +29,36 @@ import (

 // contains a whole parsed table
 type Tabdata struct {
-	maxwidthHeader int   // longest header
-	maxwidthPerCol []int // max width per column
-	columns        int
-	headerIndices  []map[string]int // [ {beg=>0, end=>17}, ... ]
-	headers        []string         // [ "ID", "NAME", ...]
+	maxwidthHeader int      // longest header
+	maxwidthPerCol []int    // max width per column
+	columns        int      // count
+	headers        []string // [ "ID", "NAME", ...]
 	entries        [][]string
 }

 /*
-   Parse tabular input. We split the  header (first line) by 2 or more
-   spaces, remember the positions of  the header fields. We then split
-   the data (everything after the first line) by those positions. That
-   way we can turn "tabular data" (with fields containing whitespaces)
-   into real tabular data. We re-tabulate our input if you will.
+   Parse tabular input.
 */
 func parseFile(input io.Reader, pattern string) (Tabdata, error) {
 	data := Tabdata{}

 	var scanner *bufio.Scanner
-	var spaces = `\s\s+|$`
-
-	if len(Separator) > 0 {
-		spaces = Separator
-	}

 	hadFirst := false
-	spacefinder := regexp.MustCompile(spaces)
-	beg := 0
+	separate := regexp.MustCompile(Separator)
+	patternR, err := regexp.Compile(pattern)
+	if err != nil {
+		return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, err))
+	}

 	scanner = bufio.NewScanner(input)

 	for scanner.Scan() {
 		line := strings.TrimSpace(scanner.Text())
-		values := []string{}
-
-		patternR, err := regexp.Compile(pattern)
-		if err != nil {
-			return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, err))
-		}
+		parts := separate.Split(line, -1)

 		if !hadFirst {
 			// header processing
-			parts := spacefinder.FindAllStringIndex(line, -1)
 			data.columns = len(parts)
 			// if Debug {
 			// 	fmt.Println(parts)
@@ -83,30 +70,14 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
 				// 	fmt.Printf("Part: <%s>\n", string(line[beg:part[0]]))
 				//}

-				// current field
-				head := string(line[beg:part[0]])
-
-				// register begin and end of field within line
-				indices := make(map[string]int)
-				indices["beg"] = beg
-				if part[0] == part[1] {
-					indices["end"] = 0
-				} else {
-					indices["end"] = part[1] - 1
-				}
-
 				// register widest header field
-				headerlen := len(head)
+				headerlen := len(part)
 				if headerlen > data.maxwidthHeader {
 					data.maxwidthHeader = headerlen
 				}

 				// register fields data
-				data.headerIndices = append(data.headerIndices, indices)
-				data.headers = append(data.headers, head)
-
-				// end of current field == begin of next one
-				beg = part[1]
+				data.headers = append(data.headers, strings.TrimSpace(part))

 				// done
 				hadFirst = true
@@ -114,22 +85,19 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
 		} else {
 			// data processing
 			if len(pattern) > 0 {
-				if !patternR.MatchString(line) {
+				if patternR.MatchString(line) == InvertMatch {
+					// by default  -v is false, so if a  line does NOT
+					// match the pattern, we will ignore it. However,
+					// if the user specified -v, the matching is inverted,
+					// so we ignore all lines, which DO match.
 					continue
 				}
 			}

 			idx := 0 // we cannot use the header index, because we could exclude columns
-			for _, index := range data.headerIndices {
-				value := ""
-
-				if index["end"] == 0 {
-					value = string(line[index["beg"]:])
-				} else {
-					value = string(line[index["beg"]:index["end"]])
-				}
-
-				width := len(strings.TrimSpace(value))
+			values := []string{}
+			for _, part := range parts {
+				width := len(strings.TrimSpace(part))

 				if len(data.maxwidthPerCol)-1 < idx {
 					data.maxwidthPerCol = append(data.maxwidthPerCol, width)
@@ -142,7 +110,7 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
 				// if Debug {
 				// 	fmt.Printf("<%s> ", value)
 				// }
-				values = append(values, strings.TrimSpace(value))
+				values = append(values, strings.TrimSpace(part))

 				idx++
 			}
@@ -151,7 +119,7 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
 	}

 	if scanner.Err() != nil {
-		return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, scanner.Err()))
+		return data, errors.Unwrap(fmt.Errorf("Failed to read from io.Reader: %w", scanner.Err()))
 	}

 	if Debug {
--- a/lib/parser_test.go
+++ b/lib/parser_test.go
@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
 package lib

 import (
+	"fmt"
 	"reflect"
 	"strings"
 	"testing"
@@ -27,40 +28,18 @@ func TestParser(t *testing.T) {
 	data := Tabdata{
 		maxwidthHeader: 5,
 		maxwidthPerCol: []int{
-			5,
-			5,
-			8,
+			5, 5, 8,
 		},
 		columns: 3,
-		headerIndices: []map[string]int{
-			map[string]int{
-				"beg": 0,
-				"end": 6,
-			},
-			map[string]int{
-				"end": 13,
-				"beg": 7,
-			},
-			map[string]int{
-				"beg": 14,
-				"end": 0,
-			},
-		},
 		headers: []string{
-			"ONE",
-			"TWO",
-			"THREE",
+			"ONE", "TWO", "THREE",
 		},
 		entries: [][]string{
 			[]string{
-				"asd",
-				"igig",
-				"cxxxncnc",
+				"asd", "igig", "cxxxncnc",
 			},
 			[]string{
-				"19191",
-				"EDD 1",
-				"X",
+				"19191", "EDD 1", "X",
 			},
 		},
 	}
@@ -71,12 +50,63 @@ asd    igig   cxxxncnc

 	readFd := strings.NewReader(table)
 	gotdata, err := parseFile(readFd, "")
+	Separator = DefaultSeparator

 	if err != nil {
 		t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, gotdata)
 	}

 	if !reflect.DeepEqual(data, gotdata) {
-		t.Errorf("Parser returned invalid data\nExp: %+v\nGot: %+v\n", data, gotdata)
+		t.Errorf("Parser returned invalid data, Regex: %s\nExp: %+v\nGot: %+v\n", Separator, data, gotdata)
+	}
+}
+
+func TestParserPatternmatching(t *testing.T) {
+	var tests = []struct {
+		entries [][]string
+		pattern string
+		invert  bool
+	}{
+		{
+			entries: [][]string{
+				[]string{
+					"asd", "igig", "cxxxncnc",
+				},
+			},
+			pattern: "ig",
+			invert:  false,
+		},
+		{
+			entries: [][]string{
+				[]string{
+					"19191", "EDD 1", "X",
+				},
+			},
+			pattern: "ig",
+			invert:  true,
+		},
+	}
+
+	table := `ONE    TWO    THREE  
+asd    igig   cxxxncnc  
+19191  EDD 1  X`
+
+	for _, tt := range tests {
+		testname := fmt.Sprintf("parse-with-inverted-pattern-%t", tt.invert)
+		t.Run(testname, func(t *testing.T) {
+			InvertMatch = tt.invert
+
+			readFd := strings.NewReader(table)
+			gotdata, err := parseFile(readFd, tt.pattern)
+
+			if err != nil {
+				t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, gotdata)
+			}
+
+			if !reflect.DeepEqual(tt.entries, gotdata.entries) {
+				t.Errorf("Parser returned invalid data (pattern: %s, invert: %t)\nExp: %+v\nGot: %+v\n",
+					tt.pattern, tt.invert, tt.entries, gotdata.entries)
+			}
+		})
 	}
 }
--- a/lib/printer.go
+++ b/lib/printer.go
@@ -26,41 +26,10 @@ import (
 )

 func printData(data *Tabdata) {
-	// prepare headers: add numbers to headers
-	numberedHeaders := []string{}
-	for i, head := range data.headers {
-		if len(Columns) > 0 {
-			// -c specified
-			if !contains(UseColumns, i+1) {
-				// ignore this one
-				continue
-			}
-		}
-		if NoNumbering {
-			numberedHeaders = append(numberedHeaders, head)
-		} else {
-			numberedHeaders = append(numberedHeaders, fmt.Sprintf("%s(%d)", head, i+1))
-		}
-	}
-	data.headers = numberedHeaders
-
-	// prepare data
-	if len(Columns) > 0 {
-		reducedEntries := [][]string{}
-		reducedEntry := []string{}
-		for _, entry := range data.entries {
-			reducedEntry = nil
-			for i, value := range entry {
-				if !contains(UseColumns, i+1) {
-					continue
-				}
-
-				reducedEntry = append(reducedEntry, value)
-			}
-			reducedEntries = append(reducedEntries, reducedEntry)
-		}
-		data.entries = reducedEntries
+	if OutputMode != "shell" {
+		numberizeHeaders(data)
 	}
+	reduceColumns(data)

 	switch OutputMode {
 	case "extended":
@@ -190,6 +159,7 @@ func printShellData(data *Tabdata) {
 		var idx int
 		for _, entry := range data.entries {
 			idx = 0
+			shentries := []string{}
 			for i, value := range entry {
 				if len(Columns) > 0 {
 					if !contains(UseColumns, i+1) {
@@ -197,10 +167,10 @@ func printShellData(data *Tabdata) {
 					}
 				}

-				fmt.Printf("%s=\"%s\" ", data.headers[idx], value)
+				shentries = append(shentries, fmt.Sprintf("%s=\"%s\"", data.headers[idx], value))
 				idx++
 			}
-			fmt.Println()
+			fmt.Println(strings.Join(shentries, " "))
 		}
 	}
 }
--- a/lib/printer_test.go
+++ b/lib/printer_test.go
@@ -18,15 +18,33 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
 package lib

 import (
+	"fmt"
 	"os"
 	"strings"
 	"testing"
 )

 func TestPrinter(t *testing.T) {
-	table := `ONE    TWO    THREE  
-asd    igig   cxxxncnc  
-19191  EDD 1  X`
+	startdata := Tabdata{
+		maxwidthHeader: 5,
+		maxwidthPerCol: []int{
+			5,
+			5,
+			8,
+		},
+		columns: 3,
+		headers: []string{
+			"ONE", "TWO", "THREE",
+		},
+		entries: [][]string{
+			[]string{
+				"asd", "igig", "cxxxncnc",
+			},
+			[]string{
+				"19191", "EDD 1", "X",
+			},
+		},
+	}

 	expects := map[string]string{
 		"ascii": `ONE(1)	TWO(2)	THREE(3) 
@@ -42,6 +60,8 @@ asd   	igig  	cxxxncnc
 |--------|--------|----------|
 | asd    | igig   | cxxxncnc |
 |  19191 | EDD 1  | X        |`,
+		"shell": `ONE="asd" TWO="igig" THREE="cxxxncnc"
+ONE="19191" TWO="EDD 1" THREE="X"`,
 	}

 	r, w, err := os.Pipe()
@@ -52,27 +72,25 @@ asd   	igig  	cxxxncnc
 	os.Stdout = w

 	for mode, expect := range expects {
-		OutputMode = mode
-		fd := strings.NewReader(table)
-		data, err := parseFile(fd, "")
+		testname := fmt.Sprintf("print-%s", mode)
+		t.Run(testname, func(t *testing.T) {

-		if err != nil {
-			t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, data)
-		}
+			OutputMode = mode
+			data := startdata // we need to reset our mock data, since it's being modified in printData()
+			printData(&data)

-		printData(&data)
+			buf := make([]byte, 1024)
+			n, err := r.Read(buf)
+			if err != nil {
+				t.Fatal(err)
+			}
+			buf = buf[:n]
+			output := strings.TrimSpace(string(buf))

-		buf := make([]byte, 1024)
-		n, err := r.Read(buf)
-		if err != nil {
-			t.Fatal(err)
-		}
-		buf = buf[:n]
-		output := strings.TrimSpace(string(buf))
-
-		if output != expect {
-			t.Errorf("output mode: %s, got:\n%s\nwant:\n%s\n (%d <=> %d)", mode, output, expect, len(output), len(expect))
-		}
+			if output != expect {
+				t.Errorf("output mode: %s, got:\n%s\nwant:\n%s\n (%d <=> %d)", mode, output, expect, len(output), len(expect))
+			}
+		})
 	}

 	// Restore
--- a/main.go
+++ b/main.go
@@ -18,7 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
 package main

 import (
-	"daemon.de/tablizer/cmd"
+	"github.com/tlinden/tablizer/cmd"
 )

 func main() {
--- a/tablizer.1
+++ b/tablizer.1
@@ -0,0 +1,280 @@
+.\" Automatically generated by Pod::Man 4.14 (Pod::Simple 3.42)
+.\"
+.\" Standard preamble:
+.\" ========================================================================
+.de Sp \" Vertical space (when we can't use .PP)
+.if t .sp .5v
+.if n .sp
+..
+.de Vb \" Begin verbatim text
+.ft CW
+.nf
+.ne \\$1
+..
+.de Ve \" End verbatim text
+.ft R
+.fi
+..
+.\" Set up some character translations and predefined strings.  \*(-- will
+.\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
+.\" double quote, and \*(R" will give a right double quote.  \*(C+ will
+.\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
+.\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
+.\" nothing in troff, for use with C<>.
+.tr \(*W-
+.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
+.ie n \{\
+.    ds -- \(*W-
+.    ds PI pi
+.    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
+.    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
+.    ds L" ""
+.    ds R" ""
+.    ds C` ""
+.    ds C' ""
+'br\}
+.el\{\
+.    ds -- \|\(em\|
+.    ds PI \(*p
+.    ds L" ``
+.    ds R" ''
+.    ds C`
+.    ds C'
+'br\}
+.\"
+.\" Escape single quotes in literal strings from groff's Unicode transform.
+.ie \n(.g .ds Aq \(aq
+.el       .ds Aq '
+.\"
+.\" If the F register is >0, we'll generate index entries on stderr for
+.\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
+.\" entries marked with X<> in POD.  Of course, you'll have to process the
+.\" output yourself in some meaningful fashion.
+.\"
+.\" Avoid warning from groff about undefined register 'F'.
+.de IX
+..
+.nr rF 0
+.if \n(.g .if rF .nr rF 1
+.if (\n(rF:(\n(.g==0)) \{\
+.    if \nF \{\
+.        de IX
+.        tm Index:\\$1\t\\n%\t"\\$2"
+..
+.        if !\nF==2 \{\
+.            nr % 0
+.            nr F 2
+.        \}
+.    \}
+.\}
+.rr rF
+.\"
+.\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2).
+.\" Fear.  Run.  Save yourself.  No user-serviceable parts.
+.    \" fudge factors for nroff and troff
+.if n \{\
+.    ds #H 0
+.    ds #V .8m
+.    ds #F .3m
+.    ds #[ \f1
+.    ds #] \fP
+.\}
+.if t \{\
+.    ds #H ((1u-(\\\\n(.fu%2u))*.13m)
+.    ds #V .6m
+.    ds #F 0
+.    ds #[ \&
+.    ds #] \&
+.\}
+.    \" simple accents for nroff and troff
+.if n \{\
+.    ds ' \&
+.    ds ` \&
+.    ds ^ \&
+.    ds , \&
+.    ds ~ ~
+.    ds /
+.\}
+.if t \{\
+.    ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
+.    ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
+.    ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
+.    ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
+.    ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
+.    ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
+.\}
+.    \" troff and (daisy-wheel) nroff accents
+.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
+.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
+.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
+.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
+.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
+.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
+.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
+.ds ae a\h'-(\w'a'u*4/10)'e
+.ds Ae A\h'-(\w'A'u*4/10)'E
+.    \" corrections for vroff
+.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
+.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
+.    \" for low resolution devices (crt and lpr)
+.if \n(.H>23 .if \n(.V>19 \
+\{\
+.    ds : e
+.    ds 8 ss
+.    ds o a
+.    ds d- d\h'-1'\(ga
+.    ds D- D\h'-1'\(hy
+.    ds th \o'bp'
+.    ds Th \o'LP'
+.    ds ae ae
+.    ds Ae AE
+.\}
+.rm #[ #] #H #V #F C
+.\" ========================================================================
+.\"
+.IX Title "TABLIZER 1"
+.TH TABLIZER 1 "2022-10-05" "1" "User Commands"
+.\" For nroff, turn off justification.  Always turn off hyphenation; it makes
+.\" way too many mistakes in technical documents.
+.if n .ad l
+.nh
+.SH "NAME"
+tablizer \- Manipulate tabular output of other programs
+.SH "SYNOPSIS"
+.IX Header "SYNOPSIS"
+.Vb 2
+\&    Usage:
+\&      tablizer [regex] [file, ...] [flags]
+\&    
+\&    Flags:
+\&      \-c, \-\-columns string     Only show the speficied columns (separated by ,)
+\&      \-d, \-\-debug              Enable debugging
+\&      \-h, \-\-help               help for tablizer
+\&      \-v, \-\-invert\-match       select non\-matching rows
+\&      \-m, \-\-man                Display manual page
+\&      \-n, \-\-no\-numbering       Disable header numbering
+\&      \-o, \-\-output string      Output mode \- one of: orgtbl, markdown, extended, ascii(default)
+\&      \-X, \-\-extended           Enable extended output
+\&      \-M, \-\-markdown           Enable markdown table output
+\&      \-O, \-\-orgtbl             Enable org\-mode table output
+\&      \-s, \-\-separator string   Custom field separator
+\&      \-v, \-\-version            Print program version
+.Ve
+.SH "DESCRIPTION"
+.IX Header "DESCRIPTION"
+Many  programs generate  tabular  output.  But  sometimes  you need  to
+post-process these tables, you may need  to remove one or more columns
+or you may want to filter for  some pattern or you may need the output
+in another program and need to  parse it somehow.  Standard unix tools
+such as  \fBawk\fR\|(1), \fBgrep\fR\|(1) or \fBcolumn\fR\|(1)  may help, but sometimes  it's a
+tedious business.
+.PP
+Let's take  the output of  the tool  kubectl.  It contains  cells with
+withespace and they do not separate columns by \s-1TAB\s0 characters. This is
+not easy to process.
+.PP
+You can use \fBtablizer\fR to do these and more things.
+.PP
+\&\fBtablizer\fR analyses the header fiels of a table, registers the column
+positions  of  each  header  field  and  separates  columns  by  those
+positions.
+.PP
+Without any options it reads its input from \f(CW\*(C`STDIN\*(C'\fR, but you can also
+specify a  file as a  parameter. If you want  to reduce the  output by
+some regular expression,  just specify it as its  first parameter. You
+may also  use the  \fB\-v\fR option  to exclude all  rows which  match the
+pattern. Hence:
+.PP
+.Vb 2
+\&   # read from STDIN
+\&   kubectl get pods | tablizer
+\&
+\&   # read a file
+\&   tablizer filename
+\&
+\&   # search for pattern in a file (works like grep)
+\&   tablizer regex filename
+\&
+\&   # search for pattern in STDIN
+\&   kubectl get pods | tablizer regex
+.Ve
+.PP
+The output  looks like the  original one  but every header  field will
+have a numer associated with it, e.g.:
+.PP
+.Vb 1
+\&   NAME(1) READY(2) STATUS(3) RESTARTS(4) AGE(5)
+.Ve
+.PP
+These numbers denote the column and  you can use them to specify which
+columns you want to have in your output:
+.PP
+.Vb 1
+\&   kubectl get pods | tablizer \-c1,3
+.Ve
+.PP
+You can specify the numbers in any order but output will always follow
+the original order.
+.PP
+The numbering can be suppressed by using the \fB\-n\fR option.
+.PP
+Finally the  \fB\-d\fR option  enables debugging  output which  is mostly
+usefull for the developer.
+.SS "\s-1OUTPUT MODES\s0"
+.IX Subsection "OUTPUT MODES"
+There might be cases  when the tabular output of a  program is way too
+large  for your  current  terminal but  you still  need  to see  every
+column.   In such  cases the  \fB\-o extended\fR  or \fB\-X\fR  option can  be
+usefull which enables \fIextended mode\fR. In this mode, each row will be
+printed vertically,  header left,  value right,  aligned by  the field
+widths. Here's an example:
+.PP
+.Vb 6
+\&    kubectl get pods | ./tablizer \-o extended
+\&        NAME: repldepl\-7bcd8d5b64\-7zq4l  
+\&       READY: 1/1    
+\&      STATUS: Running  
+\&    RESTARTS: 1 (71m ago)  
+\&         AGE: 5h28m
+.Ve
+.PP
+You can  of course  still use  a regex  to reduce  the number  of rows
+displayed.
+.PP
+The option \fB\-o shell\fR  can be used if the output  has to be processed
+by the shell,  it prints variable assignments for each  cell, one line
+per row:
+.PP
+.Vb 4
+\&    kubectl get pods | ./tablizer \-o extended ./tablizer \-o shell
+\&    NAME="repldepl\-7bcd8d5b64\-7zq4l" READY="1/1" STATUS="Running" RESTARTS="9 (47m ago)" AGE="4d23h" 
+\&    NAME="repldepl\-7bcd8d5b64\-m48n8" READY="1/1" STATUS="Running" RESTARTS="9 (47m ago)" AGE="4d23h" 
+\&    NAME="repldepl\-7bcd8d5b64\-q2bf4" READY="1/1" STATUS="Running" RESTARTS="9 (47m ago)" AGE="4d23h"
+.Ve
+.PP
+You can use this in an eval loop.
+.PP
+Beside normal  ascii mode  (the default) and  extended mode  there are
+more output modes available: \fBorgtbl\fR  which prints an Emacs org-mode
+table and \fBmarkdown\fR which prints a Markdown table.
+.SH "BUGS"
+.IX Header "BUGS"
+In order to report a bug, unexpected behavior, feature requests
+or to submit a patch, please open an issue on github:
+<https://github.com/TLINDEN/tablizer/issues>.
+.SH "LICENSE"
+.IX Header "LICENSE"
+This software is licensed under the \s-1GNU GENERAL PUBLIC LICENSE\s0 version 3.
+.PP
+Copyright (c) 2022 by Thomas von Dein
+.PP
+This software uses the following \s-1GO\s0 libraries:
+.IP "repr (https://github.com/alecthomas/repr)" 4
+.IX Item "repr (https://github.com/alecthomas/repr)"
+Released under the \s-1MIT\s0 License, Copyright (c) 2016 Alec Thomas
+.IP "cobra (https://github.com/spf13/cobra)" 4
+.IX Item "cobra (https://github.com/spf13/cobra)"
+Released under the Apache 2.0 license, Copyright 2013\-2022 The Cobra Authors
+.SH "AUTHORS"
+.IX Header "AUTHORS"
+Thomas von Dein \fBtom \s-1AT\s0 vondein \s-1DOT\s0 org\fR
--- a/tablizer.pod
+++ b/tablizer.pod
@@ -11,6 +11,8 @@ tablizer - Manipulate tabular output of other programs
      -c, --columns string     Only show the speficied columns (separated by ,)
      -d, --debug              Enable debugging
      -h, --help               help for tablizer
+      -v, --invert-match       select non-matching rows
+      -m, --man                Display manual page
      -n, --no-numbering       Disable header numbering
      -o, --output string      Output mode - one of: orgtbl, markdown, extended, ascii(default)
      -X, --extended           Enable extended output
@@ -22,12 +24,12 @@ tablizer - Manipulate tabular output of other programs

 =head1 DESCRIPTION

-Many  programs generate  tabular  output.  But  sometimes  you need  to
+Many  programs generate  tabular output.   But sometimes  you need  to
 post-process these tables, you may need  to remove one or more columns
-or you may want to filter for  some pattern or you may need the output
-in another program and need to  parse it somehow.  Standard unix tools
-such as  awk(1), grep(1) or column(1)  may help, but sometimes  it's a
-tedious business.
+or you  may want to filter  for some pattern (See  L<PATTERNS>) or you
+may need the  output in another program and need  to parse it somehow.
+Standard unix tools such as awk(1), grep(1) or column(1) may help, but
+sometimes it's a tedious business.

 Let's take  the output of  the tool  kubectl.  It contains  cells with
 withespace and they do not separate columns by TAB characters. This is
@@ -41,8 +43,9 @@ positions.

 Without any options it reads its input from C<STDIN>, but you can also
 specify a  file as a  parameter. If you want  to reduce the  output by
-some   regular   expression,   just    specify   it   as   its   first
-parameters. Hence:
+some regular expression,  just specify it as its  first parameter. You
+may also  use the  B<-v> option  to exclude all  rows which  match the
+pattern. Hence:

   # read from STDIN
   kubectl get pods | tablizer
@@ -74,6 +77,34 @@ The numbering can be suppressed by using the B<-n> option.
 Finally the  B<-d> option  enables debugging  output which  is mostly
 usefull for the developer.

+=head2 PATTERNS
+
+You can reduce the rows being  displayed by using a regular expression
+pattern.  The  regexp is  PCRE compatible, refer  to the  syntax cheat
+sheet here: L<https://github.com/google/re2/wiki/Syntax>.  If you want
+to read  a more comprehensive  documentation about the topic  and have
+perl installed you can read it with:
+
+    perldoc perlre
+
+Or read it online: L<https://perldoc.perl.org/perlre>.
+
+A note on  modifiers: the regexp engine used in  tablizer uses another
+modifier syntax:
+
+    (?MODIFIER)
+
+The most important modifiers are:
+
+C<i> ignore case
+C<m> multiline mode
+C<s> single line mode
+
+Example for a case insensitve search:
+
+    kubectl get pods -A | tablizer "(?i)account"
+
+
 =head2 OUTPUT MODES

 There might be cases  when the tabular output of a  program is way too
Author	SHA1	Message	Date
Thomas von Dein	196833ed3c	Merge branch 'development'	2022-10-05 19:17:34 +02:00
Thomas von Dein	85277bbf5e	more refactoring, fixed bug in shell mode output, fixed default Separator and fixed #3	2022-10-05 16:43:51 +02:00
Thomas von Dein	26e50cf908	fix #1 : use scanner.Split() instead of splitting by header position boundaries, since this splitting cuts utf-8 chars which causes distorted output.	2022-10-05 12:55:33 +02:00
Thomas von Dein	5be18e27c9	Merge branch 'development'	2022-10-05 09:20:02 +02:00
Thomas von Dein	2c410e1cb3	added -v flag, replace 'help' subcommand wich -m, more tests	2022-10-05 09:12:46 +02:00
Thomas von Dein	1b622284a1	Merge branch 'development'	2022-10-04 16:14:04 +02:00
Thomas von Dein	404481c3dc	fixed badge uri and workflow name	2022-10-04 16:12:12 +02:00
Thomas von Dein	15f437314a	Merge branch 'main' of github.com:TLINDEN/tablizer	2022-10-04 16:08:34 +02:00
Thomas von Dein	3746c7f326	Merge branch 'development'	2022-10-04 16:06:59 +02:00
Thomas von Dein	b7b638636d	removed circleci, added badge	2022-10-04 16:05:02 +02:00
Thomas von Dein	dd13300c8b	add manpage to source (regen @ make)	2022-10-04 15:25:22 +02:00
Thomas von Dein	a59a6cb7d8	fixed actions	2022-10-04 15:15:14 +02:00
Thomas von Dein	d7ea0017b7	moved again	2022-10-04 15:14:21 +02:00
Thomas von Dein	09dc1f3e60	mv	2022-10-04 15:12:55 +02:00
Thomas von Dein	43dc4ff031	added dev version, changed go namespace, added inline manpage	2022-10-04 15:09:13 +02:00
Thomas von Dein	4596d9d589	added gh workflow	2022-10-04 15:08:49 +02:00