Compare commits

...

5 Commits

Author SHA1 Message Date
196833ed3c Merge branch 'development' 2022-10-05 19:17:34 +02:00
85277bbf5e more refactoring, fixed bug in shell mode output, fixed default
Separator and fixed #3
2022-10-05 16:43:51 +02:00
26e50cf908 fix #1: use scanner.Split() instead of splitting by header position
boundaries, since this splitting cuts utf-8 chars which causes
distorted output.
2022-10-05 12:55:33 +02:00
5be18e27c9 Merge branch 'development' 2022-10-05 09:20:02 +02:00
2c410e1cb3 added -v flag, replace 'help' subcommand wich -m, more tests 2022-10-05 09:12:46 +02:00
11 changed files with 296 additions and 188 deletions

View File

@@ -18,33 +18,31 @@ package cmd
import (
"bytes"
"github.com/tlinden/tablizer/lib"
"fmt"
"github.com/spf13/cobra"
"github.com/tlinden/tablizer/lib"
"log"
"os"
"os/exec"
)
var helpCmd = &cobra.Command{
Use: "help",
Short: "Show documentation",
Run: func(cmd *cobra.Command, args []string) {
man := exec.Command("less", "-")
var ShowManual = false
var b bytes.Buffer
b.Write([]byte(manpage))
func man() {
man := exec.Command("less", "-")
man.Stdout = os.Stdout
man.Stdin = &b
man.Stderr = os.Stderr
var b bytes.Buffer
b.Write([]byte(manpage))
err := man.Run()
man.Stdout = os.Stdout
man.Stdin = &b
man.Stderr = os.Stderr
if err != nil {
log.Fatal(err)
}
},
err := man.Run()
if err != nil {
log.Fatal(err)
}
}
var rootCmd = &cobra.Command{
@@ -57,6 +55,11 @@ var rootCmd = &cobra.Command{
return nil
}
if ShowManual {
man()
return nil
}
err := lib.PrepareColumns()
if err != nil {
return err
@@ -81,8 +84,10 @@ func Execute() {
func init() {
rootCmd.PersistentFlags().BoolVarP(&lib.Debug, "debug", "d", false, "Enable debugging")
rootCmd.PersistentFlags().BoolVarP(&lib.NoNumbering, "no-numbering", "n", false, "Disable header numbering")
rootCmd.PersistentFlags().BoolVarP(&lib.ShowVersion, "version", "v", false, "Print program version")
rootCmd.PersistentFlags().StringVarP(&lib.Separator, "separator", "s", "", "Custom field separator")
rootCmd.PersistentFlags().BoolVarP(&lib.ShowVersion, "version", "V", false, "Print program version")
rootCmd.PersistentFlags().BoolVarP(&lib.InvertMatch, "invert-match", "v", false, "select non-matching rows")
rootCmd.PersistentFlags().BoolVarP(&ShowManual, "man", "m", false, "Display manual page")
rootCmd.PersistentFlags().StringVarP(&lib.Separator, "separator", "s", lib.DefaultSeparator, "Custom field separator")
rootCmd.PersistentFlags().StringVarP(&lib.Columns, "columns", "c", "", "Only show the speficied columns (separated by ,)")
// output flags, only 1 allowed, hidden, since just short cuts
@@ -98,11 +103,4 @@ func init() {
// same thing but more common, takes precedence over above group
rootCmd.PersistentFlags().StringVarP(&lib.OutputMode, "output", "o", "", "Output mode - one of: orgtbl, markdown, extended, shell, ascii(default)")
rootCmd.AddCommand(helpCmd)
rootCmd.SetHelpCommand(&cobra.Command{
Use: "no-help",
Hidden: true,
})
}

View File

@@ -11,6 +11,8 @@ SYNOPSIS
-c, --columns string Only show the speficied columns (separated by ,)
-d, --debug Enable debugging
-h, --help help for tablizer
-v, --invert-match select non-matching rows
-m, --man Display manual page
-n, --no-numbering Disable header numbering
-o, --output string Output mode - one of: orgtbl, markdown, extended, ascii(default)
-X, --extended Enable extended output
@@ -38,7 +40,8 @@ DESCRIPTION
Without any options it reads its input from "STDIN", but you can also
specify a file as a parameter. If you want to reduce the output by some
regular expression, just specify it as its first parameters. Hence:
regular expression, just specify it as its first parameter. You may also
use the -v option to exclude all rows which match the pattern. Hence:
# read from STDIN
kubectl get pods | tablizer

View File

@@ -19,24 +19,26 @@ package lib
var (
// command line flags
Debug bool
XtendedOut bool
NoNumbering bool
ShowVersion bool
Columns string
UseColumns []int
Separator string
OutflagExtended bool
OutflagMarkdown bool
OutflagOrgtable bool
OutflagShell bool
OutputMode string
Debug bool
XtendedOut bool
NoNumbering bool
ShowVersion bool
Columns string
UseColumns []int
DefaultSeparator string = `(\s\s+|\t)`
Separator string = `(\s\s+|\t)`
OutflagExtended bool
OutflagMarkdown bool
OutflagOrgtable bool
OutflagShell bool
OutputMode string
InvertMatch bool
// used for validation
validOutputmodes = "(orgtbl|markdown|extended|ascii)"
// main program version
Version = "v1.0.4"
Version = "v1.0.6"
// generated version string, used by -v contains lib.Version on
// main branch, and lib.Version-$branch-$lastcommit-$date on

View File

@@ -48,8 +48,49 @@ func PrepareColumns() error {
return nil
}
func numberizeHeaders(data *Tabdata) {
// prepare headers: add numbers to headers
numberedHeaders := []string{}
for i, head := range data.headers {
if len(Columns) > 0 {
// -c specified
if !contains(UseColumns, i+1) {
// ignore this one
continue
}
}
if NoNumbering {
numberedHeaders = append(numberedHeaders, head)
} else {
numberedHeaders = append(numberedHeaders, fmt.Sprintf("%s(%d)", head, i+1))
}
}
data.headers = numberedHeaders
}
func reduceColumns(data *Tabdata) {
// exclude columns, if any
if len(Columns) > 0 {
reducedEntries := [][]string{}
reducedEntry := []string{}
for _, entry := range data.entries {
reducedEntry = nil
for i, value := range entry {
if !contains(UseColumns, i+1) {
continue
}
reducedEntry = append(reducedEntry, value)
}
reducedEntries = append(reducedEntries, reducedEntry)
}
data.entries = reducedEntries
}
}
func PrepareModeFlags() error {
if len(OutputMode) == 0 {
// associate short flags like -X with mode selector
switch {
case OutflagExtended:
OutputMode = "extended"

View File

@@ -52,6 +52,7 @@ func TestPrepareColumns(t *testing.T) {
}{
{"1,2,3", []int{1, 2, 3}, false},
{"1,2,", []int{}, true},
{"a,b", []int{}, true},
}
for _, tt := range tests {
@@ -71,3 +72,46 @@ func TestPrepareColumns(t *testing.T) {
})
}
}
func TestReduceColumns(t *testing.T) {
var tests = []struct {
expect [][]string
columns []int
}{
{
expect: [][]string{[]string{"a", "b"}},
columns: []int{1, 2},
},
{
expect: [][]string{[]string{"a", "c"}},
columns: []int{1, 3},
},
{
expect: [][]string{[]string{"a"}},
columns: []int{1},
},
{
expect: [][]string{nil},
columns: []int{4},
},
}
input := [][]string{[]string{"a", "b", "c"}}
Columns = "y" // used as a flag with len(Columns)...
for _, tt := range tests {
testname := fmt.Sprintf("reduce-columns-by-%+v", tt.columns)
t.Run(testname, func(t *testing.T) {
UseColumns = tt.columns
data := Tabdata{entries: input}
reduceColumns(&data)
if !reflect.DeepEqual(data.entries, tt.expect) {
t.Errorf("reduceColumns returned invalid data:\ngot: %+v\nexp: %+v", data.entries, tt.expect)
}
})
}
Columns = "" // reset for other tests
UseColumns = nil
}

View File

@@ -29,49 +29,36 @@ import (
// contains a whole parsed table
type Tabdata struct {
maxwidthHeader int // longest header
maxwidthPerCol []int // max width per column
columns int
headerIndices []map[string]int // [ {beg=>0, end=>17}, ... ]
headers []string // [ "ID", "NAME", ...]
maxwidthHeader int // longest header
maxwidthPerCol []int // max width per column
columns int // count
headers []string // [ "ID", "NAME", ...]
entries [][]string
}
/*
Parse tabular input. We split the header (first line) by 2 or more
spaces, remember the positions of the header fields. We then split
the data (everything after the first line) by those positions. That
way we can turn "tabular data" (with fields containing whitespaces)
into real tabular data. We re-tabulate our input if you will.
Parse tabular input.
*/
func parseFile(input io.Reader, pattern string) (Tabdata, error) {
data := Tabdata{}
var scanner *bufio.Scanner
var spaces = `\s\s+|$`
if len(Separator) > 0 {
spaces = Separator
}
hadFirst := false
spacefinder := regexp.MustCompile(spaces)
beg := 0
separate := regexp.MustCompile(Separator)
patternR, err := regexp.Compile(pattern)
if err != nil {
return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, err))
}
scanner = bufio.NewScanner(input)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
values := []string{}
patternR, err := regexp.Compile(pattern)
if err != nil {
return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, err))
}
parts := separate.Split(line, -1)
if !hadFirst {
// header processing
parts := spacefinder.FindAllStringIndex(line, -1)
data.columns = len(parts)
// if Debug {
// fmt.Println(parts)
@@ -83,30 +70,14 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
// fmt.Printf("Part: <%s>\n", string(line[beg:part[0]]))
//}
// current field
head := string(line[beg:part[0]])
// register begin and end of field within line
indices := make(map[string]int)
indices["beg"] = beg
if part[0] == part[1] {
indices["end"] = 0
} else {
indices["end"] = part[1] - 1
}
// register widest header field
headerlen := len(head)
headerlen := len(part)
if headerlen > data.maxwidthHeader {
data.maxwidthHeader = headerlen
}
// register fields data
data.headerIndices = append(data.headerIndices, indices)
data.headers = append(data.headers, head)
// end of current field == begin of next one
beg = part[1]
data.headers = append(data.headers, strings.TrimSpace(part))
// done
hadFirst = true
@@ -114,22 +85,19 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
} else {
// data processing
if len(pattern) > 0 {
if !patternR.MatchString(line) {
if patternR.MatchString(line) == InvertMatch {
// by default -v is false, so if a line does NOT
// match the pattern, we will ignore it. However,
// if the user specified -v, the matching is inverted,
// so we ignore all lines, which DO match.
continue
}
}
idx := 0 // we cannot use the header index, because we could exclude columns
for _, index := range data.headerIndices {
value := ""
if index["end"] == 0 {
value = string(line[index["beg"]:])
} else {
value = string(line[index["beg"]:index["end"]])
}
width := len(strings.TrimSpace(value))
values := []string{}
for _, part := range parts {
width := len(strings.TrimSpace(part))
if len(data.maxwidthPerCol)-1 < idx {
data.maxwidthPerCol = append(data.maxwidthPerCol, width)
@@ -142,7 +110,7 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
// if Debug {
// fmt.Printf("<%s> ", value)
// }
values = append(values, strings.TrimSpace(value))
values = append(values, strings.TrimSpace(part))
idx++
}
@@ -151,7 +119,7 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
}
if scanner.Err() != nil {
return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, scanner.Err()))
return data, errors.Unwrap(fmt.Errorf("Failed to read from io.Reader: %w", scanner.Err()))
}
if Debug {

View File

@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package lib
import (
"fmt"
"reflect"
"strings"
"testing"
@@ -27,40 +28,18 @@ func TestParser(t *testing.T) {
data := Tabdata{
maxwidthHeader: 5,
maxwidthPerCol: []int{
5,
5,
8,
5, 5, 8,
},
columns: 3,
headerIndices: []map[string]int{
map[string]int{
"beg": 0,
"end": 6,
},
map[string]int{
"end": 13,
"beg": 7,
},
map[string]int{
"beg": 14,
"end": 0,
},
},
headers: []string{
"ONE",
"TWO",
"THREE",
"ONE", "TWO", "THREE",
},
entries: [][]string{
[]string{
"asd",
"igig",
"cxxxncnc",
"asd", "igig", "cxxxncnc",
},
[]string{
"19191",
"EDD 1",
"X",
"19191", "EDD 1", "X",
},
},
}
@@ -71,12 +50,63 @@ asd igig cxxxncnc
readFd := strings.NewReader(table)
gotdata, err := parseFile(readFd, "")
Separator = DefaultSeparator
if err != nil {
t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, gotdata)
}
if !reflect.DeepEqual(data, gotdata) {
t.Errorf("Parser returned invalid data\nExp: %+v\nGot: %+v\n", data, gotdata)
t.Errorf("Parser returned invalid data, Regex: %s\nExp: %+v\nGot: %+v\n", Separator, data, gotdata)
}
}
func TestParserPatternmatching(t *testing.T) {
var tests = []struct {
entries [][]string
pattern string
invert bool
}{
{
entries: [][]string{
[]string{
"asd", "igig", "cxxxncnc",
},
},
pattern: "ig",
invert: false,
},
{
entries: [][]string{
[]string{
"19191", "EDD 1", "X",
},
},
pattern: "ig",
invert: true,
},
}
table := `ONE TWO THREE
asd igig cxxxncnc
19191 EDD 1 X`
for _, tt := range tests {
testname := fmt.Sprintf("parse-with-inverted-pattern-%t", tt.invert)
t.Run(testname, func(t *testing.T) {
InvertMatch = tt.invert
readFd := strings.NewReader(table)
gotdata, err := parseFile(readFd, tt.pattern)
if err != nil {
t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, gotdata)
}
if !reflect.DeepEqual(tt.entries, gotdata.entries) {
t.Errorf("Parser returned invalid data (pattern: %s, invert: %t)\nExp: %+v\nGot: %+v\n",
tt.pattern, tt.invert, tt.entries, gotdata.entries)
}
})
}
}

View File

@@ -26,41 +26,10 @@ import (
)
func printData(data *Tabdata) {
// prepare headers: add numbers to headers
numberedHeaders := []string{}
for i, head := range data.headers {
if len(Columns) > 0 {
// -c specified
if !contains(UseColumns, i+1) {
// ignore this one
continue
}
}
if NoNumbering {
numberedHeaders = append(numberedHeaders, head)
} else {
numberedHeaders = append(numberedHeaders, fmt.Sprintf("%s(%d)", head, i+1))
}
}
data.headers = numberedHeaders
// prepare data
if len(Columns) > 0 {
reducedEntries := [][]string{}
reducedEntry := []string{}
for _, entry := range data.entries {
reducedEntry = nil
for i, value := range entry {
if !contains(UseColumns, i+1) {
continue
}
reducedEntry = append(reducedEntry, value)
}
reducedEntries = append(reducedEntries, reducedEntry)
}
data.entries = reducedEntries
if OutputMode != "shell" {
numberizeHeaders(data)
}
reduceColumns(data)
switch OutputMode {
case "extended":
@@ -190,6 +159,7 @@ func printShellData(data *Tabdata) {
var idx int
for _, entry := range data.entries {
idx = 0
shentries := []string{}
for i, value := range entry {
if len(Columns) > 0 {
if !contains(UseColumns, i+1) {
@@ -197,10 +167,10 @@ func printShellData(data *Tabdata) {
}
}
fmt.Printf("%s=\"%s\" ", data.headers[idx], value)
shentries = append(shentries, fmt.Sprintf("%s=\"%s\"", data.headers[idx], value))
idx++
}
fmt.Println()
fmt.Println(strings.Join(shentries, " "))
}
}
}

View File

@@ -18,15 +18,33 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package lib
import (
"fmt"
"os"
"strings"
"testing"
)
func TestPrinter(t *testing.T) {
table := `ONE TWO THREE
asd igig cxxxncnc
19191 EDD 1 X`
startdata := Tabdata{
maxwidthHeader: 5,
maxwidthPerCol: []int{
5,
5,
8,
},
columns: 3,
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
[]string{
"asd", "igig", "cxxxncnc",
},
[]string{
"19191", "EDD 1", "X",
},
},
}
expects := map[string]string{
"ascii": `ONE(1) TWO(2) THREE(3)
@@ -42,6 +60,8 @@ asd igig cxxxncnc
|--------|--------|----------|
| asd | igig | cxxxncnc |
| 19191 | EDD 1 | X |`,
"shell": `ONE="asd" TWO="igig" THREE="cxxxncnc"
ONE="19191" TWO="EDD 1" THREE="X"`,
}
r, w, err := os.Pipe()
@@ -52,27 +72,25 @@ asd igig cxxxncnc
os.Stdout = w
for mode, expect := range expects {
OutputMode = mode
fd := strings.NewReader(table)
data, err := parseFile(fd, "")
testname := fmt.Sprintf("print-%s", mode)
t.Run(testname, func(t *testing.T) {
if err != nil {
t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, data)
}
OutputMode = mode
data := startdata // we need to reset our mock data, since it's being modified in printData()
printData(&data)
printData(&data)
buf := make([]byte, 1024)
n, err := r.Read(buf)
if err != nil {
t.Fatal(err)
}
buf = buf[:n]
output := strings.TrimSpace(string(buf))
buf := make([]byte, 1024)
n, err := r.Read(buf)
if err != nil {
t.Fatal(err)
}
buf = buf[:n]
output := strings.TrimSpace(string(buf))
if output != expect {
t.Errorf("output mode: %s, got:\n%s\nwant:\n%s\n (%d <=> %d)", mode, output, expect, len(output), len(expect))
}
if output != expect {
t.Errorf("output mode: %s, got:\n%s\nwant:\n%s\n (%d <=> %d)", mode, output, expect, len(output), len(expect))
}
})
}
// Restore

View File

@@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "TABLIZER 1"
.TH TABLIZER 1 "2022-10-04" "1" "User Commands"
.TH TABLIZER 1 "2022-10-05" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
@@ -150,6 +150,8 @@ tablizer \- Manipulate tabular output of other programs
\& \-c, \-\-columns string Only show the speficied columns (separated by ,)
\& \-d, \-\-debug Enable debugging
\& \-h, \-\-help help for tablizer
\& \-v, \-\-invert\-match select non\-matching rows
\& \-m, \-\-man Display manual page
\& \-n, \-\-no\-numbering Disable header numbering
\& \-o, \-\-output string Output mode \- one of: orgtbl, markdown, extended, ascii(default)
\& \-X, \-\-extended Enable extended output
@@ -179,8 +181,9 @@ positions.
.PP
Without any options it reads its input from \f(CW\*(C`STDIN\*(C'\fR, but you can also
specify a file as a parameter. If you want to reduce the output by
some regular expression, just specify it as its first
parameters. Hence:
some regular expression, just specify it as its first parameter. You
may also use the \fB\-v\fR option to exclude all rows which match the
pattern. Hence:
.PP
.Vb 2
\& # read from STDIN

View File

@@ -11,6 +11,8 @@ tablizer - Manipulate tabular output of other programs
-c, --columns string Only show the speficied columns (separated by ,)
-d, --debug Enable debugging
-h, --help help for tablizer
-v, --invert-match select non-matching rows
-m, --man Display manual page
-n, --no-numbering Disable header numbering
-o, --output string Output mode - one of: orgtbl, markdown, extended, ascii(default)
-X, --extended Enable extended output
@@ -22,12 +24,12 @@ tablizer - Manipulate tabular output of other programs
=head1 DESCRIPTION
Many programs generate tabular output. But sometimes you need to
Many programs generate tabular output. But sometimes you need to
post-process these tables, you may need to remove one or more columns
or you may want to filter for some pattern or you may need the output
in another program and need to parse it somehow. Standard unix tools
such as awk(1), grep(1) or column(1) may help, but sometimes it's a
tedious business.
or you may want to filter for some pattern (See L<PATTERNS>) or you
may need the output in another program and need to parse it somehow.
Standard unix tools such as awk(1), grep(1) or column(1) may help, but
sometimes it's a tedious business.
Let's take the output of the tool kubectl. It contains cells with
withespace and they do not separate columns by TAB characters. This is
@@ -41,8 +43,9 @@ positions.
Without any options it reads its input from C<STDIN>, but you can also
specify a file as a parameter. If you want to reduce the output by
some regular expression, just specify it as its first
parameters. Hence:
some regular expression, just specify it as its first parameter. You
may also use the B<-v> option to exclude all rows which match the
pattern. Hence:
# read from STDIN
kubectl get pods | tablizer
@@ -74,6 +77,34 @@ The numbering can be suppressed by using the B<-n> option.
Finally the B<-d> option enables debugging output which is mostly
usefull for the developer.
=head2 PATTERNS
You can reduce the rows being displayed by using a regular expression
pattern. The regexp is PCRE compatible, refer to the syntax cheat
sheet here: L<https://github.com/google/re2/wiki/Syntax>. If you want
to read a more comprehensive documentation about the topic and have
perl installed you can read it with:
perldoc perlre
Or read it online: L<https://perldoc.perl.org/perlre>.
A note on modifiers: the regexp engine used in tablizer uses another
modifier syntax:
(?MODIFIER)
The most important modifiers are:
C<i> ignore case
C<m> multiline mode
C<s> single line mode
Example for a case insensitve search:
kubectl get pods -A | tablizer "(?i)account"
=head2 OUTPUT MODES
There might be cases when the tabular output of a program is way too