Merge branch 'development'

This commit is contained in:
2022-10-05 19:17:34 +02:00
9 changed files with 204 additions and 163 deletions

View File

@@ -87,7 +87,7 @@ func init() {
rootCmd.PersistentFlags().BoolVarP(&lib.ShowVersion, "version", "V", false, "Print program version")
rootCmd.PersistentFlags().BoolVarP(&lib.InvertMatch, "invert-match", "v", false, "select non-matching rows")
rootCmd.PersistentFlags().BoolVarP(&ShowManual, "man", "m", false, "Display manual page")
rootCmd.PersistentFlags().StringVarP(&lib.Separator, "separator", "s", "", "Custom field separator")
rootCmd.PersistentFlags().StringVarP(&lib.Separator, "separator", "s", lib.DefaultSeparator, "Custom field separator")
rootCmd.PersistentFlags().StringVarP(&lib.Columns, "columns", "c", "", "Only show the speficied columns (separated by ,)")
// output flags, only 1 allowed, hidden, since just short cuts

View File

@@ -19,25 +19,26 @@ package lib
var (
// command line flags
Debug bool
XtendedOut bool
NoNumbering bool
ShowVersion bool
Columns string
UseColumns []int
Separator string
OutflagExtended bool
OutflagMarkdown bool
OutflagOrgtable bool
OutflagShell bool
OutputMode string
InvertMatch bool
Debug bool
XtendedOut bool
NoNumbering bool
ShowVersion bool
Columns string
UseColumns []int
DefaultSeparator string = `(\s\s+|\t)`
Separator string = `(\s\s+|\t)`
OutflagExtended bool
OutflagMarkdown bool
OutflagOrgtable bool
OutflagShell bool
OutputMode string
InvertMatch bool
// used for validation
validOutputmodes = "(orgtbl|markdown|extended|ascii)"
// main program version
Version = "v1.0.5"
Version = "v1.0.6"
// generated version string, used by -v contains lib.Version on
// main branch, and lib.Version-$branch-$lastcommit-$date on

View File

@@ -48,8 +48,49 @@ func PrepareColumns() error {
return nil
}
func numberizeHeaders(data *Tabdata) {
// prepare headers: add numbers to headers
numberedHeaders := []string{}
for i, head := range data.headers {
if len(Columns) > 0 {
// -c specified
if !contains(UseColumns, i+1) {
// ignore this one
continue
}
}
if NoNumbering {
numberedHeaders = append(numberedHeaders, head)
} else {
numberedHeaders = append(numberedHeaders, fmt.Sprintf("%s(%d)", head, i+1))
}
}
data.headers = numberedHeaders
}
func reduceColumns(data *Tabdata) {
// exclude columns, if any
if len(Columns) > 0 {
reducedEntries := [][]string{}
reducedEntry := []string{}
for _, entry := range data.entries {
reducedEntry = nil
for i, value := range entry {
if !contains(UseColumns, i+1) {
continue
}
reducedEntry = append(reducedEntry, value)
}
reducedEntries = append(reducedEntries, reducedEntry)
}
data.entries = reducedEntries
}
}
func PrepareModeFlags() error {
if len(OutputMode) == 0 {
// associate short flags like -X with mode selector
switch {
case OutflagExtended:
OutputMode = "extended"

View File

@@ -52,6 +52,7 @@ func TestPrepareColumns(t *testing.T) {
}{
{"1,2,3", []int{1, 2, 3}, false},
{"1,2,", []int{}, true},
{"a,b", []int{}, true},
}
for _, tt := range tests {
@@ -71,3 +72,46 @@ func TestPrepareColumns(t *testing.T) {
})
}
}
func TestReduceColumns(t *testing.T) {
var tests = []struct {
expect [][]string
columns []int
}{
{
expect: [][]string{[]string{"a", "b"}},
columns: []int{1, 2},
},
{
expect: [][]string{[]string{"a", "c"}},
columns: []int{1, 3},
},
{
expect: [][]string{[]string{"a"}},
columns: []int{1},
},
{
expect: [][]string{nil},
columns: []int{4},
},
}
input := [][]string{[]string{"a", "b", "c"}}
Columns = "y" // used as a flag with len(Columns)...
for _, tt := range tests {
testname := fmt.Sprintf("reduce-columns-by-%+v", tt.columns)
t.Run(testname, func(t *testing.T) {
UseColumns = tt.columns
data := Tabdata{entries: input}
reduceColumns(&data)
if !reflect.DeepEqual(data.entries, tt.expect) {
t.Errorf("reduceColumns returned invalid data:\ngot: %+v\nexp: %+v", data.entries, tt.expect)
}
})
}
Columns = "" // reset for other tests
UseColumns = nil
}

View File

@@ -29,49 +29,36 @@ import (
// contains a whole parsed table
type Tabdata struct {
maxwidthHeader int // longest header
maxwidthPerCol []int // max width per column
columns int
headerIndices []map[string]int // [ {beg=>0, end=>17}, ... ]
headers []string // [ "ID", "NAME", ...]
maxwidthHeader int // longest header
maxwidthPerCol []int // max width per column
columns int // count
headers []string // [ "ID", "NAME", ...]
entries [][]string
}
/*
Parse tabular input. We split the header (first line) by 2 or more
spaces, remember the positions of the header fields. We then split
the data (everything after the first line) by those positions. That
way we can turn "tabular data" (with fields containing whitespaces)
into real tabular data. We re-tabulate our input if you will.
Parse tabular input.
*/
func parseFile(input io.Reader, pattern string) (Tabdata, error) {
data := Tabdata{}
var scanner *bufio.Scanner
var spaces = `\s\s+|$`
if len(Separator) > 0 {
spaces = Separator
}
hadFirst := false
spacefinder := regexp.MustCompile(spaces)
beg := 0
separate := regexp.MustCompile(Separator)
patternR, err := regexp.Compile(pattern)
if err != nil {
return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, err))
}
scanner = bufio.NewScanner(input)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
values := []string{}
patternR, err := regexp.Compile(pattern)
if err != nil {
return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, err))
}
parts := separate.Split(line, -1)
if !hadFirst {
// header processing
parts := spacefinder.FindAllStringIndex(line, -1)
data.columns = len(parts)
// if Debug {
// fmt.Println(parts)
@@ -83,30 +70,14 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
// fmt.Printf("Part: <%s>\n", string(line[beg:part[0]]))
//}
// current field
head := string(line[beg:part[0]])
// register begin and end of field within line
indices := make(map[string]int)
indices["beg"] = beg
if part[0] == part[1] {
indices["end"] = 0
} else {
indices["end"] = part[1] - 1
}
// register widest header field
headerlen := len(head)
headerlen := len(part)
if headerlen > data.maxwidthHeader {
data.maxwidthHeader = headerlen
}
// register fields data
data.headerIndices = append(data.headerIndices, indices)
data.headers = append(data.headers, head)
// end of current field == begin of next one
beg = part[1]
data.headers = append(data.headers, strings.TrimSpace(part))
// done
hadFirst = true
@@ -124,16 +95,9 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
}
idx := 0 // we cannot use the header index, because we could exclude columns
for _, index := range data.headerIndices {
value := ""
if index["end"] == 0 {
value = string(line[index["beg"]:])
} else {
value = string(line[index["beg"]:index["end"]])
}
width := len(strings.TrimSpace(value))
values := []string{}
for _, part := range parts {
width := len(strings.TrimSpace(part))
if len(data.maxwidthPerCol)-1 < idx {
data.maxwidthPerCol = append(data.maxwidthPerCol, width)
@@ -146,7 +110,7 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
// if Debug {
// fmt.Printf("<%s> ", value)
// }
values = append(values, strings.TrimSpace(value))
values = append(values, strings.TrimSpace(part))
idx++
}

View File

@@ -28,40 +28,18 @@ func TestParser(t *testing.T) {
data := Tabdata{
maxwidthHeader: 5,
maxwidthPerCol: []int{
5,
5,
8,
5, 5, 8,
},
columns: 3,
headerIndices: []map[string]int{
map[string]int{
"beg": 0,
"end": 6,
},
map[string]int{
"end": 13,
"beg": 7,
},
map[string]int{
"beg": 14,
"end": 0,
},
},
headers: []string{
"ONE",
"TWO",
"THREE",
"ONE", "TWO", "THREE",
},
entries: [][]string{
[]string{
"asd",
"igig",
"cxxxncnc",
"asd", "igig", "cxxxncnc",
},
[]string{
"19191",
"EDD 1",
"X",
"19191", "EDD 1", "X",
},
},
}
@@ -72,13 +50,14 @@ asd igig cxxxncnc
readFd := strings.NewReader(table)
gotdata, err := parseFile(readFd, "")
Separator = DefaultSeparator
if err != nil {
t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, gotdata)
}
if !reflect.DeepEqual(data, gotdata) {
t.Errorf("Parser returned invalid data\nExp: %+v\nGot: %+v\n", data, gotdata)
t.Errorf("Parser returned invalid data, Regex: %s\nExp: %+v\nGot: %+v\n", Separator, data, gotdata)
}
}
@@ -91,9 +70,7 @@ func TestParserPatternmatching(t *testing.T) {
{
entries: [][]string{
[]string{
"asd",
"igig",
"cxxxncnc",
"asd", "igig", "cxxxncnc",
},
},
pattern: "ig",
@@ -102,9 +79,7 @@ func TestParserPatternmatching(t *testing.T) {
{
entries: [][]string{
[]string{
"19191",
"EDD 1",
"X",
"19191", "EDD 1", "X",
},
},
pattern: "ig",

View File

@@ -26,41 +26,10 @@ import (
)
func printData(data *Tabdata) {
// prepare headers: add numbers to headers
numberedHeaders := []string{}
for i, head := range data.headers {
if len(Columns) > 0 {
// -c specified
if !contains(UseColumns, i+1) {
// ignore this one
continue
}
}
if NoNumbering {
numberedHeaders = append(numberedHeaders, head)
} else {
numberedHeaders = append(numberedHeaders, fmt.Sprintf("%s(%d)", head, i+1))
}
}
data.headers = numberedHeaders
// prepare data
if len(Columns) > 0 {
reducedEntries := [][]string{}
reducedEntry := []string{}
for _, entry := range data.entries {
reducedEntry = nil
for i, value := range entry {
if !contains(UseColumns, i+1) {
continue
}
reducedEntry = append(reducedEntry, value)
}
reducedEntries = append(reducedEntries, reducedEntry)
}
data.entries = reducedEntries
if OutputMode != "shell" {
numberizeHeaders(data)
}
reduceColumns(data)
switch OutputMode {
case "extended":
@@ -190,6 +159,7 @@ func printShellData(data *Tabdata) {
var idx int
for _, entry := range data.entries {
idx = 0
shentries := []string{}
for i, value := range entry {
if len(Columns) > 0 {
if !contains(UseColumns, i+1) {
@@ -197,10 +167,10 @@ func printShellData(data *Tabdata) {
}
}
fmt.Printf("%s=\"%s\" ", data.headers[idx], value)
shentries = append(shentries, fmt.Sprintf("%s=\"%s\"", data.headers[idx], value))
idx++
}
fmt.Println()
fmt.Println(strings.Join(shentries, " "))
}
}
}

View File

@@ -18,15 +18,33 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package lib
import (
"fmt"
"os"
"strings"
"testing"
)
func TestPrinter(t *testing.T) {
table := `ONE TWO THREE
asd igig cxxxncnc
19191 EDD 1 X`
startdata := Tabdata{
maxwidthHeader: 5,
maxwidthPerCol: []int{
5,
5,
8,
},
columns: 3,
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
[]string{
"asd", "igig", "cxxxncnc",
},
[]string{
"19191", "EDD 1", "X",
},
},
}
expects := map[string]string{
"ascii": `ONE(1) TWO(2) THREE(3)
@@ -42,6 +60,8 @@ asd igig cxxxncnc
|--------|--------|----------|
| asd | igig | cxxxncnc |
| 19191 | EDD 1 | X |`,
"shell": `ONE="asd" TWO="igig" THREE="cxxxncnc"
ONE="19191" TWO="EDD 1" THREE="X"`,
}
r, w, err := os.Pipe()
@@ -52,27 +72,25 @@ asd igig cxxxncnc
os.Stdout = w
for mode, expect := range expects {
OutputMode = mode
fd := strings.NewReader(table)
data, err := parseFile(fd, "")
testname := fmt.Sprintf("print-%s", mode)
t.Run(testname, func(t *testing.T) {
if err != nil {
t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, data)
}
OutputMode = mode
data := startdata // we need to reset our mock data, since it's being modified in printData()
printData(&data)
printData(&data)
buf := make([]byte, 1024)
n, err := r.Read(buf)
if err != nil {
t.Fatal(err)
}
buf = buf[:n]
output := strings.TrimSpace(string(buf))
buf := make([]byte, 1024)
n, err := r.Read(buf)
if err != nil {
t.Fatal(err)
}
buf = buf[:n]
output := strings.TrimSpace(string(buf))
if output != expect {
t.Errorf("output mode: %s, got:\n%s\nwant:\n%s\n (%d <=> %d)", mode, output, expect, len(output), len(expect))
}
if output != expect {
t.Errorf("output mode: %s, got:\n%s\nwant:\n%s\n (%d <=> %d)", mode, output, expect, len(output), len(expect))
}
})
}
// Restore

View File

@@ -24,12 +24,12 @@ tablizer - Manipulate tabular output of other programs
=head1 DESCRIPTION
Many programs generate tabular output. But sometimes you need to
Many programs generate tabular output. But sometimes you need to
post-process these tables, you may need to remove one or more columns
or you may want to filter for some pattern or you may need the output
in another program and need to parse it somehow. Standard unix tools
such as awk(1), grep(1) or column(1) may help, but sometimes it's a
tedious business.
or you may want to filter for some pattern (See L<PATTERNS>) or you
may need the output in another program and need to parse it somehow.
Standard unix tools such as awk(1), grep(1) or column(1) may help, but
sometimes it's a tedious business.
Let's take the output of the tool kubectl. It contains cells with
withespace and they do not separate columns by TAB characters. This is
@@ -77,6 +77,34 @@ The numbering can be suppressed by using the B<-n> option.
Finally the B<-d> option enables debugging output which is mostly
usefull for the developer.
=head2 PATTERNS
You can reduce the rows being displayed by using a regular expression
pattern. The regexp is PCRE compatible, refer to the syntax cheat
sheet here: L<https://github.com/google/re2/wiki/Syntax>. If you want
to read a more comprehensive documentation about the topic and have
perl installed you can read it with:
perldoc perlre
Or read it online: L<https://perldoc.perl.org/perlre>.
A note on modifiers: the regexp engine used in tablizer uses another
modifier syntax:
(?MODIFIER)
The most important modifiers are:
C<i> ignore case
C<m> multiline mode
C<s> single line mode
Example for a case insensitve search:
kubectl get pods -A | tablizer "(?i)account"
=head2 OUTPUT MODES
There might be cases when the tabular output of a program is way too