Compare commits

..

3 Commits

Author SHA1 Message Date
T.v.Dein
8bdb3db105 fix #85: add --auto-headers and --custom-headers (#86) 2025-10-10 13:08:16 +02:00
4ce6c30f54 fix short usage formatting 2025-10-09 23:16:07 +02:00
T.v.Dein
ec0b210167 add some handy builtin character classes as split separators (#84) 2025-10-09 23:03:57 +02:00
9 changed files with 382 additions and 19 deletions

View File

@@ -27,13 +27,26 @@ import (
"github.com/hashicorp/hcl/v2/hclsimple"
)
const DefaultSeparator string = `(\s\s+|\t)`
const Version string = "v1.5.8"
const MAXPARTS = 2
const (
Version = "v1.5.10"
MAXPARTS = 2
)
var DefaultConfigfile = os.Getenv("HOME") + "/.config/tablizer/config"
var (
DefaultConfigfile = os.Getenv("HOME") + "/.config/tablizer/config"
VERSION string // maintained by -x
var VERSION string // maintained by -x
SeparatorTemplates = map[string]string{
":tab:": `\s*\t\s*`, // tab but eats spaces around
":spaces:": `\s{2,}`, // 2 or more spaces
":pipe:": `\s*\|\s*`, // one pipe eating spaces around
":default:": `(\s\s+|\t)`, // 2 or more spaces or tab
":nonword:": `\W`, // word boundary
":nondigit:": `\D`, // same for numbers
":special:": `[\*\+\-_\(\)\[\]\{\}?\\/<>=&$§"':,\^]+`, // match any special char
":nonprint:": `[[:^print:]]+`, // non printables
}
)
// public config, set via config file or using defaults
type Settings struct {
@@ -80,6 +93,8 @@ type Config struct {
UseHighlight bool
Interactive bool
InputJSON bool
AutoHeaders bool
CustomHeaders []string
SortMode string
SortDescending bool
@@ -356,6 +371,13 @@ func (conf *Config) ApplyDefaults() {
if conf.OutputMode == Yaml || conf.OutputMode == CSV {
conf.Numbering = false
}
if conf.Separator[0] == ':' && conf.Separator[len(conf.Separator)-1] == ':' {
separator, ok := SeparatorTemplates[conf.Separator]
if ok {
conf.Separator = separator
}
}
}
func (conf *Config) PreparePattern(patterns []*Pattern) error {
@@ -393,6 +415,12 @@ func (conf *Config) PreparePattern(patterns []*Pattern) error {
return nil
}
func (conf *Config) PrepareCustomHeaders(custom string) {
if len(custom) > 0 {
conf.CustomHeaders = strings.Split(custom, ",")
}
}
// Parse config file. Ignore if the file doesn't exist but return an
// error if it exists but fails to read or parse
func (conf *Config) ParseConfigfile() error {

View File

@@ -59,6 +59,7 @@ func Execute() {
ShowCompletion string
modeflag cfg.Modeflag
sortmode cfg.Sortmode
headers string
)
var rootCmd = &cobra.Command{
@@ -91,6 +92,7 @@ func Execute() {
conf.CheckEnv()
conf.PrepareModeFlags(modeflag)
conf.PrepareSortFlags(sortmode)
conf.PrepareCustomHeaders(headers)
wrapE(conf.PrepareFilters())
@@ -123,7 +125,7 @@ func Execute() {
"Use alternating background colors")
rootCmd.PersistentFlags().StringVarP(&ShowCompletion, "completion", "", "",
"Display completion code")
rootCmd.PersistentFlags().StringVarP(&conf.Separator, "separator", "s", cfg.DefaultSeparator,
rootCmd.PersistentFlags().StringVarP(&conf.Separator, "separator", "s", cfg.SeparatorTemplates[":default:"],
"Custom field separator")
rootCmd.PersistentFlags().StringVarP(&conf.Columns, "columns", "c", "",
"Only show the speficied columns (separated by ,)")
@@ -137,6 +139,10 @@ func Execute() {
"Output field separator (' ' for ascii table, ',' for CSV)")
rootCmd.PersistentFlags().BoolVarP(&conf.InputJSON, "json", "j", false,
"JSON input mode")
rootCmd.PersistentFlags().BoolVarP(&conf.AutoHeaders, "auto-headers", "", false,
"Generate headers automatically")
rootCmd.PersistentFlags().StringVarP(&headers, "custom-headers", "", "",
"Custom headers")
// sort options
rootCmd.PersistentFlags().StringVarP(&conf.SortByColumn, "sort-by", "k", "",

View File

@@ -7,7 +7,7 @@ const shortusage = `tablizer [regex,...] [-r file] [flags]
-T col,... transpose specified columns -n numberize columns
-R /from/to/ apply replacement to columns in -T -N do not use colors
-y col,... yank columns to clipboard -H do not show headers
--ofs char output field separator -s specify field separator
--ofs char output field separator -s specify field separator
-r file read input from file -z use fuzzy search
-f file read config from file -I interactive filter mode
-d debug

View File

@@ -14,7 +14,7 @@ SYNOPSIS
-n, --numbering Enable header numbering
-N, --no-color Disable pattern highlighting
-H, --no-headers Disable headers display
-s, --separator <string> Custom field separator
-s, --separator <string> Custom field separator (maybe char, string or :class:)
-k, --sort-by <int|name> Sort by column (default: 1)
-z, --fuzzy Use fuzzy search [experimental]
-F, --filter <field[!]=reg> Filter given field with regex, can be used multiple times
@@ -22,6 +22,8 @@ SYNOPSIS
-R, --regex-transposer </from/to/> Apply /search/replace/ regexp to fields given in -T
-j, --json Read JSON input (must be array of hashes)
-I, --interactive Interactively filter and select rows
--auto-headers Generate headers if there are none present in input
--custom-headers a,b,... Use custom headers, separated by comma
Output Flags (mutually exclusive):
-X, --extended Enable extended output
@@ -141,6 +143,57 @@ DESCRIPTION
Finally the -d option enables debugging output which is mostly useful
for the developer.
SEPARATOR
The option -s can be a single character, in which case the CSV parser
will be invoked. You can also specify a string as separator. The string
will be interpreted as literal string unless it is a valid go regular
expression. For example:
-s '\t{2,}\'
is being used as a regexp and will match two or more consecutive tabs.
-s 'foo'
on the other hand is no regular expression and will be used literally.
To make live easier, there are a couple of predefined regular
expressions, which you can specify as classes:
* :tab:
Matches a tab and eats spaces around it.
* :spaces:
Matches 2 or more spaces.
* :pipe:
Matches a pipe character and eats spaces around it.
* :default:
Matches 2 or more spaces or tab. This is the default separator if
none is specified.
* :nonword:
Matches a non-word character.
* :nondigit:
Matches a non-digit character.
* :special:
Matches one or more special chars like brackets, dollar sign,
slashes etc.
* :nonprint:
Matches one or more non-printable characters.
PATTERNS AND FILTERING
You can reduce the rows being displayed by using one or more regular
expression patterns. The regexp language being used is the one of
@@ -458,7 +511,7 @@ Operational Flags:
-n, --numbering Enable header numbering
-N, --no-color Disable pattern highlighting
-H, --no-headers Disable headers display
-s, --separator <string> Custom field separator
-s, --separator <string> Custom field separator (maybe char, string or :class:)
-k, --sort-by <int|name> Sort by column (default: 1)
-z, --fuzzy Use fuzzy search [experimental]
-F, --filter <field[!]=reg> Filter given field with regex, can be used multiple times
@@ -466,6 +519,8 @@ Operational Flags:
-R, --regex-transposer </from/to/> Apply /search/replace/ regexp to fields given in -T
-j, --json Read JSON input (must be array of hashes)
-I, --interactive Interactively filter and select rows
--auto-headers Generate headers if there are none present in input
--custom-headers a,b,... Use custom headers, separated by comma
Output Flags (mutually exclusive):
-X, --extended Enable extended output

View File

@@ -66,6 +66,43 @@ func Parse(conf cfg.Config, input io.Reader) (Tabdata, error) {
return data, err
}
/*
* Setup headers, given headers might be usable headers or just the
* first row, which we use to determine how many headers to generate,
* if enabled.
*/
func SetHeaders(conf cfg.Config, headers []string) []string {
if !conf.AutoHeaders && len(conf.CustomHeaders) == 0 {
return headers
}
if conf.AutoHeaders {
heads := make([]string, len(headers))
for idx := range headers {
heads[idx] = fmt.Sprintf("%d", idx+1)
}
return heads
}
if len(conf.CustomHeaders) == len(headers) {
return conf.CustomHeaders
}
// use as much custom ones we have, generate the remainder
heads := make([]string, len(headers))
for idx := range headers {
if idx < len(conf.CustomHeaders) {
heads[idx] = conf.CustomHeaders[idx]
} else {
heads[idx] = fmt.Sprintf("%d", idx+1)
}
}
return heads
}
/*
Parse CSV input.
*/
@@ -87,7 +124,7 @@ func parseCSV(conf cfg.Config, input io.Reader) (Tabdata, error) {
}
if len(records) >= 1 {
data.headers = records[0]
data.headers = SetHeaders(conf, records[0])
data.columns = len(records)
for _, head := range data.headers {
@@ -98,9 +135,14 @@ func parseCSV(conf cfg.Config, input io.Reader) (Tabdata, error) {
}
}
if len(records) > 1 {
data.entries = records[1:]
if len(records) >= 1 {
if conf.AutoHeaders || len(conf.CustomHeaders) > 0 {
data.entries = records
} else {
data.entries = records[1:]
}
}
}
return data, nil
@@ -128,7 +170,9 @@ func parseTabular(conf cfg.Config, input io.Reader) (Tabdata, error) {
data.columns = len(parts)
// process all header fields
for _, part := range parts {
firstrow := make([]string, len(parts))
for idx, part := range parts {
// register widest header field
headerlen := len(part)
if headerlen > data.maxwidthHeader {
@@ -136,11 +180,22 @@ func parseTabular(conf cfg.Config, input io.Reader) (Tabdata, error) {
}
// register fields data
data.headers = append(data.headers, strings.TrimSpace(part))
firstrow[idx] = strings.TrimSpace(part)
// done
hadFirst = true
}
data.headers = SetHeaders(conf, firstrow)
if conf.AutoHeaders || len(conf.CustomHeaders) > 0 {
// we do not use generated headers, consider as row
if matchPattern(conf, line) == conf.InvertMatch {
continue
}
data.entries = append(data.entries, firstrow)
}
} else {
// data processing
if matchPattern(conf, line) == conf.InvertMatch {

View File

@@ -34,7 +34,7 @@ var input = []struct {
}{
{
name: "tabular-data",
separator: cfg.DefaultSeparator,
separator: cfg.SeparatorTemplates[":default:"],
text: `
ONE TWO THREE
asd igig cxxxncnc
@@ -148,7 +148,7 @@ asd igig
19191 EDD 1 X`
readFd := strings.NewReader(strings.TrimSpace(table))
conf := cfg.Config{Separator: cfg.DefaultSeparator}
conf := cfg.Config{Separator: cfg.SeparatorTemplates[":default:"]}
gotdata, err := wrapValidateParser(conf, readFd)
assert.NoError(t, err)
@@ -314,6 +314,108 @@ func TestParserJSONInput(t *testing.T) {
}
}
func TestParserSeparators(t *testing.T) {
list := []string{"alpha", "beta", "delta"}
tests := []struct {
input string
sep string
}{
{
input: `🎲`,
sep: ":nonprint:",
},
{
input: `|`,
sep: ":pipe:",
},
{
input: ` `,
sep: ":spaces:",
},
{
input: " \t ",
sep: ":tab:",
},
{
input: `-`,
sep: ":nonword:",
},
{
input: `//$`,
sep: ":special:",
},
}
for _, testdata := range tests {
testname := fmt.Sprintf("parse-%s", testdata.sep)
t.Run(testname, func(t *testing.T) {
header := strings.Join(list, testdata.input)
row := header
content := header + "\n" + row
readFd := strings.NewReader(strings.TrimSpace(content))
conf := cfg.Config{Separator: testdata.sep}
conf.ApplyDefaults()
gotdata, err := wrapValidateParser(conf, readFd)
assert.NoError(t, err)
assert.EqualValues(t, [][]string{list}, gotdata.entries)
})
}
}
func TestParserSetHeaders(t *testing.T) {
row := []string{"c", "b", "c", "d", "e"}
tests := []struct {
name string
custom []string
expect []string
auto bool
}{
{
name: "default",
expect: row,
},
{
name: "auto",
expect: strings.Split("1 2 3 4 5", " "),
auto: true,
},
{
name: "custom-complete",
custom: strings.Split("A B C D E", " "),
expect: strings.Split("A B C D E", " "),
},
{
name: "custom-too-short",
custom: strings.Split("A B", " "),
expect: strings.Split("A B 3 4 5", " "),
},
{
name: "custom-too-long",
custom: strings.Split("A B C D E F G", " "),
expect: strings.Split("A B C D E", " "),
},
}
for _, testdata := range tests {
testname := fmt.Sprintf("parse-%s", testdata.name)
t.Run(testname, func(t *testing.T) {
conf := cfg.Config{
AutoHeaders: testdata.auto,
CustomHeaders: testdata.custom,
}
headers := SetHeaders(conf, row)
assert.NotNil(t, headers)
assert.EqualValues(t, testdata.expect, headers)
})
}
}
func wrapValidateParser(conf cfg.Config, input io.Reader) (Tabdata, error) {
data, err := Parse(conf, input)

View File

@@ -292,6 +292,7 @@ func TestPrinter(t *testing.T) {
conf.UseSortByColumn = []int{testdata.column}
}
conf.Separator = cfg.SeparatorTemplates[":default:"]
conf.ApplyDefaults()
// the test checks the len!

View File

@@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "TABLIZER 1"
.TH TABLIZER 1 "2025-10-01" "1" "User Commands"
.TH TABLIZER 1 "2025-10-10" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
@@ -152,7 +152,7 @@ tablizer \- Manipulate tabular output of other programs
\& \-n, \-\-numbering Enable header numbering
\& \-N, \-\-no\-color Disable pattern highlighting
\& \-H, \-\-no\-headers Disable headers display
\& \-s, \-\-separator <string> Custom field separator
\& \-s, \-\-separator <string> Custom field separator (maybe char, string or :class:)
\& \-k, \-\-sort\-by <int|name> Sort by column (default: 1)
\& \-z, \-\-fuzzy Use fuzzy search [experimental]
\& \-F, \-\-filter <field[!]=reg> Filter given field with regex, can be used multiple times
@@ -160,6 +160,8 @@ tablizer \- Manipulate tabular output of other programs
\& \-R, \-\-regex\-transposer </from/to/> Apply /search/replace/ regexp to fields given in \-T
\& \-j, \-\-json Read JSON input (must be array of hashes)
\& \-I, \-\-interactive Interactively filter and select rows
\& \-\-auto\-headers Generate headers if there are none present in input
\& \-\-custom\-headers a,b,... Use custom headers, separated by comma
\&
\& Output Flags (mutually exclusive):
\& \-X, \-\-extended Enable extended output
@@ -293,6 +295,62 @@ Sorts timestamps.
.PP
Finally the \fB\-d\fR option enables debugging output which is mostly
useful for the developer.
.SS "\s-1SEPARATOR\s0"
.IX Subsection "SEPARATOR"
The option \fB\-s\fR can be a single character, in which case the \s-1CSV\s0
parser will be invoked. You can also specify a string as
separator. The string will be interpreted as literal string unless it
is a valid go regular expression. For example:
.PP
.Vb 1
\& \-s \*(Aq\et{2,}\e\*(Aq
.Ve
.PP
is being used as a regexp and will match two or more consecutive tabs.
.PP
.Vb 1
\& \-s \*(Aqfoo\*(Aq
.Ve
.PP
on the other hand is no regular expression and will be used literally.
.PP
To make live easier, there are a couple of predefined regular
expressions, which you can specify as classes:
.Sp
.RS 4
* :tab:
.Sp
Matches a tab and eats spaces around it.
.Sp
* :spaces:
.Sp
Matches 2 or more spaces.
.Sp
* :pipe:
.Sp
Matches a pipe character and eats spaces around it.
.Sp
* :default:
.Sp
Matches 2 or more spaces or tab. This is the default separator if none
is specified.
.Sp
* :nonword:
.Sp
Matches a non-word character.
.Sp
* :nondigit:
.Sp
Matches a non-digit character.
.Sp
* :special:
.Sp
Matches one or more special chars like brackets, dollar sign, slashes etc.
.Sp
* :nonprint:
.Sp
Matches one or more non-printable characters.
.RE
.SS "\s-1PATTERNS AND FILTERING\s0"
.IX Subsection "PATTERNS AND FILTERING"
You can reduce the rows being displayed by using one or more regular

View File

@@ -13,7 +13,7 @@ tablizer - Manipulate tabular output of other programs
-n, --numbering Enable header numbering
-N, --no-color Disable pattern highlighting
-H, --no-headers Disable headers display
-s, --separator <string> Custom field separator
-s, --separator <string> Custom field separator (maybe char, string or :class:)
-k, --sort-by <int|name> Sort by column (default: 1)
-z, --fuzzy Use fuzzy search [experimental]
-F, --filter <field[!]=reg> Filter given field with regex, can be used multiple times
@@ -21,6 +21,8 @@ tablizer - Manipulate tabular output of other programs
-R, --regex-transposer </from/to/> Apply /search/replace/ regexp to fields given in -T
-j, --json Read JSON input (must be array of hashes)
-I, --interactive Interactively filter and select rows
--auto-headers Generate headers if there are none present in input
--custom-headers a,b,... Use custom headers, separated by comma
Output Flags (mutually exclusive):
-X, --extended Enable extended output
@@ -153,6 +155,62 @@ Sorts timestamps.
Finally the B<-d> option enables debugging output which is mostly
useful for the developer.
=head2 SEPARATOR
The option B<-s> can be a single character, in which case the CSV
parser will be invoked. You can also specify a string as
separator. The string will be interpreted as literal string unless it
is a valid go regular expression. For example:
-s '\t{2,}\'
is being used as a regexp and will match two or more consecutive tabs.
-s 'foo'
on the other hand is no regular expression and will be used literally.
To make live easier, there are a couple of predefined regular
expressions, which you can specify as classes:
=over
* :tab:
Matches a tab and eats spaces around it.
* :spaces:
Matches 2 or more spaces.
* :pipe:
Matches a pipe character and eats spaces around it.
* :default:
Matches 2 or more spaces or tab. This is the default separator if none
is specified.
* :nonword:
Matches a non-word character.
* :nondigit:
Matches a non-digit character.
* :special:
Matches one or more special chars like brackets, dollar sign, slashes etc.
* :nonprint:
Matches one or more non-printable characters.
=back
=head2 PATTERNS AND FILTERING
You can reduce the rows being displayed by using one or more regular