add some handy builtin character classes as split separators (#84)

This commit is contained in:
T.v.Dein
2025-10-09 23:03:57 +02:00
committed by GitHub
parent 253ef8262e
commit ec0b210167
7 changed files with 249 additions and 13 deletions

View File

@@ -27,13 +27,26 @@ import (
"github.com/hashicorp/hcl/v2/hclsimple" "github.com/hashicorp/hcl/v2/hclsimple"
) )
const DefaultSeparator string = `(\s\s+|\t)` const (
const Version string = "v1.5.8" Version = "v1.5.9"
const MAXPARTS = 2 MAXPARTS = 2
)
var DefaultConfigfile = os.Getenv("HOME") + "/.config/tablizer/config" var (
DefaultConfigfile = os.Getenv("HOME") + "/.config/tablizer/config"
VERSION string // maintained by -x
var VERSION string // maintained by -x SeparatorTemplates = map[string]string{
":tab:": `\s*\t\s*`, // tab but eats spaces around
":spaces:": `\s{2,}`, // 2 or more spaces
":pipe:": `\s*\|\s*`, // one pipe eating spaces around
":default:": `(\s\s+|\t)`, // 2 or more spaces or tab
":nonword:": `\W`, // word boundary
":nondigit:": `\D`, // same for numbers
":special:": `[\*\+\-_\(\)\[\]\{\}?\\/<>=&$§"':,\^]+`, // match any special char
":nonprint:": `[[:^print:]]+`, // non printables
}
)
// public config, set via config file or using defaults // public config, set via config file or using defaults
type Settings struct { type Settings struct {
@@ -356,6 +369,13 @@ func (conf *Config) ApplyDefaults() {
if conf.OutputMode == Yaml || conf.OutputMode == CSV { if conf.OutputMode == Yaml || conf.OutputMode == CSV {
conf.Numbering = false conf.Numbering = false
} }
if conf.Separator[0] == ':' && conf.Separator[len(conf.Separator)-1] == ':' {
separator, ok := SeparatorTemplates[conf.Separator]
if ok {
conf.Separator = separator
}
}
} }
func (conf *Config) PreparePattern(patterns []*Pattern) error { func (conf *Config) PreparePattern(patterns []*Pattern) error {

View File

@@ -123,7 +123,7 @@ func Execute() {
"Use alternating background colors") "Use alternating background colors")
rootCmd.PersistentFlags().StringVarP(&ShowCompletion, "completion", "", "", rootCmd.PersistentFlags().StringVarP(&ShowCompletion, "completion", "", "",
"Display completion code") "Display completion code")
rootCmd.PersistentFlags().StringVarP(&conf.Separator, "separator", "s", cfg.DefaultSeparator, rootCmd.PersistentFlags().StringVarP(&conf.Separator, "separator", "s", cfg.SeparatorTemplates[":default:"],
"Custom field separator") "Custom field separator")
rootCmd.PersistentFlags().StringVarP(&conf.Columns, "columns", "c", "", rootCmd.PersistentFlags().StringVarP(&conf.Columns, "columns", "c", "",
"Only show the speficied columns (separated by ,)") "Only show the speficied columns (separated by ,)")

View File

@@ -14,7 +14,7 @@ SYNOPSIS
-n, --numbering Enable header numbering -n, --numbering Enable header numbering
-N, --no-color Disable pattern highlighting -N, --no-color Disable pattern highlighting
-H, --no-headers Disable headers display -H, --no-headers Disable headers display
-s, --separator <string> Custom field separator -s, --separator <string> Custom field separator (maybe char, string or :class:)
-k, --sort-by <int|name> Sort by column (default: 1) -k, --sort-by <int|name> Sort by column (default: 1)
-z, --fuzzy Use fuzzy search [experimental] -z, --fuzzy Use fuzzy search [experimental]
-F, --filter <field[!]=reg> Filter given field with regex, can be used multiple times -F, --filter <field[!]=reg> Filter given field with regex, can be used multiple times
@@ -141,6 +141,57 @@ DESCRIPTION
Finally the -d option enables debugging output which is mostly useful Finally the -d option enables debugging output which is mostly useful
for the developer. for the developer.
SEPARATOR
The option -s can be a single character, in which case the CSV parser
will be invoked. You can also specify a string as separator. The string
will be interpreted as literal string unless it is a valid go regular
expression. For example:
-s '\t{2,}\'
is being used as a regexp and will match two or more consecutive tabs.
-s 'foo'
on the other hand is no regular expression and will be used literally.
To make live easier, there are a couple of predefined regular
expressions, which you can specify as classes:
* :tab:
Matches a tab and eats spaces around it.
* :spaces:
Matches 2 or more spaces.
* :pipe:
Matches a pipe character and eats spaces around it.
* :default:
Matches 2 or more spaces or tab. This is the default separator if
none is specified.
* :nonword:
Matches a non-word character.
* :nondigit:
Matches a non-digit character.
* :special:
Matches one or more special chars like brackets, dollar sign,
slashes etc.
* :nonprint:
Matches one or more non-printable characters.
PATTERNS AND FILTERING PATTERNS AND FILTERING
You can reduce the rows being displayed by using one or more regular You can reduce the rows being displayed by using one or more regular
expression patterns. The regexp language being used is the one of expression patterns. The regexp language being used is the one of
@@ -458,7 +509,7 @@ Operational Flags:
-n, --numbering Enable header numbering -n, --numbering Enable header numbering
-N, --no-color Disable pattern highlighting -N, --no-color Disable pattern highlighting
-H, --no-headers Disable headers display -H, --no-headers Disable headers display
-s, --separator <string> Custom field separator -s, --separator <string> Custom field separator (maybe char, string or :class:)
-k, --sort-by <int|name> Sort by column (default: 1) -k, --sort-by <int|name> Sort by column (default: 1)
-z, --fuzzy Use fuzzy search [experimental] -z, --fuzzy Use fuzzy search [experimental]
-F, --filter <field[!]=reg> Filter given field with regex, can be used multiple times -F, --filter <field[!]=reg> Filter given field with regex, can be used multiple times

View File

@@ -34,7 +34,7 @@ var input = []struct {
}{ }{
{ {
name: "tabular-data", name: "tabular-data",
separator: cfg.DefaultSeparator, separator: cfg.SeparatorTemplates[":default:"],
text: ` text: `
ONE TWO THREE ONE TWO THREE
asd igig cxxxncnc asd igig cxxxncnc
@@ -148,7 +148,7 @@ asd igig
19191 EDD 1 X` 19191 EDD 1 X`
readFd := strings.NewReader(strings.TrimSpace(table)) readFd := strings.NewReader(strings.TrimSpace(table))
conf := cfg.Config{Separator: cfg.DefaultSeparator} conf := cfg.Config{Separator: cfg.SeparatorTemplates[":default:"]}
gotdata, err := wrapValidateParser(conf, readFd) gotdata, err := wrapValidateParser(conf, readFd)
assert.NoError(t, err) assert.NoError(t, err)
@@ -314,6 +314,58 @@ func TestParserJSONInput(t *testing.T) {
} }
} }
func TestParserSeparators(t *testing.T) {
list := []string{"alpha", "beta", "delta"}
tests := []struct {
input string
sep string
}{
{
input: `🎲`,
sep: ":nonprint:",
},
{
input: `|`,
sep: ":pipe:",
},
{
input: ` `,
sep: ":spaces:",
},
{
input: " \t ",
sep: ":tab:",
},
{
input: `-`,
sep: ":nonword:",
},
{
input: `//$`,
sep: ":special:",
},
}
for _, testdata := range tests {
testname := fmt.Sprintf("parse-%s", testdata.sep)
t.Run(testname, func(t *testing.T) {
header := strings.Join(list, testdata.input)
row := header
content := header + "\n" + row
readFd := strings.NewReader(strings.TrimSpace(content))
conf := cfg.Config{Separator: testdata.sep}
conf.ApplyDefaults()
gotdata, err := wrapValidateParser(conf, readFd)
assert.NoError(t, err)
assert.EqualValues(t, [][]string{list}, gotdata.entries)
})
}
}
func wrapValidateParser(conf cfg.Config, input io.Reader) (Tabdata, error) { func wrapValidateParser(conf cfg.Config, input io.Reader) (Tabdata, error) {
data, err := Parse(conf, input) data, err := Parse(conf, input)

View File

@@ -292,6 +292,7 @@ func TestPrinter(t *testing.T) {
conf.UseSortByColumn = []int{testdata.column} conf.UseSortByColumn = []int{testdata.column}
} }
conf.Separator = cfg.SeparatorTemplates[":default:"]
conf.ApplyDefaults() conf.ApplyDefaults()
// the test checks the len! // the test checks the len!

View File

@@ -133,7 +133,7 @@
.\" ======================================================================== .\" ========================================================================
.\" .\"
.IX Title "TABLIZER 1" .IX Title "TABLIZER 1"
.TH TABLIZER 1 "2025-10-01" "1" "User Commands" .TH TABLIZER 1 "2025-10-09" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents. .\" way too many mistakes in technical documents.
.if n .ad l .if n .ad l
@@ -152,7 +152,7 @@ tablizer \- Manipulate tabular output of other programs
\& \-n, \-\-numbering Enable header numbering \& \-n, \-\-numbering Enable header numbering
\& \-N, \-\-no\-color Disable pattern highlighting \& \-N, \-\-no\-color Disable pattern highlighting
\& \-H, \-\-no\-headers Disable headers display \& \-H, \-\-no\-headers Disable headers display
\& \-s, \-\-separator <string> Custom field separator \& \-s, \-\-separator <string> Custom field separator (maybe char, string or :class:)
\& \-k, \-\-sort\-by <int|name> Sort by column (default: 1) \& \-k, \-\-sort\-by <int|name> Sort by column (default: 1)
\& \-z, \-\-fuzzy Use fuzzy search [experimental] \& \-z, \-\-fuzzy Use fuzzy search [experimental]
\& \-F, \-\-filter <field[!]=reg> Filter given field with regex, can be used multiple times \& \-F, \-\-filter <field[!]=reg> Filter given field with regex, can be used multiple times
@@ -293,6 +293,62 @@ Sorts timestamps.
.PP .PP
Finally the \fB\-d\fR option enables debugging output which is mostly Finally the \fB\-d\fR option enables debugging output which is mostly
useful for the developer. useful for the developer.
.SS "\s-1SEPARATOR\s0"
.IX Subsection "SEPARATOR"
The option \fB\-s\fR can be a single character, in which case the \s-1CSV\s0
parser will be invoked. You can also specify a string as
separator. The string will be interpreted as literal string unless it
is a valid go regular expression. For example:
.PP
.Vb 1
\& \-s \*(Aq\et{2,}\e\*(Aq
.Ve
.PP
is being used as a regexp and will match two or more consecutive tabs.
.PP
.Vb 1
\& \-s \*(Aqfoo\*(Aq
.Ve
.PP
on the other hand is no regular expression and will be used literally.
.PP
To make live easier, there are a couple of predefined regular
expressions, which you can specify as classes:
.Sp
.RS 4
* :tab:
.Sp
Matches a tab and eats spaces around it.
.Sp
* :spaces:
.Sp
Matches 2 or more spaces.
.Sp
* :pipe:
.Sp
Matches a pipe character and eats spaces around it.
.Sp
* :default:
.Sp
Matches 2 or more spaces or tab. This is the default separator if none
is specified.
.Sp
* :nonword:
.Sp
Matches a non-word character.
.Sp
* :nondigit:
.Sp
Matches a non-digit character.
.Sp
* :special:
.Sp
Matches one or more special chars like brackets, dollar sign, slashes etc.
.Sp
* :nonprint:
.Sp
Matches one or more non-printable characters.
.RE
.SS "\s-1PATTERNS AND FILTERING\s0" .SS "\s-1PATTERNS AND FILTERING\s0"
.IX Subsection "PATTERNS AND FILTERING" .IX Subsection "PATTERNS AND FILTERING"
You can reduce the rows being displayed by using one or more regular You can reduce the rows being displayed by using one or more regular

View File

@@ -13,7 +13,7 @@ tablizer - Manipulate tabular output of other programs
-n, --numbering Enable header numbering -n, --numbering Enable header numbering
-N, --no-color Disable pattern highlighting -N, --no-color Disable pattern highlighting
-H, --no-headers Disable headers display -H, --no-headers Disable headers display
-s, --separator <string> Custom field separator -s, --separator <string> Custom field separator (maybe char, string or :class:)
-k, --sort-by <int|name> Sort by column (default: 1) -k, --sort-by <int|name> Sort by column (default: 1)
-z, --fuzzy Use fuzzy search [experimental] -z, --fuzzy Use fuzzy search [experimental]
-F, --filter <field[!]=reg> Filter given field with regex, can be used multiple times -F, --filter <field[!]=reg> Filter given field with regex, can be used multiple times
@@ -153,6 +153,62 @@ Sorts timestamps.
Finally the B<-d> option enables debugging output which is mostly Finally the B<-d> option enables debugging output which is mostly
useful for the developer. useful for the developer.
=head2 SEPARATOR
The option B<-s> can be a single character, in which case the CSV
parser will be invoked. You can also specify a string as
separator. The string will be interpreted as literal string unless it
is a valid go regular expression. For example:
-s '\t{2,}\'
is being used as a regexp and will match two or more consecutive tabs.
-s 'foo'
on the other hand is no regular expression and will be used literally.
To make live easier, there are a couple of predefined regular
expressions, which you can specify as classes:
=over
* :tab:
Matches a tab and eats spaces around it.
* :spaces:
Matches 2 or more spaces.
* :pipe:
Matches a pipe character and eats spaces around it.
* :default:
Matches 2 or more spaces or tab. This is the default separator if none
is specified.
* :nonword:
Matches a non-word character.
* :nondigit:
Matches a non-digit character.
* :special:
Matches one or more special chars like brackets, dollar sign, slashes etc.
* :nonprint:
Matches one or more non-printable characters.
=back
=head2 PATTERNS AND FILTERING =head2 PATTERNS AND FILTERING
You can reduce the rows being displayed by using one or more regular You can reduce the rows being displayed by using one or more regular