mirror of
https://codeberg.org/scip/tablizer.git
synced 2025-12-16 20:20:57 +01:00
add some handy builtin character classes as split separators (#84)
This commit is contained in:
@@ -27,13 +27,26 @@ import (
|
||||
"github.com/hashicorp/hcl/v2/hclsimple"
|
||||
)
|
||||
|
||||
const DefaultSeparator string = `(\s\s+|\t)`
|
||||
const Version string = "v1.5.8"
|
||||
const MAXPARTS = 2
|
||||
const (
|
||||
Version = "v1.5.9"
|
||||
MAXPARTS = 2
|
||||
)
|
||||
|
||||
var DefaultConfigfile = os.Getenv("HOME") + "/.config/tablizer/config"
|
||||
var (
|
||||
DefaultConfigfile = os.Getenv("HOME") + "/.config/tablizer/config"
|
||||
VERSION string // maintained by -x
|
||||
|
||||
var VERSION string // maintained by -x
|
||||
SeparatorTemplates = map[string]string{
|
||||
":tab:": `\s*\t\s*`, // tab but eats spaces around
|
||||
":spaces:": `\s{2,}`, // 2 or more spaces
|
||||
":pipe:": `\s*\|\s*`, // one pipe eating spaces around
|
||||
":default:": `(\s\s+|\t)`, // 2 or more spaces or tab
|
||||
":nonword:": `\W`, // word boundary
|
||||
":nondigit:": `\D`, // same for numbers
|
||||
":special:": `[\*\+\-_\(\)\[\]\{\}?\\/<>=&$§"':,\^]+`, // match any special char
|
||||
":nonprint:": `[[:^print:]]+`, // non printables
|
||||
}
|
||||
)
|
||||
|
||||
// public config, set via config file or using defaults
|
||||
type Settings struct {
|
||||
@@ -356,6 +369,13 @@ func (conf *Config) ApplyDefaults() {
|
||||
if conf.OutputMode == Yaml || conf.OutputMode == CSV {
|
||||
conf.Numbering = false
|
||||
}
|
||||
|
||||
if conf.Separator[0] == ':' && conf.Separator[len(conf.Separator)-1] == ':' {
|
||||
separator, ok := SeparatorTemplates[conf.Separator]
|
||||
if ok {
|
||||
conf.Separator = separator
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (conf *Config) PreparePattern(patterns []*Pattern) error {
|
||||
|
||||
@@ -123,7 +123,7 @@ func Execute() {
|
||||
"Use alternating background colors")
|
||||
rootCmd.PersistentFlags().StringVarP(&ShowCompletion, "completion", "", "",
|
||||
"Display completion code")
|
||||
rootCmd.PersistentFlags().StringVarP(&conf.Separator, "separator", "s", cfg.DefaultSeparator,
|
||||
rootCmd.PersistentFlags().StringVarP(&conf.Separator, "separator", "s", cfg.SeparatorTemplates[":default:"],
|
||||
"Custom field separator")
|
||||
rootCmd.PersistentFlags().StringVarP(&conf.Columns, "columns", "c", "",
|
||||
"Only show the speficied columns (separated by ,)")
|
||||
|
||||
@@ -14,7 +14,7 @@ SYNOPSIS
|
||||
-n, --numbering Enable header numbering
|
||||
-N, --no-color Disable pattern highlighting
|
||||
-H, --no-headers Disable headers display
|
||||
-s, --separator <string> Custom field separator
|
||||
-s, --separator <string> Custom field separator (maybe char, string or :class:)
|
||||
-k, --sort-by <int|name> Sort by column (default: 1)
|
||||
-z, --fuzzy Use fuzzy search [experimental]
|
||||
-F, --filter <field[!]=reg> Filter given field with regex, can be used multiple times
|
||||
@@ -141,6 +141,57 @@ DESCRIPTION
|
||||
Finally the -d option enables debugging output which is mostly useful
|
||||
for the developer.
|
||||
|
||||
SEPARATOR
|
||||
The option -s can be a single character, in which case the CSV parser
|
||||
will be invoked. You can also specify a string as separator. The string
|
||||
will be interpreted as literal string unless it is a valid go regular
|
||||
expression. For example:
|
||||
|
||||
-s '\t{2,}\'
|
||||
|
||||
is being used as a regexp and will match two or more consecutive tabs.
|
||||
|
||||
-s 'foo'
|
||||
|
||||
on the other hand is no regular expression and will be used literally.
|
||||
|
||||
To make live easier, there are a couple of predefined regular
|
||||
expressions, which you can specify as classes:
|
||||
|
||||
* :tab:
|
||||
|
||||
Matches a tab and eats spaces around it.
|
||||
|
||||
* :spaces:
|
||||
|
||||
Matches 2 or more spaces.
|
||||
|
||||
* :pipe:
|
||||
|
||||
Matches a pipe character and eats spaces around it.
|
||||
|
||||
* :default:
|
||||
|
||||
Matches 2 or more spaces or tab. This is the default separator if
|
||||
none is specified.
|
||||
|
||||
* :nonword:
|
||||
|
||||
Matches a non-word character.
|
||||
|
||||
* :nondigit:
|
||||
|
||||
Matches a non-digit character.
|
||||
|
||||
* :special:
|
||||
|
||||
Matches one or more special chars like brackets, dollar sign,
|
||||
slashes etc.
|
||||
|
||||
* :nonprint:
|
||||
|
||||
Matches one or more non-printable characters.
|
||||
|
||||
PATTERNS AND FILTERING
|
||||
You can reduce the rows being displayed by using one or more regular
|
||||
expression patterns. The regexp language being used is the one of
|
||||
@@ -458,7 +509,7 @@ Operational Flags:
|
||||
-n, --numbering Enable header numbering
|
||||
-N, --no-color Disable pattern highlighting
|
||||
-H, --no-headers Disable headers display
|
||||
-s, --separator <string> Custom field separator
|
||||
-s, --separator <string> Custom field separator (maybe char, string or :class:)
|
||||
-k, --sort-by <int|name> Sort by column (default: 1)
|
||||
-z, --fuzzy Use fuzzy search [experimental]
|
||||
-F, --filter <field[!]=reg> Filter given field with regex, can be used multiple times
|
||||
|
||||
@@ -34,7 +34,7 @@ var input = []struct {
|
||||
}{
|
||||
{
|
||||
name: "tabular-data",
|
||||
separator: cfg.DefaultSeparator,
|
||||
separator: cfg.SeparatorTemplates[":default:"],
|
||||
text: `
|
||||
ONE TWO THREE
|
||||
asd igig cxxxncnc
|
||||
@@ -148,7 +148,7 @@ asd igig
|
||||
19191 EDD 1 X`
|
||||
|
||||
readFd := strings.NewReader(strings.TrimSpace(table))
|
||||
conf := cfg.Config{Separator: cfg.DefaultSeparator}
|
||||
conf := cfg.Config{Separator: cfg.SeparatorTemplates[":default:"]}
|
||||
gotdata, err := wrapValidateParser(conf, readFd)
|
||||
|
||||
assert.NoError(t, err)
|
||||
@@ -314,6 +314,58 @@ func TestParserJSONInput(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParserSeparators(t *testing.T) {
|
||||
list := []string{"alpha", "beta", "delta"}
|
||||
|
||||
tests := []struct {
|
||||
input string
|
||||
sep string
|
||||
}{
|
||||
{
|
||||
input: `🎲`,
|
||||
sep: ":nonprint:",
|
||||
},
|
||||
{
|
||||
input: `|`,
|
||||
sep: ":pipe:",
|
||||
},
|
||||
{
|
||||
input: ` `,
|
||||
sep: ":spaces:",
|
||||
},
|
||||
{
|
||||
input: " \t ",
|
||||
sep: ":tab:",
|
||||
},
|
||||
{
|
||||
input: `-`,
|
||||
sep: ":nonword:",
|
||||
},
|
||||
{
|
||||
input: `//$`,
|
||||
sep: ":special:",
|
||||
},
|
||||
}
|
||||
|
||||
for _, testdata := range tests {
|
||||
testname := fmt.Sprintf("parse-%s", testdata.sep)
|
||||
t.Run(testname, func(t *testing.T) {
|
||||
header := strings.Join(list, testdata.input)
|
||||
row := header
|
||||
content := header + "\n" + row
|
||||
|
||||
readFd := strings.NewReader(strings.TrimSpace(content))
|
||||
conf := cfg.Config{Separator: testdata.sep}
|
||||
conf.ApplyDefaults()
|
||||
|
||||
gotdata, err := wrapValidateParser(conf, readFd)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, [][]string{list}, gotdata.entries)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func wrapValidateParser(conf cfg.Config, input io.Reader) (Tabdata, error) {
|
||||
data, err := Parse(conf, input)
|
||||
|
||||
|
||||
@@ -292,6 +292,7 @@ func TestPrinter(t *testing.T) {
|
||||
conf.UseSortByColumn = []int{testdata.column}
|
||||
}
|
||||
|
||||
conf.Separator = cfg.SeparatorTemplates[":default:"]
|
||||
conf.ApplyDefaults()
|
||||
|
||||
// the test checks the len!
|
||||
|
||||
60
tablizer.1
60
tablizer.1
@@ -133,7 +133,7 @@
|
||||
.\" ========================================================================
|
||||
.\"
|
||||
.IX Title "TABLIZER 1"
|
||||
.TH TABLIZER 1 "2025-10-01" "1" "User Commands"
|
||||
.TH TABLIZER 1 "2025-10-09" "1" "User Commands"
|
||||
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
|
||||
.\" way too many mistakes in technical documents.
|
||||
.if n .ad l
|
||||
@@ -152,7 +152,7 @@ tablizer \- Manipulate tabular output of other programs
|
||||
\& \-n, \-\-numbering Enable header numbering
|
||||
\& \-N, \-\-no\-color Disable pattern highlighting
|
||||
\& \-H, \-\-no\-headers Disable headers display
|
||||
\& \-s, \-\-separator <string> Custom field separator
|
||||
\& \-s, \-\-separator <string> Custom field separator (maybe char, string or :class:)
|
||||
\& \-k, \-\-sort\-by <int|name> Sort by column (default: 1)
|
||||
\& \-z, \-\-fuzzy Use fuzzy search [experimental]
|
||||
\& \-F, \-\-filter <field[!]=reg> Filter given field with regex, can be used multiple times
|
||||
@@ -293,6 +293,62 @@ Sorts timestamps.
|
||||
.PP
|
||||
Finally the \fB\-d\fR option enables debugging output which is mostly
|
||||
useful for the developer.
|
||||
.SS "\s-1SEPARATOR\s0"
|
||||
.IX Subsection "SEPARATOR"
|
||||
The option \fB\-s\fR can be a single character, in which case the \s-1CSV\s0
|
||||
parser will be invoked. You can also specify a string as
|
||||
separator. The string will be interpreted as literal string unless it
|
||||
is a valid go regular expression. For example:
|
||||
.PP
|
||||
.Vb 1
|
||||
\& \-s \*(Aq\et{2,}\e\*(Aq
|
||||
.Ve
|
||||
.PP
|
||||
is being used as a regexp and will match two or more consecutive tabs.
|
||||
.PP
|
||||
.Vb 1
|
||||
\& \-s \*(Aqfoo\*(Aq
|
||||
.Ve
|
||||
.PP
|
||||
on the other hand is no regular expression and will be used literally.
|
||||
.PP
|
||||
To make live easier, there are a couple of predefined regular
|
||||
expressions, which you can specify as classes:
|
||||
.Sp
|
||||
.RS 4
|
||||
* :tab:
|
||||
.Sp
|
||||
Matches a tab and eats spaces around it.
|
||||
.Sp
|
||||
* :spaces:
|
||||
.Sp
|
||||
Matches 2 or more spaces.
|
||||
.Sp
|
||||
* :pipe:
|
||||
.Sp
|
||||
Matches a pipe character and eats spaces around it.
|
||||
.Sp
|
||||
* :default:
|
||||
.Sp
|
||||
Matches 2 or more spaces or tab. This is the default separator if none
|
||||
is specified.
|
||||
.Sp
|
||||
* :nonword:
|
||||
.Sp
|
||||
Matches a non-word character.
|
||||
.Sp
|
||||
* :nondigit:
|
||||
.Sp
|
||||
Matches a non-digit character.
|
||||
.Sp
|
||||
* :special:
|
||||
.Sp
|
||||
Matches one or more special chars like brackets, dollar sign, slashes etc.
|
||||
.Sp
|
||||
* :nonprint:
|
||||
.Sp
|
||||
Matches one or more non-printable characters.
|
||||
.RE
|
||||
.SS "\s-1PATTERNS AND FILTERING\s0"
|
||||
.IX Subsection "PATTERNS AND FILTERING"
|
||||
You can reduce the rows being displayed by using one or more regular
|
||||
|
||||
58
tablizer.pod
58
tablizer.pod
@@ -13,7 +13,7 @@ tablizer - Manipulate tabular output of other programs
|
||||
-n, --numbering Enable header numbering
|
||||
-N, --no-color Disable pattern highlighting
|
||||
-H, --no-headers Disable headers display
|
||||
-s, --separator <string> Custom field separator
|
||||
-s, --separator <string> Custom field separator (maybe char, string or :class:)
|
||||
-k, --sort-by <int|name> Sort by column (default: 1)
|
||||
-z, --fuzzy Use fuzzy search [experimental]
|
||||
-F, --filter <field[!]=reg> Filter given field with regex, can be used multiple times
|
||||
@@ -153,6 +153,62 @@ Sorts timestamps.
|
||||
Finally the B<-d> option enables debugging output which is mostly
|
||||
useful for the developer.
|
||||
|
||||
=head2 SEPARATOR
|
||||
|
||||
The option B<-s> can be a single character, in which case the CSV
|
||||
parser will be invoked. You can also specify a string as
|
||||
separator. The string will be interpreted as literal string unless it
|
||||
is a valid go regular expression. For example:
|
||||
|
||||
-s '\t{2,}\'
|
||||
|
||||
is being used as a regexp and will match two or more consecutive tabs.
|
||||
|
||||
-s 'foo'
|
||||
|
||||
on the other hand is no regular expression and will be used literally.
|
||||
|
||||
To make live easier, there are a couple of predefined regular
|
||||
expressions, which you can specify as classes:
|
||||
|
||||
=over
|
||||
|
||||
* :tab:
|
||||
|
||||
Matches a tab and eats spaces around it.
|
||||
|
||||
* :spaces:
|
||||
|
||||
Matches 2 or more spaces.
|
||||
|
||||
* :pipe:
|
||||
|
||||
Matches a pipe character and eats spaces around it.
|
||||
|
||||
* :default:
|
||||
|
||||
Matches 2 or more spaces or tab. This is the default separator if none
|
||||
is specified.
|
||||
|
||||
* :nonword:
|
||||
|
||||
Matches a non-word character.
|
||||
|
||||
* :nondigit:
|
||||
|
||||
Matches a non-digit character.
|
||||
|
||||
* :special:
|
||||
|
||||
Matches one or more special chars like brackets, dollar sign, slashes etc.
|
||||
|
||||
* :nonprint:
|
||||
|
||||
Matches one or more non-printable characters.
|
||||
|
||||
|
||||
=back
|
||||
|
||||
=head2 PATTERNS AND FILTERING
|
||||
|
||||
You can reduce the rows being displayed by using one or more regular
|
||||
|
||||
Reference in New Issue
Block a user