implement multiple regex support and icase and negate flags

This commit is contained in:
2025-01-21 18:37:45 +01:00
parent 03f3225f24
commit f6e3075ea8
11 changed files with 267 additions and 118 deletions

View File

@@ -52,6 +52,12 @@ type Transposer struct {
Replace string Replace string
} }
type Pattern struct {
Pattern string
PatternRe *regexp.Regexp
Negate bool
}
// internal config // internal config
type Config struct { type Config struct {
Debug bool Debug bool
@@ -62,8 +68,7 @@ type Config struct {
Separator string Separator string
OutputMode int OutputMode int
InvertMatch bool InvertMatch bool
Pattern string Patterns []*Pattern
PatternR *regexp.Regexp
UseFuzzySearch bool UseFuzzySearch bool
UseHighlight bool UseHighlight bool
@@ -333,15 +338,39 @@ func (conf *Config) ApplyDefaults() {
} }
} }
func (conf *Config) PreparePattern(pattern string) error { func (conf *Config) PreparePattern(patterns []*Pattern) error {
PatternR, err := regexp.Compile(pattern) // regex checks if a pattern looks like /$pattern/[i!]
flagre := regexp.MustCompile(`^/(.*)/([i!]+)$`)
if err != nil { for _, pattern := range patterns {
return fmt.Errorf("regexp pattern %s is invalid: %w", conf.Pattern, err) matches := flagre.FindAllStringSubmatch(pattern.Pattern, -1)
if matches != nil {
// we have a regex with flags
for _, match := range matches {
pattern.Pattern = match[1] // the inner part is our actual pattern
flags := match[2] // the flags
for _, flag := range flags {
switch flag {
case 'i':
pattern.Pattern = `(?i)` + pattern.Pattern
case '!':
pattern.Negate = true
}
}
}
}
PatternRe, err := regexp.Compile(pattern.Pattern)
if err != nil {
return fmt.Errorf("regexp pattern %s is invalid: %w", pattern.Pattern, err)
}
pattern.PatternRe = PatternRe
} }
conf.PatternR = PatternR conf.Patterns = patterns
conf.Pattern = pattern
return nil return nil
} }

View File

@@ -79,20 +79,55 @@ func TestPrepareSortFlags(t *testing.T) {
func TestPreparePattern(t *testing.T) { func TestPreparePattern(t *testing.T) {
var tests = []struct { var tests = []struct {
pattern string patterns []*Pattern
wanterr bool name string
wanterr bool
wanticase bool
wantneg bool
}{ }{
{"[A-Z]+", false}, {
{"[a-z", true}, []*Pattern{{Pattern: "[A-Z]+"}},
"simple",
false,
false,
false,
},
{
[]*Pattern{{Pattern: "[a-z"}},
"regfail",
true,
false,
false,
},
{
[]*Pattern{{Pattern: "/[A-Z]+/i"}},
"icase",
false,
true,
false,
},
{
[]*Pattern{{Pattern: "/[A-Z]+/!"}},
"negate",
false,
false,
true,
},
{
[]*Pattern{{Pattern: "/[A-Z]+/!i"}},
"negicase",
false,
true,
true,
},
} }
for _, testdata := range tests { for _, testdata := range tests {
testname := fmt.Sprintf("PreparePattern-pattern-%s-wanterr-%t", testname := fmt.Sprintf("PreparePattern-pattern-%s-wanterr-%t", testdata.name, testdata.wanterr)
testdata.pattern, testdata.wanterr)
t.Run(testname, func(t *testing.T) { t.Run(testname, func(t *testing.T) {
conf := Config{} conf := Config{}
err := conf.PreparePattern(testdata.pattern) err := conf.PreparePattern(testdata.patterns)
if err != nil { if err != nil {
if !testdata.wanterr { if !testdata.wanterr {

View File

@@ -6,7 +6,7 @@ NAME
SYNOPSIS SYNOPSIS
Usage: Usage:
tablizer [regex] [file, ...] [flags] tablizer [regex,...] [file, ...] [flags]
Operational Flags: Operational Flags:
-c, --columns string Only show the speficied columns (separated by ,) -c, --columns string Only show the speficied columns (separated by ,)
@@ -130,30 +130,43 @@ DESCRIPTION
for the developer. for the developer.
PATTERNS AND FILTERING PATTERNS AND FILTERING
You can reduce the rows being displayed by using a regular expression You can reduce the rows being displayed by using one or more regular
pattern. The regexp is PCRE compatible, refer to the syntax cheat sheet expression patterns. The regexp language being used is the one of
here: <https://github.com/google/re2/wiki/Syntax>. If you want to read a GOLANG, refer to the syntax cheat sheet here:
more comprehensive documentation about the topic and have perl installed <https://pkg.go.dev/regexp/syntax>.
you can read it with:
If you want to read a more comprehensive documentation about the topic
and have perl installed you can read it with:
perldoc perlre perldoc perlre
Or read it online: <https://perldoc.perl.org/perlre>. Or read it online: <https://perldoc.perl.org/perlre>. But please note
that the GO regexp engine does NOT support all perl regex terms,
especially look-ahead and look-behind.
A note on modifiers: the regexp engine used in tablizer uses another If you want to supply flags to a regex, then surround it with slashes
modifier syntax: and append the flag. The following flags are supported:
(?MODIFIER) i => case insensitive
! => negative match
The most important modifiers are:
"i" ignore case "m" multiline mode "s" single line mode
Example for a case insensitive search: Example for a case insensitive search:
kubectl get pods -A | tablizer "(?i)account" kubectl get pods -A | tablizer "/account/i"
You can use the experimental fuzzy search feature by providing the If you use the "!" flag, then the regex match will be negated, that is,
if a line in the input matches the given regex, but "!" is supplied,
tablizer will NOT include it in the output.
For example, here we want to get all lines matching "foo" but not "bar":
cat table | tablizer foo '/bar/!'
This would match a line "foo zorro" but not "foo bar".
The flags can also be combined.
You can also use the experimental fuzzy search feature by providing the
option -z, in which case the pattern is regarded as a fuzzy search term, option -z, in which case the pattern is regarded as a fuzzy search term,
not a regexp. not a regexp.
@@ -392,7 +405,7 @@ AUTHORS
var usage = ` var usage = `
Usage: Usage:
tablizer [regex] [file, ...] [flags] tablizer [regex,...] [file, ...] [flags]
Operational Flags: Operational Flags:
-c, --columns string Only show the speficied columns (separated by ,) -c, --columns string Only show the speficied columns (separated by ,)

View File

@@ -27,15 +27,42 @@ import (
) )
/* /*
* [!]Match a line, use fuzzy search for normal pattern strings and * [!]Match a line, use fuzzy search for normal pattern strings and
* regexp otherwise. * regexp otherwise.
*/
'foo bar' foo, /bar/! => false => line contains foo and not (not bar)
'foo nix' foo, /bar/! => ture => line contains foo and (not bar)
'foo bar' foo, /bar/ => true => line contains both foo and bar
'foo nix' foo, /bar/ => false => line does not contain bar
'foo bar' foo, /nix/ => false => line does not contain nix
*/
func matchPattern(conf cfg.Config, line string) bool { func matchPattern(conf cfg.Config, line string) bool {
if conf.UseFuzzySearch { if len(conf.Patterns) == 0 {
return fuzzy.MatchFold(conf.Pattern, line) // any line always matches ""
return true
} }
return conf.PatternR.MatchString(line) if conf.UseFuzzySearch {
// fuzzy search only considers the 1st pattern
return fuzzy.MatchFold(conf.Patterns[0].Pattern, line)
}
var match bool
for _, re := range conf.Patterns {
patmatch := re.PatternRe.MatchString(line)
if re.Negate {
// toggle the meaning of match
patmatch = !patmatch
}
if match != patmatch {
// toggles match if the last match and current match are different
match = !match
}
}
return match
} }
/* /*
@@ -123,8 +150,11 @@ func Exists[K comparable, V any](m map[K]V, v K) bool {
return false return false
} }
/*
* Filters the whole input lines, returns filtered lines
*/
func FilterByPattern(conf cfg.Config, input io.Reader) (io.Reader, error) { func FilterByPattern(conf cfg.Config, input io.Reader) (io.Reader, error) {
if conf.Pattern == "" { if len(conf.Patterns) == 0 {
return input, nil return input, nil
} }
@@ -136,7 +166,7 @@ func FilterByPattern(conf cfg.Config, input io.Reader) (io.Reader, error) {
line := strings.TrimSpace(scanner.Text()) line := strings.TrimSpace(scanner.Text())
if hadFirst { if hadFirst {
// don't match 1st line, it's the header // don't match 1st line, it's the header
if conf.Pattern != "" && matchPattern(conf, line) == conf.InvertMatch { if matchPattern(conf, line) == conf.InvertMatch {
// by default -v is false, so if a line does NOT // by default -v is false, so if a line does NOT
// match the pattern, we will ignore it. However, // match the pattern, we will ignore it. However,
// if the user specified -v, the matching is inverted, // if the user specified -v, the matching is inverted,

View File

@@ -27,21 +27,21 @@ import (
func TestMatchPattern(t *testing.T) { func TestMatchPattern(t *testing.T) {
var input = []struct { var input = []struct {
name string name string
fuzzy bool fuzzy bool
pattern string patterns []*cfg.Pattern
line string line string
}{ }{
{ {
name: "normal", name: "normal",
pattern: "haus", patterns: []*cfg.Pattern{{Pattern: "haus"}},
line: "hausparty", line: "hausparty",
}, },
{ {
name: "fuzzy", name: "fuzzy",
pattern: "hpt", patterns: []*cfg.Pattern{{Pattern: "hpt"}},
line: "haus-party-termin", line: "haus-party-termin",
fuzzy: true, fuzzy: true,
}, },
} }
@@ -55,7 +55,7 @@ func TestMatchPattern(t *testing.T) {
conf.UseFuzzySearch = true conf.UseFuzzySearch = true
} }
err := conf.PreparePattern(inputdata.pattern) err := conf.PreparePattern(inputdata.patterns)
if err != nil { if err != nil {
t.Errorf("PreparePattern returned error: %s", err) t.Errorf("PreparePattern returned error: %s", err)
} }

View File

@@ -293,12 +293,20 @@ func colorizeData(conf cfg.Config, output string) string {
return colorized return colorized
case len(conf.Pattern) > 0 && !conf.NoColor && color.IsConsole(os.Stdout): case len(conf.Patterns) > 0 && !conf.NoColor && color.IsConsole(os.Stdout):
r := regexp.MustCompile("(" + conf.Pattern + ")") out := output
return r.ReplaceAllStringFunc(output, func(in string) string { for _, re := range conf.Patterns {
return conf.ColorStyle.Sprint(in) if !re.Negate {
}) r := regexp.MustCompile("(" + re.Pattern + ")")
out = r.ReplaceAllStringFunc(out, func(in string) string {
return conf.ColorStyle.Sprint(in)
})
}
}
return out
default: default:
return output return output

View File

@@ -29,13 +29,13 @@ import (
const RWRR = 0755 const RWRR = 0755
func ProcessFiles(conf *cfg.Config, args []string) error { func ProcessFiles(conf *cfg.Config, args []string) error {
fd, pattern, err := determineIO(conf, args) fd, patterns, err := determineIO(conf, args)
if err != nil { if err != nil {
return err return err
} }
if err := conf.PreparePattern(pattern); err != nil { if err := conf.PreparePattern(patterns); err != nil {
return err return err
} }
@@ -63,9 +63,9 @@ func ProcessFiles(conf *cfg.Config, args []string) error {
return nil return nil
} }
func determineIO(conf *cfg.Config, args []string) (io.Reader, string, error) { func determineIO(conf *cfg.Config, args []string) (io.Reader, []*cfg.Pattern, error) {
var filehandle io.Reader var filehandle io.Reader
var pattern string var patterns []*cfg.Pattern
var haveio bool var haveio bool
switch { switch {
@@ -76,7 +76,7 @@ func determineIO(conf *cfg.Config, args []string) (io.Reader, string, error) {
fd, err := os.OpenFile(conf.InputFile, os.O_RDONLY, RWRR) fd, err := os.OpenFile(conf.InputFile, os.O_RDONLY, RWRR)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("failed to read input file %s: %w", conf.InputFile, err) return nil, nil, fmt.Errorf("failed to read input file %s: %w", conf.InputFile, err)
} }
filehandle = fd filehandle = fd
@@ -93,13 +93,15 @@ func determineIO(conf *cfg.Config, args []string) (io.Reader, string, error) {
} }
if len(args) > 0 { if len(args) > 0 {
pattern = args[0] patterns = make([]*cfg.Pattern, len(args))
conf.Pattern = args[0] for i, arg := range args {
patterns[i] = &cfg.Pattern{Pattern: arg}
}
} }
if !haveio { if !haveio {
return nil, "", errors.New("no file specified and nothing to read on stdin") return nil, nil, errors.New("no file specified and nothing to read on stdin")
} }
return filehandle, pattern, nil return filehandle, patterns, nil
} }

View File

@@ -137,7 +137,7 @@ func parseTabular(conf cfg.Config, input io.Reader) (Tabdata, error) {
} }
} else { } else {
// data processing // data processing
if conf.Pattern != "" && matchPattern(conf, line) == conf.InvertMatch { if matchPattern(conf, line) == conf.InvertMatch {
// by default -v is false, so if a line does NOT // by default -v is false, so if a line does NOT
// match the pattern, we will ignore it. However, // match the pattern, we will ignore it. However,
// if the user specified -v, the matching is inverted, // if the user specified -v, the matching is inverted,

View File

@@ -83,36 +83,42 @@ func TestParser(t *testing.T) {
func TestParserPatternmatching(t *testing.T) { func TestParserPatternmatching(t *testing.T) {
var tests = []struct { var tests = []struct {
entries [][]string name string
pattern string entries [][]string
invert bool patterns []*cfg.Pattern
want bool invert bool
want bool
}{ }{
{ {
name: "match",
entries: [][]string{ entries: [][]string{
{"asd", "igig", "cxxxncnc"}, {"asd", "igig", "cxxxncnc"},
}, },
pattern: "ig", patterns: []*cfg.Pattern{{Pattern: "ig"}},
invert: false, invert: false,
}, },
{ {
name: "invert",
entries: [][]string{ entries: [][]string{
{"19191", "EDD 1", "X"}, {"19191", "EDD 1", "X"},
}, },
pattern: "ig", patterns: []*cfg.Pattern{{Pattern: "ig"}},
invert: true, invert: true,
}, },
} }
for _, inputdata := range input { for _, inputdata := range input {
for _, testdata := range tests { for _, testdata := range tests {
testname := fmt.Sprintf("parse-%s-with-pattern-%s-inverted-%t", testname := fmt.Sprintf("parse-%s-with-pattern-%s-inverted-%t",
inputdata.name, testdata.pattern, testdata.invert) inputdata.name, testdata.name, testdata.invert)
t.Run(testname, func(t *testing.T) { t.Run(testname, func(t *testing.T) {
conf := cfg.Config{InvertMatch: testdata.invert, Pattern: testdata.pattern, conf := cfg.Config{
Separator: inputdata.separator} InvertMatch: testdata.invert,
Patterns: testdata.patterns,
Separator: inputdata.separator,
}
_ = conf.PreparePattern(testdata.pattern) _ = conf.PreparePattern(testdata.patterns)
readFd := strings.NewReader(strings.TrimSpace(inputdata.text)) readFd := strings.NewReader(strings.TrimSpace(inputdata.text))
gotdata, err := Parse(conf, readFd) gotdata, err := Parse(conf, readFd)
@@ -125,7 +131,7 @@ func TestParserPatternmatching(t *testing.T) {
} else { } else {
if !reflect.DeepEqual(testdata.entries, gotdata.entries) { if !reflect.DeepEqual(testdata.entries, gotdata.entries) {
t.Errorf("Parser returned invalid data (pattern: %s, invert: %t)\nExp: %+v\nGot: %+v\n", t.Errorf("Parser returned invalid data (pattern: %s, invert: %t)\nExp: %+v\nGot: %+v\n",
testdata.pattern, testdata.invert, testdata.entries, gotdata.entries) testdata.name, testdata.invert, testdata.entries, gotdata.entries)
} }
} }
}) })

View File

@@ -133,7 +133,7 @@
.\" ======================================================================== .\" ========================================================================
.\" .\"
.IX Title "TABLIZER 1" .IX Title "TABLIZER 1"
.TH TABLIZER 1 "2025-01-15" "1" "User Commands" .TH TABLIZER 1 "2025-01-21" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents. .\" way too many mistakes in technical documents.
.if n .ad l .if n .ad l
@@ -144,7 +144,7 @@ tablizer \- Manipulate tabular output of other programs
.IX Header "SYNOPSIS" .IX Header "SYNOPSIS"
.Vb 2 .Vb 2
\& Usage: \& Usage:
\& tablizer [regex] [file, ...] [flags] \& tablizer [regex,...] [file, ...] [flags]
\& \&
\& Operational Flags: \& Operational Flags:
\& \-c, \-\-columns string Only show the speficied columns (separated by ,) \& \-c, \-\-columns string Only show the speficied columns (separated by ,)
@@ -278,38 +278,52 @@ Finally the \fB\-d\fR option enables debugging output which is mostly
useful for the developer. useful for the developer.
.SS "\s-1PATTERNS AND FILTERING\s0" .SS "\s-1PATTERNS AND FILTERING\s0"
.IX Subsection "PATTERNS AND FILTERING" .IX Subsection "PATTERNS AND FILTERING"
You can reduce the rows being displayed by using a regular expression You can reduce the rows being displayed by using one or more regular
pattern. The regexp is \s-1PCRE\s0 compatible, refer to the syntax cheat expression patterns. The regexp language being used is the one of
sheet here: <https://github.com/google/re2/wiki/Syntax>. If you want \&\s-1GOLANG,\s0 refer to the syntax cheat sheet here:
to read a more comprehensive documentation about the topic and have <https://pkg.go.dev/regexp/syntax>.
perl installed you can read it with: .PP
If you want to read a more comprehensive documentation about the
topic and have perl installed you can read it with:
.PP .PP
.Vb 1 .Vb 1
\& perldoc perlre \& perldoc perlre
.Ve .Ve
.PP .PP
Or read it online: <https://perldoc.perl.org/perlre>. Or read it online: <https://perldoc.perl.org/perlre>. But please note
that the \s-1GO\s0 regexp engine does \s-1NOT\s0 support all perl regex terms,
especially look-ahead and look-behind.
.PP .PP
A note on modifiers: the regexp engine used in tablizer uses another If you want to supply flags to a regex, then surround it with slashes
modifier syntax: and append the flag. The following flags are supported:
.PP .PP
.Vb 1 .Vb 2
\& (?MODIFIER) \& i => case insensitive
\& ! => negative match
.Ve .Ve
.PP .PP
The most important modifiers are:
.PP
\&\f(CW\*(C`i\*(C'\fR ignore case
\&\f(CW\*(C`m\*(C'\fR multiline mode
\&\f(CW\*(C`s\*(C'\fR single line mode
.PP
Example for a case insensitive search: Example for a case insensitive search:
.PP .PP
.Vb 1 .Vb 1
\& kubectl get pods \-A | tablizer "(?i)account" \& kubectl get pods \-A | tablizer "/account/i"
.Ve .Ve
.PP .PP
You can use the experimental fuzzy search feature by providing the If you use the \f(CW\*(C`!\*(C'\fR flag, then the regex match will be negated, that
is, if a line in the input matches the given regex, but \f(CW\*(C`!\*(C'\fR is
supplied, tablizer will \s-1NOT\s0 include it in the output.
.PP
For example, here we want to get all lines matching \*(L"foo\*(R" but not
\&\*(L"bar\*(R":
.PP
.Vb 1
\& cat table | tablizer foo \*(Aq/bar/!\*(Aq
.Ve
.PP
This would match a line \*(L"foo zorro\*(R" but not \*(L"foo bar\*(R".
.PP
The flags can also be combined.
.PP
You can also use the experimental fuzzy search feature by providing the
option \fB\-z\fR, in which case the pattern is regarded as a fuzzy search option \fB\-z\fR, in which case the pattern is regarded as a fuzzy search
term, not a regexp. term, not a regexp.
.PP .PP

View File

@@ -5,7 +5,7 @@ tablizer - Manipulate tabular output of other programs
=head1 SYNOPSIS =head1 SYNOPSIS
Usage: Usage:
tablizer [regex] [file, ...] [flags] tablizer [regex,...] [file, ...] [flags]
Operational Flags: Operational Flags:
-c, --columns string Only show the speficied columns (separated by ,) -c, --columns string Only show the speficied columns (separated by ,)
@@ -142,32 +142,44 @@ useful for the developer.
=head2 PATTERNS AND FILTERING =head2 PATTERNS AND FILTERING
You can reduce the rows being displayed by using a regular expression You can reduce the rows being displayed by using one or more regular
pattern. The regexp is PCRE compatible, refer to the syntax cheat expression patterns. The regexp language being used is the one of
sheet here: L<https://github.com/google/re2/wiki/Syntax>. If you want GOLANG, refer to the syntax cheat sheet here:
to read a more comprehensive documentation about the topic and have L<https://pkg.go.dev/regexp/syntax>.
perl installed you can read it with:
If you want to read a more comprehensive documentation about the
topic and have perl installed you can read it with:
perldoc perlre perldoc perlre
Or read it online: L<https://perldoc.perl.org/perlre>. Or read it online: L<https://perldoc.perl.org/perlre>. But please note
that the GO regexp engine does NOT support all perl regex terms,
especially look-ahead and look-behind.
A note on modifiers: the regexp engine used in tablizer uses another If you want to supply flags to a regex, then surround it with slashes
modifier syntax: and append the flag. The following flags are supported:
(?MODIFIER) i => case insensitive
! => negative match
The most important modifiers are:
C<i> ignore case
C<m> multiline mode
C<s> single line mode
Example for a case insensitive search: Example for a case insensitive search:
kubectl get pods -A | tablizer "(?i)account" kubectl get pods -A | tablizer "/account/i"
You can use the experimental fuzzy search feature by providing the If you use the C<!> flag, then the regex match will be negated, that
is, if a line in the input matches the given regex, but C<!> is
supplied, tablizer will NOT include it in the output.
For example, here we want to get all lines matching "foo" but not
"bar":
cat table | tablizer foo '/bar/!'
This would match a line "foo zorro" but not "foo bar".
The flags can also be combined.
You can also use the experimental fuzzy search feature by providing the
option B<-z>, in which case the pattern is regarded as a fuzzy search option B<-z>, in which case the pattern is regarded as a fuzzy search
term, not a regexp. term, not a regexp.