implement multiple regex support and icase and negate flags

This commit is contained in:
2025-01-21 18:37:45 +01:00
parent 03f3225f24
commit f6e3075ea8
11 changed files with 267 additions and 118 deletions

View File

@@ -52,6 +52,12 @@ type Transposer struct {
Replace string
}
type Pattern struct {
Pattern string
PatternRe *regexp.Regexp
Negate bool
}
// internal config
type Config struct {
Debug bool
@@ -62,8 +68,7 @@ type Config struct {
Separator string
OutputMode int
InvertMatch bool
Pattern string
PatternR *regexp.Regexp
Patterns []*Pattern
UseFuzzySearch bool
UseHighlight bool
@@ -333,15 +338,39 @@ func (conf *Config) ApplyDefaults() {
}
}
func (conf *Config) PreparePattern(pattern string) error {
PatternR, err := regexp.Compile(pattern)
func (conf *Config) PreparePattern(patterns []*Pattern) error {
// regex checks if a pattern looks like /$pattern/[i!]
flagre := regexp.MustCompile(`^/(.*)/([i!]+)$`)
if err != nil {
return fmt.Errorf("regexp pattern %s is invalid: %w", conf.Pattern, err)
for _, pattern := range patterns {
matches := flagre.FindAllStringSubmatch(pattern.Pattern, -1)
if matches != nil {
// we have a regex with flags
for _, match := range matches {
pattern.Pattern = match[1] // the inner part is our actual pattern
flags := match[2] // the flags
for _, flag := range flags {
switch flag {
case 'i':
pattern.Pattern = `(?i)` + pattern.Pattern
case '!':
pattern.Negate = true
}
}
}
}
PatternRe, err := regexp.Compile(pattern.Pattern)
if err != nil {
return fmt.Errorf("regexp pattern %s is invalid: %w", pattern.Pattern, err)
}
pattern.PatternRe = PatternRe
}
conf.PatternR = PatternR
conf.Pattern = pattern
conf.Patterns = patterns
return nil
}

View File

@@ -79,20 +79,55 @@ func TestPrepareSortFlags(t *testing.T) {
func TestPreparePattern(t *testing.T) {
var tests = []struct {
pattern string
wanterr bool
patterns []*Pattern
name string
wanterr bool
wanticase bool
wantneg bool
}{
{"[A-Z]+", false},
{"[a-z", true},
{
[]*Pattern{{Pattern: "[A-Z]+"}},
"simple",
false,
false,
false,
},
{
[]*Pattern{{Pattern: "[a-z"}},
"regfail",
true,
false,
false,
},
{
[]*Pattern{{Pattern: "/[A-Z]+/i"}},
"icase",
false,
true,
false,
},
{
[]*Pattern{{Pattern: "/[A-Z]+/!"}},
"negate",
false,
false,
true,
},
{
[]*Pattern{{Pattern: "/[A-Z]+/!i"}},
"negicase",
false,
true,
true,
},
}
for _, testdata := range tests {
testname := fmt.Sprintf("PreparePattern-pattern-%s-wanterr-%t",
testdata.pattern, testdata.wanterr)
testname := fmt.Sprintf("PreparePattern-pattern-%s-wanterr-%t", testdata.name, testdata.wanterr)
t.Run(testname, func(t *testing.T) {
conf := Config{}
err := conf.PreparePattern(testdata.pattern)
err := conf.PreparePattern(testdata.patterns)
if err != nil {
if !testdata.wanterr {

View File

@@ -6,7 +6,7 @@ NAME
SYNOPSIS
Usage:
tablizer [regex] [file, ...] [flags]
tablizer [regex,...] [file, ...] [flags]
Operational Flags:
-c, --columns string Only show the speficied columns (separated by ,)
@@ -130,30 +130,43 @@ DESCRIPTION
for the developer.
PATTERNS AND FILTERING
You can reduce the rows being displayed by using a regular expression
pattern. The regexp is PCRE compatible, refer to the syntax cheat sheet
here: <https://github.com/google/re2/wiki/Syntax>. If you want to read a
more comprehensive documentation about the topic and have perl installed
you can read it with:
You can reduce the rows being displayed by using one or more regular
expression patterns. The regexp language being used is the one of
GOLANG, refer to the syntax cheat sheet here:
<https://pkg.go.dev/regexp/syntax>.
If you want to read a more comprehensive documentation about the topic
and have perl installed you can read it with:
perldoc perlre
Or read it online: <https://perldoc.perl.org/perlre>.
Or read it online: <https://perldoc.perl.org/perlre>. But please note
that the GO regexp engine does NOT support all perl regex terms,
especially look-ahead and look-behind.
A note on modifiers: the regexp engine used in tablizer uses another
modifier syntax:
If you want to supply flags to a regex, then surround it with slashes
and append the flag. The following flags are supported:
(?MODIFIER)
The most important modifiers are:
"i" ignore case "m" multiline mode "s" single line mode
i => case insensitive
! => negative match
Example for a case insensitive search:
kubectl get pods -A | tablizer "(?i)account"
kubectl get pods -A | tablizer "/account/i"
You can use the experimental fuzzy search feature by providing the
If you use the "!" flag, then the regex match will be negated, that is,
if a line in the input matches the given regex, but "!" is supplied,
tablizer will NOT include it in the output.
For example, here we want to get all lines matching "foo" but not "bar":
cat table | tablizer foo '/bar/!'
This would match a line "foo zorro" but not "foo bar".
The flags can also be combined.
You can also use the experimental fuzzy search feature by providing the
option -z, in which case the pattern is regarded as a fuzzy search term,
not a regexp.
@@ -392,7 +405,7 @@ AUTHORS
var usage = `
Usage:
tablizer [regex] [file, ...] [flags]
tablizer [regex,...] [file, ...] [flags]
Operational Flags:
-c, --columns string Only show the speficied columns (separated by ,)

View File

@@ -27,15 +27,42 @@ import (
)
/*
* [!]Match a line, use fuzzy search for normal pattern strings and
* regexp otherwise.
*/
* [!]Match a line, use fuzzy search for normal pattern strings and
* regexp otherwise.
'foo bar' foo, /bar/! => false => line contains foo and not (not bar)
'foo nix' foo, /bar/! => ture => line contains foo and (not bar)
'foo bar' foo, /bar/ => true => line contains both foo and bar
'foo nix' foo, /bar/ => false => line does not contain bar
'foo bar' foo, /nix/ => false => line does not contain nix
*/
func matchPattern(conf cfg.Config, line string) bool {
if conf.UseFuzzySearch {
return fuzzy.MatchFold(conf.Pattern, line)
if len(conf.Patterns) == 0 {
// any line always matches ""
return true
}
return conf.PatternR.MatchString(line)
if conf.UseFuzzySearch {
// fuzzy search only considers the 1st pattern
return fuzzy.MatchFold(conf.Patterns[0].Pattern, line)
}
var match bool
for _, re := range conf.Patterns {
patmatch := re.PatternRe.MatchString(line)
if re.Negate {
// toggle the meaning of match
patmatch = !patmatch
}
if match != patmatch {
// toggles match if the last match and current match are different
match = !match
}
}
return match
}
/*
@@ -123,8 +150,11 @@ func Exists[K comparable, V any](m map[K]V, v K) bool {
return false
}
/*
* Filters the whole input lines, returns filtered lines
*/
func FilterByPattern(conf cfg.Config, input io.Reader) (io.Reader, error) {
if conf.Pattern == "" {
if len(conf.Patterns) == 0 {
return input, nil
}
@@ -136,7 +166,7 @@ func FilterByPattern(conf cfg.Config, input io.Reader) (io.Reader, error) {
line := strings.TrimSpace(scanner.Text())
if hadFirst {
// don't match 1st line, it's the header
if conf.Pattern != "" && matchPattern(conf, line) == conf.InvertMatch {
if matchPattern(conf, line) == conf.InvertMatch {
// by default -v is false, so if a line does NOT
// match the pattern, we will ignore it. However,
// if the user specified -v, the matching is inverted,

View File

@@ -27,21 +27,21 @@ import (
func TestMatchPattern(t *testing.T) {
var input = []struct {
name string
fuzzy bool
pattern string
line string
name string
fuzzy bool
patterns []*cfg.Pattern
line string
}{
{
name: "normal",
pattern: "haus",
line: "hausparty",
name: "normal",
patterns: []*cfg.Pattern{{Pattern: "haus"}},
line: "hausparty",
},
{
name: "fuzzy",
pattern: "hpt",
line: "haus-party-termin",
fuzzy: true,
name: "fuzzy",
patterns: []*cfg.Pattern{{Pattern: "hpt"}},
line: "haus-party-termin",
fuzzy: true,
},
}
@@ -55,7 +55,7 @@ func TestMatchPattern(t *testing.T) {
conf.UseFuzzySearch = true
}
err := conf.PreparePattern(inputdata.pattern)
err := conf.PreparePattern(inputdata.patterns)
if err != nil {
t.Errorf("PreparePattern returned error: %s", err)
}

View File

@@ -293,12 +293,20 @@ func colorizeData(conf cfg.Config, output string) string {
return colorized
case len(conf.Pattern) > 0 && !conf.NoColor && color.IsConsole(os.Stdout):
r := regexp.MustCompile("(" + conf.Pattern + ")")
case len(conf.Patterns) > 0 && !conf.NoColor && color.IsConsole(os.Stdout):
out := output
return r.ReplaceAllStringFunc(output, func(in string) string {
return conf.ColorStyle.Sprint(in)
})
for _, re := range conf.Patterns {
if !re.Negate {
r := regexp.MustCompile("(" + re.Pattern + ")")
out = r.ReplaceAllStringFunc(out, func(in string) string {
return conf.ColorStyle.Sprint(in)
})
}
}
return out
default:
return output

View File

@@ -29,13 +29,13 @@ import (
const RWRR = 0755
func ProcessFiles(conf *cfg.Config, args []string) error {
fd, pattern, err := determineIO(conf, args)
fd, patterns, err := determineIO(conf, args)
if err != nil {
return err
}
if err := conf.PreparePattern(pattern); err != nil {
if err := conf.PreparePattern(patterns); err != nil {
return err
}
@@ -63,9 +63,9 @@ func ProcessFiles(conf *cfg.Config, args []string) error {
return nil
}
func determineIO(conf *cfg.Config, args []string) (io.Reader, string, error) {
func determineIO(conf *cfg.Config, args []string) (io.Reader, []*cfg.Pattern, error) {
var filehandle io.Reader
var pattern string
var patterns []*cfg.Pattern
var haveio bool
switch {
@@ -76,7 +76,7 @@ func determineIO(conf *cfg.Config, args []string) (io.Reader, string, error) {
fd, err := os.OpenFile(conf.InputFile, os.O_RDONLY, RWRR)
if err != nil {
return nil, "", fmt.Errorf("failed to read input file %s: %w", conf.InputFile, err)
return nil, nil, fmt.Errorf("failed to read input file %s: %w", conf.InputFile, err)
}
filehandle = fd
@@ -93,13 +93,15 @@ func determineIO(conf *cfg.Config, args []string) (io.Reader, string, error) {
}
if len(args) > 0 {
pattern = args[0]
conf.Pattern = args[0]
patterns = make([]*cfg.Pattern, len(args))
for i, arg := range args {
patterns[i] = &cfg.Pattern{Pattern: arg}
}
}
if !haveio {
return nil, "", errors.New("no file specified and nothing to read on stdin")
return nil, nil, errors.New("no file specified and nothing to read on stdin")
}
return filehandle, pattern, nil
return filehandle, patterns, nil
}

View File

@@ -137,7 +137,7 @@ func parseTabular(conf cfg.Config, input io.Reader) (Tabdata, error) {
}
} else {
// data processing
if conf.Pattern != "" && matchPattern(conf, line) == conf.InvertMatch {
if matchPattern(conf, line) == conf.InvertMatch {
// by default -v is false, so if a line does NOT
// match the pattern, we will ignore it. However,
// if the user specified -v, the matching is inverted,

View File

@@ -83,36 +83,42 @@ func TestParser(t *testing.T) {
func TestParserPatternmatching(t *testing.T) {
var tests = []struct {
entries [][]string
pattern string
invert bool
want bool
name string
entries [][]string
patterns []*cfg.Pattern
invert bool
want bool
}{
{
name: "match",
entries: [][]string{
{"asd", "igig", "cxxxncnc"},
},
pattern: "ig",
invert: false,
patterns: []*cfg.Pattern{{Pattern: "ig"}},
invert: false,
},
{
name: "invert",
entries: [][]string{
{"19191", "EDD 1", "X"},
},
pattern: "ig",
invert: true,
patterns: []*cfg.Pattern{{Pattern: "ig"}},
invert: true,
},
}
for _, inputdata := range input {
for _, testdata := range tests {
testname := fmt.Sprintf("parse-%s-with-pattern-%s-inverted-%t",
inputdata.name, testdata.pattern, testdata.invert)
inputdata.name, testdata.name, testdata.invert)
t.Run(testname, func(t *testing.T) {
conf := cfg.Config{InvertMatch: testdata.invert, Pattern: testdata.pattern,
Separator: inputdata.separator}
conf := cfg.Config{
InvertMatch: testdata.invert,
Patterns: testdata.patterns,
Separator: inputdata.separator,
}
_ = conf.PreparePattern(testdata.pattern)
_ = conf.PreparePattern(testdata.patterns)
readFd := strings.NewReader(strings.TrimSpace(inputdata.text))
gotdata, err := Parse(conf, readFd)
@@ -125,7 +131,7 @@ func TestParserPatternmatching(t *testing.T) {
} else {
if !reflect.DeepEqual(testdata.entries, gotdata.entries) {
t.Errorf("Parser returned invalid data (pattern: %s, invert: %t)\nExp: %+v\nGot: %+v\n",
testdata.pattern, testdata.invert, testdata.entries, gotdata.entries)
testdata.name, testdata.invert, testdata.entries, gotdata.entries)
}
}
})

View File

@@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "TABLIZER 1"
.TH TABLIZER 1 "2025-01-15" "1" "User Commands"
.TH TABLIZER 1 "2025-01-21" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
@@ -144,7 +144,7 @@ tablizer \- Manipulate tabular output of other programs
.IX Header "SYNOPSIS"
.Vb 2
\& Usage:
\& tablizer [regex] [file, ...] [flags]
\& tablizer [regex,...] [file, ...] [flags]
\&
\& Operational Flags:
\& \-c, \-\-columns string Only show the speficied columns (separated by ,)
@@ -278,38 +278,52 @@ Finally the \fB\-d\fR option enables debugging output which is mostly
useful for the developer.
.SS "\s-1PATTERNS AND FILTERING\s0"
.IX Subsection "PATTERNS AND FILTERING"
You can reduce the rows being displayed by using a regular expression
pattern. The regexp is \s-1PCRE\s0 compatible, refer to the syntax cheat
sheet here: <https://github.com/google/re2/wiki/Syntax>. If you want
to read a more comprehensive documentation about the topic and have
perl installed you can read it with:
You can reduce the rows being displayed by using one or more regular
expression patterns. The regexp language being used is the one of
\&\s-1GOLANG,\s0 refer to the syntax cheat sheet here:
<https://pkg.go.dev/regexp/syntax>.
.PP
If you want to read a more comprehensive documentation about the
topic and have perl installed you can read it with:
.PP
.Vb 1
\& perldoc perlre
.Ve
.PP
Or read it online: <https://perldoc.perl.org/perlre>.
Or read it online: <https://perldoc.perl.org/perlre>. But please note
that the \s-1GO\s0 regexp engine does \s-1NOT\s0 support all perl regex terms,
especially look-ahead and look-behind.
.PP
A note on modifiers: the regexp engine used in tablizer uses another
modifier syntax:
If you want to supply flags to a regex, then surround it with slashes
and append the flag. The following flags are supported:
.PP
.Vb 1
\& (?MODIFIER)
.Vb 2
\& i => case insensitive
\& ! => negative match
.Ve
.PP
The most important modifiers are:
.PP
\&\f(CW\*(C`i\*(C'\fR ignore case
\&\f(CW\*(C`m\*(C'\fR multiline mode
\&\f(CW\*(C`s\*(C'\fR single line mode
.PP
Example for a case insensitive search:
.PP
.Vb 1
\& kubectl get pods \-A | tablizer "(?i)account"
\& kubectl get pods \-A | tablizer "/account/i"
.Ve
.PP
You can use the experimental fuzzy search feature by providing the
If you use the \f(CW\*(C`!\*(C'\fR flag, then the regex match will be negated, that
is, if a line in the input matches the given regex, but \f(CW\*(C`!\*(C'\fR is
supplied, tablizer will \s-1NOT\s0 include it in the output.
.PP
For example, here we want to get all lines matching \*(L"foo\*(R" but not
\&\*(L"bar\*(R":
.PP
.Vb 1
\& cat table | tablizer foo \*(Aq/bar/!\*(Aq
.Ve
.PP
This would match a line \*(L"foo zorro\*(R" but not \*(L"foo bar\*(R".
.PP
The flags can also be combined.
.PP
You can also use the experimental fuzzy search feature by providing the
option \fB\-z\fR, in which case the pattern is regarded as a fuzzy search
term, not a regexp.
.PP

View File

@@ -5,7 +5,7 @@ tablizer - Manipulate tabular output of other programs
=head1 SYNOPSIS
Usage:
tablizer [regex] [file, ...] [flags]
tablizer [regex,...] [file, ...] [flags]
Operational Flags:
-c, --columns string Only show the speficied columns (separated by ,)
@@ -142,32 +142,44 @@ useful for the developer.
=head2 PATTERNS AND FILTERING
You can reduce the rows being displayed by using a regular expression
pattern. The regexp is PCRE compatible, refer to the syntax cheat
sheet here: L<https://github.com/google/re2/wiki/Syntax>. If you want
to read a more comprehensive documentation about the topic and have
perl installed you can read it with:
You can reduce the rows being displayed by using one or more regular
expression patterns. The regexp language being used is the one of
GOLANG, refer to the syntax cheat sheet here:
L<https://pkg.go.dev/regexp/syntax>.
If you want to read a more comprehensive documentation about the
topic and have perl installed you can read it with:
perldoc perlre
Or read it online: L<https://perldoc.perl.org/perlre>.
Or read it online: L<https://perldoc.perl.org/perlre>. But please note
that the GO regexp engine does NOT support all perl regex terms,
especially look-ahead and look-behind.
A note on modifiers: the regexp engine used in tablizer uses another
modifier syntax:
If you want to supply flags to a regex, then surround it with slashes
and append the flag. The following flags are supported:
(?MODIFIER)
The most important modifiers are:
C<i> ignore case
C<m> multiline mode
C<s> single line mode
i => case insensitive
! => negative match
Example for a case insensitive search:
kubectl get pods -A | tablizer "(?i)account"
kubectl get pods -A | tablizer "/account/i"
You can use the experimental fuzzy search feature by providing the
If you use the C<!> flag, then the regex match will be negated, that
is, if a line in the input matches the given regex, but C<!> is
supplied, tablizer will NOT include it in the output.
For example, here we want to get all lines matching "foo" but not
"bar":
cat table | tablizer foo '/bar/!'
This would match a line "foo zorro" but not "foo bar".
The flags can also be combined.
You can also use the experimental fuzzy search feature by providing the
option B<-z>, in which case the pattern is regarded as a fuzzy search
term, not a regexp.