From f6e3075ea8ecd7065e490d5150651195a16c6656 Mon Sep 17 00:00:00 2001 From: Thomas von Dein Date: Tue, 21 Jan 2025 18:37:45 +0100 Subject: [PATCH] implement multiple regex support and icase and negate flags --- cfg/config.go | 45 +++++++++++++++++++++++++++++++------- cfg/config_test.go | 49 +++++++++++++++++++++++++++++++++++------ cmd/tablizer.go | 47 +++++++++++++++++++++++++--------------- lib/filter.go | 46 ++++++++++++++++++++++++++++++++------- lib/filter_test.go | 24 ++++++++++----------- lib/helpers.go | 18 +++++++++++----- lib/io.go | 20 +++++++++-------- lib/parser.go | 2 +- lib/parser_test.go | 32 ++++++++++++++++----------- tablizer.1 | 54 +++++++++++++++++++++++++++++----------------- tablizer.pod | 48 +++++++++++++++++++++++++---------------- 11 files changed, 267 insertions(+), 118 deletions(-) diff --git a/cfg/config.go b/cfg/config.go index 3d4840a..b7a867d 100644 --- a/cfg/config.go +++ b/cfg/config.go @@ -52,6 +52,12 @@ type Transposer struct { Replace string } +type Pattern struct { + Pattern string + PatternRe *regexp.Regexp + Negate bool +} + // internal config type Config struct { Debug bool @@ -62,8 +68,7 @@ type Config struct { Separator string OutputMode int InvertMatch bool - Pattern string - PatternR *regexp.Regexp + Patterns []*Pattern UseFuzzySearch bool UseHighlight bool @@ -333,15 +338,39 @@ func (conf *Config) ApplyDefaults() { } } -func (conf *Config) PreparePattern(pattern string) error { - PatternR, err := regexp.Compile(pattern) +func (conf *Config) PreparePattern(patterns []*Pattern) error { + // regex checks if a pattern looks like /$pattern/[i!] + flagre := regexp.MustCompile(`^/(.*)/([i!]+)$`) - if err != nil { - return fmt.Errorf("regexp pattern %s is invalid: %w", conf.Pattern, err) + for _, pattern := range patterns { + matches := flagre.FindAllStringSubmatch(pattern.Pattern, -1) + + if matches != nil { + // we have a regex with flags + for _, match := range matches { + pattern.Pattern = match[1] // the inner part is our actual pattern + flags := match[2] // the flags + + for _, flag := range flags { + switch flag { + case 'i': + pattern.Pattern = `(?i)` + pattern.Pattern + case '!': + pattern.Negate = true + } + } + } + } + + PatternRe, err := regexp.Compile(pattern.Pattern) + if err != nil { + return fmt.Errorf("regexp pattern %s is invalid: %w", pattern.Pattern, err) + } + + pattern.PatternRe = PatternRe } - conf.PatternR = PatternR - conf.Pattern = pattern + conf.Patterns = patterns return nil } diff --git a/cfg/config_test.go b/cfg/config_test.go index 84a477f..7b60f79 100644 --- a/cfg/config_test.go +++ b/cfg/config_test.go @@ -79,20 +79,55 @@ func TestPrepareSortFlags(t *testing.T) { func TestPreparePattern(t *testing.T) { var tests = []struct { - pattern string - wanterr bool + patterns []*Pattern + name string + wanterr bool + wanticase bool + wantneg bool }{ - {"[A-Z]+", false}, - {"[a-z", true}, + { + []*Pattern{{Pattern: "[A-Z]+"}}, + "simple", + false, + false, + false, + }, + { + []*Pattern{{Pattern: "[a-z"}}, + "regfail", + true, + false, + false, + }, + { + []*Pattern{{Pattern: "/[A-Z]+/i"}}, + "icase", + false, + true, + false, + }, + { + []*Pattern{{Pattern: "/[A-Z]+/!"}}, + "negate", + false, + false, + true, + }, + { + []*Pattern{{Pattern: "/[A-Z]+/!i"}}, + "negicase", + false, + true, + true, + }, } for _, testdata := range tests { - testname := fmt.Sprintf("PreparePattern-pattern-%s-wanterr-%t", - testdata.pattern, testdata.wanterr) + testname := fmt.Sprintf("PreparePattern-pattern-%s-wanterr-%t", testdata.name, testdata.wanterr) t.Run(testname, func(t *testing.T) { conf := Config{} - err := conf.PreparePattern(testdata.pattern) + err := conf.PreparePattern(testdata.patterns) if err != nil { if !testdata.wanterr { diff --git a/cmd/tablizer.go b/cmd/tablizer.go index 71f5b21..d4768d2 100644 --- a/cmd/tablizer.go +++ b/cmd/tablizer.go @@ -6,7 +6,7 @@ NAME SYNOPSIS Usage: - tablizer [regex] [file, ...] [flags] + tablizer [regex,...] [file, ...] [flags] Operational Flags: -c, --columns string Only show the speficied columns (separated by ,) @@ -130,30 +130,43 @@ DESCRIPTION for the developer. PATTERNS AND FILTERING - You can reduce the rows being displayed by using a regular expression - pattern. The regexp is PCRE compatible, refer to the syntax cheat sheet - here: . If you want to read a - more comprehensive documentation about the topic and have perl installed - you can read it with: + You can reduce the rows being displayed by using one or more regular + expression patterns. The regexp language being used is the one of + GOLANG, refer to the syntax cheat sheet here: + . + + If you want to read a more comprehensive documentation about the topic + and have perl installed you can read it with: perldoc perlre - Or read it online: . + Or read it online: . But please note + that the GO regexp engine does NOT support all perl regex terms, + especially look-ahead and look-behind. - A note on modifiers: the regexp engine used in tablizer uses another - modifier syntax: + If you want to supply flags to a regex, then surround it with slashes + and append the flag. The following flags are supported: - (?MODIFIER) - - The most important modifiers are: - - "i" ignore case "m" multiline mode "s" single line mode + i => case insensitive + ! => negative match Example for a case insensitive search: - kubectl get pods -A | tablizer "(?i)account" + kubectl get pods -A | tablizer "/account/i" - You can use the experimental fuzzy search feature by providing the + If you use the "!" flag, then the regex match will be negated, that is, + if a line in the input matches the given regex, but "!" is supplied, + tablizer will NOT include it in the output. + + For example, here we want to get all lines matching "foo" but not "bar": + + cat table | tablizer foo '/bar/!' + + This would match a line "foo zorro" but not "foo bar". + + The flags can also be combined. + + You can also use the experimental fuzzy search feature by providing the option -z, in which case the pattern is regarded as a fuzzy search term, not a regexp. @@ -392,7 +405,7 @@ AUTHORS var usage = ` Usage: - tablizer [regex] [file, ...] [flags] + tablizer [regex,...] [file, ...] [flags] Operational Flags: -c, --columns string Only show the speficied columns (separated by ,) diff --git a/lib/filter.go b/lib/filter.go index dadbb06..816732d 100644 --- a/lib/filter.go +++ b/lib/filter.go @@ -27,15 +27,42 @@ import ( ) /* - * [!]Match a line, use fuzzy search for normal pattern strings and - * regexp otherwise. - */ +* [!]Match a line, use fuzzy search for normal pattern strings and +* regexp otherwise. + + 'foo bar' foo, /bar/! => false => line contains foo and not (not bar) + 'foo nix' foo, /bar/! => ture => line contains foo and (not bar) + 'foo bar' foo, /bar/ => true => line contains both foo and bar + 'foo nix' foo, /bar/ => false => line does not contain bar + 'foo bar' foo, /nix/ => false => line does not contain nix +*/ func matchPattern(conf cfg.Config, line string) bool { - if conf.UseFuzzySearch { - return fuzzy.MatchFold(conf.Pattern, line) + if len(conf.Patterns) == 0 { + // any line always matches "" + return true } - return conf.PatternR.MatchString(line) + if conf.UseFuzzySearch { + // fuzzy search only considers the 1st pattern + return fuzzy.MatchFold(conf.Patterns[0].Pattern, line) + } + + var match bool + + for _, re := range conf.Patterns { + patmatch := re.PatternRe.MatchString(line) + if re.Negate { + // toggle the meaning of match + patmatch = !patmatch + } + + if match != patmatch { + // toggles match if the last match and current match are different + match = !match + } + } + + return match } /* @@ -123,8 +150,11 @@ func Exists[K comparable, V any](m map[K]V, v K) bool { return false } +/* + * Filters the whole input lines, returns filtered lines + */ func FilterByPattern(conf cfg.Config, input io.Reader) (io.Reader, error) { - if conf.Pattern == "" { + if len(conf.Patterns) == 0 { return input, nil } @@ -136,7 +166,7 @@ func FilterByPattern(conf cfg.Config, input io.Reader) (io.Reader, error) { line := strings.TrimSpace(scanner.Text()) if hadFirst { // don't match 1st line, it's the header - if conf.Pattern != "" && matchPattern(conf, line) == conf.InvertMatch { + if matchPattern(conf, line) == conf.InvertMatch { // by default -v is false, so if a line does NOT // match the pattern, we will ignore it. However, // if the user specified -v, the matching is inverted, diff --git a/lib/filter_test.go b/lib/filter_test.go index e779d31..5562fc0 100644 --- a/lib/filter_test.go +++ b/lib/filter_test.go @@ -27,21 +27,21 @@ import ( func TestMatchPattern(t *testing.T) { var input = []struct { - name string - fuzzy bool - pattern string - line string + name string + fuzzy bool + patterns []*cfg.Pattern + line string }{ { - name: "normal", - pattern: "haus", - line: "hausparty", + name: "normal", + patterns: []*cfg.Pattern{{Pattern: "haus"}}, + line: "hausparty", }, { - name: "fuzzy", - pattern: "hpt", - line: "haus-party-termin", - fuzzy: true, + name: "fuzzy", + patterns: []*cfg.Pattern{{Pattern: "hpt"}}, + line: "haus-party-termin", + fuzzy: true, }, } @@ -55,7 +55,7 @@ func TestMatchPattern(t *testing.T) { conf.UseFuzzySearch = true } - err := conf.PreparePattern(inputdata.pattern) + err := conf.PreparePattern(inputdata.patterns) if err != nil { t.Errorf("PreparePattern returned error: %s", err) } diff --git a/lib/helpers.go b/lib/helpers.go index 32b6705..c2ae9e9 100644 --- a/lib/helpers.go +++ b/lib/helpers.go @@ -293,12 +293,20 @@ func colorizeData(conf cfg.Config, output string) string { return colorized - case len(conf.Pattern) > 0 && !conf.NoColor && color.IsConsole(os.Stdout): - r := regexp.MustCompile("(" + conf.Pattern + ")") + case len(conf.Patterns) > 0 && !conf.NoColor && color.IsConsole(os.Stdout): + out := output - return r.ReplaceAllStringFunc(output, func(in string) string { - return conf.ColorStyle.Sprint(in) - }) + for _, re := range conf.Patterns { + if !re.Negate { + r := regexp.MustCompile("(" + re.Pattern + ")") + + out = r.ReplaceAllStringFunc(out, func(in string) string { + return conf.ColorStyle.Sprint(in) + }) + } + } + + return out default: return output diff --git a/lib/io.go b/lib/io.go index 86954b8..3e39ef8 100644 --- a/lib/io.go +++ b/lib/io.go @@ -29,13 +29,13 @@ import ( const RWRR = 0755 func ProcessFiles(conf *cfg.Config, args []string) error { - fd, pattern, err := determineIO(conf, args) + fd, patterns, err := determineIO(conf, args) if err != nil { return err } - if err := conf.PreparePattern(pattern); err != nil { + if err := conf.PreparePattern(patterns); err != nil { return err } @@ -63,9 +63,9 @@ func ProcessFiles(conf *cfg.Config, args []string) error { return nil } -func determineIO(conf *cfg.Config, args []string) (io.Reader, string, error) { +func determineIO(conf *cfg.Config, args []string) (io.Reader, []*cfg.Pattern, error) { var filehandle io.Reader - var pattern string + var patterns []*cfg.Pattern var haveio bool switch { @@ -76,7 +76,7 @@ func determineIO(conf *cfg.Config, args []string) (io.Reader, string, error) { fd, err := os.OpenFile(conf.InputFile, os.O_RDONLY, RWRR) if err != nil { - return nil, "", fmt.Errorf("failed to read input file %s: %w", conf.InputFile, err) + return nil, nil, fmt.Errorf("failed to read input file %s: %w", conf.InputFile, err) } filehandle = fd @@ -93,13 +93,15 @@ func determineIO(conf *cfg.Config, args []string) (io.Reader, string, error) { } if len(args) > 0 { - pattern = args[0] - conf.Pattern = args[0] + patterns = make([]*cfg.Pattern, len(args)) + for i, arg := range args { + patterns[i] = &cfg.Pattern{Pattern: arg} + } } if !haveio { - return nil, "", errors.New("no file specified and nothing to read on stdin") + return nil, nil, errors.New("no file specified and nothing to read on stdin") } - return filehandle, pattern, nil + return filehandle, patterns, nil } diff --git a/lib/parser.go b/lib/parser.go index 40fe69b..c664bf5 100644 --- a/lib/parser.go +++ b/lib/parser.go @@ -137,7 +137,7 @@ func parseTabular(conf cfg.Config, input io.Reader) (Tabdata, error) { } } else { // data processing - if conf.Pattern != "" && matchPattern(conf, line) == conf.InvertMatch { + if matchPattern(conf, line) == conf.InvertMatch { // by default -v is false, so if a line does NOT // match the pattern, we will ignore it. However, // if the user specified -v, the matching is inverted, diff --git a/lib/parser_test.go b/lib/parser_test.go index e382f59..a9abbe5 100644 --- a/lib/parser_test.go +++ b/lib/parser_test.go @@ -83,36 +83,42 @@ func TestParser(t *testing.T) { func TestParserPatternmatching(t *testing.T) { var tests = []struct { - entries [][]string - pattern string - invert bool - want bool + name string + entries [][]string + patterns []*cfg.Pattern + invert bool + want bool }{ { + name: "match", entries: [][]string{ {"asd", "igig", "cxxxncnc"}, }, - pattern: "ig", - invert: false, + patterns: []*cfg.Pattern{{Pattern: "ig"}}, + invert: false, }, { + name: "invert", entries: [][]string{ {"19191", "EDD 1", "X"}, }, - pattern: "ig", - invert: true, + patterns: []*cfg.Pattern{{Pattern: "ig"}}, + invert: true, }, } for _, inputdata := range input { for _, testdata := range tests { testname := fmt.Sprintf("parse-%s-with-pattern-%s-inverted-%t", - inputdata.name, testdata.pattern, testdata.invert) + inputdata.name, testdata.name, testdata.invert) t.Run(testname, func(t *testing.T) { - conf := cfg.Config{InvertMatch: testdata.invert, Pattern: testdata.pattern, - Separator: inputdata.separator} + conf := cfg.Config{ + InvertMatch: testdata.invert, + Patterns: testdata.patterns, + Separator: inputdata.separator, + } - _ = conf.PreparePattern(testdata.pattern) + _ = conf.PreparePattern(testdata.patterns) readFd := strings.NewReader(strings.TrimSpace(inputdata.text)) gotdata, err := Parse(conf, readFd) @@ -125,7 +131,7 @@ func TestParserPatternmatching(t *testing.T) { } else { if !reflect.DeepEqual(testdata.entries, gotdata.entries) { t.Errorf("Parser returned invalid data (pattern: %s, invert: %t)\nExp: %+v\nGot: %+v\n", - testdata.pattern, testdata.invert, testdata.entries, gotdata.entries) + testdata.name, testdata.invert, testdata.entries, gotdata.entries) } } }) diff --git a/tablizer.1 b/tablizer.1 index 73dd6b4..aa74953 100644 --- a/tablizer.1 +++ b/tablizer.1 @@ -133,7 +133,7 @@ .\" ======================================================================== .\" .IX Title "TABLIZER 1" -.TH TABLIZER 1 "2025-01-15" "1" "User Commands" +.TH TABLIZER 1 "2025-01-21" "1" "User Commands" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -144,7 +144,7 @@ tablizer \- Manipulate tabular output of other programs .IX Header "SYNOPSIS" .Vb 2 \& Usage: -\& tablizer [regex] [file, ...] [flags] +\& tablizer [regex,...] [file, ...] [flags] \& \& Operational Flags: \& \-c, \-\-columns string Only show the speficied columns (separated by ,) @@ -278,38 +278,52 @@ Finally the \fB\-d\fR option enables debugging output which is mostly useful for the developer. .SS "\s-1PATTERNS AND FILTERING\s0" .IX Subsection "PATTERNS AND FILTERING" -You can reduce the rows being displayed by using a regular expression -pattern. The regexp is \s-1PCRE\s0 compatible, refer to the syntax cheat -sheet here: . If you want -to read a more comprehensive documentation about the topic and have -perl installed you can read it with: +You can reduce the rows being displayed by using one or more regular +expression patterns. The regexp language being used is the one of +\&\s-1GOLANG,\s0 refer to the syntax cheat sheet here: +. +.PP +If you want to read a more comprehensive documentation about the +topic and have perl installed you can read it with: .PP .Vb 1 \& perldoc perlre .Ve .PP -Or read it online: . +Or read it online: . But please note +that the \s-1GO\s0 regexp engine does \s-1NOT\s0 support all perl regex terms, +especially look-ahead and look-behind. .PP -A note on modifiers: the regexp engine used in tablizer uses another -modifier syntax: +If you want to supply flags to a regex, then surround it with slashes +and append the flag. The following flags are supported: .PP -.Vb 1 -\& (?MODIFIER) +.Vb 2 +\& i => case insensitive +\& ! => negative match .Ve .PP -The most important modifiers are: -.PP -\&\f(CW\*(C`i\*(C'\fR ignore case -\&\f(CW\*(C`m\*(C'\fR multiline mode -\&\f(CW\*(C`s\*(C'\fR single line mode -.PP Example for a case insensitive search: .PP .Vb 1 -\& kubectl get pods \-A | tablizer "(?i)account" +\& kubectl get pods \-A | tablizer "/account/i" .Ve .PP -You can use the experimental fuzzy search feature by providing the +If you use the \f(CW\*(C`!\*(C'\fR flag, then the regex match will be negated, that +is, if a line in the input matches the given regex, but \f(CW\*(C`!\*(C'\fR is +supplied, tablizer will \s-1NOT\s0 include it in the output. +.PP +For example, here we want to get all lines matching \*(L"foo\*(R" but not +\&\*(L"bar\*(R": +.PP +.Vb 1 +\& cat table | tablizer foo \*(Aq/bar/!\*(Aq +.Ve +.PP +This would match a line \*(L"foo zorro\*(R" but not \*(L"foo bar\*(R". +.PP +The flags can also be combined. +.PP +You can also use the experimental fuzzy search feature by providing the option \fB\-z\fR, in which case the pattern is regarded as a fuzzy search term, not a regexp. .PP diff --git a/tablizer.pod b/tablizer.pod index 8f170ae..11846ea 100644 --- a/tablizer.pod +++ b/tablizer.pod @@ -5,7 +5,7 @@ tablizer - Manipulate tabular output of other programs =head1 SYNOPSIS Usage: - tablizer [regex] [file, ...] [flags] + tablizer [regex,...] [file, ...] [flags] Operational Flags: -c, --columns string Only show the speficied columns (separated by ,) @@ -142,32 +142,44 @@ useful for the developer. =head2 PATTERNS AND FILTERING -You can reduce the rows being displayed by using a regular expression -pattern. The regexp is PCRE compatible, refer to the syntax cheat -sheet here: L. If you want -to read a more comprehensive documentation about the topic and have -perl installed you can read it with: +You can reduce the rows being displayed by using one or more regular +expression patterns. The regexp language being used is the one of +GOLANG, refer to the syntax cheat sheet here: +L. + +If you want to read a more comprehensive documentation about the +topic and have perl installed you can read it with: perldoc perlre -Or read it online: L. +Or read it online: L. But please note +that the GO regexp engine does NOT support all perl regex terms, +especially look-ahead and look-behind. -A note on modifiers: the regexp engine used in tablizer uses another -modifier syntax: +If you want to supply flags to a regex, then surround it with slashes +and append the flag. The following flags are supported: - (?MODIFIER) - -The most important modifiers are: - -C ignore case -C multiline mode -C single line mode + i => case insensitive + ! => negative match Example for a case insensitive search: - kubectl get pods -A | tablizer "(?i)account" + kubectl get pods -A | tablizer "/account/i" -You can use the experimental fuzzy search feature by providing the +If you use the C flag, then the regex match will be negated, that +is, if a line in the input matches the given regex, but C is +supplied, tablizer will NOT include it in the output. + +For example, here we want to get all lines matching "foo" but not +"bar": + + cat table | tablizer foo '/bar/!' + +This would match a line "foo zorro" but not "foo bar". + +The flags can also be combined. + +You can also use the experimental fuzzy search feature by providing the option B<-z>, in which case the pattern is regarded as a fuzzy search term, not a regexp.