diff --git a/cfg/config.go b/cfg/config.go index ce1bab5..dbc3d5c 100644 --- a/cfg/config.go +++ b/cfg/config.go @@ -1,5 +1,5 @@ /* -Copyright © 2022 Thomas von Dein +Copyright © 2022-2024 Thomas von Dein This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -22,6 +22,7 @@ import ( "log" "os" "regexp" + "strings" "github.com/glycerine/zygomys/zygo" "github.com/gookit/color" @@ -29,7 +30,7 @@ import ( ) const DefaultSeparator string = `(\s\s+|\t)` -const Version string = "v1.1.0" +const Version string = "v1.2.0" var DefaultLoadPath string = os.Getenv("HOME") + "/.config/tablizer/lisp" var DefaultConfigfile string = os.Getenv("HOME") + "/.config/tablizer/config" @@ -89,6 +90,10 @@ type Config struct { Configfile string Configuration Configuration + + // used for field filtering + Rawfilters []string + Filters map[string]*regexp.Regexp } // maps outputmode short flags to output mode, ie. -O => -o orgtbl @@ -260,6 +265,26 @@ func (conf *Config) PrepareModeFlags(flag Modeflag) { } } +func (conf *Config) PrepareFilters() error { + conf.Filters = make(map[string]*regexp.Regexp, len(conf.Rawfilters)) + + for _, filter := range conf.Rawfilters { + parts := strings.Split(filter, "=") + if len(parts) != 2 { + return errors.New("filter field and value must be separated by =") + } + + reg, err := regexp.Compile(parts[1]) + if err != nil { + return err + } + + conf.Filters[strings.ToLower(parts[0])] = reg + } + + return nil +} + func (c *Config) CheckEnv() { // check for environment vars, command line flags have precedence, // NO_COLOR is being checked by the color module itself. diff --git a/cmd/root.go b/cmd/root.go index 1edfb43..50352b6 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -1,5 +1,5 @@ /* -Copyright © 2022 Thomas von Dein +Copyright © 2022-2024 Thomas von Dein This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -100,6 +100,11 @@ func Execute() { conf.CheckEnv() conf.PrepareModeFlags(modeflag) conf.PrepareSortFlags(sortmode) + + if err = conf.PrepareFilters(); err != nil { + return err + } + conf.DetermineColormode() conf.ApplyDefaults() @@ -149,10 +154,15 @@ func Execute() { rootCmd.MarkFlagsMutuallyExclusive("extended", "markdown", "orgtbl", "shell", "yaml", "csv") // lisp options - rootCmd.PersistentFlags().StringVarP(&conf.LispLoadPath, "load-path", "l", cfg.DefaultLoadPath, "Load path for lisp plugins (expects *.zy files)") + rootCmd.PersistentFlags().StringVarP(&conf.LispLoadPath, "load-path", "l", cfg.DefaultLoadPath, + "Load path for lisp plugins (expects *.zy files)") // config file - rootCmd.PersistentFlags().StringVarP(&conf.Configfile, "config", "f", cfg.DefaultConfigfile, "config file (default: ~/.config/tablizer/config)") + rootCmd.PersistentFlags().StringVarP(&conf.Configfile, "config", "f", cfg.DefaultConfigfile, + "config file (default: ~/.config/tablizer/config)") + + // filters + rootCmd.PersistentFlags().StringArrayVarP(&conf.Rawfilters, "filter", "F", nil, "Filter by field (field=regexp)") rootCmd.SetUsageTemplate(strings.TrimSpace(usage) + "\n") diff --git a/cmd/tablizer.go b/cmd/tablizer.go index e49dc17..5f0d23e 100644 --- a/cmd/tablizer.go +++ b/cmd/tablizer.go @@ -17,6 +17,7 @@ SYNOPSIS -s, --separator string Custom field separator -k, --sort-by int Sort by column (default: 1) -z, --fuzzy Use fuzzy seach [experimental] + -F, --filter field=reg Filter given field with regex, can be used multiple times Output Flags (mutually exclusive): -X, --extended Enable extended output @@ -117,7 +118,7 @@ DESCRIPTION Finally the -d option enables debugging output which is mostly useful for the developer. - PATTERNS + PATTERNS AND FILTERING You can reduce the rows being displayed by using a regular expression pattern. The regexp is PCRE compatible, refer to the syntax cheat sheet here: . If you want to read a @@ -145,6 +146,19 @@ DESCRIPTION -z, in which case the pattern is regarded as a fuzzy search term, not a regexp. + Sometimes you want to filter by one or more columns. You can do that + using the -F option. The option can be specified multiple times and has + the following format: + + fieldname=regexp + + Fieldnames (== columns headers) are case insensitive. + + If you specify more than one filter, both filters have to match (AND + operation). + + If the option -v is specified, the filtering is inverted. + COLUMNS The parameter -c can be used to specify, which columns to display. By default tablizer numerizes the header names and these numbers can be @@ -298,7 +312,7 @@ LICENSE This software is licensed under the GNU GENERAL PUBLIC LICENSE version 3. - Copyright (c) 2023 by Thomas von Dein + Copyright (c) 2022-2024 by Thomas von Dein This software uses the following GO modules: @@ -340,6 +354,7 @@ Operational Flags: -s, --separator string Custom field separator -k, --sort-by int Sort by column (default: 1) -z, --fuzzy Use fuzzy seach [experimental] + -F, --filter field=reg Filter given field with regex, can be used multiple times Output Flags (mutually exclusive): -X, --extended Enable extended output diff --git a/lib/common.go b/lib/common.go index 5e6e920..0c9b915 100644 --- a/lib/common.go +++ b/lib/common.go @@ -1,5 +1,5 @@ /* -Copyright © 2022 Thomas von Dein +Copyright © 2022-2024 Thomas von Dein This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,3 +24,13 @@ type Tabdata struct { headers []string // [ "ID", "NAME", ...] entries [][]string } + +func (data *Tabdata) CloneEmpty() Tabdata { + new := Tabdata{ + maxwidthHeader: data.maxwidthHeader, + columns: data.columns, + headers: data.headers, + } + + return new +} diff --git a/lib/filter.go b/lib/filter.go new file mode 100644 index 0000000..022fd91 --- /dev/null +++ b/lib/filter.go @@ -0,0 +1,82 @@ +/* +Copyright © 2022-2024 Thomas von Dein + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package lib + +import ( + "strings" + + "github.com/lithammer/fuzzysearch/fuzzy" + "github.com/tlinden/tablizer/cfg" +) + +/* + * [!]Match a line, use fuzzy search for normal pattern strings and + * regexp otherwise. + */ +func matchPattern(c cfg.Config, line string) bool { + if c.UseFuzzySearch { + return fuzzy.MatchFold(c.Pattern, line) + } + + return c.PatternR.MatchString(line) +} + +/* + * Filter parsed data by fields. The filter is positive, so if one or + * more filters match on a row, it will be kept, otherwise it will be + * excluded. + */ +func FilterByFields(conf cfg.Config, data Tabdata) (Tabdata, bool, error) { + if len(conf.Filters) == 0 { + // no filters, no checking + return Tabdata{}, false, nil + } + + newdata := data.CloneEmpty() + + for _, row := range data.entries { + keep := true + + for idx, header := range data.headers { + if !Exists(conf.Filters, strings.ToLower(header)) { + // do not filter by unspecified field + continue + } + + if !conf.Filters[strings.ToLower(header)].MatchString(row[idx]) { + // there IS a filter, but it doesn't match + keep = false + break + } + } + + if keep == !conf.InvertMatch { + // also apply -v + newdata.entries = append(newdata.entries, row) + } + } + + return newdata, true, nil +} + +func Exists[K comparable, V any](m map[K]V, v K) bool { + if _, ok := m[v]; ok { + return true + } + return false +} diff --git a/lib/filter_test.go b/lib/filter_test.go new file mode 100644 index 0000000..7124d0b --- /dev/null +++ b/lib/filter_test.go @@ -0,0 +1,164 @@ +/* +Copyright © 2024 Thomas von Dein + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +*/ + +package lib + +import ( + "fmt" + "reflect" + "testing" + + "github.com/tlinden/tablizer/cfg" +) + +func TestMatchPattern(t *testing.T) { + var input = []struct { + name string + fuzzy bool + pattern string + line string + }{ + { + name: "normal", + pattern: "haus", + line: "hausparty", + }, + { + name: "fuzzy", + pattern: "hpt", + line: "haus-party-termin", + fuzzy: true, + }, + } + + for _, in := range input { + testname := fmt.Sprintf("match-pattern-%s", in.name) + + t.Run(testname, func(t *testing.T) { + c := cfg.Config{} + + if in.fuzzy { + c.UseFuzzySearch = true + } + + err := c.PreparePattern(in.pattern) + if err != nil { + t.Errorf("PreparePattern returned error: %s", err) + } + + if !matchPattern(c, in.line) { + t.Errorf("matchPattern() did not match\nExp: true\nGot: false\n") + } + }) + } + +} + +func TestFilterByFields(t *testing.T) { + data := Tabdata{ + headers: []string{ + "ONE", "TWO", "THREE", + }, + entries: [][]string{ + {"asd", "igig", "cxxxncnc"}, + {"19191", "EDD 1", "x"}, + {"8d8", "AN 1", "y"}, + }, + } + + var input = []struct { + name string + filter []string + expect Tabdata + invert bool + }{ + { + name: "one-field", + filter: []string{"one=19"}, + expect: Tabdata{ + headers: []string{ + "ONE", "TWO", "THREE", + }, + entries: [][]string{ + {"19191", "EDD 1", "x"}, + }, + }, + }, + + { + name: "one-field-inverted", + filter: []string{"one=19"}, + invert: true, + expect: Tabdata{ + headers: []string{ + "ONE", "TWO", "THREE", + }, + entries: [][]string{ + {"asd", "igig", "cxxxncnc"}, + {"8d8", "AN 1", "y"}, + }, + }, + }, + + { + name: "many-fields", + filter: []string{"one=19", "two=DD"}, + expect: Tabdata{ + headers: []string{ + "ONE", "TWO", "THREE", + }, + entries: [][]string{ + {"19191", "EDD 1", "x"}, + }, + }, + }, + + { + name: "many-fields-inverted", + filter: []string{"one=19", "two=DD"}, + invert: true, + expect: Tabdata{ + headers: []string{ + "ONE", "TWO", "THREE", + }, + entries: [][]string{ + {"asd", "igig", "cxxxncnc"}, + {"8d8", "AN 1", "y"}, + }, + }, + }, + } + + for _, in := range input { + testname := fmt.Sprintf("filter-by-fields-%s", in.name) + + t.Run(testname, func(t *testing.T) { + c := cfg.Config{Rawfilters: in.filter, InvertMatch: in.invert} + + err := c.PrepareFilters() + if err != nil { + t.Errorf("PrepareFilters returned error: %s", err) + } + + data, _, _ := FilterByFields(c, data) + if !reflect.DeepEqual(data, in.expect) { + t.Errorf("Filtered data does not match expected data:\ngot: %+v\nexp: %+v", data, in.expect) + } + }) + } + +} diff --git a/tablizer.1 b/tablizer.1 index 2d8285c..4ea873f 100644 --- a/tablizer.1 +++ b/tablizer.1 @@ -133,7 +133,7 @@ .\" ======================================================================== .\" .IX Title "TABLIZER 1" -.TH TABLIZER 1 "2023-11-22" "1" "User Commands" +.TH TABLIZER 1 "2024-05-07" "1" "User Commands" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -155,6 +155,7 @@ tablizer \- Manipulate tabular output of other programs \& \-s, \-\-separator string Custom field separator \& \-k, \-\-sort\-by int Sort by column (default: 1) \& \-z, \-\-fuzzy Use fuzzy seach [experimental] +\& \-F, \-\-filter field=reg Filter given field with regex, can be used multiple times \& \& Output Flags (mutually exclusive): \& \-X, \-\-extended Enable extended output @@ -264,8 +265,8 @@ Sorts timestamps. .PP Finally the \fB\-d\fR option enables debugging output which is mostly useful for the developer. -.SS "\s-1PATTERNS\s0" -.IX Subsection "PATTERNS" +.SS "\s-1PATTERNS AND FILTERING\s0" +.IX Subsection "PATTERNS AND FILTERING" You can reduce the rows being displayed by using a regular expression pattern. The regexp is \s-1PCRE\s0 compatible, refer to the syntax cheat sheet here: . If you want @@ -300,6 +301,21 @@ Example for a case insensitive search: You can use the experimental fuzzy seach feature by providing the option \fB\-z\fR, in which case the pattern is regarded as a fuzzy search term, not a regexp. +.PP +Sometimes you want to filter by one or more columns. You can do that +using the \fB\-F\fR option. The option can be specified multiple times and +has the following format: +.PP +.Vb 1 +\& fieldname=regexp +.Ve +.PP +Fieldnames (== columns headers) are case insensitive. +.PP +If you specify more than one filter, both filters have to match (\s-1AND\s0 +operation). +.PP +If the option \fB\-v\fR is specified, the filtering is inverted. .SS "\s-1COLUMNS\s0" .IX Subsection "COLUMNS" The parameter \fB\-c\fR can be used to specify, which columns to @@ -487,7 +503,7 @@ or to submit a patch, please open an issue on github: .IX Header "LICENSE" This software is licensed under the \s-1GNU GENERAL PUBLIC LICENSE\s0 version 3. .PP -Copyright (c) 2023 by Thomas von Dein +Copyright (c) 2022\-2024 by Thomas von Dein .PP This software uses the following \s-1GO\s0 modules: .IP "repr (https://github.com/alecthomas/repr)" 4 diff --git a/tablizer.pod b/tablizer.pod index 8bfcfde..79884cd 100644 --- a/tablizer.pod +++ b/tablizer.pod @@ -16,6 +16,7 @@ tablizer - Manipulate tabular output of other programs -s, --separator string Custom field separator -k, --sort-by int Sort by column (default: 1) -z, --fuzzy Use fuzzy seach [experimental] + -F, --filter field=reg Filter given field with regex, can be used multiple times Output Flags (mutually exclusive): -X, --extended Enable extended output @@ -128,7 +129,7 @@ Sorts timestamps. Finally the B<-d> option enables debugging output which is mostly useful for the developer. -=head2 PATTERNS +=head2 PATTERNS AND FILTERING You can reduce the rows being displayed by using a regular expression pattern. The regexp is PCRE compatible, refer to the syntax cheat @@ -159,6 +160,20 @@ You can use the experimental fuzzy seach feature by providing the option B<-z>, in which case the pattern is regarded as a fuzzy search term, not a regexp. +Sometimes you want to filter by one or more columns. You can do that +using the B<-F> option. The option can be specified multiple times and +has the following format: + + fieldname=regexp + +Fieldnames (== columns headers) are case insensitive. + +If you specify more than one filter, both filters have to match (AND +operation). + +If the option B<-v> is specified, the filtering is inverted. + + =head2 COLUMNS The parameter B<-c> can be used to specify, which columns to @@ -336,7 +351,7 @@ L. This software is licensed under the GNU GENERAL PUBLIC LICENSE version 3. -Copyright (c) 2023 by Thomas von Dein +Copyright (c) 2022-2024 by Thomas von Dein This software uses the following GO modules: