add -F filter by column flag (closes #13)

This commit is contained in:
2024-05-07 13:28:11 +02:00
parent 96f7881c16
commit ba2a2e8460
8 changed files with 351 additions and 14 deletions

View File

@@ -1,5 +1,5 @@
/* /*
Copyright © 2022 Thomas von Dein Copyright © 2022-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@@ -22,6 +22,7 @@ import (
"log" "log"
"os" "os"
"regexp" "regexp"
"strings"
"github.com/glycerine/zygomys/zygo" "github.com/glycerine/zygomys/zygo"
"github.com/gookit/color" "github.com/gookit/color"
@@ -29,7 +30,7 @@ import (
) )
const DefaultSeparator string = `(\s\s+|\t)` const DefaultSeparator string = `(\s\s+|\t)`
const Version string = "v1.1.0" const Version string = "v1.2.0"
var DefaultLoadPath string = os.Getenv("HOME") + "/.config/tablizer/lisp" var DefaultLoadPath string = os.Getenv("HOME") + "/.config/tablizer/lisp"
var DefaultConfigfile string = os.Getenv("HOME") + "/.config/tablizer/config" var DefaultConfigfile string = os.Getenv("HOME") + "/.config/tablizer/config"
@@ -89,6 +90,10 @@ type Config struct {
Configfile string Configfile string
Configuration Configuration Configuration Configuration
// used for field filtering
Rawfilters []string
Filters map[string]*regexp.Regexp
} }
// maps outputmode short flags to output mode, ie. -O => -o orgtbl // maps outputmode short flags to output mode, ie. -O => -o orgtbl
@@ -260,6 +265,26 @@ func (conf *Config) PrepareModeFlags(flag Modeflag) {
} }
} }
func (conf *Config) PrepareFilters() error {
conf.Filters = make(map[string]*regexp.Regexp, len(conf.Rawfilters))
for _, filter := range conf.Rawfilters {
parts := strings.Split(filter, "=")
if len(parts) != 2 {
return errors.New("filter field and value must be separated by =")
}
reg, err := regexp.Compile(parts[1])
if err != nil {
return err
}
conf.Filters[strings.ToLower(parts[0])] = reg
}
return nil
}
func (c *Config) CheckEnv() { func (c *Config) CheckEnv() {
// check for environment vars, command line flags have precedence, // check for environment vars, command line flags have precedence,
// NO_COLOR is being checked by the color module itself. // NO_COLOR is being checked by the color module itself.

View File

@@ -1,5 +1,5 @@
/* /*
Copyright © 2022 Thomas von Dein Copyright © 2022-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@@ -100,6 +100,11 @@ func Execute() {
conf.CheckEnv() conf.CheckEnv()
conf.PrepareModeFlags(modeflag) conf.PrepareModeFlags(modeflag)
conf.PrepareSortFlags(sortmode) conf.PrepareSortFlags(sortmode)
if err = conf.PrepareFilters(); err != nil {
return err
}
conf.DetermineColormode() conf.DetermineColormode()
conf.ApplyDefaults() conf.ApplyDefaults()
@@ -149,10 +154,15 @@ func Execute() {
rootCmd.MarkFlagsMutuallyExclusive("extended", "markdown", "orgtbl", "shell", "yaml", "csv") rootCmd.MarkFlagsMutuallyExclusive("extended", "markdown", "orgtbl", "shell", "yaml", "csv")
// lisp options // lisp options
rootCmd.PersistentFlags().StringVarP(&conf.LispLoadPath, "load-path", "l", cfg.DefaultLoadPath, "Load path for lisp plugins (expects *.zy files)") rootCmd.PersistentFlags().StringVarP(&conf.LispLoadPath, "load-path", "l", cfg.DefaultLoadPath,
"Load path for lisp plugins (expects *.zy files)")
// config file // config file
rootCmd.PersistentFlags().StringVarP(&conf.Configfile, "config", "f", cfg.DefaultConfigfile, "config file (default: ~/.config/tablizer/config)") rootCmd.PersistentFlags().StringVarP(&conf.Configfile, "config", "f", cfg.DefaultConfigfile,
"config file (default: ~/.config/tablizer/config)")
// filters
rootCmd.PersistentFlags().StringArrayVarP(&conf.Rawfilters, "filter", "F", nil, "Filter by field (field=regexp)")
rootCmd.SetUsageTemplate(strings.TrimSpace(usage) + "\n") rootCmd.SetUsageTemplate(strings.TrimSpace(usage) + "\n")

View File

@@ -17,6 +17,7 @@ SYNOPSIS
-s, --separator string Custom field separator -s, --separator string Custom field separator
-k, --sort-by int Sort by column (default: 1) -k, --sort-by int Sort by column (default: 1)
-z, --fuzzy Use fuzzy seach [experimental] -z, --fuzzy Use fuzzy seach [experimental]
-F, --filter field=reg Filter given field with regex, can be used multiple times
Output Flags (mutually exclusive): Output Flags (mutually exclusive):
-X, --extended Enable extended output -X, --extended Enable extended output
@@ -117,7 +118,7 @@ DESCRIPTION
Finally the -d option enables debugging output which is mostly useful Finally the -d option enables debugging output which is mostly useful
for the developer. for the developer.
PATTERNS PATTERNS AND FILTERING
You can reduce the rows being displayed by using a regular expression You can reduce the rows being displayed by using a regular expression
pattern. The regexp is PCRE compatible, refer to the syntax cheat sheet pattern. The regexp is PCRE compatible, refer to the syntax cheat sheet
here: <https://github.com/google/re2/wiki/Syntax>. If you want to read a here: <https://github.com/google/re2/wiki/Syntax>. If you want to read a
@@ -145,6 +146,19 @@ DESCRIPTION
-z, in which case the pattern is regarded as a fuzzy search term, not a -z, in which case the pattern is regarded as a fuzzy search term, not a
regexp. regexp.
Sometimes you want to filter by one or more columns. You can do that
using the -F option. The option can be specified multiple times and has
the following format:
fieldname=regexp
Fieldnames (== columns headers) are case insensitive.
If you specify more than one filter, both filters have to match (AND
operation).
If the option -v is specified, the filtering is inverted.
COLUMNS COLUMNS
The parameter -c can be used to specify, which columns to display. By The parameter -c can be used to specify, which columns to display. By
default tablizer numerizes the header names and these numbers can be default tablizer numerizes the header names and these numbers can be
@@ -298,7 +312,7 @@ LICENSE
This software is licensed under the GNU GENERAL PUBLIC LICENSE version This software is licensed under the GNU GENERAL PUBLIC LICENSE version
3. 3.
Copyright (c) 2023 by Thomas von Dein Copyright (c) 2022-2024 by Thomas von Dein
This software uses the following GO modules: This software uses the following GO modules:
@@ -340,6 +354,7 @@ Operational Flags:
-s, --separator string Custom field separator -s, --separator string Custom field separator
-k, --sort-by int Sort by column (default: 1) -k, --sort-by int Sort by column (default: 1)
-z, --fuzzy Use fuzzy seach [experimental] -z, --fuzzy Use fuzzy seach [experimental]
-F, --filter field=reg Filter given field with regex, can be used multiple times
Output Flags (mutually exclusive): Output Flags (mutually exclusive):
-X, --extended Enable extended output -X, --extended Enable extended output

View File

@@ -1,5 +1,5 @@
/* /*
Copyright © 2022 Thomas von Dein Copyright © 2022-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@@ -24,3 +24,13 @@ type Tabdata struct {
headers []string // [ "ID", "NAME", ...] headers []string // [ "ID", "NAME", ...]
entries [][]string entries [][]string
} }
func (data *Tabdata) CloneEmpty() Tabdata {
new := Tabdata{
maxwidthHeader: data.maxwidthHeader,
columns: data.columns,
headers: data.headers,
}
return new
}

82
lib/filter.go Normal file
View File

@@ -0,0 +1,82 @@
/*
Copyright © 2022-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package lib
import (
"strings"
"github.com/lithammer/fuzzysearch/fuzzy"
"github.com/tlinden/tablizer/cfg"
)
/*
* [!]Match a line, use fuzzy search for normal pattern strings and
* regexp otherwise.
*/
func matchPattern(c cfg.Config, line string) bool {
if c.UseFuzzySearch {
return fuzzy.MatchFold(c.Pattern, line)
}
return c.PatternR.MatchString(line)
}
/*
* Filter parsed data by fields. The filter is positive, so if one or
* more filters match on a row, it will be kept, otherwise it will be
* excluded.
*/
func FilterByFields(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
if len(conf.Filters) == 0 {
// no filters, no checking
return Tabdata{}, false, nil
}
newdata := data.CloneEmpty()
for _, row := range data.entries {
keep := true
for idx, header := range data.headers {
if !Exists(conf.Filters, strings.ToLower(header)) {
// do not filter by unspecified field
continue
}
if !conf.Filters[strings.ToLower(header)].MatchString(row[idx]) {
// there IS a filter, but it doesn't match
keep = false
break
}
}
if keep == !conf.InvertMatch {
// also apply -v
newdata.entries = append(newdata.entries, row)
}
}
return newdata, true, nil
}
func Exists[K comparable, V any](m map[K]V, v K) bool {
if _, ok := m[v]; ok {
return true
}
return false
}

164
lib/filter_test.go Normal file
View File

@@ -0,0 +1,164 @@
/*
Copyright © 2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package lib
import (
"fmt"
"reflect"
"testing"
"github.com/tlinden/tablizer/cfg"
)
func TestMatchPattern(t *testing.T) {
var input = []struct {
name string
fuzzy bool
pattern string
line string
}{
{
name: "normal",
pattern: "haus",
line: "hausparty",
},
{
name: "fuzzy",
pattern: "hpt",
line: "haus-party-termin",
fuzzy: true,
},
}
for _, in := range input {
testname := fmt.Sprintf("match-pattern-%s", in.name)
t.Run(testname, func(t *testing.T) {
c := cfg.Config{}
if in.fuzzy {
c.UseFuzzySearch = true
}
err := c.PreparePattern(in.pattern)
if err != nil {
t.Errorf("PreparePattern returned error: %s", err)
}
if !matchPattern(c, in.line) {
t.Errorf("matchPattern() did not match\nExp: true\nGot: false\n")
}
})
}
}
func TestFilterByFields(t *testing.T) {
data := Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"asd", "igig", "cxxxncnc"},
{"19191", "EDD 1", "x"},
{"8d8", "AN 1", "y"},
},
}
var input = []struct {
name string
filter []string
expect Tabdata
invert bool
}{
{
name: "one-field",
filter: []string{"one=19"},
expect: Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"19191", "EDD 1", "x"},
},
},
},
{
name: "one-field-inverted",
filter: []string{"one=19"},
invert: true,
expect: Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"asd", "igig", "cxxxncnc"},
{"8d8", "AN 1", "y"},
},
},
},
{
name: "many-fields",
filter: []string{"one=19", "two=DD"},
expect: Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"19191", "EDD 1", "x"},
},
},
},
{
name: "many-fields-inverted",
filter: []string{"one=19", "two=DD"},
invert: true,
expect: Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"asd", "igig", "cxxxncnc"},
{"8d8", "AN 1", "y"},
},
},
},
}
for _, in := range input {
testname := fmt.Sprintf("filter-by-fields-%s", in.name)
t.Run(testname, func(t *testing.T) {
c := cfg.Config{Rawfilters: in.filter, InvertMatch: in.invert}
err := c.PrepareFilters()
if err != nil {
t.Errorf("PrepareFilters returned error: %s", err)
}
data, _, _ := FilterByFields(c, data)
if !reflect.DeepEqual(data, in.expect) {
t.Errorf("Filtered data does not match expected data:\ngot: %+v\nexp: %+v", data, in.expect)
}
})
}
}

View File

@@ -133,7 +133,7 @@
.\" ======================================================================== .\" ========================================================================
.\" .\"
.IX Title "TABLIZER 1" .IX Title "TABLIZER 1"
.TH TABLIZER 1 "2023-11-22" "1" "User Commands" .TH TABLIZER 1 "2024-05-07" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents. .\" way too many mistakes in technical documents.
.if n .ad l .if n .ad l
@@ -155,6 +155,7 @@ tablizer \- Manipulate tabular output of other programs
\& \-s, \-\-separator string Custom field separator \& \-s, \-\-separator string Custom field separator
\& \-k, \-\-sort\-by int Sort by column (default: 1) \& \-k, \-\-sort\-by int Sort by column (default: 1)
\& \-z, \-\-fuzzy Use fuzzy seach [experimental] \& \-z, \-\-fuzzy Use fuzzy seach [experimental]
\& \-F, \-\-filter field=reg Filter given field with regex, can be used multiple times
\& \&
\& Output Flags (mutually exclusive): \& Output Flags (mutually exclusive):
\& \-X, \-\-extended Enable extended output \& \-X, \-\-extended Enable extended output
@@ -264,8 +265,8 @@ Sorts timestamps.
.PP .PP
Finally the \fB\-d\fR option enables debugging output which is mostly Finally the \fB\-d\fR option enables debugging output which is mostly
useful for the developer. useful for the developer.
.SS "\s-1PATTERNS\s0" .SS "\s-1PATTERNS AND FILTERING\s0"
.IX Subsection "PATTERNS" .IX Subsection "PATTERNS AND FILTERING"
You can reduce the rows being displayed by using a regular expression You can reduce the rows being displayed by using a regular expression
pattern. The regexp is \s-1PCRE\s0 compatible, refer to the syntax cheat pattern. The regexp is \s-1PCRE\s0 compatible, refer to the syntax cheat
sheet here: <https://github.com/google/re2/wiki/Syntax>. If you want sheet here: <https://github.com/google/re2/wiki/Syntax>. If you want
@@ -300,6 +301,21 @@ Example for a case insensitive search:
You can use the experimental fuzzy seach feature by providing the You can use the experimental fuzzy seach feature by providing the
option \fB\-z\fR, in which case the pattern is regarded as a fuzzy search option \fB\-z\fR, in which case the pattern is regarded as a fuzzy search
term, not a regexp. term, not a regexp.
.PP
Sometimes you want to filter by one or more columns. You can do that
using the \fB\-F\fR option. The option can be specified multiple times and
has the following format:
.PP
.Vb 1
\& fieldname=regexp
.Ve
.PP
Fieldnames (== columns headers) are case insensitive.
.PP
If you specify more than one filter, both filters have to match (\s-1AND\s0
operation).
.PP
If the option \fB\-v\fR is specified, the filtering is inverted.
.SS "\s-1COLUMNS\s0" .SS "\s-1COLUMNS\s0"
.IX Subsection "COLUMNS" .IX Subsection "COLUMNS"
The parameter \fB\-c\fR can be used to specify, which columns to The parameter \fB\-c\fR can be used to specify, which columns to
@@ -487,7 +503,7 @@ or to submit a patch, please open an issue on github:
.IX Header "LICENSE" .IX Header "LICENSE"
This software is licensed under the \s-1GNU GENERAL PUBLIC LICENSE\s0 version 3. This software is licensed under the \s-1GNU GENERAL PUBLIC LICENSE\s0 version 3.
.PP .PP
Copyright (c) 2023 by Thomas von Dein Copyright (c) 2022\-2024 by Thomas von Dein
.PP .PP
This software uses the following \s-1GO\s0 modules: This software uses the following \s-1GO\s0 modules:
.IP "repr (https://github.com/alecthomas/repr)" 4 .IP "repr (https://github.com/alecthomas/repr)" 4

View File

@@ -16,6 +16,7 @@ tablizer - Manipulate tabular output of other programs
-s, --separator string Custom field separator -s, --separator string Custom field separator
-k, --sort-by int Sort by column (default: 1) -k, --sort-by int Sort by column (default: 1)
-z, --fuzzy Use fuzzy seach [experimental] -z, --fuzzy Use fuzzy seach [experimental]
-F, --filter field=reg Filter given field with regex, can be used multiple times
Output Flags (mutually exclusive): Output Flags (mutually exclusive):
-X, --extended Enable extended output -X, --extended Enable extended output
@@ -128,7 +129,7 @@ Sorts timestamps.
Finally the B<-d> option enables debugging output which is mostly Finally the B<-d> option enables debugging output which is mostly
useful for the developer. useful for the developer.
=head2 PATTERNS =head2 PATTERNS AND FILTERING
You can reduce the rows being displayed by using a regular expression You can reduce the rows being displayed by using a regular expression
pattern. The regexp is PCRE compatible, refer to the syntax cheat pattern. The regexp is PCRE compatible, refer to the syntax cheat
@@ -159,6 +160,20 @@ You can use the experimental fuzzy seach feature by providing the
option B<-z>, in which case the pattern is regarded as a fuzzy search option B<-z>, in which case the pattern is regarded as a fuzzy search
term, not a regexp. term, not a regexp.
Sometimes you want to filter by one or more columns. You can do that
using the B<-F> option. The option can be specified multiple times and
has the following format:
fieldname=regexp
Fieldnames (== columns headers) are case insensitive.
If you specify more than one filter, both filters have to match (AND
operation).
If the option B<-v> is specified, the filtering is inverted.
=head2 COLUMNS =head2 COLUMNS
The parameter B<-c> can be used to specify, which columns to The parameter B<-c> can be used to specify, which columns to
@@ -336,7 +351,7 @@ L<https://github.com/TLINDEN/tablizer/issues>.
This software is licensed under the GNU GENERAL PUBLIC LICENSE version 3. This software is licensed under the GNU GENERAL PUBLIC LICENSE version 3.
Copyright (c) 2023 by Thomas von Dein Copyright (c) 2022-2024 by Thomas von Dein
This software uses the following GO modules: This software uses the following GO modules: