add -F filter by column flag (closes #13)

This commit is contained in:
2024-05-07 13:28:11 +02:00
parent 96f7881c16
commit ba2a2e8460
8 changed files with 351 additions and 14 deletions

View File

@@ -1,5 +1,5 @@
/*
Copyright © 2022 Thomas von Dein
Copyright © 2022-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -22,6 +22,7 @@ import (
"log"
"os"
"regexp"
"strings"
"github.com/glycerine/zygomys/zygo"
"github.com/gookit/color"
@@ -29,7 +30,7 @@ import (
)
const DefaultSeparator string = `(\s\s+|\t)`
const Version string = "v1.1.0"
const Version string = "v1.2.0"
var DefaultLoadPath string = os.Getenv("HOME") + "/.config/tablizer/lisp"
var DefaultConfigfile string = os.Getenv("HOME") + "/.config/tablizer/config"
@@ -89,6 +90,10 @@ type Config struct {
Configfile string
Configuration Configuration
// used for field filtering
Rawfilters []string
Filters map[string]*regexp.Regexp
}
// maps outputmode short flags to output mode, ie. -O => -o orgtbl
@@ -260,6 +265,26 @@ func (conf *Config) PrepareModeFlags(flag Modeflag) {
}
}
func (conf *Config) PrepareFilters() error {
conf.Filters = make(map[string]*regexp.Regexp, len(conf.Rawfilters))
for _, filter := range conf.Rawfilters {
parts := strings.Split(filter, "=")
if len(parts) != 2 {
return errors.New("filter field and value must be separated by =")
}
reg, err := regexp.Compile(parts[1])
if err != nil {
return err
}
conf.Filters[strings.ToLower(parts[0])] = reg
}
return nil
}
func (c *Config) CheckEnv() {
// check for environment vars, command line flags have precedence,
// NO_COLOR is being checked by the color module itself.

View File

@@ -1,5 +1,5 @@
/*
Copyright © 2022 Thomas von Dein
Copyright © 2022-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -100,6 +100,11 @@ func Execute() {
conf.CheckEnv()
conf.PrepareModeFlags(modeflag)
conf.PrepareSortFlags(sortmode)
if err = conf.PrepareFilters(); err != nil {
return err
}
conf.DetermineColormode()
conf.ApplyDefaults()
@@ -149,10 +154,15 @@ func Execute() {
rootCmd.MarkFlagsMutuallyExclusive("extended", "markdown", "orgtbl", "shell", "yaml", "csv")
// lisp options
rootCmd.PersistentFlags().StringVarP(&conf.LispLoadPath, "load-path", "l", cfg.DefaultLoadPath, "Load path for lisp plugins (expects *.zy files)")
rootCmd.PersistentFlags().StringVarP(&conf.LispLoadPath, "load-path", "l", cfg.DefaultLoadPath,
"Load path for lisp plugins (expects *.zy files)")
// config file
rootCmd.PersistentFlags().StringVarP(&conf.Configfile, "config", "f", cfg.DefaultConfigfile, "config file (default: ~/.config/tablizer/config)")
rootCmd.PersistentFlags().StringVarP(&conf.Configfile, "config", "f", cfg.DefaultConfigfile,
"config file (default: ~/.config/tablizer/config)")
// filters
rootCmd.PersistentFlags().StringArrayVarP(&conf.Rawfilters, "filter", "F", nil, "Filter by field (field=regexp)")
rootCmd.SetUsageTemplate(strings.TrimSpace(usage) + "\n")

View File

@@ -17,6 +17,7 @@ SYNOPSIS
-s, --separator string Custom field separator
-k, --sort-by int Sort by column (default: 1)
-z, --fuzzy Use fuzzy seach [experimental]
-F, --filter field=reg Filter given field with regex, can be used multiple times
Output Flags (mutually exclusive):
-X, --extended Enable extended output
@@ -117,7 +118,7 @@ DESCRIPTION
Finally the -d option enables debugging output which is mostly useful
for the developer.
PATTERNS
PATTERNS AND FILTERING
You can reduce the rows being displayed by using a regular expression
pattern. The regexp is PCRE compatible, refer to the syntax cheat sheet
here: <https://github.com/google/re2/wiki/Syntax>. If you want to read a
@@ -145,6 +146,19 @@ DESCRIPTION
-z, in which case the pattern is regarded as a fuzzy search term, not a
regexp.
Sometimes you want to filter by one or more columns. You can do that
using the -F option. The option can be specified multiple times and has
the following format:
fieldname=regexp
Fieldnames (== columns headers) are case insensitive.
If you specify more than one filter, both filters have to match (AND
operation).
If the option -v is specified, the filtering is inverted.
COLUMNS
The parameter -c can be used to specify, which columns to display. By
default tablizer numerizes the header names and these numbers can be
@@ -298,7 +312,7 @@ LICENSE
This software is licensed under the GNU GENERAL PUBLIC LICENSE version
3.
Copyright (c) 2023 by Thomas von Dein
Copyright (c) 2022-2024 by Thomas von Dein
This software uses the following GO modules:
@@ -340,6 +354,7 @@ Operational Flags:
-s, --separator string Custom field separator
-k, --sort-by int Sort by column (default: 1)
-z, --fuzzy Use fuzzy seach [experimental]
-F, --filter field=reg Filter given field with regex, can be used multiple times
Output Flags (mutually exclusive):
-X, --extended Enable extended output

View File

@@ -1,5 +1,5 @@
/*
Copyright © 2022 Thomas von Dein
Copyright © 2022-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -24,3 +24,13 @@ type Tabdata struct {
headers []string // [ "ID", "NAME", ...]
entries [][]string
}
func (data *Tabdata) CloneEmpty() Tabdata {
new := Tabdata{
maxwidthHeader: data.maxwidthHeader,
columns: data.columns,
headers: data.headers,
}
return new
}

82
lib/filter.go Normal file
View File

@@ -0,0 +1,82 @@
/*
Copyright © 2022-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package lib
import (
"strings"
"github.com/lithammer/fuzzysearch/fuzzy"
"github.com/tlinden/tablizer/cfg"
)
/*
* [!]Match a line, use fuzzy search for normal pattern strings and
* regexp otherwise.
*/
func matchPattern(c cfg.Config, line string) bool {
if c.UseFuzzySearch {
return fuzzy.MatchFold(c.Pattern, line)
}
return c.PatternR.MatchString(line)
}
/*
* Filter parsed data by fields. The filter is positive, so if one or
* more filters match on a row, it will be kept, otherwise it will be
* excluded.
*/
func FilterByFields(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
if len(conf.Filters) == 0 {
// no filters, no checking
return Tabdata{}, false, nil
}
newdata := data.CloneEmpty()
for _, row := range data.entries {
keep := true
for idx, header := range data.headers {
if !Exists(conf.Filters, strings.ToLower(header)) {
// do not filter by unspecified field
continue
}
if !conf.Filters[strings.ToLower(header)].MatchString(row[idx]) {
// there IS a filter, but it doesn't match
keep = false
break
}
}
if keep == !conf.InvertMatch {
// also apply -v
newdata.entries = append(newdata.entries, row)
}
}
return newdata, true, nil
}
func Exists[K comparable, V any](m map[K]V, v K) bool {
if _, ok := m[v]; ok {
return true
}
return false
}

164
lib/filter_test.go Normal file
View File

@@ -0,0 +1,164 @@
/*
Copyright © 2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package lib
import (
"fmt"
"reflect"
"testing"
"github.com/tlinden/tablizer/cfg"
)
func TestMatchPattern(t *testing.T) {
var input = []struct {
name string
fuzzy bool
pattern string
line string
}{
{
name: "normal",
pattern: "haus",
line: "hausparty",
},
{
name: "fuzzy",
pattern: "hpt",
line: "haus-party-termin",
fuzzy: true,
},
}
for _, in := range input {
testname := fmt.Sprintf("match-pattern-%s", in.name)
t.Run(testname, func(t *testing.T) {
c := cfg.Config{}
if in.fuzzy {
c.UseFuzzySearch = true
}
err := c.PreparePattern(in.pattern)
if err != nil {
t.Errorf("PreparePattern returned error: %s", err)
}
if !matchPattern(c, in.line) {
t.Errorf("matchPattern() did not match\nExp: true\nGot: false\n")
}
})
}
}
func TestFilterByFields(t *testing.T) {
data := Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"asd", "igig", "cxxxncnc"},
{"19191", "EDD 1", "x"},
{"8d8", "AN 1", "y"},
},
}
var input = []struct {
name string
filter []string
expect Tabdata
invert bool
}{
{
name: "one-field",
filter: []string{"one=19"},
expect: Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"19191", "EDD 1", "x"},
},
},
},
{
name: "one-field-inverted",
filter: []string{"one=19"},
invert: true,
expect: Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"asd", "igig", "cxxxncnc"},
{"8d8", "AN 1", "y"},
},
},
},
{
name: "many-fields",
filter: []string{"one=19", "two=DD"},
expect: Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"19191", "EDD 1", "x"},
},
},
},
{
name: "many-fields-inverted",
filter: []string{"one=19", "two=DD"},
invert: true,
expect: Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"asd", "igig", "cxxxncnc"},
{"8d8", "AN 1", "y"},
},
},
},
}
for _, in := range input {
testname := fmt.Sprintf("filter-by-fields-%s", in.name)
t.Run(testname, func(t *testing.T) {
c := cfg.Config{Rawfilters: in.filter, InvertMatch: in.invert}
err := c.PrepareFilters()
if err != nil {
t.Errorf("PrepareFilters returned error: %s", err)
}
data, _, _ := FilterByFields(c, data)
if !reflect.DeepEqual(data, in.expect) {
t.Errorf("Filtered data does not match expected data:\ngot: %+v\nexp: %+v", data, in.expect)
}
})
}
}

View File

@@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "TABLIZER 1"
.TH TABLIZER 1 "2023-11-22" "1" "User Commands"
.TH TABLIZER 1 "2024-05-07" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
@@ -155,6 +155,7 @@ tablizer \- Manipulate tabular output of other programs
\& \-s, \-\-separator string Custom field separator
\& \-k, \-\-sort\-by int Sort by column (default: 1)
\& \-z, \-\-fuzzy Use fuzzy seach [experimental]
\& \-F, \-\-filter field=reg Filter given field with regex, can be used multiple times
\&
\& Output Flags (mutually exclusive):
\& \-X, \-\-extended Enable extended output
@@ -264,8 +265,8 @@ Sorts timestamps.
.PP
Finally the \fB\-d\fR option enables debugging output which is mostly
useful for the developer.
.SS "\s-1PATTERNS\s0"
.IX Subsection "PATTERNS"
.SS "\s-1PATTERNS AND FILTERING\s0"
.IX Subsection "PATTERNS AND FILTERING"
You can reduce the rows being displayed by using a regular expression
pattern. The regexp is \s-1PCRE\s0 compatible, refer to the syntax cheat
sheet here: <https://github.com/google/re2/wiki/Syntax>. If you want
@@ -300,6 +301,21 @@ Example for a case insensitive search:
You can use the experimental fuzzy seach feature by providing the
option \fB\-z\fR, in which case the pattern is regarded as a fuzzy search
term, not a regexp.
.PP
Sometimes you want to filter by one or more columns. You can do that
using the \fB\-F\fR option. The option can be specified multiple times and
has the following format:
.PP
.Vb 1
\& fieldname=regexp
.Ve
.PP
Fieldnames (== columns headers) are case insensitive.
.PP
If you specify more than one filter, both filters have to match (\s-1AND\s0
operation).
.PP
If the option \fB\-v\fR is specified, the filtering is inverted.
.SS "\s-1COLUMNS\s0"
.IX Subsection "COLUMNS"
The parameter \fB\-c\fR can be used to specify, which columns to
@@ -487,7 +503,7 @@ or to submit a patch, please open an issue on github:
.IX Header "LICENSE"
This software is licensed under the \s-1GNU GENERAL PUBLIC LICENSE\s0 version 3.
.PP
Copyright (c) 2023 by Thomas von Dein
Copyright (c) 2022\-2024 by Thomas von Dein
.PP
This software uses the following \s-1GO\s0 modules:
.IP "repr (https://github.com/alecthomas/repr)" 4

View File

@@ -16,6 +16,7 @@ tablizer - Manipulate tabular output of other programs
-s, --separator string Custom field separator
-k, --sort-by int Sort by column (default: 1)
-z, --fuzzy Use fuzzy seach [experimental]
-F, --filter field=reg Filter given field with regex, can be used multiple times
Output Flags (mutually exclusive):
-X, --extended Enable extended output
@@ -128,7 +129,7 @@ Sorts timestamps.
Finally the B<-d> option enables debugging output which is mostly
useful for the developer.
=head2 PATTERNS
=head2 PATTERNS AND FILTERING
You can reduce the rows being displayed by using a regular expression
pattern. The regexp is PCRE compatible, refer to the syntax cheat
@@ -159,6 +160,20 @@ You can use the experimental fuzzy seach feature by providing the
option B<-z>, in which case the pattern is regarded as a fuzzy search
term, not a regexp.
Sometimes you want to filter by one or more columns. You can do that
using the B<-F> option. The option can be specified multiple times and
has the following format:
fieldname=regexp
Fieldnames (== columns headers) are case insensitive.
If you specify more than one filter, both filters have to match (AND
operation).
If the option B<-v> is specified, the filtering is inverted.
=head2 COLUMNS
The parameter B<-c> can be used to specify, which columns to
@@ -336,7 +351,7 @@ L<https://github.com/TLINDEN/tablizer/issues>.
This software is licensed under the GNU GENERAL PUBLIC LICENSE version 3.
Copyright (c) 2023 by Thomas von Dein
Copyright (c) 2022-2024 by Thomas von Dein
This software uses the following GO modules: