add -F filter by column flag (closes #13)

2026-02-04 02:20:56 +01:00 · 2024-05-07 13:28:11 +02:00
parent 96f7881c16
commit ba2a2e8460
8 changed files with 351 additions and 14 deletions
--- a/cfg/config.go
+++ b/cfg/config.go
@@ -1,5 +1,5 @@
 /*
-Copyright © 2022 Thomas von Dein
+Copyright © 2022-2024 Thomas von Dein

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -22,6 +22,7 @@ import (
 	"log"
 	"os"
 	"regexp"
+	"strings"

 	"github.com/glycerine/zygomys/zygo"
 	"github.com/gookit/color"
@@ -29,7 +30,7 @@ import (
 )

 const DefaultSeparator string = `(\s\s+|\t)`
-const Version string = "v1.1.0"
+const Version string = "v1.2.0"

 var DefaultLoadPath string = os.Getenv("HOME") + "/.config/tablizer/lisp"
 var DefaultConfigfile string = os.Getenv("HOME") + "/.config/tablizer/config"
@@ -89,6 +90,10 @@ type Config struct {
 	Configfile string

 	Configuration Configuration
+
+	// used for field filtering
+	Rawfilters []string
+	Filters    map[string]*regexp.Regexp
 }

 // maps outputmode short flags to output mode, ie. -O => -o orgtbl
@@ -260,6 +265,26 @@ func (conf *Config) PrepareModeFlags(flag Modeflag) {
 	}
 }

+func (conf *Config) PrepareFilters() error {
+	conf.Filters = make(map[string]*regexp.Regexp, len(conf.Rawfilters))
+
+	for _, filter := range conf.Rawfilters {
+		parts := strings.Split(filter, "=")
+		if len(parts) != 2 {
+			return errors.New("filter field and value must be separated by =")
+		}
+
+		reg, err := regexp.Compile(parts[1])
+		if err != nil {
+			return err
+		}
+
+		conf.Filters[strings.ToLower(parts[0])] = reg
+	}
+
+	return nil
+}
+
 func (c *Config) CheckEnv() {
 	// check for environment vars, command line flags have precedence,
 	// NO_COLOR is being checked by the color module itself.
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -1,5 +1,5 @@
 /*
-Copyright © 2022 Thomas von Dein
+Copyright © 2022-2024 Thomas von Dein

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -100,6 +100,11 @@ func Execute() {
 			conf.CheckEnv()
 			conf.PrepareModeFlags(modeflag)
 			conf.PrepareSortFlags(sortmode)
+
+			if err = conf.PrepareFilters(); err != nil {
+				return err
+			}
+
 			conf.DetermineColormode()
 			conf.ApplyDefaults()

@@ -149,10 +154,15 @@ func Execute() {
 	rootCmd.MarkFlagsMutuallyExclusive("extended", "markdown", "orgtbl", "shell", "yaml", "csv")

 	// lisp options
-	rootCmd.PersistentFlags().StringVarP(&conf.LispLoadPath, "load-path", "l", cfg.DefaultLoadPath, "Load path for lisp plugins (expects *.zy files)")
+	rootCmd.PersistentFlags().StringVarP(&conf.LispLoadPath, "load-path", "l", cfg.DefaultLoadPath,
+		"Load path for lisp plugins (expects *.zy files)")

 	// config file
-	rootCmd.PersistentFlags().StringVarP(&conf.Configfile, "config", "f", cfg.DefaultConfigfile, "config file (default: ~/.config/tablizer/config)")
+	rootCmd.PersistentFlags().StringVarP(&conf.Configfile, "config", "f", cfg.DefaultConfigfile,
+		"config file (default: ~/.config/tablizer/config)")
+
+	// filters
+	rootCmd.PersistentFlags().StringArrayVarP(&conf.Rawfilters, "filter", "F", nil, "Filter by field (field=regexp)")

 	rootCmd.SetUsageTemplate(strings.TrimSpace(usage) + "\n")

--- a/cmd/tablizer.go
+++ b/cmd/tablizer.go
@@ -17,6 +17,7 @@ SYNOPSIS
          -s, --separator string   Custom field separator
          -k, --sort-by int        Sort by column (default: 1)
          -z, --fuzzy              Use fuzzy seach [experimental]
+          -F, --filter field=reg   Filter given field with regex, can be used multiple times

        Output Flags (mutually exclusive):
          -X, --extended           Enable extended output
@@ -117,7 +118,7 @@ DESCRIPTION
    Finally the -d option enables debugging output which is mostly useful
    for the developer.

-  PATTERNS
+  PATTERNS AND FILTERING
    You can reduce the rows being displayed by using a regular expression
    pattern. The regexp is PCRE compatible, refer to the syntax cheat sheet
    here: <https://github.com/google/re2/wiki/Syntax>. If you want to read a
@@ -145,6 +146,19 @@ DESCRIPTION
    -z, in which case the pattern is regarded as a fuzzy search term, not a
    regexp.

+    Sometimes you want to filter by one or more columns. You can do that
+    using the -F option. The option can be specified multiple times and has
+    the following format:
+
+        fieldname=regexp
+
+    Fieldnames (== columns headers) are case insensitive.
+
+    If you specify more than one filter, both filters have to match (AND
+    operation).
+
+    If the option -v is specified, the filtering is inverted.
+
  COLUMNS
    The parameter -c can be used to specify, which columns to display. By
    default tablizer numerizes the header names and these numbers can be
@@ -298,7 +312,7 @@ LICENSE
    This software is licensed under the GNU GENERAL PUBLIC LICENSE version
    3.

-    Copyright (c) 2023 by Thomas von Dein
+    Copyright (c) 2022-2024 by Thomas von Dein

    This software uses the following GO modules:

@@ -340,6 +354,7 @@ Operational Flags:
  -s, --separator string   Custom field separator
  -k, --sort-by int        Sort by column (default: 1)
  -z, --fuzzy              Use fuzzy seach [experimental]
+  -F, --filter field=reg   Filter given field with regex, can be used multiple times

 Output Flags (mutually exclusive):
  -X, --extended           Enable extended output
--- a/lib/common.go
+++ b/lib/common.go
@@ -1,5 +1,5 @@
 /*
-Copyright © 2022 Thomas von Dein
+Copyright © 2022-2024 Thomas von Dein

 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -24,3 +24,13 @@ type Tabdata struct {
 	headers        []string // [ "ID", "NAME", ...]
 	entries        [][]string
 }
+
+func (data *Tabdata) CloneEmpty() Tabdata {
+	new := Tabdata{
+		maxwidthHeader: data.maxwidthHeader,
+		columns:        data.columns,
+		headers:        data.headers,
+	}
+
+	return new
+}
--- a/lib/filter.go
+++ b/lib/filter.go
@@ -0,0 +1,82 @@
+/*
+Copyright © 2022-2024 Thomas von Dein
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package lib
+
+import (
+	"strings"
+
+	"github.com/lithammer/fuzzysearch/fuzzy"
+	"github.com/tlinden/tablizer/cfg"
+)
+
+/*
+ * [!]Match a  line, use fuzzy  search for normal pattern  strings and
+ * regexp otherwise.
+ */
+func matchPattern(c cfg.Config, line string) bool {
+	if c.UseFuzzySearch {
+		return fuzzy.MatchFold(c.Pattern, line)
+	}
+
+	return c.PatternR.MatchString(line)
+}
+
+/*
+ * Filter parsed data by fields. The  filter is positive, so if one or
+ * more filters match on a row, it  will be kept, otherwise it will be
+ * excluded.
+ */
+func FilterByFields(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
+	if len(conf.Filters) == 0 {
+		// no filters, no checking
+		return Tabdata{}, false, nil
+	}
+
+	newdata := data.CloneEmpty()
+
+	for _, row := range data.entries {
+		keep := true
+
+		for idx, header := range data.headers {
+			if !Exists(conf.Filters, strings.ToLower(header)) {
+				// do not filter by unspecified field
+				continue
+			}
+
+			if !conf.Filters[strings.ToLower(header)].MatchString(row[idx]) {
+				// there IS a filter, but it doesn't match
+				keep = false
+				break
+			}
+		}
+
+		if keep == !conf.InvertMatch {
+			// also apply -v
+			newdata.entries = append(newdata.entries, row)
+		}
+	}
+
+	return newdata, true, nil
+}
+
+func Exists[K comparable, V any](m map[K]V, v K) bool {
+	if _, ok := m[v]; ok {
+		return true
+	}
+	return false
+}
--- a/lib/filter_test.go
+++ b/lib/filter_test.go
@@ -0,0 +1,164 @@
+/*
+Copyright © 2024 Thomas von Dein
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+package lib
+
+import (
+	"fmt"
+	"reflect"
+	"testing"
+
+	"github.com/tlinden/tablizer/cfg"
+)
+
+func TestMatchPattern(t *testing.T) {
+	var input = []struct {
+		name    string
+		fuzzy   bool
+		pattern string
+		line    string
+	}{
+		{
+			name:    "normal",
+			pattern: "haus",
+			line:    "hausparty",
+		},
+		{
+			name:    "fuzzy",
+			pattern: "hpt",
+			line:    "haus-party-termin",
+			fuzzy:   true,
+		},
+	}
+
+	for _, in := range input {
+		testname := fmt.Sprintf("match-pattern-%s", in.name)
+
+		t.Run(testname, func(t *testing.T) {
+			c := cfg.Config{}
+
+			if in.fuzzy {
+				c.UseFuzzySearch = true
+			}
+
+			err := c.PreparePattern(in.pattern)
+			if err != nil {
+				t.Errorf("PreparePattern returned error: %s", err)
+			}
+
+			if !matchPattern(c, in.line) {
+				t.Errorf("matchPattern() did not match\nExp: true\nGot: false\n")
+			}
+		})
+	}
+
+}
+
+func TestFilterByFields(t *testing.T) {
+	data := Tabdata{
+		headers: []string{
+			"ONE", "TWO", "THREE",
+		},
+		entries: [][]string{
+			{"asd", "igig", "cxxxncnc"},
+			{"19191", "EDD 1", "x"},
+			{"8d8", "AN 1", "y"},
+		},
+	}
+
+	var input = []struct {
+		name   string
+		filter []string
+		expect Tabdata
+		invert bool
+	}{
+		{
+			name:   "one-field",
+			filter: []string{"one=19"},
+			expect: Tabdata{
+				headers: []string{
+					"ONE", "TWO", "THREE",
+				},
+				entries: [][]string{
+					{"19191", "EDD 1", "x"},
+				},
+			},
+		},
+
+		{
+			name:   "one-field-inverted",
+			filter: []string{"one=19"},
+			invert: true,
+			expect: Tabdata{
+				headers: []string{
+					"ONE", "TWO", "THREE",
+				},
+				entries: [][]string{
+					{"asd", "igig", "cxxxncnc"},
+					{"8d8", "AN 1", "y"},
+				},
+			},
+		},
+
+		{
+			name:   "many-fields",
+			filter: []string{"one=19", "two=DD"},
+			expect: Tabdata{
+				headers: []string{
+					"ONE", "TWO", "THREE",
+				},
+				entries: [][]string{
+					{"19191", "EDD 1", "x"},
+				},
+			},
+		},
+
+		{
+			name:   "many-fields-inverted",
+			filter: []string{"one=19", "two=DD"},
+			invert: true,
+			expect: Tabdata{
+				headers: []string{
+					"ONE", "TWO", "THREE",
+				},
+				entries: [][]string{
+					{"asd", "igig", "cxxxncnc"},
+					{"8d8", "AN 1", "y"},
+				},
+			},
+		},
+	}
+
+	for _, in := range input {
+		testname := fmt.Sprintf("filter-by-fields-%s", in.name)
+
+		t.Run(testname, func(t *testing.T) {
+			c := cfg.Config{Rawfilters: in.filter, InvertMatch: in.invert}
+
+			err := c.PrepareFilters()
+			if err != nil {
+				t.Errorf("PrepareFilters returned error: %s", err)
+			}
+
+			data, _, _ := FilterByFields(c, data)
+			if !reflect.DeepEqual(data, in.expect) {
+				t.Errorf("Filtered data does not match expected data:\ngot: %+v\nexp: %+v", data, in.expect)
+			}
+		})
+	}
+
+}
--- a/tablizer.1
+++ b/tablizer.1
@@ -133,7 +133,7 @@
 .\" ========================================================================
 .\"
 .IX Title "TABLIZER 1"
-.TH TABLIZER 1 "2023-11-22" "1" "User Commands"
+.TH TABLIZER 1 "2024-05-07" "1" "User Commands"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
@@ -155,6 +155,7 @@ tablizer \- Manipulate tabular output of other programs
 \&      \-s, \-\-separator string   Custom field separator
 \&      \-k, \-\-sort\-by int        Sort by column (default: 1)
 \&      \-z, \-\-fuzzy              Use fuzzy seach [experimental]
+\&      \-F, \-\-filter field=reg   Filter given field with regex, can be used multiple times
 \&
 \&    Output Flags (mutually exclusive):
 \&      \-X, \-\-extended           Enable extended output
@@ -264,8 +265,8 @@ Sorts timestamps.
 .PP
 Finally the  \fB\-d\fR option  enables debugging  output which  is mostly
 useful for the developer.
-.SS "\s-1PATTERNS\s0"
-.IX Subsection "PATTERNS"
+.SS "\s-1PATTERNS AND FILTERING\s0"
+.IX Subsection "PATTERNS AND FILTERING"
 You can reduce the rows being  displayed by using a regular expression
 pattern.  The  regexp is  \s-1PCRE\s0 compatible, refer  to the  syntax cheat
 sheet here: <https://github.com/google/re2/wiki/Syntax>.  If you want
@@ -300,6 +301,21 @@ Example for a case insensitive search:
 You  can use  the experimental  fuzzy seach  feature by  providing the
 option \fB\-z\fR, in which case the  pattern is regarded as a fuzzy search
 term, not a regexp.
+.PP
+Sometimes you want to  filter by one or more columns.  You can do that
+using the \fB\-F\fR option. The option can be specified multiple times and
+has the following format:
+.PP
+.Vb 1
+\&    fieldname=regexp
+.Ve
+.PP
+Fieldnames (== columns headers) are case insensitive.
+.PP
+If you specify more than one filter, both filters have to match (\s-1AND\s0
+operation).
+.PP
+If the option \fB\-v\fR is specified, the filtering is inverted.
 .SS "\s-1COLUMNS\s0"
 .IX Subsection "COLUMNS"
 The  parameter  \fB\-c\fR  can  be  used  to  specify,  which  columns  to
@@ -487,7 +503,7 @@ or to submit a patch, please open an issue on github:
 .IX Header "LICENSE"
 This software is licensed under the \s-1GNU GENERAL PUBLIC LICENSE\s0 version 3.
 .PP
-Copyright (c) 2023 by Thomas von Dein
+Copyright (c) 2022\-2024 by Thomas von Dein
 .PP
 This software uses the following \s-1GO\s0 modules:
 .IP "repr (https://github.com/alecthomas/repr)" 4
--- a/tablizer.pod
+++ b/tablizer.pod
@@ -16,6 +16,7 @@ tablizer - Manipulate tabular output of other programs
      -s, --separator string   Custom field separator
      -k, --sort-by int        Sort by column (default: 1)
      -z, --fuzzy              Use fuzzy seach [experimental]
+      -F, --filter field=reg   Filter given field with regex, can be used multiple times

    Output Flags (mutually exclusive):
      -X, --extended           Enable extended output
@@ -128,7 +129,7 @@ Sorts timestamps.
 Finally the  B<-d> option  enables debugging  output which  is mostly
 useful for the developer.

-=head2 PATTERNS
+=head2 PATTERNS AND FILTERING

 You can reduce the rows being  displayed by using a regular expression
 pattern.  The  regexp is  PCRE compatible, refer  to the  syntax cheat
@@ -159,6 +160,20 @@ You  can use  the experimental  fuzzy seach  feature by  providing the
 option B<-z>, in which case the  pattern is regarded as a fuzzy search
 term, not a regexp.

+Sometimes you want to  filter by one or more columns.  You can do that
+using the B<-F> option. The option can be specified multiple times and
+has the following format:
+
+    fieldname=regexp
+
+Fieldnames (== columns headers) are case insensitive.
+
+If you specify more than one filter, both filters have to match (AND
+operation).
+
+If the option B<-v> is specified, the filtering is inverted.
+
+
 =head2 COLUMNS

 The  parameter  B<-c>  can  be  used  to  specify,  which  columns  to
@@ -336,7 +351,7 @@ L<https://github.com/TLINDEN/tablizer/issues>.

 This software is licensed under the GNU GENERAL PUBLIC LICENSE version 3.

-Copyright (c) 2023 by Thomas von Dein
+Copyright (c) 2022-2024 by Thomas von Dein

 This software uses the following GO modules: