add -F filter by column flag (closes #13)

2026-02-04 10:20:59 +01:00 · 2024-05-07 13:28:11 +02:00
parent 96f7881c16
commit ba2a2e8460
8 changed files with 351 additions and 14 deletions
--- a/cfg/config.go
+++ b/cfg/config.go
@@ -1,5 +1,5 @@
 /*
-Copyright © 2022 Thomas von Dein
+Copyright © 2022-2024 Thomas von Dein
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -22,6 +22,7 @@ import (
 	"log"
 	"os"
 	"regexp"
 	"strings"
 	"github.com/glycerine/zygomys/zygo"
 	"github.com/gookit/color"
@@ -29,7 +30,7 @@ import (
 )
 const DefaultSeparator string = `(\s\s+|\t)`
-const Version string = "v1.1.0"
+const Version string = "v1.2.0"
 var DefaultLoadPath string = os.Getenv("HOME") + "/.config/tablizer/lisp"
 var DefaultConfigfile string = os.Getenv("HOME") + "/.config/tablizer/config"
@@ -89,6 +90,10 @@ type Config struct {
 	Configfile string
 	Configuration Configuration
 	// used for field filtering
 	Rawfilters []string
 	Filters    map[string]*regexp.Regexp
 }
 // maps outputmode short flags to output mode, ie. -O => -o orgtbl
@@ -260,6 +265,26 @@ func (conf *Config) PrepareModeFlags(flag Modeflag) {
 	}
 }
 func (conf *Config) PrepareFilters() error {
 	conf.Filters = make(map[string]*regexp.Regexp, len(conf.Rawfilters))
 	for _, filter := range conf.Rawfilters {
 		parts := strings.Split(filter, "=")
 		if len(parts) != 2 {
 			return errors.New("filter field and value must be separated by =")
 		}
 		reg, err := regexp.Compile(parts[1])
 		if err != nil {
 			return err
 		}
 		conf.Filters[strings.ToLower(parts[0])] = reg
 	}
 	return nil
 }
 func (c *Config) CheckEnv() {
 	// check for environment vars, command line flags have precedence,
 	// NO_COLOR is being checked by the color module itself.
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -1,5 +1,5 @@
 /*
-Copyright © 2022 Thomas von Dein
+Copyright © 2022-2024 Thomas von Dein
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -100,6 +100,11 @@ func Execute() {
 			conf.CheckEnv()
 			conf.PrepareModeFlags(modeflag)
 			conf.PrepareSortFlags(sortmode)
 			if err = conf.PrepareFilters(); err != nil {
 				return err
 			}
 			conf.DetermineColormode()
 			conf.ApplyDefaults()
@@ -149,10 +154,15 @@ func Execute() {
 	rootCmd.MarkFlagsMutuallyExclusive("extended", "markdown", "orgtbl", "shell", "yaml", "csv")
 	// lisp options
-	rootCmd.PersistentFlags().StringVarP(&conf.LispLoadPath, "load-path", "l", cfg.DefaultLoadPath, "Load path for lisp plugins (expects *.zy files)")
+	rootCmd.PersistentFlags().StringVarP(&conf.LispLoadPath, "load-path", "l", cfg.DefaultLoadPath,
 		"Load path for lisp plugins (expects *.zy files)")
 	// config file
-	rootCmd.PersistentFlags().StringVarP(&conf.Configfile, "config", "f", cfg.DefaultConfigfile, "config file (default: ~/.config/tablizer/config)")
+	rootCmd.PersistentFlags().StringVarP(&conf.Configfile, "config", "f", cfg.DefaultConfigfile,
 		"config file (default: ~/.config/tablizer/config)")
 	// filters
 	rootCmd.PersistentFlags().StringArrayVarP(&conf.Rawfilters, "filter", "F", nil, "Filter by field (field=regexp)")
 	rootCmd.SetUsageTemplate(strings.TrimSpace(usage) + "\n")
--- a/cmd/tablizer.go
+++ b/cmd/tablizer.go
@@ -17,6 +17,7 @@ SYNOPSIS
          -s, --separator string   Custom field separator
          -k, --sort-by int        Sort by column (default: 1)
          -z, --fuzzy              Use fuzzy seach [experimental]
          -F, --filter field=reg   Filter given field with regex, can be used multiple times
        Output Flags (mutually exclusive):
          -X, --extended           Enable extended output
@@ -117,7 +118,7 @@ DESCRIPTION
    Finally the -d option enables debugging output which is mostly useful
    for the developer.
-  PATTERNS
+  PATTERNS AND FILTERING
    You can reduce the rows being displayed by using a regular expression
    pattern. The regexp is PCRE compatible, refer to the syntax cheat sheet
    here: <https://github.com/google/re2/wiki/Syntax>. If you want to read a
@@ -145,6 +146,19 @@ DESCRIPTION
    -z, in which case the pattern is regarded as a fuzzy search term, not a
    regexp.
    Sometimes you want to filter by one or more columns. You can do that
    using the -F option. The option can be specified multiple times and has
    the following format:
        fieldname=regexp
    Fieldnames (== columns headers) are case insensitive.
    If you specify more than one filter, both filters have to match (AND
    operation).
    If the option -v is specified, the filtering is inverted.
  COLUMNS
    The parameter -c can be used to specify, which columns to display. By
    default tablizer numerizes the header names and these numbers can be
@@ -298,7 +312,7 @@ LICENSE
    This software is licensed under the GNU GENERAL PUBLIC LICENSE version
    3.
-    Copyright (c) 2023 by Thomas von Dein
+    Copyright (c) 2022-2024 by Thomas von Dein
    This software uses the following GO modules:
@@ -340,6 +354,7 @@ Operational Flags:
  -s, --separator string   Custom field separator
  -k, --sort-by int        Sort by column (default: 1)
  -z, --fuzzy              Use fuzzy seach [experimental]
  -F, --filter field=reg   Filter given field with regex, can be used multiple times
 Output Flags (mutually exclusive):
  -X, --extended           Enable extended output
--- a/lib/common.go
+++ b/lib/common.go
@@ -1,5 +1,5 @@
 /*
-Copyright © 2022 Thomas von Dein
+Copyright © 2022-2024 Thomas von Dein
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -24,3 +24,13 @@ type Tabdata struct {
 	headers        []string // [ "ID", "NAME", ...]
 	entries        [][]string
 }
 func (data *Tabdata) CloneEmpty() Tabdata {
 	new := Tabdata{
 		maxwidthHeader: data.maxwidthHeader,
 		columns:        data.columns,
 		headers:        data.headers,
 	}
 	return new
 }
--- a/lib/filter.go
+++ b/lib/filter.go
@@ -0,0 +1,82 @@
 /*
 Copyright © 2022-2024 Thomas von Dein
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 package lib
 import (
 	"strings"
 	"github.com/lithammer/fuzzysearch/fuzzy"
 	"github.com/tlinden/tablizer/cfg"
 )
 /*
 * [!]Match a  line, use fuzzy  search for normal pattern  strings and
 * regexp otherwise.
 */
 func matchPattern(c cfg.Config, line string) bool {
 	if c.UseFuzzySearch {
 		return fuzzy.MatchFold(c.Pattern, line)
 	}
 	return c.PatternR.MatchString(line)
 }
 /*
 * Filter parsed data by fields. The  filter is positive, so if one or
 * more filters match on a row, it  will be kept, otherwise it will be
 * excluded.
 */
 func FilterByFields(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
 	if len(conf.Filters) == 0 {
 		// no filters, no checking
 		return Tabdata{}, false, nil
 	}
 	newdata := data.CloneEmpty()
 	for _, row := range data.entries {
 		keep := true
 		for idx, header := range data.headers {
 			if !Exists(conf.Filters, strings.ToLower(header)) {
 				// do not filter by unspecified field
 				continue
 			}
 			if !conf.Filters[strings.ToLower(header)].MatchString(row[idx]) {
 				// there IS a filter, but it doesn't match
 				keep = false
 				break
 			}
 		}
 		if keep == !conf.InvertMatch {
 			// also apply -v
 			newdata.entries = append(newdata.entries, row)
 		}
 	}
 	return newdata, true, nil
 }
 func Exists[K comparable, V any](m map[K]V, v K) bool {
 	if _, ok := m[v]; ok {
 		return true
 	}
 	return false
 }
--- a/lib/filter_test.go
+++ b/lib/filter_test.go
@@ -0,0 +1,164 @@
 /*
 Copyright © 2024 Thomas von Dein
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with this program. If not, see <http://www.gnu.org/licenses/>.
 */
 package lib
 import (
 	"fmt"
 	"reflect"
 	"testing"
 	"github.com/tlinden/tablizer/cfg"
 )
 func TestMatchPattern(t *testing.T) {
 	var input = []struct {
 		name    string
 		fuzzy   bool
 		pattern string
 		line    string
 	}{
 		{
 			name:    "normal",
 			pattern: "haus",
 			line:    "hausparty",
 		},
 		{
 			name:    "fuzzy",
 			pattern: "hpt",
 			line:    "haus-party-termin",
 			fuzzy:   true,
 		},
 	}
 	for _, in := range input {
 		testname := fmt.Sprintf("match-pattern-%s", in.name)
 		t.Run(testname, func(t *testing.T) {
 			c := cfg.Config{}
 			if in.fuzzy {
 				c.UseFuzzySearch = true
 			}
 			err := c.PreparePattern(in.pattern)
 			if err != nil {
 				t.Errorf("PreparePattern returned error: %s", err)
 			}
 			if !matchPattern(c, in.line) {
 				t.Errorf("matchPattern() did not match\nExp: true\nGot: false\n")
 			}
 		})
 	}
 }
 func TestFilterByFields(t *testing.T) {
 	data := Tabdata{
 		headers: []string{
 			"ONE", "TWO", "THREE",
 		},
 		entries: [][]string{
 			{"asd", "igig", "cxxxncnc"},
 			{"19191", "EDD 1", "x"},
 			{"8d8", "AN 1", "y"},
 		},
 	}
 	var input = []struct {
 		name   string
 		filter []string
 		expect Tabdata
 		invert bool
 	}{
 		{
 			name:   "one-field",
 			filter: []string{"one=19"},
 			expect: Tabdata{
 				headers: []string{
 					"ONE", "TWO", "THREE",
 				},
 				entries: [][]string{
 					{"19191", "EDD 1", "x"},
 				},
 			},
 		},
 		{
 			name:   "one-field-inverted",
 			filter: []string{"one=19"},
 			invert: true,
 			expect: Tabdata{
 				headers: []string{
 					"ONE", "TWO", "THREE",
 				},
 				entries: [][]string{
 					{"asd", "igig", "cxxxncnc"},
 					{"8d8", "AN 1", "y"},
 				},
 			},
 		},
 		{
 			name:   "many-fields",
 			filter: []string{"one=19", "two=DD"},
 			expect: Tabdata{
 				headers: []string{
 					"ONE", "TWO", "THREE",
 				},
 				entries: [][]string{
 					{"19191", "EDD 1", "x"},
 				},
 			},
 		},
 		{
 			name:   "many-fields-inverted",
 			filter: []string{"one=19", "two=DD"},
 			invert: true,
 			expect: Tabdata{
 				headers: []string{
 					"ONE", "TWO", "THREE",
 				},
 				entries: [][]string{
 					{"asd", "igig", "cxxxncnc"},
 					{"8d8", "AN 1", "y"},
 				},
 			},
 		},
 	}
 	for _, in := range input {
 		testname := fmt.Sprintf("filter-by-fields-%s", in.name)
 		t.Run(testname, func(t *testing.T) {
 			c := cfg.Config{Rawfilters: in.filter, InvertMatch: in.invert}
 			err := c.PrepareFilters()
 			if err != nil {
 				t.Errorf("PrepareFilters returned error: %s", err)
 			}
 			data, _, _ := FilterByFields(c, data)
 			if !reflect.DeepEqual(data, in.expect) {
 				t.Errorf("Filtered data does not match expected data:\ngot: %+v\nexp: %+v", data, in.expect)
 			}
 		})
 	}
 }
--- a/tablizer.1
+++ b/tablizer.1
@@ -133,7 +133,7 @@
 .\" ========================================================================
 .\"
 .IX Title "TABLIZER 1"
-.TH TABLIZER 1 "2023-11-22" "1" "User Commands"
+.TH TABLIZER 1 "2024-05-07" "1" "User Commands"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
@@ -155,6 +155,7 @@ tablizer \- Manipulate tabular output of other programs
 \&      \-s, \-\-separator string   Custom field separator
 \&      \-k, \-\-sort\-by int        Sort by column (default: 1)
 \&      \-z, \-\-fuzzy              Use fuzzy seach [experimental]
 \&      \-F, \-\-filter field=reg   Filter given field with regex, can be used multiple times
 \&
 \&    Output Flags (mutually exclusive):
 \&      \-X, \-\-extended           Enable extended output
@@ -264,8 +265,8 @@ Sorts timestamps.
 .PP
 Finally the  \fB\-d\fR option  enables debugging  output which  is mostly
 useful for the developer.
-.SS "\s-1PATTERNS\s0"
+.SS "\s-1PATTERNS AND FILTERING\s0"
-.IX Subsection "PATTERNS"
+.IX Subsection "PATTERNS AND FILTERING"
 You can reduce the rows being  displayed by using a regular expression
 pattern.  The  regexp is  \s-1PCRE\s0 compatible, refer  to the  syntax cheat
 sheet here: <https://github.com/google/re2/wiki/Syntax>.  If you want
@@ -300,6 +301,21 @@ Example for a case insensitive search:
 You  can use  the experimental  fuzzy seach  feature by  providing the
 option \fB\-z\fR, in which case the  pattern is regarded as a fuzzy search
 term, not a regexp.
 .PP
 Sometimes you want to  filter by one or more columns.  You can do that
 using the \fB\-F\fR option. The option can be specified multiple times and
 has the following format:
 .PP
 .Vb 1
 \&    fieldname=regexp
 .Ve
 .PP
 Fieldnames (== columns headers) are case insensitive.
 .PP
 If you specify more than one filter, both filters have to match (\s-1AND\s0
 operation).
 .PP
 If the option \fB\-v\fR is specified, the filtering is inverted.
 .SS "\s-1COLUMNS\s0"
 .IX Subsection "COLUMNS"
 The  parameter  \fB\-c\fR  can  be  used  to  specify,  which  columns  to
@@ -487,7 +503,7 @@ or to submit a patch, please open an issue on github:
 .IX Header "LICENSE"
 This software is licensed under the \s-1GNU GENERAL PUBLIC LICENSE\s0 version 3.
 .PP
-Copyright (c) 2023 by Thomas von Dein
+Copyright (c) 2022\-2024 by Thomas von Dein
 .PP
 This software uses the following \s-1GO\s0 modules:
 .IP "repr (https://github.com/alecthomas/repr)" 4
--- a/tablizer.pod
+++ b/tablizer.pod
@@ -16,6 +16,7 @@ tablizer - Manipulate tabular output of other programs
      -s, --separator string   Custom field separator
      -k, --sort-by int        Sort by column (default: 1)
      -z, --fuzzy              Use fuzzy seach [experimental]
      -F, --filter field=reg   Filter given field with regex, can be used multiple times
    Output Flags (mutually exclusive):
      -X, --extended           Enable extended output
@@ -128,7 +129,7 @@ Sorts timestamps.
 Finally the  B<-d> option  enables debugging  output which  is mostly
 useful for the developer.
-=head2 PATTERNS
+=head2 PATTERNS AND FILTERING
 You can reduce the rows being  displayed by using a regular expression
 pattern.  The  regexp is  PCRE compatible, refer  to the  syntax cheat
@@ -159,6 +160,20 @@ You  can use  the experimental  fuzzy seach  feature by  providing the
 option B<-z>, in which case the  pattern is regarded as a fuzzy search
 term, not a regexp.
 Sometimes you want to  filter by one or more columns.  You can do that
 using the B<-F> option. The option can be specified multiple times and
 has the following format:
    fieldname=regexp
 Fieldnames (== columns headers) are case insensitive.
 If you specify more than one filter, both filters have to match (AND
 operation).
 If the option B<-v> is specified, the filtering is inverted.
 =head2 COLUMNS
 The  parameter  B<-c>  can  be  used  to  specify,  which  columns  to
@@ -336,7 +351,7 @@ L<https://github.com/TLINDEN/tablizer/issues>.
 This software is licensed under the GNU GENERAL PUBLIC LICENSE version 3.
-Copyright (c) 2023 by Thomas von Dein
+Copyright (c) 2022-2024 by Thomas von Dein
 This software uses the following GO modules: