fix #85 : add --auto-headers and --custom-headers (#86 )

fix short usage formatting
add some handy builtin character classes as split separators (#84 )
2025-12-18 21:11:03 +01:00 · 2025-10-10 13:08:16 +02:00 · 2025-10-09 23:16:07 +02:00 · 2025-10-09 23:03:57 +02:00
9 changed files with 382 additions and 19 deletions
--- a/cfg/config.go
+++ b/cfg/config.go
@@ -27,13 +27,26 @@ import (
 	"github.com/hashicorp/hcl/v2/hclsimple"
 )
-const DefaultSeparator string = `(\s\s+|\t)`
+const (
-const Version string = "v1.5.8"
+	Version  = "v1.5.10"
-const MAXPARTS = 2
+	MAXPARTS = 2
 )
-var DefaultConfigfile = os.Getenv("HOME") + "/.config/tablizer/config"
+var (
 	DefaultConfigfile = os.Getenv("HOME") + "/.config/tablizer/config"
 	VERSION           string // maintained by -x
-var VERSION string // maintained by -x
+	SeparatorTemplates = map[string]string{
 		":tab:":      `\s*\t\s*`,                               // tab but eats spaces around
 		":spaces:":   `\s{2,}`,                                 // 2 or more spaces
 		":pipe:":     `\s*\|\s*`,                               // one pipe eating spaces around
 		":default:":  `(\s\s+|\t)`,                             // 2 or more spaces or tab
 		":nonword:":  `\W`,                                     // word boundary
 		":nondigit:": `\D`,                                     // same for numbers
 		":special:":  `[\*\+\-_\(\)\[\]\{\}?\\/<>=&$§"':,\^]+`, // match any special char
 		":nonprint:": `[[:^print:]]+`,                          // non printables
 	}
 )
 // public config, set via config file or using defaults
 type Settings struct {
@@ -80,6 +93,8 @@ type Config struct {
 	UseHighlight   bool
 	Interactive    bool
 	InputJSON      bool
 	AutoHeaders    bool
 	CustomHeaders  []string
 	SortMode        string
 	SortDescending  bool
@@ -356,6 +371,13 @@ func (conf *Config) ApplyDefaults() {
 	if conf.OutputMode == Yaml || conf.OutputMode == CSV {
 		conf.Numbering = false
 	}
 	if conf.Separator[0] == ':' && conf.Separator[len(conf.Separator)-1] == ':' {
 		separator, ok := SeparatorTemplates[conf.Separator]
 		if ok {
 			conf.Separator = separator
 		}
 	}
 }
 func (conf *Config) PreparePattern(patterns []*Pattern) error {
@@ -393,6 +415,12 @@ func (conf *Config) PreparePattern(patterns []*Pattern) error {
 	return nil
 }
 func (conf *Config) PrepareCustomHeaders(custom string) {
 	if len(custom) > 0 {
 		conf.CustomHeaders = strings.Split(custom, ",")
 	}
 }
 // Parse config file.  Ignore if the file doesn't exist  but return an
 // error if it exists but fails to read or parse
 func (conf *Config) ParseConfigfile() error {
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -59,6 +59,7 @@ func Execute() {
 		ShowCompletion string
 		modeflag       cfg.Modeflag
 		sortmode       cfg.Sortmode
 		headers        string
 	)
 	var rootCmd = &cobra.Command{
@@ -91,6 +92,7 @@ func Execute() {
 			conf.CheckEnv()
 			conf.PrepareModeFlags(modeflag)
 			conf.PrepareSortFlags(sortmode)
 			conf.PrepareCustomHeaders(headers)
 			wrapE(conf.PrepareFilters())
@@ -123,7 +125,7 @@ func Execute() {
 		"Use alternating background colors")
 	rootCmd.PersistentFlags().StringVarP(&ShowCompletion, "completion", "", "",
 		"Display completion code")
-	rootCmd.PersistentFlags().StringVarP(&conf.Separator, "separator", "s", cfg.DefaultSeparator,
+	rootCmd.PersistentFlags().StringVarP(&conf.Separator, "separator", "s", cfg.SeparatorTemplates[":default:"],
 		"Custom field separator")
 	rootCmd.PersistentFlags().StringVarP(&conf.Columns, "columns", "c", "",
 		"Only show the speficied columns (separated by ,)")
@@ -137,6 +139,10 @@ func Execute() {
 		"Output field separator (' ' for ascii table, ',' for CSV)")
 	rootCmd.PersistentFlags().BoolVarP(&conf.InputJSON, "json", "j", false,
 		"JSON input mode")
 	rootCmd.PersistentFlags().BoolVarP(&conf.AutoHeaders, "auto-headers", "", false,
 		"Generate headers automatically")
 	rootCmd.PersistentFlags().StringVarP(&headers, "custom-headers", "", "",
 		"Custom headers")
 	// sort options
 	rootCmd.PersistentFlags().StringVarP(&conf.SortByColumn, "sort-by", "k", "",
--- a/cmd/shortusage.go
+++ b/cmd/shortusage.go
@@ -7,7 +7,7 @@ const shortusage = `tablizer [regex,...] [-r file] [flags]
 -T col,...   transpose specified columns           -n  numberize columns
 -R /from/to/ apply replacement to columns in -T    -N  do not use colors
 -y col,...   yank columns to clipboard             -H  do not show headers
--ofs char   output field separator	               -s  specify field separator
+--ofs char   output field separator                -s  specify field separator
 -r file      read input from file                  -z  use fuzzy search
 -f file      read config from file                 -I  interactive filter mode
                                                   -d  debug
--- a/cmd/tablizer.go
+++ b/cmd/tablizer.go
@@ -14,7 +14,7 @@ SYNOPSIS
          -n, --numbering                    Enable header numbering
          -N, --no-color                     Disable pattern highlighting
          -H, --no-headers                   Disable headers display
-          -s, --separator <string>           Custom field separator
+          -s, --separator <string>           Custom field separator (maybe char, string or :class:)
          -k, --sort-by <int|name>           Sort by column (default: 1)
          -z, --fuzzy                        Use fuzzy search [experimental]
          -F, --filter <field[!]=reg>        Filter given field with regex, can be used multiple times
@@ -22,6 +22,8 @@ SYNOPSIS
          -R, --regex-transposer </from/to/> Apply /search/replace/ regexp to fields given in -T
          -j, --json                         Read JSON input (must be array of hashes)
          -I, --interactive                  Interactively filter and select rows
              --auto-headers                 Generate headers if there are none present in input
              --custom-headers a,b,...       Use custom headers, separated by comma
        Output Flags (mutually exclusive):
          -X, --extended                     Enable extended output
@@ -141,6 +143,57 @@ DESCRIPTION
    Finally the -d option enables debugging output which is mostly useful
    for the developer.
  SEPARATOR
    The option -s can be a single character, in which case the CSV parser
    will be invoked. You can also specify a string as separator. The string
    will be interpreted as literal string unless it is a valid go regular
    expression. For example:
        -s '\t{2,}\'
    is being used as a regexp and will match two or more consecutive tabs.
        -s 'foo'
    on the other hand is no regular expression and will be used literally.
    To make live easier, there are a couple of predefined regular
    expressions, which you can specify as classes:
        * :tab:
        Matches a tab and eats spaces around it.
        * :spaces:
        Matches 2 or more spaces.
        * :pipe:
        Matches a pipe character and eats spaces around it.
        * :default:
        Matches 2 or more spaces or tab. This is the default separator if
        none is specified.
        * :nonword:
        Matches a non-word character.
        * :nondigit:
        Matches a non-digit character.
        * :special:
        Matches one or more special chars like brackets, dollar sign,
        slashes etc.
        * :nonprint:
        Matches one or more non-printable characters.
  PATTERNS AND FILTERING
    You can reduce the rows being displayed by using one or more regular
    expression patterns. The regexp language being used is the one of
@@ -458,7 +511,7 @@ Operational Flags:
  -n, --numbering                    Enable header numbering
  -N, --no-color                     Disable pattern highlighting
  -H, --no-headers                   Disable headers display
-  -s, --separator <string>           Custom field separator
+  -s, --separator <string>           Custom field separator (maybe char, string or :class:)
  -k, --sort-by <int|name>           Sort by column (default: 1)
  -z, --fuzzy                        Use fuzzy search [experimental]
  -F, --filter <field[!]=reg>        Filter given field with regex, can be used multiple times
@@ -466,6 +519,8 @@ Operational Flags:
  -R, --regex-transposer </from/to/> Apply /search/replace/ regexp to fields given in -T
  -j, --json                         Read JSON input (must be array of hashes)
  -I, --interactive                  Interactively filter and select rows
      --auto-headers                 Generate headers if there are none present in input
      --custom-headers a,b,...       Use custom headers, separated by comma
 Output Flags (mutually exclusive):
  -X, --extended                     Enable extended output
--- a/lib/parser.go
+++ b/lib/parser.go
@@ -66,6 +66,43 @@ func Parse(conf cfg.Config, input io.Reader) (Tabdata, error) {
 	return data, err
 }
 /*
 * Setup headers,  given headers might  be usable headers or  just the
 * first row, which we use to  determine how many headers to generate,
 * if enabled.
 */
 func SetHeaders(conf cfg.Config, headers []string) []string {
 	if !conf.AutoHeaders && len(conf.CustomHeaders) == 0 {
 		return headers
 	}
 	if conf.AutoHeaders {
 		heads := make([]string, len(headers))
 		for idx := range headers {
 			heads[idx] = fmt.Sprintf("%d", idx+1)
 		}
 		return heads
 	}
 	if len(conf.CustomHeaders) == len(headers) {
 		return conf.CustomHeaders
 	}
 	// use as much custom ones we have, generate the remainder
 	heads := make([]string, len(headers))
 	for idx := range headers {
 		if idx < len(conf.CustomHeaders) {
 			heads[idx] = conf.CustomHeaders[idx]
 		} else {
 			heads[idx] = fmt.Sprintf("%d", idx+1)
 		}
 	}
 	return heads
 }
 /*
 Parse CSV input.
 */
@@ -87,7 +124,7 @@ func parseCSV(conf cfg.Config, input io.Reader) (Tabdata, error) {
 	}
 	if len(records) >= 1 {
-		data.headers = records[0]
+		data.headers = SetHeaders(conf, records[0])
 		data.columns = len(records)
 		for _, head := range data.headers {
@@ -98,9 +135,14 @@ func parseCSV(conf cfg.Config, input io.Reader) (Tabdata, error) {
 			}
 		}
-		if len(records) > 1 {
+		if len(records) >= 1 {
-			data.entries = records[1:]
+			if conf.AutoHeaders || len(conf.CustomHeaders) > 0 {
 				data.entries = records
 			} else {
 				data.entries = records[1:]
 			}
 		}
 	}
 	return data, nil
@@ -128,7 +170,9 @@ func parseTabular(conf cfg.Config, input io.Reader) (Tabdata, error) {
 			data.columns = len(parts)
 			// process all header fields
-			for _, part := range parts {
+			firstrow := make([]string, len(parts))
 			for idx, part := range parts {
 				// register widest header field
 				headerlen := len(part)
 				if headerlen > data.maxwidthHeader {
@@ -136,11 +180,22 @@ func parseTabular(conf cfg.Config, input io.Reader) (Tabdata, error) {
 				}
 				// register fields data
-				data.headers = append(data.headers, strings.TrimSpace(part))
+				firstrow[idx] = strings.TrimSpace(part)
 				// done
 				hadFirst = true
 			}
 			data.headers = SetHeaders(conf, firstrow)
 			if conf.AutoHeaders || len(conf.CustomHeaders) > 0 {
 				// we do not use generated headers, consider as row
 				if matchPattern(conf, line) == conf.InvertMatch {
 					continue
 				}
 				data.entries = append(data.entries, firstrow)
 			}
 		} else {
 			// data processing
 			if matchPattern(conf, line) == conf.InvertMatch {
--- a/lib/parser_test.go
+++ b/lib/parser_test.go
@@ -34,7 +34,7 @@ var input = []struct {
 }{
 	{
 		name:      "tabular-data",
-		separator: cfg.DefaultSeparator,
+		separator: cfg.SeparatorTemplates[":default:"],
 		text: `
 ONE    TWO    THREE  
 asd    igig   cxxxncnc  
@@ -148,7 +148,7 @@ asd    igig
 19191  EDD 1  X`
 	readFd := strings.NewReader(strings.TrimSpace(table))
-	conf := cfg.Config{Separator: cfg.DefaultSeparator}
+	conf := cfg.Config{Separator: cfg.SeparatorTemplates[":default:"]}
 	gotdata, err := wrapValidateParser(conf, readFd)
 	assert.NoError(t, err)
@@ -314,6 +314,108 @@ func TestParserJSONInput(t *testing.T) {
 	}
 }
 func TestParserSeparators(t *testing.T) {
 	list := []string{"alpha", "beta", "delta"}
 	tests := []struct {
 		input string
 		sep   string
 	}{
 		{
 			input: `🎲`,
 			sep:   ":nonprint:",
 		},
 		{
 			input: `|`,
 			sep:   ":pipe:",
 		},
 		{
 			input: `   `,
 			sep:   ":spaces:",
 		},
 		{
 			input: "   \t  ",
 			sep:   ":tab:",
 		},
 		{
 			input: `-`,
 			sep:   ":nonword:",
 		},
 		{
 			input: `//$`,
 			sep:   ":special:",
 		},
 	}
 	for _, testdata := range tests {
 		testname := fmt.Sprintf("parse-%s", testdata.sep)
 		t.Run(testname, func(t *testing.T) {
 			header := strings.Join(list, testdata.input)
 			row := header
 			content := header + "\n" + row
 			readFd := strings.NewReader(strings.TrimSpace(content))
 			conf := cfg.Config{Separator: testdata.sep}
 			conf.ApplyDefaults()
 			gotdata, err := wrapValidateParser(conf, readFd)
 			assert.NoError(t, err)
 			assert.EqualValues(t, [][]string{list}, gotdata.entries)
 		})
 	}
 }
 func TestParserSetHeaders(t *testing.T) {
 	row := []string{"c", "b", "c", "d", "e"}
 	tests := []struct {
 		name   string
 		custom []string
 		expect []string
 		auto   bool
 	}{
 		{
 			name:   "default",
 			expect: row,
 		},
 		{
 			name:   "auto",
 			expect: strings.Split("1 2 3 4 5", " "),
 			auto:   true,
 		},
 		{
 			name:   "custom-complete",
 			custom: strings.Split("A B C D E", " "),
 			expect: strings.Split("A B C D E", " "),
 		},
 		{
 			name:   "custom-too-short",
 			custom: strings.Split("A B", " "),
 			expect: strings.Split("A B 3 4 5", " "),
 		},
 		{
 			name:   "custom-too-long",
 			custom: strings.Split("A B C D E F G", " "),
 			expect: strings.Split("A B C D E", " "),
 		},
 	}
 	for _, testdata := range tests {
 		testname := fmt.Sprintf("parse-%s", testdata.name)
 		t.Run(testname, func(t *testing.T) {
 			conf := cfg.Config{
 				AutoHeaders:   testdata.auto,
 				CustomHeaders: testdata.custom,
 			}
 			headers := SetHeaders(conf, row)
 			assert.NotNil(t, headers)
 			assert.EqualValues(t, testdata.expect, headers)
 		})
 	}
 }
 func wrapValidateParser(conf cfg.Config, input io.Reader) (Tabdata, error) {
 	data, err := Parse(conf, input)
--- a/lib/printer_test.go
+++ b/lib/printer_test.go
@@ -292,6 +292,7 @@ func TestPrinter(t *testing.T) {
 				conf.UseSortByColumn = []int{testdata.column}
 			}
 			conf.Separator = cfg.SeparatorTemplates[":default:"]
 			conf.ApplyDefaults()
 			// the test checks the len!
--- a/tablizer.1
+++ b/tablizer.1
@@ -133,7 +133,7 @@
 .\" ========================================================================
 .\"
 .IX Title "TABLIZER 1"
-.TH TABLIZER 1 "2025-10-01" "1" "User Commands"
+.TH TABLIZER 1 "2025-10-10" "1" "User Commands"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
@@ -152,7 +152,7 @@ tablizer \- Manipulate tabular output of other programs
 \&      \-n, \-\-numbering                    Enable header numbering
 \&      \-N, \-\-no\-color                     Disable pattern highlighting
 \&      \-H, \-\-no\-headers                   Disable headers display
-\&      \-s, \-\-separator <string>           Custom field separator
+\&      \-s, \-\-separator <string>           Custom field separator (maybe char, string or :class:)
 \&      \-k, \-\-sort\-by <int|name>           Sort by column (default: 1)
 \&      \-z, \-\-fuzzy                        Use fuzzy search [experimental]
 \&      \-F, \-\-filter <field[!]=reg>        Filter given field with regex, can be used multiple times
@@ -160,6 +160,8 @@ tablizer \- Manipulate tabular output of other programs
 \&      \-R, \-\-regex\-transposer </from/to/> Apply /search/replace/ regexp to fields given in \-T
 \&      \-j, \-\-json                         Read JSON input (must be array of hashes)
 \&      \-I, \-\-interactive                  Interactively filter and select rows
 \&          \-\-auto\-headers                 Generate headers if there are none present in input
 \&          \-\-custom\-headers a,b,...       Use custom headers, separated by comma
 \&
 \&    Output Flags (mutually exclusive):
 \&      \-X, \-\-extended                     Enable extended output
@@ -293,6 +295,62 @@ Sorts timestamps.
 .PP
 Finally the  \fB\-d\fR option  enables debugging  output which  is mostly
 useful for the developer.
 .SS "\s-1SEPARATOR\s0"
 .IX Subsection "SEPARATOR"
 The option \fB\-s\fR can be a single character, in which case the \s-1CSV\s0
 parser will be invoked. You can also specify a string as
 separator. The string will be interpreted as literal string unless it
 is a valid go regular expression. For example:
 .PP
 .Vb 1
 \&    \-s \*(Aq\et{2,}\e\*(Aq
 .Ve
 .PP
 is being used as a regexp and will match two or more consecutive tabs.
 .PP
 .Vb 1
 \&    \-s \*(Aqfoo\*(Aq
 .Ve
 .PP
 on the other hand is no regular expression and will be used literally.
 .PP
 To make live easier, there are a couple of predefined regular
 expressions, which you can specify as classes:
 .Sp
 .RS 4
 * 		:tab:
 .Sp
 Matches a tab and eats spaces around it.
 .Sp
 *		:spaces:
 .Sp
 Matches 2 or more spaces.
 .Sp
 *		:pipe:
 .Sp
 Matches a pipe character and eats spaces around it.
 .Sp
 *		:default:
 .Sp
 Matches 2 or more spaces or tab. This is the default separator if none
 is specified.
 .Sp
 *		:nonword:
 .Sp
 Matches a non-word character.
 .Sp
 *		:nondigit:
 .Sp
 Matches a non-digit character.
 .Sp
 *		:special:
 .Sp
 Matches one or more special chars like brackets, dollar sign, slashes etc.
 .Sp
 *		:nonprint:
 .Sp
 Matches one or more non-printable characters.
 .RE
 .SS "\s-1PATTERNS AND FILTERING\s0"
 .IX Subsection "PATTERNS AND FILTERING"
 You can reduce  the rows being displayed by using  one or more regular
--- a/tablizer.pod
+++ b/tablizer.pod
@@ -13,7 +13,7 @@ tablizer - Manipulate tabular output of other programs
      -n, --numbering                    Enable header numbering
      -N, --no-color                     Disable pattern highlighting
      -H, --no-headers                   Disable headers display
-      -s, --separator <string>           Custom field separator
+      -s, --separator <string>           Custom field separator (maybe char, string or :class:)
      -k, --sort-by <int|name>           Sort by column (default: 1)
      -z, --fuzzy                        Use fuzzy search [experimental]
      -F, --filter <field[!]=reg>        Filter given field with regex, can be used multiple times
@@ -21,6 +21,8 @@ tablizer - Manipulate tabular output of other programs
      -R, --regex-transposer </from/to/> Apply /search/replace/ regexp to fields given in -T
      -j, --json                         Read JSON input (must be array of hashes)
      -I, --interactive                  Interactively filter and select rows
          --auto-headers                 Generate headers if there are none present in input
          --custom-headers a,b,...       Use custom headers, separated by comma
    Output Flags (mutually exclusive):
      -X, --extended                     Enable extended output
@@ -153,6 +155,62 @@ Sorts timestamps.
 Finally the  B<-d> option  enables debugging  output which  is mostly
 useful for the developer.
 =head2 SEPARATOR
 The option B<-s> can be a single character, in which case the CSV
 parser will be invoked. You can also specify a string as
 separator. The string will be interpreted as literal string unless it
 is a valid go regular expression. For example:
    -s '\t{2,}\'
 is being used as a regexp and will match two or more consecutive tabs.
    -s 'foo'
 on the other hand is no regular expression and will be used literally.
 To make live easier, there are a couple of predefined regular
 expressions, which you can specify as classes:
 =over
 * 		:tab:      
 Matches a tab and eats spaces around it.
 *		:spaces:
 Matches 2 or more spaces.
 *		:pipe:
 Matches a pipe character and eats spaces around it.
 *		:default:
 Matches 2 or more spaces or tab. This is the default separator if none
 is specified.
 *		:nonword:
 Matches a non-word character.
 *		:nondigit:
 Matches a non-digit character.
 *		:special:
 Matches one or more special chars like brackets, dollar sign, slashes etc.
 *		:nonprint:
 Matches one or more non-printable characters.
 =back
 =head2 PATTERNS AND FILTERING
 You can reduce  the rows being displayed by using  one or more regular
Author	SHA1	Message	Date
T.v.Dein	8bdb3db105	fix #85 : add --auto-headers and --custom-headers (#86 )	2025-10-10 13:08:16 +02:00
Thomas von Dein	4ce6c30f54	fix short usage formatting	2025-10-09 23:16:07 +02:00
T.v.Dein	ec0b210167	add some handy builtin character classes as split separators (#84 )	2025-10-09 23:03:57 +02:00