fix #85 : add --auto-headers and --custom-headers (#86 )

fix short usage formatting
add some handy builtin character classes as split separators (#84 )
2025-12-18 13:01:11 +01:00 · 2025-10-10 13:08:16 +02:00 · 2025-10-09 23:16:07 +02:00 · 2025-10-09 23:03:57 +02:00
9 changed files with 382 additions and 19 deletions
--- a/cfg/config.go
+++ b/cfg/config.go
@@ -27,13 +27,26 @@ import (
 	"github.com/hashicorp/hcl/v2/hclsimple"
 )

-const DefaultSeparator string = `(\s\s+|\t)`
-const Version string = "v1.5.8"
-const MAXPARTS = 2
+const (
+	Version  = "v1.5.10"
+	MAXPARTS = 2
+)

-var DefaultConfigfile = os.Getenv("HOME") + "/.config/tablizer/config"
+var (
+	DefaultConfigfile = os.Getenv("HOME") + "/.config/tablizer/config"
+	VERSION           string // maintained by -x

-var VERSION string // maintained by -x
+	SeparatorTemplates = map[string]string{
+		":tab:":      `\s*\t\s*`,                               // tab but eats spaces around
+		":spaces:":   `\s{2,}`,                                 // 2 or more spaces
+		":pipe:":     `\s*\|\s*`,                               // one pipe eating spaces around
+		":default:":  `(\s\s+|\t)`,                             // 2 or more spaces or tab
+		":nonword:":  `\W`,                                     // word boundary
+		":nondigit:": `\D`,                                     // same for numbers
+		":special:":  `[\*\+\-_\(\)\[\]\{\}?\\/<>=&$§"':,\^]+`, // match any special char
+		":nonprint:": `[[:^print:]]+`,                          // non printables
+	}
+)

 // public config, set via config file or using defaults
 type Settings struct {
@@ -80,6 +93,8 @@ type Config struct {
 	UseHighlight   bool
 	Interactive    bool
 	InputJSON      bool
+	AutoHeaders    bool
+	CustomHeaders  []string

 	SortMode        string
 	SortDescending  bool
@@ -356,6 +371,13 @@ func (conf *Config) ApplyDefaults() {
 	if conf.OutputMode == Yaml || conf.OutputMode == CSV {
 		conf.Numbering = false
 	}
+
+	if conf.Separator[0] == ':' && conf.Separator[len(conf.Separator)-1] == ':' {
+		separator, ok := SeparatorTemplates[conf.Separator]
+		if ok {
+			conf.Separator = separator
+		}
+	}
 }

 func (conf *Config) PreparePattern(patterns []*Pattern) error {
@@ -393,6 +415,12 @@ func (conf *Config) PreparePattern(patterns []*Pattern) error {
 	return nil
 }

+func (conf *Config) PrepareCustomHeaders(custom string) {
+	if len(custom) > 0 {
+		conf.CustomHeaders = strings.Split(custom, ",")
+	}
+}
+
 // Parse config file.  Ignore if the file doesn't exist  but return an
 // error if it exists but fails to read or parse
 func (conf *Config) ParseConfigfile() error {
--- a/cmd/root.go
+++ b/cmd/root.go
@@ -59,6 +59,7 @@ func Execute() {
 		ShowCompletion string
 		modeflag       cfg.Modeflag
 		sortmode       cfg.Sortmode
+		headers        string
 	)

 	var rootCmd = &cobra.Command{
@@ -91,6 +92,7 @@ func Execute() {
 			conf.CheckEnv()
 			conf.PrepareModeFlags(modeflag)
 			conf.PrepareSortFlags(sortmode)
+			conf.PrepareCustomHeaders(headers)

 			wrapE(conf.PrepareFilters())

@@ -123,7 +125,7 @@ func Execute() {
 		"Use alternating background colors")
 	rootCmd.PersistentFlags().StringVarP(&ShowCompletion, "completion", "", "",
 		"Display completion code")
-	rootCmd.PersistentFlags().StringVarP(&conf.Separator, "separator", "s", cfg.DefaultSeparator,
+	rootCmd.PersistentFlags().StringVarP(&conf.Separator, "separator", "s", cfg.SeparatorTemplates[":default:"],
 		"Custom field separator")
 	rootCmd.PersistentFlags().StringVarP(&conf.Columns, "columns", "c", "",
 		"Only show the speficied columns (separated by ,)")
@@ -137,6 +139,10 @@ func Execute() {
 		"Output field separator (' ' for ascii table, ',' for CSV)")
 	rootCmd.PersistentFlags().BoolVarP(&conf.InputJSON, "json", "j", false,
 		"JSON input mode")
+	rootCmd.PersistentFlags().BoolVarP(&conf.AutoHeaders, "auto-headers", "", false,
+		"Generate headers automatically")
+	rootCmd.PersistentFlags().StringVarP(&headers, "custom-headers", "", "",
+		"Custom headers")

 	// sort options
 	rootCmd.PersistentFlags().StringVarP(&conf.SortByColumn, "sort-by", "k", "",
--- a/cmd/shortusage.go
+++ b/cmd/shortusage.go
@@ -7,7 +7,7 @@ const shortusage = `tablizer [regex,...] [-r file] [flags]
 -T col,...   transpose specified columns           -n  numberize columns
 -R /from/to/ apply replacement to columns in -T    -N  do not use colors
 -y col,...   yank columns to clipboard             -H  do not show headers
--ofs char   output field separator	               -s  specify field separator
+--ofs char   output field separator                -s  specify field separator
 -r file      read input from file                  -z  use fuzzy search
 -f file      read config from file                 -I  interactive filter mode
                                                   -d  debug
--- a/cmd/tablizer.go
+++ b/cmd/tablizer.go
@@ -14,7 +14,7 @@ SYNOPSIS
          -n, --numbering                    Enable header numbering
          -N, --no-color                     Disable pattern highlighting
          -H, --no-headers                   Disable headers display
-          -s, --separator <string>           Custom field separator
+          -s, --separator <string>           Custom field separator (maybe char, string or :class:)
          -k, --sort-by <int|name>           Sort by column (default: 1)
          -z, --fuzzy                        Use fuzzy search [experimental]
          -F, --filter <field[!]=reg>        Filter given field with regex, can be used multiple times
@@ -22,6 +22,8 @@ SYNOPSIS
          -R, --regex-transposer </from/to/> Apply /search/replace/ regexp to fields given in -T
          -j, --json                         Read JSON input (must be array of hashes)
          -I, --interactive                  Interactively filter and select rows
+              --auto-headers                 Generate headers if there are none present in input
+              --custom-headers a,b,...       Use custom headers, separated by comma

        Output Flags (mutually exclusive):
          -X, --extended                     Enable extended output
@@ -141,6 +143,57 @@ DESCRIPTION
    Finally the -d option enables debugging output which is mostly useful
    for the developer.

+  SEPARATOR
+    The option -s can be a single character, in which case the CSV parser
+    will be invoked. You can also specify a string as separator. The string
+    will be interpreted as literal string unless it is a valid go regular
+    expression. For example:
+
+        -s '\t{2,}\'
+
+    is being used as a regexp and will match two or more consecutive tabs.
+
+        -s 'foo'
+
+    on the other hand is no regular expression and will be used literally.
+
+    To make live easier, there are a couple of predefined regular
+    expressions, which you can specify as classes:
+
+        * :tab:
+
+        Matches a tab and eats spaces around it.
+
+        * :spaces:
+
+        Matches 2 or more spaces.
+
+        * :pipe:
+
+        Matches a pipe character and eats spaces around it.
+
+        * :default:
+
+        Matches 2 or more spaces or tab. This is the default separator if
+        none is specified.
+
+        * :nonword:
+
+        Matches a non-word character.
+
+        * :nondigit:
+
+        Matches a non-digit character.
+
+        * :special:
+
+        Matches one or more special chars like brackets, dollar sign,
+        slashes etc.
+
+        * :nonprint:
+
+        Matches one or more non-printable characters.
+
  PATTERNS AND FILTERING
    You can reduce the rows being displayed by using one or more regular
    expression patterns. The regexp language being used is the one of
@@ -458,7 +511,7 @@ Operational Flags:
  -n, --numbering                    Enable header numbering
  -N, --no-color                     Disable pattern highlighting
  -H, --no-headers                   Disable headers display
-  -s, --separator <string>           Custom field separator
+  -s, --separator <string>           Custom field separator (maybe char, string or :class:)
  -k, --sort-by <int|name>           Sort by column (default: 1)
  -z, --fuzzy                        Use fuzzy search [experimental]
  -F, --filter <field[!]=reg>        Filter given field with regex, can be used multiple times
@@ -466,6 +519,8 @@ Operational Flags:
  -R, --regex-transposer </from/to/> Apply /search/replace/ regexp to fields given in -T
  -j, --json                         Read JSON input (must be array of hashes)
  -I, --interactive                  Interactively filter and select rows
+      --auto-headers                 Generate headers if there are none present in input
+      --custom-headers a,b,...       Use custom headers, separated by comma

 Output Flags (mutually exclusive):
  -X, --extended                     Enable extended output
--- a/lib/parser.go
+++ b/lib/parser.go
@@ -66,6 +66,43 @@ func Parse(conf cfg.Config, input io.Reader) (Tabdata, error) {
 	return data, err
 }

+/*
+ * Setup headers,  given headers might  be usable headers or  just the
+ * first row, which we use to  determine how many headers to generate,
+ * if enabled.
+ */
+func SetHeaders(conf cfg.Config, headers []string) []string {
+	if !conf.AutoHeaders && len(conf.CustomHeaders) == 0 {
+		return headers
+	}
+
+	if conf.AutoHeaders {
+		heads := make([]string, len(headers))
+		for idx := range headers {
+			heads[idx] = fmt.Sprintf("%d", idx+1)
+		}
+
+		return heads
+	}
+
+	if len(conf.CustomHeaders) == len(headers) {
+		return conf.CustomHeaders
+	}
+
+	// use as much custom ones we have, generate the remainder
+	heads := make([]string, len(headers))
+
+	for idx := range headers {
+		if idx < len(conf.CustomHeaders) {
+			heads[idx] = conf.CustomHeaders[idx]
+		} else {
+			heads[idx] = fmt.Sprintf("%d", idx+1)
+		}
+	}
+
+	return heads
+}
+
 /*
 Parse CSV input.
 */
@@ -87,7 +124,7 @@ func parseCSV(conf cfg.Config, input io.Reader) (Tabdata, error) {
 	}

 	if len(records) >= 1 {
-		data.headers = records[0]
+		data.headers = SetHeaders(conf, records[0])
 		data.columns = len(records)

 		for _, head := range data.headers {
@@ -98,9 +135,14 @@ func parseCSV(conf cfg.Config, input io.Reader) (Tabdata, error) {
 			}
 		}

-		if len(records) > 1 {
-			data.entries = records[1:]
+		if len(records) >= 1 {
+			if conf.AutoHeaders || len(conf.CustomHeaders) > 0 {
+				data.entries = records
+			} else {
+				data.entries = records[1:]
+			}
 		}
+
 	}

 	return data, nil
@@ -128,7 +170,9 @@ func parseTabular(conf cfg.Config, input io.Reader) (Tabdata, error) {
 			data.columns = len(parts)

 			// process all header fields
-			for _, part := range parts {
+			firstrow := make([]string, len(parts))
+
+			for idx, part := range parts {
 				// register widest header field
 				headerlen := len(part)
 				if headerlen > data.maxwidthHeader {
@@ -136,11 +180,22 @@ func parseTabular(conf cfg.Config, input io.Reader) (Tabdata, error) {
 				}

 				// register fields data
-				data.headers = append(data.headers, strings.TrimSpace(part))
+				firstrow[idx] = strings.TrimSpace(part)

 				// done
 				hadFirst = true
 			}
+
+			data.headers = SetHeaders(conf, firstrow)
+
+			if conf.AutoHeaders || len(conf.CustomHeaders) > 0 {
+				// we do not use generated headers, consider as row
+				if matchPattern(conf, line) == conf.InvertMatch {
+					continue
+				}
+
+				data.entries = append(data.entries, firstrow)
+			}
 		} else {
 			// data processing
 			if matchPattern(conf, line) == conf.InvertMatch {
--- a/lib/parser_test.go
+++ b/lib/parser_test.go
@@ -34,7 +34,7 @@ var input = []struct {
 }{
 	{
 		name:      "tabular-data",
-		separator: cfg.DefaultSeparator,
+		separator: cfg.SeparatorTemplates[":default:"],
 		text: `
 ONE    TWO    THREE  
 asd    igig   cxxxncnc  
@@ -148,7 +148,7 @@ asd    igig
 19191  EDD 1  X`

 	readFd := strings.NewReader(strings.TrimSpace(table))
-	conf := cfg.Config{Separator: cfg.DefaultSeparator}
+	conf := cfg.Config{Separator: cfg.SeparatorTemplates[":default:"]}
 	gotdata, err := wrapValidateParser(conf, readFd)

 	assert.NoError(t, err)
@@ -314,6 +314,108 @@ func TestParserJSONInput(t *testing.T) {
 	}
 }

+func TestParserSeparators(t *testing.T) {
+	list := []string{"alpha", "beta", "delta"}
+
+	tests := []struct {
+		input string
+		sep   string
+	}{
+		{
+			input: `🎲`,
+			sep:   ":nonprint:",
+		},
+		{
+			input: `|`,
+			sep:   ":pipe:",
+		},
+		{
+			input: `   `,
+			sep:   ":spaces:",
+		},
+		{
+			input: "   \t  ",
+			sep:   ":tab:",
+		},
+		{
+			input: `-`,
+			sep:   ":nonword:",
+		},
+		{
+			input: `//$`,
+			sep:   ":special:",
+		},
+	}
+
+	for _, testdata := range tests {
+		testname := fmt.Sprintf("parse-%s", testdata.sep)
+		t.Run(testname, func(t *testing.T) {
+			header := strings.Join(list, testdata.input)
+			row := header
+			content := header + "\n" + row
+
+			readFd := strings.NewReader(strings.TrimSpace(content))
+			conf := cfg.Config{Separator: testdata.sep}
+			conf.ApplyDefaults()
+
+			gotdata, err := wrapValidateParser(conf, readFd)
+
+			assert.NoError(t, err)
+			assert.EqualValues(t, [][]string{list}, gotdata.entries)
+		})
+	}
+}
+
+func TestParserSetHeaders(t *testing.T) {
+	row := []string{"c", "b", "c", "d", "e"}
+
+	tests := []struct {
+		name   string
+		custom []string
+		expect []string
+		auto   bool
+	}{
+		{
+			name:   "default",
+			expect: row,
+		},
+		{
+			name:   "auto",
+			expect: strings.Split("1 2 3 4 5", " "),
+			auto:   true,
+		},
+		{
+			name:   "custom-complete",
+			custom: strings.Split("A B C D E", " "),
+			expect: strings.Split("A B C D E", " "),
+		},
+		{
+			name:   "custom-too-short",
+			custom: strings.Split("A B", " "),
+			expect: strings.Split("A B 3 4 5", " "),
+		},
+		{
+			name:   "custom-too-long",
+			custom: strings.Split("A B C D E F G", " "),
+			expect: strings.Split("A B C D E", " "),
+		},
+	}
+
+	for _, testdata := range tests {
+		testname := fmt.Sprintf("parse-%s", testdata.name)
+		t.Run(testname, func(t *testing.T) {
+			conf := cfg.Config{
+				AutoHeaders:   testdata.auto,
+				CustomHeaders: testdata.custom,
+			}
+			headers := SetHeaders(conf, row)
+
+			assert.NotNil(t, headers)
+			assert.EqualValues(t, testdata.expect, headers)
+		})
+	}
+}
+
 func wrapValidateParser(conf cfg.Config, input io.Reader) (Tabdata, error) {
 	data, err := Parse(conf, input)

--- a/lib/printer_test.go
+++ b/lib/printer_test.go
@@ -292,6 +292,7 @@ func TestPrinter(t *testing.T) {
 				conf.UseSortByColumn = []int{testdata.column}
 			}

+			conf.Separator = cfg.SeparatorTemplates[":default:"]
 			conf.ApplyDefaults()

 			// the test checks the len!
--- a/tablizer.1
+++ b/tablizer.1
@@ -133,7 +133,7 @@
 .\" ========================================================================
 .\"
 .IX Title "TABLIZER 1"
-.TH TABLIZER 1 "2025-10-01" "1" "User Commands"
+.TH TABLIZER 1 "2025-10-10" "1" "User Commands"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
@@ -152,7 +152,7 @@ tablizer \- Manipulate tabular output of other programs
 \&      \-n, \-\-numbering                    Enable header numbering
 \&      \-N, \-\-no\-color                     Disable pattern highlighting
 \&      \-H, \-\-no\-headers                   Disable headers display
-\&      \-s, \-\-separator <string>           Custom field separator
+\&      \-s, \-\-separator <string>           Custom field separator (maybe char, string or :class:)
 \&      \-k, \-\-sort\-by <int|name>           Sort by column (default: 1)
 \&      \-z, \-\-fuzzy                        Use fuzzy search [experimental]
 \&      \-F, \-\-filter <field[!]=reg>        Filter given field with regex, can be used multiple times
@@ -160,6 +160,8 @@ tablizer \- Manipulate tabular output of other programs
 \&      \-R, \-\-regex\-transposer </from/to/> Apply /search/replace/ regexp to fields given in \-T
 \&      \-j, \-\-json                         Read JSON input (must be array of hashes)
 \&      \-I, \-\-interactive                  Interactively filter and select rows
+\&          \-\-auto\-headers                 Generate headers if there are none present in input
+\&          \-\-custom\-headers a,b,...       Use custom headers, separated by comma
 \&
 \&    Output Flags (mutually exclusive):
 \&      \-X, \-\-extended                     Enable extended output
@@ -293,6 +295,62 @@ Sorts timestamps.
 .PP
 Finally the  \fB\-d\fR option  enables debugging  output which  is mostly
 useful for the developer.
+.SS "\s-1SEPARATOR\s0"
+.IX Subsection "SEPARATOR"
+The option \fB\-s\fR can be a single character, in which case the \s-1CSV\s0
+parser will be invoked. You can also specify a string as
+separator. The string will be interpreted as literal string unless it
+is a valid go regular expression. For example:
+.PP
+.Vb 1
+\&    \-s \*(Aq\et{2,}\e\*(Aq
+.Ve
+.PP
+is being used as a regexp and will match two or more consecutive tabs.
+.PP
+.Vb 1
+\&    \-s \*(Aqfoo\*(Aq
+.Ve
+.PP
+on the other hand is no regular expression and will be used literally.
+.PP
+To make live easier, there are a couple of predefined regular
+expressions, which you can specify as classes:
+.Sp
+.RS 4
+* 		:tab:
+.Sp
+Matches a tab and eats spaces around it.
+.Sp
+*		:spaces:
+.Sp
+Matches 2 or more spaces.
+.Sp
+*		:pipe:
+.Sp
+Matches a pipe character and eats spaces around it.
+.Sp
+*		:default:
+.Sp
+Matches 2 or more spaces or tab. This is the default separator if none
+is specified.
+.Sp
+*		:nonword:
+.Sp
+Matches a non-word character.
+.Sp
+*		:nondigit:
+.Sp
+Matches a non-digit character.
+.Sp
+*		:special:
+.Sp
+Matches one or more special chars like brackets, dollar sign, slashes etc.
+.Sp
+*		:nonprint:
+.Sp
+Matches one or more non-printable characters.
+.RE
 .SS "\s-1PATTERNS AND FILTERING\s0"
 .IX Subsection "PATTERNS AND FILTERING"
 You can reduce  the rows being displayed by using  one or more regular
--- a/tablizer.pod
+++ b/tablizer.pod
@@ -13,7 +13,7 @@ tablizer - Manipulate tabular output of other programs
      -n, --numbering                    Enable header numbering
      -N, --no-color                     Disable pattern highlighting
      -H, --no-headers                   Disable headers display
-      -s, --separator <string>           Custom field separator
+      -s, --separator <string>           Custom field separator (maybe char, string or :class:)
      -k, --sort-by <int|name>           Sort by column (default: 1)
      -z, --fuzzy                        Use fuzzy search [experimental]
      -F, --filter <field[!]=reg>        Filter given field with regex, can be used multiple times
@@ -21,6 +21,8 @@ tablizer - Manipulate tabular output of other programs
      -R, --regex-transposer </from/to/> Apply /search/replace/ regexp to fields given in -T
      -j, --json                         Read JSON input (must be array of hashes)
      -I, --interactive                  Interactively filter and select rows
+          --auto-headers                 Generate headers if there are none present in input
+          --custom-headers a,b,...       Use custom headers, separated by comma

    Output Flags (mutually exclusive):
      -X, --extended                     Enable extended output
@@ -153,6 +155,62 @@ Sorts timestamps.
 Finally the  B<-d> option  enables debugging  output which  is mostly
 useful for the developer.

+=head2 SEPARATOR
+
+The option B<-s> can be a single character, in which case the CSV
+parser will be invoked. You can also specify a string as
+separator. The string will be interpreted as literal string unless it
+is a valid go regular expression. For example:
+
+    -s '\t{2,}\'
+
+is being used as a regexp and will match two or more consecutive tabs.
+
+    -s 'foo'
+
+on the other hand is no regular expression and will be used literally.
+
+To make live easier, there are a couple of predefined regular
+expressions, which you can specify as classes:
+
+=over
+
+* 		:tab:      
+
+Matches a tab and eats spaces around it.
+
+*		:spaces:
+
+Matches 2 or more spaces.
+
+*		:pipe:
+
+Matches a pipe character and eats spaces around it.
+
+*		:default:
+
+Matches 2 or more spaces or tab. This is the default separator if none
+is specified.
+
+*		:nonword:
+
+Matches a non-word character.
+
+*		:nondigit:
+
+Matches a non-digit character.
+
+*		:special:
+
+Matches one or more special chars like brackets, dollar sign, slashes etc.
+
+*		:nonprint:
+
+Matches one or more non-printable characters.
+
+
+=back
+
 =head2 PATTERNS AND FILTERING

 You can reduce  the rows being displayed by using  one or more regular
Author	SHA1	Message	Date
T.v.Dein	8bdb3db105	fix #85 : add --auto-headers and --custom-headers (#86 )	2025-10-10 13:08:16 +02:00
Thomas von Dein	4ce6c30f54	fix short usage formatting	2025-10-09 23:16:07 +02:00
T.v.Dein	ec0b210167	add some handy builtin character classes as split separators (#84 )	2025-10-09 23:03:57 +02:00