diff --git a/TODO.md b/TODO.md index 896806a..beac19c 100644 --- a/TODO.md +++ b/TODO.md @@ -2,8 +2,12 @@ - rm printYamlData() log.Fatal(), maybe return error on all printers? +- refactor parser, there's some duplicate code + ## Features to be implemented +- add comment support (csf.NewReader().Comment = '#') + - add output mode csv - add --no-headers option diff --git a/cmd/tablizer.go b/cmd/tablizer.go index 61448cc..0eaf850 100644 --- a/cmd/tablizer.go +++ b/cmd/tablizer.go @@ -189,6 +189,15 @@ DESCRIPTION markdown which prints a Markdown table and yaml, which prints yaml encoding. + ENVIRONMENT VARIABLES + tablizer supports certain environment variables which use can use to + influence program behavior. Commandline flags have always precedence + over environment variables. + + - disable numbering of header fields, like -n. + - comma separated list of columns to output, like -c + - disable colorization of matches, like -N + BUGS In order to report a bug, unexpected behavior, feature requests or to submit a patch, please open an issue on github: diff --git a/lib/parser.go b/lib/parser.go index 0e2ca96..f44c37f 100644 --- a/lib/parser.go +++ b/lib/parser.go @@ -19,6 +19,7 @@ package lib import ( "bufio" + "encoding/csv" "errors" "fmt" "github.com/alecthomas/repr" @@ -28,10 +29,69 @@ import ( "strings" ) +/* + Parse CSV input. +*/ +func parseCSV(c cfg.Config, input io.Reader, pattern string) (Tabdata, error) { + var content io.Reader = input + data := Tabdata{} + + patternR, err := regexp.Compile(pattern) + if err != nil { + return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, err)) + } + + if len(pattern) > 0 { + scanner := bufio.NewScanner(input) + lines := []string{} + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if patternR.MatchString(line) == c.InvertMatch { + // by default -v is false, so if a line does NOT + // match the pattern, we will ignore it. However, + // if the user specified -v, the matching is inverted, + // so we ignore all lines, which DO match. + continue + } + lines = append(lines, line) + } + content = strings.NewReader(strings.Join(lines, "\n")) + } + + csvreader := csv.NewReader(content) + csvreader.Comma = rune(c.Separator[0]) + + records, err := csvreader.ReadAll() + if err != nil { + return data, errors.Unwrap(fmt.Errorf("Could not parse CSV input: %w", pattern, err)) + } + + if len(records) >= 1 { + data.headers = records[0] + + for _, head := range data.headers { + // register widest header field + headerlen := len(head) + if headerlen > data.maxwidthHeader { + data.maxwidthHeader = headerlen + } + } + + if len(records) > 1 { + data.entries = records[1:] + } + } + + return data, nil +} + /* Parse tabular input. */ func parseFile(c cfg.Config, input io.Reader, pattern string) (Tabdata, error) { + if len(c.Separator) == 1 { + return parseCSV(c, input, pattern) + } data := Tabdata{} var scanner *bufio.Scanner diff --git a/tablizer.1 b/tablizer.1 index b8112c8..4ced80c 100644 --- a/tablizer.1 +++ b/tablizer.1 @@ -133,7 +133,7 @@ .\" ======================================================================== .\" .IX Title "TABLIZER 1" -.TH TABLIZER 1 "2022-10-21" "1" "User Commands" +.TH TABLIZER 1 "2022-10-22" "1" "User Commands" .\" For nroff, turn off justification. Always turn off hyphenation; it makes .\" way too many mistakes in technical documents. .if n .ad l @@ -355,6 +355,19 @@ Beside normal ascii mode (the default) and extended mode there are more output modes available: \fBorgtbl\fR which prints an Emacs org-mode table and \fBmarkdown\fR which prints a Markdown table and \fByaml\fR, which prints yaml encoding. +.SS "\s-1ENVIRONMENT VARIABLES\s0" +.IX Subsection "ENVIRONMENT VARIABLES" +\&\fBtablizer\fR supports certain environment variables which use can use +to influence program behavior. Commandline flags have always +precedence over environment variables. +.IP " \- disable numbering of header fields, like \fB\-n\fR." 4 +.IX Item " - disable numbering of header fields, like -n." +.PD 0 +.IP " \- comma separated list of columns to output, like \fB\-c\fR" 4 +.IX Item " - comma separated list of columns to output, like -c" +.IP "<\s-1NO_COLORS\s0> \- disable colorization of matches, like \fB\-N\fR" 4 +.IX Item " - disable colorization of matches, like -N" +.PD .SH "BUGS" .IX Header "BUGS" In order to report a bug, unexpected behavior, feature requests