added CSV parsing support, enabled with -s

This commit is contained in:
2022-10-22 12:27:33 +02:00
parent e54435c2e4
commit b5c802403b
4 changed files with 87 additions and 1 deletions

View File

@@ -2,8 +2,12 @@
- rm printYamlData() log.Fatal(), maybe return error on all printers?
- refactor parser, there's some duplicate code
## Features to be implemented
- add comment support (csf.NewReader().Comment = '#')
- add output mode csv
- add --no-headers option

View File

@@ -189,6 +189,15 @@ DESCRIPTION
markdown which prints a Markdown table and yaml, which prints yaml
encoding.
ENVIRONMENT VARIABLES
tablizer supports certain environment variables which use can use to
influence program behavior. Commandline flags have always precedence
over environment variables.
<T_NO_HEADER_NUMBERING> - disable numbering of header fields, like -n.
<T_COLUMNS> - comma separated list of columns to output, like -c
<NO_COLORS> - disable colorization of matches, like -N
BUGS
In order to report a bug, unexpected behavior, feature requests or to
submit a patch, please open an issue on github:

View File

@@ -19,6 +19,7 @@ package lib
import (
"bufio"
"encoding/csv"
"errors"
"fmt"
"github.com/alecthomas/repr"
@@ -28,10 +29,69 @@ import (
"strings"
)
/*
Parse CSV input.
*/
func parseCSV(c cfg.Config, input io.Reader, pattern string) (Tabdata, error) {
var content io.Reader = input
data := Tabdata{}
patternR, err := regexp.Compile(pattern)
if err != nil {
return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, err))
}
if len(pattern) > 0 {
scanner := bufio.NewScanner(input)
lines := []string{}
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if patternR.MatchString(line) == c.InvertMatch {
// by default -v is false, so if a line does NOT
// match the pattern, we will ignore it. However,
// if the user specified -v, the matching is inverted,
// so we ignore all lines, which DO match.
continue
}
lines = append(lines, line)
}
content = strings.NewReader(strings.Join(lines, "\n"))
}
csvreader := csv.NewReader(content)
csvreader.Comma = rune(c.Separator[0])
records, err := csvreader.ReadAll()
if err != nil {
return data, errors.Unwrap(fmt.Errorf("Could not parse CSV input: %w", pattern, err))
}
if len(records) >= 1 {
data.headers = records[0]
for _, head := range data.headers {
// register widest header field
headerlen := len(head)
if headerlen > data.maxwidthHeader {
data.maxwidthHeader = headerlen
}
}
if len(records) > 1 {
data.entries = records[1:]
}
}
return data, nil
}
/*
Parse tabular input.
*/
func parseFile(c cfg.Config, input io.Reader, pattern string) (Tabdata, error) {
if len(c.Separator) == 1 {
return parseCSV(c, input, pattern)
}
data := Tabdata{}
var scanner *bufio.Scanner

View File

@@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "TABLIZER 1"
.TH TABLIZER 1 "2022-10-21" "1" "User Commands"
.TH TABLIZER 1 "2022-10-22" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
@@ -355,6 +355,19 @@ Beside normal ascii mode (the default) and extended mode there are
more output modes available: \fBorgtbl\fR which prints an Emacs org-mode
table and \fBmarkdown\fR which prints a Markdown table and \fByaml\fR, which
prints yaml encoding.
.SS "\s-1ENVIRONMENT VARIABLES\s0"
.IX Subsection "ENVIRONMENT VARIABLES"
\&\fBtablizer\fR supports certain environment variables which use can use
to influence program behavior. Commandline flags have always
precedence over environment variables.
.IP "<T_NO_HEADER_NUMBERING> \- disable numbering of header fields, like \fB\-n\fR." 4
.IX Item "<T_NO_HEADER_NUMBERING> - disable numbering of header fields, like -n."
.PD 0
.IP "<T_COLUMNS> \- comma separated list of columns to output, like \fB\-c\fR" 4
.IX Item "<T_COLUMNS> - comma separated list of columns to output, like -c"
.IP "<\s-1NO_COLORS\s0> \- disable colorization of matches, like \fB\-N\fR" 4
.IX Item "<NO_COLORS> - disable colorization of matches, like -N"
.PD
.SH "BUGS"
.IX Header "BUGS"
In order to report a bug, unexpected behavior, feature requests