2022-09-30 14:08:59 +02:00
|
|
|
/*
|
|
|
|
|
Copyright © 2022 Thomas von Dein
|
|
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
*/
|
|
|
|
|
|
2022-09-30 19:14:58 +02:00
|
|
|
package lib
|
2022-09-28 19:30:08 +02:00
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bufio"
|
2022-10-03 12:52:26 +02:00
|
|
|
"errors"
|
2022-09-28 19:30:08 +02:00
|
|
|
"fmt"
|
2022-10-02 14:22:31 +02:00
|
|
|
"github.com/alecthomas/repr"
|
2022-09-28 19:30:08 +02:00
|
|
|
"io"
|
|
|
|
|
"regexp"
|
|
|
|
|
"strings"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
/*
|
2022-10-05 12:55:33 +02:00
|
|
|
Parse tabular input.
|
2022-09-28 19:30:08 +02:00
|
|
|
*/
|
2022-10-03 12:52:26 +02:00
|
|
|
func parseFile(input io.Reader, pattern string) (Tabdata, error) {
|
2022-09-28 19:30:08 +02:00
|
|
|
data := Tabdata{}
|
|
|
|
|
|
|
|
|
|
var scanner *bufio.Scanner
|
|
|
|
|
|
|
|
|
|
hadFirst := false
|
2022-10-05 12:55:33 +02:00
|
|
|
separate := regexp.MustCompile(Separator)
|
|
|
|
|
patternR, err := regexp.Compile(pattern)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, err))
|
|
|
|
|
}
|
2022-09-28 19:30:08 +02:00
|
|
|
|
|
|
|
|
scanner = bufio.NewScanner(input)
|
|
|
|
|
|
|
|
|
|
for scanner.Scan() {
|
2022-10-02 14:22:31 +02:00
|
|
|
line := strings.TrimSpace(scanner.Text())
|
2022-10-05 12:55:33 +02:00
|
|
|
parts := separate.Split(line, -1)
|
2022-09-28 19:30:08 +02:00
|
|
|
|
|
|
|
|
if !hadFirst {
|
|
|
|
|
// header processing
|
|
|
|
|
data.columns = len(parts)
|
|
|
|
|
// if Debug {
|
|
|
|
|
// fmt.Println(parts)
|
|
|
|
|
// }
|
|
|
|
|
|
|
|
|
|
// process all header fields
|
|
|
|
|
for _, part := range parts {
|
|
|
|
|
// if Debug {
|
|
|
|
|
// fmt.Printf("Part: <%s>\n", string(line[beg:part[0]]))
|
|
|
|
|
//}
|
|
|
|
|
|
|
|
|
|
// register widest header field
|
2022-10-05 12:55:33 +02:00
|
|
|
headerlen := len(part)
|
2022-09-28 19:30:08 +02:00
|
|
|
if headerlen > data.maxwidthHeader {
|
|
|
|
|
data.maxwidthHeader = headerlen
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// register fields data
|
2022-10-05 12:55:33 +02:00
|
|
|
data.headers = append(data.headers, strings.TrimSpace(part))
|
2022-09-28 19:30:08 +02:00
|
|
|
|
|
|
|
|
// done
|
|
|
|
|
hadFirst = true
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// data processing
|
|
|
|
|
if len(pattern) > 0 {
|
2022-10-05 09:12:46 +02:00
|
|
|
if patternR.MatchString(line) == InvertMatch {
|
|
|
|
|
// by default -v is false, so if a line does NOT
|
|
|
|
|
// match the pattern, we will ignore it. However,
|
|
|
|
|
// if the user specified -v, the matching is inverted,
|
|
|
|
|
// so we ignore all lines, which DO match.
|
2022-09-28 19:30:08 +02:00
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
idx := 0 // we cannot use the header index, because we could exclude columns
|
2022-10-05 12:55:33 +02:00
|
|
|
values := []string{}
|
|
|
|
|
for _, part := range parts {
|
|
|
|
|
width := len(strings.TrimSpace(part))
|
2022-09-28 19:30:08 +02:00
|
|
|
|
|
|
|
|
if len(data.maxwidthPerCol)-1 < idx {
|
|
|
|
|
data.maxwidthPerCol = append(data.maxwidthPerCol, width)
|
|
|
|
|
} else {
|
|
|
|
|
if width > data.maxwidthPerCol[idx] {
|
|
|
|
|
data.maxwidthPerCol[idx] = width
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// if Debug {
|
|
|
|
|
// fmt.Printf("<%s> ", value)
|
|
|
|
|
// }
|
2022-10-05 12:55:33 +02:00
|
|
|
values = append(values, strings.TrimSpace(part))
|
2022-09-28 19:30:08 +02:00
|
|
|
|
|
|
|
|
idx++
|
|
|
|
|
}
|
2022-10-15 14:15:36 +02:00
|
|
|
|
|
|
|
|
// fill up missing fields, if any
|
|
|
|
|
for i := len(values); i < len(data.headers); i++ {
|
|
|
|
|
values = append(values, "")
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-28 19:30:08 +02:00
|
|
|
data.entries = append(data.entries, values)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if scanner.Err() != nil {
|
2022-10-05 09:12:46 +02:00
|
|
|
return data, errors.Unwrap(fmt.Errorf("Failed to read from io.Reader: %w", scanner.Err()))
|
2022-09-28 19:30:08 +02:00
|
|
|
}
|
|
|
|
|
|
2022-10-02 14:22:31 +02:00
|
|
|
if Debug {
|
|
|
|
|
repr.Print(data)
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-03 12:52:26 +02:00
|
|
|
return data, nil
|
2022-09-28 19:30:08 +02:00
|
|
|
}
|