initial commit

This commit is contained in:
2022-09-28 19:30:08 +02:00
parent 02a64a5c3f
commit 10f4a81751
9 changed files with 530 additions and 2 deletions

152
cmd/parser.go Normal file
View File

@@ -0,0 +1,152 @@
package cmd
import (
"bufio"
"fmt"
"io"
"os"
"regexp"
"strings"
)
// contains a whole parsed table
type Tabdata struct {
maxwidthHeader int // longest header
maxwidthPerCol []int // max width per column
columns int
headerIndices []map[string]int // [ {beg=>0, end=>17}, ... ]
headers []string // [ "ID", "NAME", ...]
entries [][]string
}
func die(v ...interface{}) {
fmt.Fprintln(os.Stderr, v...)
os.Exit(1)
}
/*
Parse tabular input. We split the header (first line) by 2 or more
spaces, remember the positions of the header fields. We then split
the data (everything after the first line) by those positions. That
way we can turn "tabular data" (with fields containing whitespaces)
into real tabular data. We re-tabulate our input if you will.
*/
func parseFile(input io.Reader, pattern string) Tabdata {
data := Tabdata{}
var scanner *bufio.Scanner
var spaces = `\s\s+|$`
if len(Separator) > 0 {
spaces = Separator
}
hadFirst := false
spacefinder := regexp.MustCompile(spaces)
beg := 0
scanner = bufio.NewScanner(input)
for scanner.Scan() {
line := scanner.Text()
values := []string{}
patternR, err := regexp.Compile(pattern)
if err != nil {
die(err)
}
if !hadFirst {
// header processing
parts := spacefinder.FindAllStringIndex(line, -1)
data.columns = len(parts)
// if Debug {
// fmt.Println(parts)
// }
// process all header fields
for _, part := range parts {
// if Debug {
// fmt.Printf("Part: <%s>\n", string(line[beg:part[0]]))
//}
// current field
head := string(line[beg:part[0]])
// register begin and end of field within line
indices := make(map[string]int)
indices["beg"] = beg
if part[0] == part[1] {
indices["end"] = 0
} else {
indices["end"] = part[1] - 1
}
// register widest header field
headerlen := len(head)
if headerlen > data.maxwidthHeader {
data.maxwidthHeader = headerlen
}
// register fields data
data.headerIndices = append(data.headerIndices, indices)
data.headers = append(data.headers, head)
// end of current field == begin of next one
beg = part[1]
// done
hadFirst = true
}
// if Debug {
// fmt.Println(data.headerIndices)
// }
} else {
// data processing
if len(pattern) > 0 {
//fmt.Println(patternR.MatchString(line))
if !patternR.MatchString(line) {
continue
}
}
idx := 0 // we cannot use the header index, because we could exclude columns
for _, index := range data.headerIndices {
value := ""
if index["end"] == 0 {
value = string(line[index["beg"]:])
} else {
value = string(line[index["beg"]:index["end"]])
}
width := len(strings.TrimSpace(value))
if len(data.maxwidthPerCol)-1 < idx {
data.maxwidthPerCol = append(data.maxwidthPerCol, width)
} else {
if width > data.maxwidthPerCol[idx] {
data.maxwidthPerCol[idx] = width
}
}
// if Debug {
// fmt.Printf("<%s> ", value)
// }
values = append(values, value)
idx++
}
if Debug {
fmt.Println()
}
data.entries = append(data.entries, values)
}
}
if scanner.Err() != nil {
die(scanner.Err())
}
return data
}

109
cmd/printer.go Normal file
View File

@@ -0,0 +1,109 @@
package cmd
import (
"fmt"
"strings"
)
func printTable(data Tabdata) {
if XtendedOut {
printExtended(data)
return
}
// needed for data output
var formats []string
if len(data.entries) > 0 {
// headers
for i, head := range data.headers {
if len(Columns) > 0 {
if !contains(UseColumns, i+1) {
continue
}
}
// calculate column width
var width int
var iwidth int
var format string
// generate format string
if len(head) > data.maxwidthPerCol[i] {
width = len(head)
} else {
width = data.maxwidthPerCol[i]
}
if NoNumbering {
iwidth = 0
} else {
iwidth = len(fmt.Sprintf("%d", i)) // in case i > 9
}
format = fmt.Sprintf("%%-%ds", 3+iwidth+width)
if NoNumbering {
fmt.Printf(format, fmt.Sprintf("%s ", head))
} else {
fmt.Printf(format, fmt.Sprintf("%s(%d) ", head, i+1))
}
// register
formats = append(formats, format)
}
fmt.Println()
// entries
var idx int
for _, entry := range data.entries {
idx = 0
//fmt.Println(entry)
for i, value := range entry {
if len(Columns) > 0 {
if !contains(UseColumns, i+1) {
continue
}
}
fmt.Printf(formats[idx], strings.TrimSpace(value))
idx++
}
fmt.Println()
}
}
}
/*
We simulate the \x command of psql (the PostgreSQL client)
*/
func printExtended(data Tabdata) {
// needed for data output
format := fmt.Sprintf("%%%ds: %%s\n", data.maxwidthHeader) // FIXME: re-calculate if -c has been set
if len(data.entries) > 0 {
var idx int
for _, entry := range data.entries {
idx = 0
for i, value := range entry {
if len(Columns) > 0 {
if !contains(UseColumns, i+1) {
continue
}
}
fmt.Printf(format, data.headers[idx], value)
idx++
}
fmt.Println()
}
}
}
func contains(s []int, e int) bool {
for _, a := range s {
if a == e {
return true
}
}
return false
}

108
cmd/root.go Normal file
View File

@@ -0,0 +1,108 @@
/*
Copyright © 2022 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package cmd
import (
"fmt"
"github.com/alecthomas/repr"
"github.com/spf13/cobra"
"os"
"strconv"
"strings"
)
var version = "v1.0.0"
var rootCmd = &cobra.Command{
Use: "tablizer [regex] [file, ...]",
Short: "[Re-]tabularize tabular data",
Long: `Manipulate tabular output of other programs`,
Run: func(cmd *cobra.Command, args []string) {
if Version {
fmt.Printf("This is tablizer version %s\n", version)
return
}
var pattern string
havefiles := false
if len(Columns) > 0 {
for _, use := range strings.Split(Columns, ",") {
usenum, err := strconv.Atoi(use)
if err != nil {
die(err)
}
UseColumns = append(UseColumns, usenum)
}
}
if len(args) > 0 {
if _, err := os.Stat(args[0]); err != nil {
pattern = args[0]
args = args[1:]
}
if len(args) > 0 {
for _, file := range args {
fd, err := os.OpenFile(file, os.O_RDONLY, 0755)
if err != nil {
die(err)
}
data := parseFile(fd, pattern)
if Debug {
repr.Print(data)
}
printTable(data)
}
havefiles = true
}
}
if !havefiles {
data := parseFile(os.Stdin, pattern)
if Debug {
repr.Print(data)
}
printTable(data)
}
},
}
var Debug bool
var XtendedOut bool
var NoNumbering bool
var Version bool
var Columns string
var UseColumns []int
var Separator string
func Execute() {
err := rootCmd.Execute()
if err != nil {
os.Exit(1)
}
}
func init() {
rootCmd.PersistentFlags().BoolVarP(&Debug, "debug", "d", false, "Enable debugging")
rootCmd.PersistentFlags().BoolVarP(&XtendedOut, "extended", "x", false, "Enable extended output")
rootCmd.PersistentFlags().BoolVarP(&NoNumbering, "no-numbering", "n", false, "Disable header numbering")
rootCmd.PersistentFlags().BoolVarP(&Version, "version", "v", false, "Print program version")
rootCmd.PersistentFlags().StringVarP(&Separator, "separator", "s", "", "Custom field separator")
rootCmd.PersistentFlags().StringVarP(&Columns, "columns", "c", "", "Only show the speficied columns (separated by ,)")
}