Compare commits

..

103 Commits

Author SHA1 Message Date
59911aebb9 added 2024-05-14 12:10:58 +02:00
a9bb79b01c merge corrections 2024-05-07 18:39:38 +02:00
a718fa388d Merge remote-tracking branch 'origin/development' 2024-05-07 18:29:01 +02:00
473feff451 refactored and un-go-criticed 2024-05-07 18:01:12 +02:00
9e2e45715e added -F docs 2024-05-07 18:00:57 +02:00
39609425e5 refactoring and gouncritic, 1st part 2024-05-07 15:19:54 +02:00
ba2a2e8460 add -F filter by column flag (closes #13) 2024-05-07 13:30:07 +02:00
96f7881c16 fix #12: only consider -v if there's a pattern, ignore it otherwise 2024-05-07 13:29:41 +02:00
T.v.Dein
6fccd1287b Feature additions (#11)
* add color table support (using alternating colorization of rows) using new flag `-L`
* add config file support (HCL format) using `~/.config/tablizer/config` or `-f <file>` so the user can customize colors
* removed golang 1.17 support
2023-11-22 14:16:43 +01:00
0f22457961 remove go 1.17 support 2023-11-22 14:09:49 +01:00
ddfbecaa35 +docs, try linter v1.18 2023-11-22 14:08:36 +01:00
3632de10d7 try to fix linter 2023-11-22 13:57:57 +01:00
76b98fb8ad upd mods 2023-11-22 13:48:32 +01:00
f045adf441 added config file support to set custom colors 2023-11-22 13:33:26 +01:00
811173ddb4 fixed alternating highlighting, now looks reasonable 2023-11-22 10:30:40 +01:00
3c910ca08f works but is ugly :( 2023-11-21 17:41:04 +01:00
a8c9ede77e added -L flag to highligh lines in alternating bg color 2023-11-21 11:40:55 +01:00
T.v.Dein
9eadb941da Release v1.0.17 (#9)
* add shortcut -H to --no-headers, it's too cumbersome to type
* added fuzzy search support
* Added basic lisp plugin facilities
* Lisp plugin Addidions:
- added process hook facilities
- added working example lisp plugin for filter hook
- load-path can now be a file as well
- added a couple of lisp helper functions (atoi, split), more may
  follow, see lisplib.go
* linting fixes
2023-10-02 18:15:41 +02:00
T.v.Dein
93800f81c1 release v1.0.16 (#8)
* add shortcut -H to --no-headers, it's too cumbersome to type

* bump version

* add -H to usage

* re-generated

---------

Co-authored-by: Thomas von Dein <tom@vondein.org>
2023-05-03 18:31:28 +02:00
T.v.Dein
a94a4fd5b0 Merge pull request #7 from TLINDEN/development
added --no-headers flag to disable header display in tabular modes
2023-04-21 10:01:19 +02:00
1acbdbc674 added --no-headers flag to disable header display in tabular modes 2023-04-21 09:52:05 +02:00
195f685584 fix release maker 2023-01-23 13:40:50 +01:00
b72a99748f upd Changelog, bump version 2023-01-23 12:38:59 +01:00
3cf9310ef7 -D could not be used together with -a 2023-01-20 13:37:06 +01:00
ceae80c91c fix invalid arg handling (io, stdin) and add tests for this 2023-01-09 12:54:45 +01:00
54add2c801 only upd patch version if any 2022-11-04 20:27:22 +01:00
2d157bf2c0 force uniseg release version, see actions#3396457307/ 2022-11-04 20:22:51 +01:00
6f71a028f0 added licenses 2022-11-04 20:22:40 +01:00
dfc7c2e03e upd dep licenses, upd go modules 2022-11-04 20:10:54 +01:00
c443914222 fix spacing mess 2022-11-03 20:17:02 +01:00
eddd4e4180 add feature request template 2022-11-03 20:12:26 +01:00
0d05505493 Merge branch 'main' into development 2022-11-03 20:09:10 +01:00
T.v.Dein
a461dba10d Merge pull request #6 from TLINDEN/issue-template
Update issue templates
2022-11-03 20:08:42 +01:00
T.v.Dein
ca71f8a572 Update issue templates 2022-11-03 19:59:19 +01:00
60230eb1f6 added show-version target 2022-11-03 19:51:42 +01:00
315e8d5363 fix changelog 2022-11-03 19:30:55 +01:00
88d078a535 fix to be able to run 'make' on systems w/o perl 2022-11-03 19:26:57 +01:00
74ab3a1804 version bump 2022-11-03 13:16:02 +01:00
2d8614fa0f demo gif 2022-11-03 13:11:09 +01:00
c8bad4df1a check completion errors 2022-11-02 14:33:48 +01:00
335b2665f2 turned completion subcommand into option 2022-11-01 11:40:36 +01:00
8552270a68 added completion support 2022-10-31 16:19:12 +01:00
6f49b76607 added demo generator 2022-10-27 19:24:22 +02:00
4653eaca09 added demo generator 2022-10-27 19:23:48 +02:00
722eea7e7b add asciicast demo 2022-10-27 19:22:36 +02:00
304f2182ac js doesnt work, try image 2022-10-27 19:18:09 +02:00
73908b1661 added ascii cast 2022-10-27 19:16:41 +02:00
105ba96757 -A was not implemented, oops! 2022-10-27 18:38:46 +02:00
0681f67bc6 fix release link 2022-10-26 12:38:08 +02:00
066ddd0d98 re-organized pattern matching code 2022-10-25 18:34:28 +02:00
417faf3ff2 fixed #5, colorization now always works as expected 2022-10-25 14:24:05 +02:00
001021dac8 Workaround for issue#3: text containing tag like content is not colorized properly. 2022-10-24 17:55:31 +02:00
5c42f7ab9a check pattern on startup 2022-10-24 14:49:23 +02:00
5e65726cb0 cleanup 2022-10-24 14:05:50 +02:00
138ae51936 added CSV output mode, enhanced parser tests 2022-10-23 16:57:30 +02:00
b5c802403b added CSV parsing support, enabled with -s 2022-10-22 12:27:33 +02:00
e54435c2e4 added support for environment variables 2022-10-22 10:21:39 +02:00
975510c86a using enum modeflags, use my own usage template, generated from manpage so I don't have to maintain it twice, it's also nicer 2022-10-21 10:21:07 +02:00
9dd2a49d9b adapted version generation to cfg module, added and fixed unit tests 2022-10-19 19:32:41 +02:00
90872e0c60 fix linter errors 2022-10-19 12:57:50 +02:00
baac74eb47 yaml fix 2022-10-19 12:51:54 +02:00
360dd28e20 add linter 2022-10-19 12:50:20 +02:00
1e36c148ff get rid of global variables, makes testing way easier 2022-10-19 12:44:19 +02:00
399620de98 Added so we can use struct holding all configuration 2022-10-19 12:43:54 +02:00
5d10875a3f fix pointer bug, mockdata have been overwritten by go test everytime,
now use a const struct via func.
2022-10-18 19:45:09 +02:00
4481f59eda no need to return string when using io.Writer anyway 2022-10-17 23:40:53 +02:00
1b2f51dcaf Changed print funcs to use an io.Writer, reimplemented print tests 2022-10-17 20:04:05 +02:00
0d6de3fe5b add-fixes 2022-10-16 19:48:12 +02:00
ec23ae2e76 fix todo 2022-10-16 19:45:46 +02:00
76930ab45a added yaml output mode support (-o yaml or -Y) 2022-10-16 19:44:26 +02:00
a77e4dbc5a added NumberizeHeaders() unit test 2022-10-16 16:37:47 +02:00
9305f48639 added target to execute a single unit test manually 2022-10-16 16:37:26 +02:00
da276a1b50 replaced github.com/xhit/go-str2duration with my own func + tests 2022-10-16 15:30:34 +02:00
dfd3ab9b77 fixed version generation 2022-10-15 19:46:03 +02:00
d53b32b95e updated Changelog 2022-10-15 19:37:30 +02:00
3edbd53ef8 bump version 2022-10-15 19:33:07 +02:00
9c49b78593 added unit test + docs for the various sort modes. 2022-10-15 19:31:42 +02:00
ca87c339b0 added support to sort by time, duration, numerical 2022-10-15 17:05:15 +02:00
fd74628259 added unit test for descending order, fixed deduplication bug 2022-10-15 16:27:04 +02:00
839f33a7fc unit test missing 2022-10-15 14:25:38 +02:00
ebd391df63 added -D to alter sort order to descending order (default: ascending) 2022-10-15 14:24:43 +02:00
752406815c catch incomplete rows and fill them with empty string[s] 2022-10-15 14:15:36 +02:00
4ec6ccd0fd added support for regexp in -c parameter, added deduplication as well 2022-10-15 14:03:30 +02:00
aef545d51e added open todo items I still had on the list 2022-10-15 14:02:46 +02:00
3249e1719f some rewording of the NCOC and turn TODO into a md file 2022-10-15 14:02:09 +02:00
f830cc6256 updated 2022-10-14 19:56:55 +02:00
7e01d54b08 added. 2022-10-14 19:52:02 +02:00
487ba6253d Introduced changelog. 2022-10-14 19:51:45 +02:00
745d15b459 Made corrections to satisfy linter. 2022-10-14 19:51:19 +02:00
8e2ba58ddb added -k parameter to sort by columns 2022-10-13 18:56:34 +02:00
6eedb60a6a minor update to current state 2022-10-11 18:50:58 +02:00
81fac864f1 using gh for release generation, fixed mkrel.sh to add version 2022-10-11 18:42:10 +02:00
e868b50c0f Merge branch 'development' 2022-10-11 18:34:11 +02:00
b9ed7d8cb7 fixed -X output in combination with -c 2022-10-11 13:47:34 +02:00
6ae4a1b6d9 added test for -X output 2022-10-11 09:11:33 +02:00
f890596b4c added pattern highlighting support 2022-10-10 20:14:51 +02:00
22ee24cfdf Merge branch 'development' 2022-10-06 20:05:20 +02:00
34e2b8d855 fixed issuer #4, version string missing, and added some docs about pattern syntax 2022-10-06 20:02:40 +02:00
196833ed3c Merge branch 'development' 2022-10-05 19:17:34 +02:00
85277bbf5e more refactoring, fixed bug in shell mode output, fixed default
Separator and fixed #3
2022-10-05 16:43:51 +02:00
26e50cf908 fix #1: use scanner.Split() instead of splitting by header position
boundaries, since this splitting cuts utf-8 chars which causes
distorted output.
2022-10-05 12:55:33 +02:00
5be18e27c9 Merge branch 'development' 2022-10-05 09:20:02 +02:00
2c410e1cb3 added -v flag, replace 'help' subcommand wich -m, more tests 2022-10-05 09:12:46 +02:00
691 changed files with 267795 additions and 552 deletions

31
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View File

@@ -0,0 +1,31 @@
---
name: Bug report
about: Create a report to help us improve
title: "[bug-report]"
labels: bug
assignees: TLINDEN
---
**Describtion**
<!-- Please provide a clear and concise description of the issue: -->
**Steps To Reproduce**
<!-- Please detail the steps to reproduce the behavior: -->
**Expected behavior**
<!-- What do you expected to happen instead? -->
**Version information**
<!--
Please provide as much version information as possible:
- if you have just installed a binary, provide the output of: tablizer --version
- if you installed from source, provide the output of: make show-version
- provide additional details: operating system and version and shell environment
-->
**Additional informations**

View File

@@ -0,0 +1,23 @@
---
name: Feature request
about: Suggest a feature
title: "[feature-request]"
labels: feature-request
assignees: TLINDEN
---
**Describtion**
<!-- Please provide a clear and concise description of the feature you desire: -->
**Version information**
<!--
Just in case the feature is already present, please provide as
much version information as possible:
- if you have just installed a binary, provide the output of: tablizer --version
- if you installed from source, provide the output of: make show-version
- provide additional details: operating system and version and shell environment
-->

View File

@@ -4,7 +4,7 @@ jobs:
build:
strategy:
matrix:
version: [1.17, 1.18, 1.19]
version: [1.18, 1.19]
os: [ubuntu-latest, windows-latest, macos-latest]
name: Build
runs-on: ${{ matrix.os }}
@@ -23,3 +23,16 @@ jobs:
- name: test
run: make test
golangci:
name: lint
runs-on: ubuntu-latest
steps:
- uses: actions/setup-go@v3
with:
go-version: 1.18
- uses: actions/checkout@v3
- name: golangci-lint
uses: golangci/golangci-lint-action@v3
with:
skip-cache: true

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
releases
tablizer

278
CHANGELOG.md Normal file
View File

@@ -0,0 +1,278 @@
# Changelog
All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org).
## [v1.0.14](https://github.com/TLINDEN/tablizer/tree/v1.0.14) - 2023-01-23
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.13...v1.0.14)
### Fixed
- The -D parameter could not be used together with -a.
- Fixed invalid argv handling: when the user wanted to read from stdin
but gave an argument which was meant as a pattern, but also existed
as a filename, then tablizer opened the file, ignored stdin.
- Makefile indentation
### Added
- added licens notes about dependencies
- using hard coded uniseq version, see actions#3396457307
- updated dependencies (go module versions)
## [v1.0.13](https://github.com/TLINDEN/tablizer/tree/v1.0.13) - 2022-11-03
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.12...v1.0.13)
### Added
- Added command line flag to generate shell completion code
- Added an animated demo gif to the README to demonstrate the tool
### Fixed
- The `-A` flag wasn't implemented (default output mode).
- Fixed building from source on systems w/o perls pod tools,
which is not requrired anyway since I always commit the latest
manpage.
## [v1.0.12](https://github.com/TLINDEN/tablizer/tree/v1.0.12) - 2022-10-25
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.11...v1.0.12)
### Added
- Added support to parse CSV input
- Added CSV output support
- Added support for environment variables
### Changed
- We do not use the generated help message anymore, instead we use the
usage from the manpage, which we have to maintain anyway. It looks
better and has flag groups, which cobra is still lacking as of this
writing.
- More refactoring and re-organization, runtime configuration now
lives in the cfg module.
### Fixed
- Fixed [Bug #5](https://github.com/TLINDEN/tablizer/issues/5), where
matches have not been highlighted correctly in some rare cases.
## [v1.0.11](https://github.com/TLINDEN/tablizer/tree/v1.0.11) - 2022-10-19
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.10...v1.0.11)
### Added
- Added CI job golinter to regularly check for common mistakes.
- Added YAML output mode.
- Added more unit tests, we're over 95% in the lib module.
### Changed
- do not use any global variables anymore, makes the code easier to
maintain, understand and test
- using io.Writer in print* functions, which is easier to test, also
re-implemented the print tests.
- replaced go-str2duration with my own implementation `duration2int()`.
## [v1.0.10](https://github.com/TLINDEN/tablizer/tree/v1.0.10) - 2022-10-15
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.9...v1.0.10)
### Added
- Added various sort modes: sort by time, by duration, numerical (-a -t -i)
- Added possibility to modify sort order to descending (-D)
- Added support to specify a regexp in column selector -c, which can
also be mixed with numerical column spec
- More unit tests
### Fixed
- Column specification allowed to specify duplicate columns like `-c
1,2,1,2` unchecked. Now this list will be deduplicated before use.
## [v1.0.9](https://github.com/TLINDEN/tablizer/tree/v1.0.9) - 2022-10-14
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.8...v1.0.9)
### Added
- Added Changelog, Contribution guidelines and no COC.
### Changed
- some minor changes to satisfy linter.
## [v1.0.8](https://github.com/TLINDEN/tablizer/tree/v1.0.8) - 2022-10-13
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.7...v1.0.8)
### Added
- Added sort support with the new parameter -k (like sort(1)).
## [v1.0.7](https://github.com/TLINDEN/tablizer/tree/v1.0.7) - 2022-10-11
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.6...v1.0.7)
### Added
- Added pattern highlighting support.
- Added more unit tests.
### Fixed
- Fixed extended more output in combination with -c.
- Fixed issue #4, the version string was missing.
## [v1.0.6](https://github.com/TLINDEN/tablizer/tree/v1.0.6) - 2022-10-05
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.5...v1.0.6)
### Added
- Added documentation about regexp syntax in the manpage.
- Added more unit tests.
### Changed
- Rewrote the input parser.
- Some more refactoring work has been done.
## [v1.0.5](https://github.com/TLINDEN/tablizer/tree/v1.0.5) - 2022-10-05
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.4...v1.0.5)
### Added
- A new option has been added: --invert-match -v which behaves like
the same option in grep(1): it inverts the pattern match.
- A few more unit tests have been added.
### Fixed
- Pattern matching did not work, because the (new) help subcommand
lead to cobra taking care of the first arg to the program
(argv[1]). So now there's a new parameter -m which displays the
manpage and no more subcommands.
## [v1.0.4](https://github.com/TLINDEN/tablizer/tree/v1.0.4) - 2022-10-04
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.3...v1.0.4)
### Added
- Development version of the compiled binary now uses git vars
in addition to program version.
- Added an option to display the manual page (compiled in) as text:
--help, for cases where a user just installed the binary.
### Changed
- Fixed go module namespace.
## [v1.0.3](https://github.com/TLINDEN/tablizer/tree/v1.0.3) - 2022-10-03
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.2...v1.0.3)
### Added
- Added a new output mode: shell mode, which allows the user
to use the output in a shell eval loop to further process
the data.
### Changed
- More refactoring work has been done.
## [v1.0.2](https://github.com/TLINDEN/tablizer/tree/v1.0.2) - 2022-10-02
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.1...v1.0.2)
### Added
- Added some basic unit tests.
### Changed
- Code has been refactored to be more efficient.
- Replaced table generation code with Tablewriter.
## [v1.0.1](https://github.com/TLINDEN/tablizer/tree/v1.0.1) - 2022-09-30
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/v1.0.0...v1.0.1)
### Added
- Added a unix manual page.
- Added release builder to Makefile
### Changed
- Various minor fixes.
## [v1.0.0](https://github.com/TLINDEN/tablizer/tree/v1.0.0) - 2022-09-28
[Full Changelog](https://github.com/TLINDEN/tablizer/compare/02a64a5c3fe4220df2c791ff1421d16ebd428c19...v1.0.0)
Initial release.

113
CODE_OF_CONDUCT.md Normal file
View File

@@ -0,0 +1,113 @@
# No Code of Conduct
*TL;DR:* This project does **NOT** have a so called Code of Conduct,
nor will it ever have one.
## The Rant
The reasons are somewhat complicated and I'll try my best to document
them here.
Ethical codes or rules come along like laws. But how is ethical or
moral behavior defined? And who defines which behavior is ethical and
which is not? Certainly not me.
Unless you live in a dictatorship (and more than half of the
population of planet earth do as of this writing), laws come into
existence by democratic procedures. Laws cover almost every aspect of
live in a society. Laws allow and forbid behavior and laws sanction
infringements.
A software project like this one on the other hand is not a society.
There are not enough people involved to form democratic
structures. And there will always be a minority of users who have the
right to commit or reject code. How could any maintainer of a software
project dare to decree rules upon others? Actually, am I, the current
maintainer of this very project authorized to do so?
I think the anser to this question clearly is NO.
The issue is being complicated by the fact, that open source
development these days happens on planetary scale. And this planet
houses hundreds if not thousands of different cultures, philosophies,
ideologies and worldviews. The answer to many ethical questions will
in most cases vague and nebulous.
Ones joke will always be another ones insult.
Then there is the problem of language. I myself am not an english
native, but I publish everyting using the english language. I am able
to communicate with most people in the open source community because
of that. But I am certainly not able to understand everything and
everyone. There might be nuances to a sentence I don't sense, there
might be sarcastic connotations I don't understand or references to
historical figures, events or traditions I don't know and never have
heard of.
Judging over other peoples online behavior looks like a titanic task
to me. It is just not my job to judge others, I am not legitimized or
authorized to do so and I am not interested in this kind of business.
Another huge problem with ethical rules is that you need to outline
and enforce sanctions on those who violate the rules. But since I am
not an elected authority how would I be able to do this? I don't
know. And what happens if someone complains about myself? Shall I
remove myself from my own project? Come on!
Last but not least there's the law. So, let's say someone in india
says something insulting to some other developer in an issue. Of
course german law does not apply to indian people. More, the insult
might actually not be an insult in india. In the end, nothing would
happen. Under normal circumstances, maintainers would delete the
posting, ban the user or remove push privileges etc.
But then, is there a way for the offending user to defend himself? Of
course not, since neither indian or german law alone applies. I cannot
go to a german court and sue the guy and he cannot do the same in
india. Or - we possibly could but the judges on both countries would
just laugh and close the case.
That being said, I don't have the power nor the tools, nor the
authority to enforce serious sanctions of any meaningful kind against
others. Therefore I cannot outline any rules whatsoever.
And let's not even start talking about there undemocratic "comitees"
many projects are forming to circumvent this problem. Some projects
even include external entities like a lawer or some bureaucrat
somewhere just to have the ability to complain against a comitee
member. What a mess!
## So, which are the ethical rules within this project then?
Well, there are none.
This project is about code, not society. It doesn't matter where you
come from, how you look, how you think, what you believe, who your
friends are, whay you said or did sometime in the past. I don't even
care if you are a human being. You are an alien so bored that you need
to submit code on github? Fine with me. You're a convicted criminal? I
don't give a shit!
**The only thing I am interested here is Code and only Code.**
So if anyhing happens here I don't like or I am obliged by law to act
on, I will decide on a case to case basis what to do. And
unfortunately, since this is the nature of a github project, you
cannot complain, object or protest. I am very sorry!
If you will, let's at least outline these:
- Please - just please - behave towards others as you'd expect others
to behave towards yourself.
- Don't judge others for any reason.
- Only judge the code.
But these are not rules, only a friendly appeal to you as a developer
and user.
Thanks a lot!

94
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,94 @@
## Project Goals
The goal of this project is to build a small tool which helps in day
to day work with tabular output of various commandline programs. It
should be small, fast and easy to understand. The idea is to replace
multiline shell pipes using awk, sed and grep with just one
binary.
There will be no GUI, no web interface, no public API of some sort, no
builtin interpreter.
The programming language used for this project will always be
[GOLANG](https://go.dev/) with the exception of the documentation
([Perl POD](https://perldoc.perl.org/perlpod)) and the Makefile.
# Contributing
You can contribute to this project in various ways:
## Open an issue
If you encounter a problem or don't understand how the program works
or if you think the documentation is unclear, please don't hesitate to
open an issue.
Please add as much information about the case as possible, such as:
- Your environment (operating system etc)
- tablizer version (`tablizer --version`)
- Input data. Please replace sensitive information with mock data!
- Actual program output.
- Expected program output.
- Error message - if any.
Be aware that I am working on this (and some other) project in my
spare time which is scarce. Therefore please don't expect me to
respond to your query within hours or even days. Be patient, but I
WILL respond.
## Pull Requests
Code and documentation help is always much appreciated! Please follow
thes guidelines to successfully contribute:
- Every pull request shall be based on latest `development`
branch. `main` is only used for releases.
- Execute the unit tests before committing: `make test`. There shall
be no errors.
- Strive to be backwards compatible so that users who are already
using the program don't have to change their habits - unless it is
really neccessary.
- Try to add a unit test for your addition.
- Don't ever change existing unit tests!
- Add a meaningful and comprehensive rationale about your contribution:
- Why do you think it might be useful for others?
- What did you actually change or add?
- Is there an open issue which this PR fixes and if so, please link
to that issue.
- [Re-]format your code with `gofmt -s`.
- Avoid unneccesary dependencies, especially for very small functions.
- **If** a new dependency is being added, it must be compatible with
our [license agreement](LICENSE).
- You need to accept that the code or documentation you contribute
will be redistributed under the terms of said license agreement. If
your contribution is considerably large or if you contribute
regularly, then feel free to add your name and if you want your
email address to the *AUTHORS* section of the
[manpage](tablizer.pod).
- Adhere to the above mentioned project goals.
- If you are unsure if your addition or change will be accepted,
better ask before starting coding. Open an issue about your proposal
and let's discuss it! That way we avoid doing unnessesary work on
both sides.
Each pull request will be carefully reviewed and if it is a useful
addition it will be accepted. However, please be prepared that
sometimes a PR will be rejected. The reasons may vary and will be
documented. Perhaps the above guidelines are not matched, or the
addition seems to be not so useful from my perspective, maybe there
are too much changes or there might be changes I don't even
understand.
But whatever happens: your contribution is always welcome!

View File

@@ -18,33 +18,43 @@
#
# no need to modify anything below
tool = tablizer
version = $(shell egrep "= .v" lib/common.go | cut -d'=' -f2 | cut -d'"' -f 2)
version = $(shell egrep "= .v" cfg/config.go | cut -d'=' -f2 | cut -d'"' -f 2)
archs = android darwin freebsd linux netbsd openbsd windows
PREFIX = /usr/local
UID = root
GID = 0
BRANCH = $(shell git describe --all | cut -d/ -f2)
BRANCH = $(shell git branch --show-current)
COMMIT = $(shell git rev-parse --short=8 HEAD)
BUILD = $(shell date +%Y.%m.%d.%H%M%S)
VERSION:= $(if $(filter $(BRANCH), development),$(version)-$(BRANCH)-$(COMMIT)-$(BUILD))
VERSION := $(if $(filter $(BRANCH), development),$(version)-$(BRANCH)-$(COMMIT)-$(BUILD),$(version))
HAVE_POD := $(shell pod2text -h 2>/dev/null)
all: $(tool).1 cmd/$(tool).go buildlocal
%.1: %.pod
ifdef HAVE_POD
pod2man -c "User Commands" -r 1 -s 1 $*.pod > $*.1
endif
cmd/%.go: %.pod
ifdef HAVE_POD
echo "package cmd" > cmd/$*.go
echo >> cmd/$*.go
echo "var manpage = \`" >> cmd/$*.go
pod2text $*.pod >> cmd/$*.go
echo "\`" >> cmd/$*.go
echo "var usage = \`" >> cmd/$*.go
awk '/SYNOPS/{f=1;next} /DESCR/{f=0} f' $*.pod | sed 's/^ //' >> cmd/$*.go
echo "\`" >> cmd/$*.go
endif
buildlocal:
go build -ldflags "-X 'github.com/tlinden/tablizer/lib.VERSION=$(VERSION)'"
go build -ldflags "-X 'github.com/tlinden/tablizer/cfg.VERSION=$(VERSION)'"
release:
./mkrel.sh $(tool) $(version)
gh release create $(version) --generate-notes releases/*
install: buildlocal
install -d -o $(UID) -g $(GID) $(PREFIX)/bin
@@ -53,7 +63,39 @@ install: buildlocal
install -o $(UID) -g $(GID) -m 444 $(tool).1 $(PREFIX)/man/man1/
clean:
rm -rf $(tool) releases
rm -rf $(tool) releases coverage.out
test:
go test -v ./...
bash t/test.sh
singletest:
@echo "Call like this: ''make singletest TEST=TestPrepareColumns MOD=lib"
go test -run $(TEST) github.com/tlinden/tablizer/$(MOD)
cover-report:
go test ./... -cover -coverprofile=coverage.out
go tool cover -html=coverage.out
show-versions: buildlocal
@echo "### tablizer version:"
@./tablizer --version
@echo
@echo "### go module versions:"
@go list -m all
@echo
@echo "### go version used for building:"
@grep -m 1 go go.mod
goupdate:
go get -t -u=patch ./...
lint:
golangci-lint run
# keep til ireturn
lint-full:
golangci-lint run --enable-all --exclude-use-default --disable exhaustivestruct,exhaustruct,depguard,interfacer,deadcode,golint,structcheck,scopelint,varcheck,ifshort,maligned,nosnakecase,godot,funlen,gofumpt,cyclop,noctx,gochecknoglobals,paralleltest,forbidigo,gci,godox,goimports,ireturn,stylecheck,testpackage,mirror,nestif,revive,goerr113,gomnd
gocritic check -enableAll *.go

View File

@@ -1,5 +1,6 @@
[![Actions](https://github.com/tlinden/tablizer/actions/workflows/ci.yaml/badge.svg)](https://github.com/tlinden/tablizer/actions)
[![License](https://img.shields.io/badge/license-GPL-blue.svg)](https://github.com/tlinden/tablizer/blob/master/LICENSE)
[![Go Report Card](https://goreportcard.com/badge/github.com/tlinden/tablizer)](https://goreportcard.com/report/github.com/tlinden/tablizer)
## tablizer - Manipulate tabular output of other programs
@@ -20,13 +21,13 @@ But you're only interested in the NAME and STATUS columns. Here's how
to do this with tablizer:
```
% kubectl get pods | ./tablizer
% kubectl get pods | tablizer
NAME(1) READY(2) STATUS(3) RESTARTS(4) AGE(5)
repldepl-7bcd8d5b64-7zq4l 1/1 Running 1 (69m ago) 5h26m
repldepl-7bcd8d5b64-m48n8 1/1 Running 1 (69m ago) 5h26m
repldepl-7bcd8d5b64-q2bf4 1/1 Running 1 (69m ago) 5h26m
% kubectl get pods | ./tablizer -c 1,3
% kubectl get pods | tablizer -c 1,3
NAME(1) STATUS(3)
repldepl-7bcd8d5b64-7zq4l Running
repldepl-7bcd8d5b64-m48n8 Running
@@ -35,10 +36,11 @@ repldepl-7bcd8d5b64-q2bf4 Running
Another use case is when the tabular output is so wide that lines are
being broken and the whole output is completely distorted. In such a
case you can use the `-x` flag to get an output similar to `\x` in `psql`:
case you can use the `-o extended | -X` flag to get an output similar
to `\x` in `psql`:
```
% kubectl get pods | ./tablizer -x
% kubectl get pods | tablizer -X
NAME: repldepl-7bcd8d5b64-7zq4l
READY: 1/1
STATUS: Running
@@ -63,17 +65,33 @@ Tablize can read one or more files or - if none specified - from STDIN.
You can also specify a regex pattern to reduce the output:
```
% kubectl get pods | ./tablizer q2bf4
% kubectl get pods | tablizer q2bf4
NAME(1) READY(2) STATUS(3) RESTARTS(4) AGE(5)
repldepl-7bcd8d5b64-q2bf4 1/1 Running 1 (69m ago) 5h26m
```
Sometimes a filter regex is to broad and you wish to filter only on a
particular column. This is possible using `-F`:
```
% kubectl get pods | tablizer -n -Fname=2
NAME READY STATUS RESTARTS AGE
repldepl-7bcd8d5b64-q2bf4 1/1 Running 1 (69m ago) 5h26m
```
Here we filtered the `NAME` column for `2`, which would have matched
otherwise on all rows.
There are more output modes like org-mode (orgtbl) and markdown.
## Demo
[![asciicast](demo/tablizer-demo.gif)](https://asciinema.org/a/9FKc3HPnlg8D2X8otheleEa9t)
## Installation
There are multiple ways to install **tablizer**:
- Go to the [latest release page](https://github.com/muesli/mango/releases/latest),
- Go to the [latest release page](https://github.com/tlinden/tablizer/releases/latest),
locate the binary for your operating system and platform.
Download it and put it into some directory within your `$PATH` variable.
@@ -106,6 +124,10 @@ https://github.com/TLINDEN/tablizer/blob/main/tablizer.pod
Or if you cloned the repository you can read it this way (perl needs
to be installed though): `perldoc tablizer.pod`.
If you have the binary installed, you can also read the man page with
this command:
tablizer --man
## Getting help

1
TODO
View File

@@ -1 +0,0 @@

18
TODO.md Normal file
View File

@@ -0,0 +1,18 @@
## Fixes to be implemented
## Features to be implemented
- add comment support (csf.NewReader().Comment = '#')
- add --no-headers option
### Lisp Plugin Infrastructure using zygo
Hooks:
| Filter | Purpose | Args | Return |
|-----------|-------------------------------------------------------------|---------------------|--------|
| filter | include or exclude lines | row as hash | bool |
| process | do calculations with data, store results in global lisp env | whole dataset | nil |
| transpose | modify a cell | headername and cell | cell |
| append | add one or more rows to the dataset (use this to add stats) | nil | rows |

386
cfg/config.go Normal file
View File

@@ -0,0 +1,386 @@
/*
Copyright © 2022-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package cfg
import (
"errors"
"fmt"
"os"
"regexp"
"strings"
"github.com/glycerine/zygomys/zygo"
"github.com/gookit/color"
"github.com/hashicorp/hcl/v2/hclsimple"
)
const DefaultSeparator string = `(\s\s+|\t)`
const Version string = "v1.2.0"
const MAXPARTS = 2
var DefaultLoadPath = os.Getenv("HOME") + "/.config/tablizer/lisp"
var DefaultConfigfile = os.Getenv("HOME") + "/.config/tablizer/config"
var VERSION string // maintained by -x
// public config, set via config file or using defaults
type Settings struct {
FG string `hcl:"FG"`
BG string `hcl:"BG"`
HighlightFG string `hcl:"HighlightFG"`
HighlightBG string `hcl:"HighlightBG"`
NoHighlightFG string `hcl:"NoHighlightFG"`
NoHighlightBG string `hcl:"NoHighlightBG"`
HighlightHdrFG string `hcl:"HighlightHdrFG"`
HighlightHdrBG string `hcl:"HighlightHdrBG"`
}
// internal config
type Config struct {
Debug bool
NoNumbering bool
NoHeaders bool
Columns string
UseColumns []int
Separator string
OutputMode int
InvertMatch bool
Pattern string
PatternR *regexp.Regexp
UseFuzzySearch bool
UseHighlight bool
SortMode string
SortDescending bool
SortByColumn int
/*
FIXME: make configurable somehow, config file or ENV
see https://github.com/gookit/color.
*/
ColorStyle color.Style
HighlightStyle color.Style
NoHighlightStyle color.Style
HighlightHdrStyle color.Style
NoColor bool
// special case: we use the config struct to transport the lisp
// env trough the program
Lisp *zygo.Zlisp
// a path containing lisp scripts to be loaded on startup
LispLoadPath string
// config file, optional
Configfile string
Settings Settings
// used for field filtering
Rawfilters []string
Filters map[string]*regexp.Regexp
}
// maps outputmode short flags to output mode, ie. -O => -o orgtbl
type Modeflag struct {
X bool
O bool
M bool
S bool
Y bool
A bool
C bool
}
// used for switching printers
const (
Extended = iota + 1
Orgtbl
Markdown
Shell
Yaml
CSV
ASCII
)
// various sort types
type Sortmode struct {
Numeric bool
Time bool
Age bool
}
// valid lisp hooks
var ValidHooks []string
// default color schemes
func (conf *Config) Colors() map[color.Level]map[string]color.Color {
colors := map[color.Level]map[string]color.Color{
color.Level16: {
"bg": color.BgGreen, "fg": color.FgWhite,
"hlbg": color.BgGray, "hlfg": color.FgWhite,
},
color.Level256: {
"bg": color.BgLightGreen, "fg": color.FgWhite,
"hlbg": color.BgLightBlue, "hlfg": color.FgWhite,
},
color.LevelRgb: {
"bg": color.BgLightGreen, "fg": color.FgWhite,
"hlbg": color.BgHiGreen, "hlfg": color.FgWhite,
"nohlbg": color.BgWhite, "nohlfg": color.FgLightGreen,
"hdrbg": color.BgBlue, "hdrfg": color.FgWhite,
},
}
if len(conf.Settings.BG) > 0 {
colors[color.Level16]["bg"] = ColorStringToBGColor(conf.Settings.BG)
colors[color.Level256]["bg"] = ColorStringToBGColor(conf.Settings.BG)
colors[color.LevelRgb]["bg"] = ColorStringToBGColor(conf.Settings.BG)
}
if len(conf.Settings.FG) > 0 {
colors[color.Level16]["fg"] = ColorStringToColor(conf.Settings.FG)
colors[color.Level256]["fg"] = ColorStringToColor(conf.Settings.FG)
colors[color.LevelRgb]["fg"] = ColorStringToColor(conf.Settings.FG)
}
if len(conf.Settings.HighlightBG) > 0 {
colors[color.Level16]["hlbg"] = ColorStringToBGColor(conf.Settings.HighlightBG)
colors[color.Level256]["hlbg"] = ColorStringToBGColor(conf.Settings.HighlightBG)
colors[color.LevelRgb]["hlbg"] = ColorStringToBGColor(conf.Settings.HighlightBG)
}
if len(conf.Settings.HighlightFG) > 0 {
colors[color.Level16]["hlfg"] = ColorStringToColor(conf.Settings.HighlightFG)
colors[color.Level256]["hlfg"] = ColorStringToColor(conf.Settings.HighlightFG)
colors[color.LevelRgb]["hlfg"] = ColorStringToColor(conf.Settings.HighlightFG)
}
if len(conf.Settings.NoHighlightBG) > 0 {
colors[color.Level16]["nohlbg"] = ColorStringToBGColor(conf.Settings.NoHighlightBG)
colors[color.Level256]["nohlbg"] = ColorStringToBGColor(conf.Settings.NoHighlightBG)
colors[color.LevelRgb]["nohlbg"] = ColorStringToBGColor(conf.Settings.NoHighlightBG)
}
if len(conf.Settings.NoHighlightFG) > 0 {
colors[color.Level16]["nohlfg"] = ColorStringToColor(conf.Settings.NoHighlightFG)
colors[color.Level256]["nohlfg"] = ColorStringToColor(conf.Settings.NoHighlightFG)
colors[color.LevelRgb]["nohlfg"] = ColorStringToColor(conf.Settings.NoHighlightFG)
}
if len(conf.Settings.HighlightHdrBG) > 0 {
colors[color.Level16]["hdrbg"] = ColorStringToBGColor(conf.Settings.HighlightHdrBG)
colors[color.Level256]["hdrbg"] = ColorStringToBGColor(conf.Settings.HighlightHdrBG)
colors[color.LevelRgb]["hdrbg"] = ColorStringToBGColor(conf.Settings.HighlightHdrBG)
}
if len(conf.Settings.HighlightHdrFG) > 0 {
colors[color.Level16]["hdrfg"] = ColorStringToColor(conf.Settings.HighlightHdrFG)
colors[color.Level256]["hdrfg"] = ColorStringToColor(conf.Settings.HighlightHdrFG)
colors[color.LevelRgb]["hdrfg"] = ColorStringToColor(conf.Settings.HighlightHdrFG)
}
return colors
}
// find supported color mode, modifies config based on constants
func (conf *Config) DetermineColormode() {
if !isTerminal(os.Stdout) {
color.Disable()
} else {
level := color.TermColorLevel()
colors := conf.Colors()
conf.ColorStyle = color.New(colors[level]["bg"], colors[level]["fg"])
conf.HighlightStyle = color.New(colors[level]["hlbg"], colors[level]["hlfg"])
conf.NoHighlightStyle = color.New(colors[level]["nohlbg"], colors[level]["nohlfg"])
conf.HighlightHdrStyle = color.New(colors[level]["hdrbg"], colors[level]["hdrfg"])
}
}
// Return true if current terminal is interactive
func isTerminal(f *os.File) bool {
o, _ := f.Stat()
return (o.Mode() & os.ModeCharDevice) == os.ModeCharDevice
}
// main program version
// generated version string, used by -v contains lib.Version on
//
// main branch, and lib.Version-$branch-$lastcommit-$date on
//
// development branch
func Getversion() string {
return fmt.Sprintf("This is tablizer version %s", VERSION)
}
func (conf *Config) PrepareSortFlags(flag Sortmode) {
switch {
case flag.Numeric:
conf.SortMode = "numeric"
case flag.Age:
conf.SortMode = "duration"
case flag.Time:
conf.SortMode = "time"
default:
conf.SortMode = "string"
}
}
func (conf *Config) PrepareModeFlags(flag Modeflag) {
switch {
case flag.X:
conf.OutputMode = Extended
case flag.O:
conf.OutputMode = Orgtbl
case flag.M:
conf.OutputMode = Markdown
case flag.S:
conf.OutputMode = Shell
case flag.Y:
conf.OutputMode = Yaml
case flag.C:
conf.OutputMode = CSV
default:
conf.OutputMode = ASCII
}
}
func (conf *Config) PrepareFilters() error {
conf.Filters = make(map[string]*regexp.Regexp, len(conf.Rawfilters))
for _, filter := range conf.Rawfilters {
parts := strings.Split(filter, "=")
if len(parts) != MAXPARTS {
return errors.New("filter field and value must be separated by =")
}
reg, err := regexp.Compile(parts[1])
if err != nil {
return fmt.Errorf("failed to compile filter regex for field %s: %w",
parts[0], err)
}
conf.Filters[strings.ToLower(parts[0])] = reg
}
return nil
}
func (conf *Config) CheckEnv() {
// check for environment vars, command line flags have precedence,
// NO_COLOR is being checked by the color module itself.
if !conf.NoNumbering {
_, set := os.LookupEnv("T_NO_HEADER_NUMBERING")
if set {
conf.NoNumbering = true
}
}
if len(conf.Columns) == 0 {
cols := os.Getenv("T_COLUMNS")
if len(cols) > 1 {
conf.Columns = cols
}
}
}
func (conf *Config) ApplyDefaults() {
// mode specific defaults
if conf.OutputMode == Yaml || conf.OutputMode == CSV {
conf.NoNumbering = true
}
ValidHooks = []string{"filter", "process", "transpose", "append"}
}
func (conf *Config) PreparePattern(pattern string) error {
PatternR, err := regexp.Compile(pattern)
if err != nil {
return fmt.Errorf("regexp pattern %s is invalid: %w", conf.Pattern, err)
}
conf.PatternR = PatternR
conf.Pattern = pattern
return nil
}
// Parse config file. Ignore if the file doesn't exist but return an
// error if it exists but fails to read or parse
func (conf *Config) ParseConfigfile() error {
path, err := os.Stat(conf.Configfile)
if os.IsNotExist(err) || path.IsDir() {
// ignore non-existent or dirs
return nil
}
configstring, err := os.ReadFile(path.Name())
if err != nil {
return fmt.Errorf("failed to read config file %s: %w", path.Name(), err)
}
err = hclsimple.Decode(
path.Name(),
configstring,
nil,
&conf.Settings)
if err != nil {
return fmt.Errorf("failed to load configuration file %s: %w",
path.Name(), err)
}
return nil
}
// translate color string to internal color value
func ColorStringToColor(colorname string) color.Color {
for name, color := range color.FgColors {
if name == colorname {
return color
}
}
for name, color := range color.ExFgColors {
if name == colorname {
return color
}
}
return color.Normal
}
// same, for background colors
func ColorStringToBGColor(colorname string) color.Color {
for name, color := range color.BgColors {
if name == colorname {
return color
}
}
for name, color := range color.ExBgColors {
if name == colorname {
return color
}
}
return color.Normal
}

104
cfg/config_test.go Normal file
View File

@@ -0,0 +1,104 @@
/*
Copyright © 2022 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package cfg
import (
"fmt"
// "reflect"
"testing"
)
func TestPrepareModeFlags(t *testing.T) {
var tests = []struct {
flag Modeflag
expect int // output (constant enum)
}{
// short commandline flags like -M
{Modeflag{X: true}, Extended},
{Modeflag{S: true}, Shell},
{Modeflag{O: true}, Orgtbl},
{Modeflag{Y: true}, Yaml},
{Modeflag{M: true}, Markdown},
{Modeflag{}, ASCII},
}
// FIXME: use a map for easier printing
for _, testdata := range tests {
testname := fmt.Sprintf("PrepareModeFlags-expect-%d", testdata.expect)
t.Run(testname, func(t *testing.T) {
conf := Config{}
conf.PrepareModeFlags(testdata.flag)
if conf.OutputMode != testdata.expect {
t.Errorf("got: %d, expect: %d", conf.OutputMode, testdata.expect)
}
})
}
}
func TestPrepareSortFlags(t *testing.T) {
var tests = []struct {
flag Sortmode
expect string // output
}{
// short commandline flags like -M
{Sortmode{Numeric: true}, "numeric"},
{Sortmode{Age: true}, "duration"},
{Sortmode{Time: true}, "time"},
{Sortmode{}, "string"},
}
for _, testdata := range tests {
testname := fmt.Sprintf("PrepareSortFlags-expect-%s", testdata.expect)
t.Run(testname, func(t *testing.T) {
conf := Config{}
conf.PrepareSortFlags(testdata.flag)
if conf.SortMode != testdata.expect {
t.Errorf("got: %s, expect: %s", conf.SortMode, testdata.expect)
}
})
}
}
func TestPreparePattern(t *testing.T) {
var tests = []struct {
pattern string
wanterr bool
}{
{"[A-Z]+", false},
{"[a-z", true},
}
for _, testdata := range tests {
testname := fmt.Sprintf("PreparePattern-pattern-%s-wanterr-%t",
testdata.pattern, testdata.wanterr)
t.Run(testname, func(t *testing.T) {
conf := Config{}
err := conf.PreparePattern(testdata.pattern)
if err != nil {
if !testdata.wanterr {
t.Errorf("PreparePattern returned error: %s", err)
}
}
})
}
}

View File

@@ -1,5 +1,5 @@
/*
Copyright © 2022 Thomas von Dein
Copyright © 2022-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,25 +18,27 @@ package cmd
import (
"bytes"
"github.com/tlinden/tablizer/lib"
"errors"
"fmt"
"github.com/spf13/cobra"
"log"
"os"
"os/exec"
"strings"
"github.com/spf13/cobra"
"github.com/tlinden/tablizer/cfg"
"github.com/tlinden/tablizer/lib"
)
var helpCmd = &cobra.Command{
Use: "help",
Short: "Show documentation",
Run: func(cmd *cobra.Command, args []string) {
func man() {
man := exec.Command("less", "-")
var b bytes.Buffer
b.Write([]byte(manpage))
var buffer bytes.Buffer
buffer.Write([]byte(manpage))
man.Stdout = os.Stdout
man.Stdin = &b
man.Stdin = &buffer
man.Stderr = os.Stderr
err := man.Run()
@@ -44,65 +46,157 @@ var helpCmd = &cobra.Command{
if err != nil {
log.Fatal(err)
}
},
}
var rootCmd = &cobra.Command{
func completion(cmd *cobra.Command, mode string) error {
switch mode {
case "bash":
return cmd.Root().GenBashCompletion(os.Stdout)
case "zsh":
return cmd.Root().GenZshCompletion(os.Stdout)
case "fish":
return cmd.Root().GenFishCompletion(os.Stdout, true)
case "powershell":
return cmd.Root().GenPowerShellCompletionWithDesc(os.Stdout)
default:
return errors.New("invalid shell parameter! Valid ones: bash|zsh|fish|powershell")
}
}
func Execute() {
var (
conf cfg.Config
ShowManual bool
ShowVersion bool
ShowCompletion string
modeflag cfg.Modeflag
sortmode cfg.Sortmode
)
var rootCmd = &cobra.Command{
Use: "tablizer [regex] [file, ...]",
Short: "[Re-]tabularize tabular data",
Long: `Manipulate tabular output of other programs`,
RunE: func(cmd *cobra.Command, args []string) error {
if lib.ShowVersion {
fmt.Printf("This is tablizer version %s\n", lib.VERSION)
if ShowVersion {
fmt.Println(cfg.Getversion())
return nil
}
err := lib.PrepareColumns()
if ShowManual {
man()
return nil
}
if len(ShowCompletion) > 0 {
return completion(cmd, ShowCompletion)
}
// Setup
err := conf.ParseConfigfile()
if err != nil {
return err
}
err = lib.PrepareModeFlags()
conf.CheckEnv()
conf.PrepareModeFlags(modeflag)
conf.PrepareSortFlags(sortmode)
if err = conf.PrepareFilters(); err != nil {
return err
}
conf.DetermineColormode()
conf.ApplyDefaults()
// setup lisp env, load plugins etc
err = lib.SetupLisp(&conf)
if err != nil {
return err
}
return lib.ProcessFiles(args)
// actual execution starts here
return lib.ProcessFiles(&conf, args)
},
}
}
// options
rootCmd.PersistentFlags().BoolVarP(&conf.Debug, "debug", "d", false,
"Enable debugging")
rootCmd.PersistentFlags().BoolVarP(&conf.NoNumbering, "no-numbering", "n", false,
"Disable header numbering")
rootCmd.PersistentFlags().BoolVarP(&conf.NoHeaders, "no-headers", "H", false,
"Disable header display")
rootCmd.PersistentFlags().BoolVarP(&conf.NoColor, "no-color", "N", false,
"Disable pattern highlighting")
rootCmd.PersistentFlags().BoolVarP(&ShowVersion, "version", "V", false,
"Print program version")
rootCmd.PersistentFlags().BoolVarP(&conf.InvertMatch, "invert-match", "v", false,
"select non-matching rows")
rootCmd.PersistentFlags().BoolVarP(&ShowManual, "man", "m", false,
"Display manual page")
rootCmd.PersistentFlags().BoolVarP(&conf.UseFuzzySearch, "fuzzy", "z", false,
"Use fuzzy searching")
rootCmd.PersistentFlags().BoolVarP(&conf.UseHighlight, "highlight-lines", "L", false,
"Use alternating background colors")
rootCmd.PersistentFlags().StringVarP(&ShowCompletion, "completion", "", "",
"Display completion code")
rootCmd.PersistentFlags().StringVarP(&conf.Separator, "separator", "s", cfg.DefaultSeparator,
"Custom field separator")
rootCmd.PersistentFlags().StringVarP(&conf.Columns, "columns", "c", "",
"Only show the speficied columns (separated by ,)")
// sort options
rootCmd.PersistentFlags().IntVarP(&conf.SortByColumn, "sort-by", "k", 0,
"Sort by column (default: 1)")
// sort mode, only 1 allowed
rootCmd.PersistentFlags().BoolVarP(&conf.SortDescending, "sort-desc", "D", false,
"Sort in descending order (default: ascending)")
rootCmd.PersistentFlags().BoolVarP(&sortmode.Numeric, "sort-numeric", "i", false,
"sort according to string numerical value")
rootCmd.PersistentFlags().BoolVarP(&sortmode.Time, "sort-time", "t", false,
"sort according to time string")
rootCmd.PersistentFlags().BoolVarP(&sortmode.Age, "sort-age", "a", false,
"sort according to age (duration) string")
rootCmd.MarkFlagsMutuallyExclusive("sort-numeric", "sort-time",
"sort-age")
// output flags, only 1 allowed
rootCmd.PersistentFlags().BoolVarP(&modeflag.X, "extended", "X", false,
"Enable extended output")
rootCmd.PersistentFlags().BoolVarP(&modeflag.M, "markdown", "M", false,
"Enable markdown table output")
rootCmd.PersistentFlags().BoolVarP(&modeflag.O, "orgtbl", "O", false,
"Enable org-mode table output")
rootCmd.PersistentFlags().BoolVarP(&modeflag.S, "shell", "S", false,
"Enable shell mode output")
rootCmd.PersistentFlags().BoolVarP(&modeflag.Y, "yaml", "Y", false,
"Enable yaml output")
rootCmd.PersistentFlags().BoolVarP(&modeflag.C, "csv", "C", false,
"Enable CSV output")
rootCmd.PersistentFlags().BoolVarP(&modeflag.A, "ascii", "A", false,
"Enable ASCII output (default)")
rootCmd.MarkFlagsMutuallyExclusive("extended", "markdown", "orgtbl",
"shell", "yaml", "csv")
// lisp options
rootCmd.PersistentFlags().StringVarP(&conf.LispLoadPath, "load-path", "l", cfg.DefaultLoadPath,
"Load path for lisp plugins (expects *.zy files)")
// config file
rootCmd.PersistentFlags().StringVarP(&conf.Configfile, "config", "f", cfg.DefaultConfigfile,
"config file (default: ~/.config/tablizer/config)")
// filters
rootCmd.PersistentFlags().StringArrayVarP(&conf.Rawfilters, "filter", "F", nil, "Filter by field (field=regexp)")
rootCmd.SetUsageTemplate(strings.TrimSpace(usage) + "\n")
func Execute() {
err := rootCmd.Execute()
if err != nil {
os.Exit(1)
}
}
func init() {
rootCmd.PersistentFlags().BoolVarP(&lib.Debug, "debug", "d", false, "Enable debugging")
rootCmd.PersistentFlags().BoolVarP(&lib.NoNumbering, "no-numbering", "n", false, "Disable header numbering")
rootCmd.PersistentFlags().BoolVarP(&lib.ShowVersion, "version", "v", false, "Print program version")
rootCmd.PersistentFlags().StringVarP(&lib.Separator, "separator", "s", "", "Custom field separator")
rootCmd.PersistentFlags().StringVarP(&lib.Columns, "columns", "c", "", "Only show the speficied columns (separated by ,)")
// output flags, only 1 allowed, hidden, since just short cuts
rootCmd.PersistentFlags().BoolVarP(&lib.OutflagExtended, "extended", "X", false, "Enable extended output")
rootCmd.PersistentFlags().BoolVarP(&lib.OutflagMarkdown, "markdown", "M", false, "Enable markdown table output")
rootCmd.PersistentFlags().BoolVarP(&lib.OutflagOrgtable, "orgtbl", "O", false, "Enable org-mode table output")
rootCmd.PersistentFlags().BoolVarP(&lib.OutflagShell, "shell", "S", false, "Enable shell mode output")
rootCmd.MarkFlagsMutuallyExclusive("extended", "markdown", "orgtbl", "shell")
rootCmd.Flags().MarkHidden("extended")
rootCmd.Flags().MarkHidden("orgtbl")
rootCmd.Flags().MarkHidden("markdown")
rootCmd.Flags().MarkHidden("shell")
// same thing but more common, takes precedence over above group
rootCmd.PersistentFlags().StringVarP(&lib.OutputMode, "output", "o", "", "Output mode - one of: orgtbl, markdown, extended, shell, ascii(default)")
rootCmd.AddCommand(helpCmd)
rootCmd.SetHelpCommand(&cobra.Command{
Use: "no-help",
Hidden: true,
})
}

View File

@@ -1,4 +1,5 @@
package cmd
var manpage = `
NAME
tablizer - Manipulate tabular output of other programs
@@ -7,25 +8,48 @@ SYNOPSIS
Usage:
tablizer [regex] [file, ...] [flags]
Flags:
Operational Flags:
-c, --columns string Only show the speficied columns (separated by ,)
-d, --debug Enable debugging
-h, --help help for tablizer
-v, --invert-match select non-matching rows
-n, --no-numbering Disable header numbering
-o, --output string Output mode - one of: orgtbl, markdown, extended, ascii(default)
-N, --no-color Disable pattern highlighting
-H, --no-headers Disable headers display
-s, --separator string Custom field separator
-k, --sort-by int Sort by column (default: 1)
-z, --fuzzy Use fuzzy search [experimental]
-F, --filter field=reg Filter given field with regex, can be used multiple times
Output Flags (mutually exclusive):
-X, --extended Enable extended output
-M, --markdown Enable markdown table output
-O, --orgtbl Enable org-mode table output
-s, --separator string Custom field separator
-v, --version Print program version
-S, --shell Enable shell evaluable output
-Y, --yaml Enable yaml output
-C, --csv Enable CSV output
-A, --ascii Default output mode, ascii tabular
-L, --hightlight-lines Use alternating background colors for tables
Sort Mode Flags (mutually exclusive):
-a, --sort-age sort according to age (duration) string
-D, --sort-desc Sort in descending order (default: ascending)
-i, --sort-numeric sort according to string numerical value
-t, --sort-time sort according to time string
Other Flags:
--completion <shell> Generate the autocompletion script for <shell>
-f, --config <file> Configuration file (default: ~/.config/tablizer/config)
-d, --debug Enable debugging
-h, --help help for tablizer
-m, --man Display manual page
-V, --version Print program version
DESCRIPTION
Many programs generate tabular output. But sometimes you need to
post-process these tables, you may need to remove one or more columns or
you may want to filter for some pattern or you may need the output in
another program and need to parse it somehow. Standard unix tools such
as awk(1), grep(1) or column(1) may help, but sometimes it's a tedious
business.
you may want to filter for some pattern (See PATTERNS) or you may need
the output in another program and need to parse it somehow. Standard
unix tools such as awk(1), grep(1) or column(1) may help, but sometimes
it's a tedious business.
Let's take the output of the tool kubectl. It contains cells with
withespace and they do not separate columns by TAB characters. This is
@@ -33,12 +57,13 @@ DESCRIPTION
You can use tablizer to do these and more things.
tablizer analyses the header fiels of a table, registers the column
tablizer analyses the header fields of a table, registers the column
positions of each header field and separates columns by those positions.
Without any options it reads its input from "STDIN", but you can also
specify a file as a parameter. If you want to reduce the output by some
regular expression, just specify it as its first parameters. Hence:
regular expression, just specify it as its first parameter. You may also
use the -v option to exclude all rows which match the pattern. Hence:
# read from STDIN
kubectl get pods | tablizer
@@ -58,7 +83,7 @@ DESCRIPTION
NAME(1) READY(2) STATUS(3) RESTARTS(4) AGE(5)
These numbers denote the column and you can use them to specify which
columns you want to have in your output:
columns you want to have in your output (see COLUMNS:
kubectl get pods | tablizer -c1,3
@@ -67,13 +92,103 @@ DESCRIPTION
The numbering can be suppressed by using the -n option.
Finally the -d option enables debugging output which is mostly usefull
By default tablizer shows a header containing the names of each column.
This can be disabled using the -H option. Be aware that this only
affects tabular output modes. Shell, Extended, Yaml and CSV output modes
always use the column names.
By default, if a pattern has been speficied, matches will be
highlighted. You can disable this behavior with the -N option.
Use the -k option to specify by which column to sort the tabular data
(as in GNU sort(1)). The default sort column is the first one. To
disable sorting at all, supply 0 (Zero) to -k. The default sort order is
ascending. You can change this to descending order using the option -D.
The default sort order is by string, but there are other sort modes:
-a --sort-age
Sorts duration strings like "1d4h32m51s".
-i --sort-numeric
Sorts numeric fields.
-t --sort-time
Sorts timestamps.
Finally the -d option enables debugging output which is mostly useful
for the developer.
PATTERNS AND FILTERING
You can reduce the rows being displayed by using a regular expression
pattern. The regexp is PCRE compatible, refer to the syntax cheat sheet
here: <https://github.com/google/re2/wiki/Syntax>. If you want to read a
more comprehensive documentation about the topic and have perl installed
you can read it with:
perldoc perlre
Or read it online: <https://perldoc.perl.org/perlre>.
A note on modifiers: the regexp engine used in tablizer uses another
modifier syntax:
(?MODIFIER)
The most important modifiers are:
"i" ignore case "m" multiline mode "s" single line mode
Example for a case insensitive search:
kubectl get pods -A | tablizer "(?i)account"
You can use the experimental fuzzy search feature by providing the
option -z, in which case the pattern is regarded as a fuzzy search term,
not a regexp.
Sometimes you want to filter by one or more columns. You can do that
using the -F option. The option can be specified multiple times and has
the following format:
fieldname=regexp
Fieldnames (== columns headers) are case insensitive.
If you specify more than one filter, both filters have to match (AND
operation).
If the option -v is specified, the filtering is inverted.
COLUMNS
The parameter -c can be used to specify, which columns to display. By
default tablizer numerizes the header names and these numbers can be
used to specify which header to display, see example above.
However, beside numbers, you can also use regular expressions with -c,
also separated by comma. And you can mix column numbers with regexps.
Lets take this table:
PID TTY TIME CMD
14001 pts/0 00:00:00 bash
42871 pts/0 00:00:00 ps
42872 pts/0 00:00:00 sed
We want to see only the CMD column and use a regex for this:
ps | tablizer -s '\s+' -c C
CMD(4)
bash
ps
tablizer
sed
where "C" is our regexp which matches CMD.
OUTPUT MODES
There might be cases when the tabular output of a program is way too
large for your current terminal but you still need to see every column.
In such cases the -o extended or -X option can be usefull which enables
In such cases the -o extended or -X option can be useful which enables
*extended mode*. In this mode, each row will be printed vertically,
header left, value right, aligned by the field widths. Here's an
example:
@@ -100,7 +215,93 @@ DESCRIPTION
Beside normal ascii mode (the default) and extended mode there are more
output modes available: orgtbl which prints an Emacs org-mode table and
markdown which prints a Markdown table.
markdown which prints a Markdown table, yaml, which prints yaml encoding
and CSV mode, which prints a comma separated value file.
ENVIRONMENT VARIABLES
tablizer supports certain environment variables which use can use to
influence program behavior. Commandline flags have always precedence
over environment variables.
<T_NO_HEADER_NUMBERING> - disable numbering of header fields, like -n.
<T_COLUMNS> - comma separated list of columns to output, like -c
<NO_COLORS> - disable colorization of matches, like -N
COMPLETION
Shell completion for command line options can be enabled by using the
--completion flag. The required parameter is the name of your shell.
Currently supported are: bash, zsh, fish and powershell.
Detailed instructions:
Bash:
source <(tablizer --completion bash)
To load completions for each session, execute once:
# Linux:
$ tablizer --completion bash > /etc/bash_completion.d/tablizer
# macOS:
$ tablizer --completion bash > $(brew --prefix)/etc/bash_completion.d/tablizer
Zsh:
If shell completion is not already enabled in your environment, you
will need to enable it. You can execute the following once:
echo "autoload -U compinit; compinit" >> ~/.zshrc
To load completions for each session, execute once:
$ tablizer --completion zsh > "${fpath[1]}/_tablizer"
You will need to start a new shell for this setup to take effect.
fish:
tablizer --completion fish | source
To load completions for each session, execute once:
tablizer --completion fish > ~/.config/fish/completions/tablizer.fish
PowerShell:
tablizer --completion powershell | Out-String | Invoke-Expression
To load completions for every new session, run:
tablizer --completion powershell > tablizer.ps1
and source this file from your PowerShell profile.
CONFIGURATION AND COLORS
YOu can put certain configuration values into a configuration file in
HCL format. By default tablizer looks for
"$HOME/.config/tablizer/config", but you can provide one using the
parameter "-f".
In the configuration the following variables can be defined:
BG = "lightGreen"
FG = "white"
HighlightBG = "lightGreen"
HighlightFG = "white"
NoHighlightBG = "white"
NoHighlightFG = "lightGreen"
HighlightHdrBG = "red"
HighlightHdrFG = "white"
The following color definitions are available:
black, blue, cyan, darkGray, default, green, lightBlue, lightCyan,
lightGreen, lightMagenta, lightRed, lightWhite, lightYellow, magenta,
red, white, yellow
The Variables FG and BG are being used to highlight matches. The other
*FG and *BG variables are for colored table output (enabled with the
"-L" parameter).
Colorization can be turned off completely either by setting the
parameter "-N" or the environment variable NO_COLOR to a true value.
BUGS
In order to report a bug, unexpected behavior, feature requests or to
@@ -111,9 +312,9 @@ LICENSE
This software is licensed under the GNU GENERAL PUBLIC LICENSE version
3.
Copyright (c) 2022 by Thomas von Dein
Copyright (c) 2022-2024 by Thomas von Dein
This software uses the following GO libraries:
This software uses the following GO modules:
repr (https://github.com/alecthomas/repr)
Released under the MIT License, Copyright (c) 2016 Alec Thomas
@@ -122,7 +323,62 @@ LICENSE
Released under the Apache 2.0 license, Copyright 2013-2022 The Cobra
Authors
dateparse (github.com/araddon/dateparse)
Released under the MIT License, Copyright (c) 2015-2017 Aaron Raddon
color (github.com/gookit/color)
Released under the MIT License, Copyright (c) 2016 inhere
tablewriter (github.com/olekukonko/tablewriter)
Released under the MIT License, Copyright (c) 201 by Oleku Konko
yaml (gopkg.in/yaml.v3)
Released under the MIT License, Copyright (c) 2006-2011 Kirill
Simonov
AUTHORS
Thomas von Dein tom AT vondein DOT org
`
var usage = `
Usage:
tablizer [regex] [file, ...] [flags]
Operational Flags:
-c, --columns string Only show the speficied columns (separated by ,)
-v, --invert-match select non-matching rows
-n, --no-numbering Disable header numbering
-N, --no-color Disable pattern highlighting
-H, --no-headers Disable headers display
-s, --separator string Custom field separator
-k, --sort-by int Sort by column (default: 1)
-z, --fuzzy Use fuzzy search [experimental]
-F, --filter field=reg Filter given field with regex, can be used multiple times
Output Flags (mutually exclusive):
-X, --extended Enable extended output
-M, --markdown Enable markdown table output
-O, --orgtbl Enable org-mode table output
-S, --shell Enable shell evaluable output
-Y, --yaml Enable yaml output
-C, --csv Enable CSV output
-A, --ascii Default output mode, ascii tabular
-L, --hightlight-lines Use alternating background colors for tables
Sort Mode Flags (mutually exclusive):
-a, --sort-age sort according to age (duration) string
-D, --sort-desc Sort in descending order (default: ascending)
-i, --sort-numeric sort according to string numerical value
-t, --sort-time sort according to time string
Other Flags:
--completion <shell> Generate the autocompletion script for <shell>
-f, --config <file> Configuration file (default: ~/.config/tablizer/config)
-d, --debug Enable debugging
-h, --help help for tablizer
-m, --man Display manual page
-V, --version Print program version
`

12
config.hcl Normal file
View File

@@ -0,0 +1,12 @@
# supported colors:
# black, blue, cyan, darkGray, default, green, lightBlue, lightCyan,
# lightGreen, lightMagenta, lightRed, lightWhite, lightYellow,
# magenta, red, white, yellow
BG = "lightGreen"
FG = "white"
HighlightBG = "lightGreen"
HighlightFG = "white"
NoHighlightBG = "white"
NoHighlightFG = "lightGreen"
HighlightHdrBG = "red"
HighlightHdrFG = "white"

3
demo/Makefile Normal file
View File

@@ -0,0 +1,3 @@
all:
LC_ALL=en_US.UTF-8 asciinema rec --cols 50 --row 30 -c ./demo.sh --overwrite tmp.cast
agg tmp.cast tmp.gif

31
demo/demo.sh Executable file
View File

@@ -0,0 +1,31 @@
#!/bin/bash
prompt() {
if test -n "$1"; then
echo
echo -n "% $*"
sleep 1
echo
$*
echo
echo -n "% "
else
echo -n "% "
fi
}
PATH=..:$PATH
clear
while IFS=$'\t' read -r flags table msg source _; do
echo "#"
echo "# source tabular data:"
cat $table
echo
echo "#"
echo "# $msg:"
prompt "tablizer $flags $table"
sleep 4
clear
done < <(yq -r tables.yaml \
| yq -r '.tables[] | [.flags, .table, .msg, .source] | @tsv')

4
demo/table.demo1 Normal file
View File

@@ -0,0 +1,4 @@
NAME DURATION COUNT WHEN
beta 1d10h5m1s 33 3/1/2014
alpha 4h35m 170 2013-Feb-03
ceta 33d12h 9 06/Jan/2008 15:04:05 -0700

3
demo/table.demo2 Normal file
View File

@@ -0,0 +1,3 @@
PID TTY TIME CMD
30912 pts/0 00:00:00 bash
49526 pts/0 00:00:00 ps

54
demo/tables.yaml Normal file
View File

@@ -0,0 +1,54 @@
tables:
# OUTPUTS
- flags: -A
table: table.demo1
msg: default output mode
- flags: -O
table: table.demo1
msg: orgmode output mode
- flags: -M
table: table.demo1
msg: markdown output mode
- flags: -S
table: table.demo1
msg: shell output mode
- flags: -X
table: table.demo1
msg: extended output mode
- flags: -Y
table: table.demo1
msg: yaml output mode
- flags: -C
table: table.demo1
msg: CSV output mode
# SORTS
- flags: -A -k 3
table: table.demo1
msg: sort by column 3
- flags: -A -k 4 -t
table: table.demo1
msg: sort by column 4 and sort type time
- flags: -A -k 2 -a
table: table.demo1
msg: sort by column 2 and sort type duration
# REDUCE
- flags: -A -c 1,3
table: table.demo1
msg: only display column 1 and 3
- flags: -A -c AM,RA
table: table.demo1
msg: only display columns matching /(RA|AM)/
- flags: -X -c 1,3
table: table.demo1
msg: only display column 1 and 3 in extended mode
# SEARCH
- flags: /20 -A
table: table.demo1
msg: only show rows matching /20
- flags: /20 -A -v
table: table.demo1
msg: only show rows NOT matching /20

119
demo/tablizer-demo.cast Normal file
View File

@@ -0,0 +1,119 @@
{"version": 2, "width": 80, "height": 25, "timestamp": 1666890777, "env": {"SHELL": "/bin/bash", "TERM": "xterm-256color"}}
[0.004618, "o", "\u001b[H\u001b[2J\u001b[3J"]
[0.010297, "o", "#\r\n# source tabular data:\r\n"]
[0.010898, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[0.011125, "o", "\r\n#\r\n"]
[0.011177, "o", "# default output mode:\r\n"]
[0.011219, "o", "\r\n% tablizer -A table.demo1"]
[1.011851, "o", "\r\n"]
[1.013635, "o", "NAME(1)\tDURATION(2)\tCOUNT(3)\tWHEN(4) \r\nbeta \t1d10h5m1s \t33 \t3/1/2014 \t\r\nalpha \t4h35m \t170 \t2013-Feb-03 \t\r\nceta \t33d12h \t9 \t06/Jan/2008 15:04:05 -0700\t\r\n"]
[1.014021, "o", "\r\n% "]
[5.015241, "o", "\u001b[H\u001b[2J\u001b[3J"]
[5.015339, "o", "#\r\n# source tabular data:\r\n"]
[5.015688, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[5.015776, "o", "\r\n#\r\n# orgmode output mode:\r\n\r\n% tablizer -O table.demo1"]
[6.016322, "o", "\r\n"]
[6.01823, "o", "+---------+-------------+----------+----------------------------+\r\n| NAME(1) | DURATION(2) | COUNT(3) | WHEN(4) |\r\n+---------+-------------+----------+----------------------------+\r\n| beta | 1d10h5m1s | 33 | 3/1/2014 |\r\n| alpha | 4h35m | 170 | 2013-Feb-03 |\r\n| ceta | 33d12h | 9 | 06/Jan/2008 15:04:05 -0700 |\r\n+---------+-------------+----------+----------------------------+\r\n"]
[6.018497, "o", "\r\n% "]
[10.020014, "o", "\u001b[H\u001b[2J\u001b[3J"]
[10.020112, "o", "#\r\n# source tabular data:\r\n"]
[10.020573, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[10.020643, "o", "\r\n#\r\n"]
[10.02068, "o", "# markdown output mode:\r\n\r\n% tablizer -M table.demo1"]
[11.021559, "o", "\r\n"]
[11.023551, "o", "| NAME(1) | DURATION(2) | COUNT(3) | WHEN(4) |\r\n|---------|-------------|----------|----------------------------|\r\n| beta | 1d10h5m1s | 33 | 3/1/2014 |\r\n| alpha | 4h35m | 170 | 2013-Feb-03 |\r\n| ceta | 33d12h | 9 | 06/Jan/2008 15:04:05 -0700 |\r\n"]
[11.023838, "o", "\r\n% "]
[15.025244, "o", "\u001b[H\u001b[2J\u001b[3J"]
[15.025345, "o", "#\r\n# source tabular data:\r\n"]
[15.025829, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[15.025915, "o", "\r\n#\r\n# shell output mode:\r\n"]
[15.025931, "o", "\r\n"]
[15.025948, "o", "% tablizer -S table.demo1"]
[16.026714, "o", "\r\n"]
[16.028606, "o", "NAME(1)=\"beta\" DURATION(2)=\"1d10h5m1s\" COUNT(3)=\"33\" WHEN(4)=\"3/1/2014\"\r\nNAME(1)=\"alpha\" DURATION(2)=\"4h35m\" COUNT(3)=\"170\" WHEN(4)=\"2013-Feb-03\"\r\nNAME(1)=\"ceta\" DURATION(2)=\"33d12h\" COUNT(3)=\"9\" WHEN(4)=\"06/Jan/2008 15:04:05 -0700\"\r\n"]
[16.029144, "o", "\r\n% "]
[20.030593, "o", "\u001b[H\u001b[2J\u001b[3J"]
[20.030706, "o", "#\r\n# source tabular data:\r\n"]
[20.03121, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[20.031277, "o", "\r\n#\r\n# extended output mode:\r\n"]
[20.031327, "o", "\r\n% tablizer -X table.demo1"]
[21.032053, "o", "\r\n"]
[21.033787, "o", " NAME(1): beta\r\nDURATION(2): 1d10h5m1s\r\n COUNT(3): 33\r\n WHEN(4): 3/1/2014\r\n\r\n NAME(1): alpha\r\nDURATION(2): 4h35m\r\n COUNT(3): 170\r\n WHEN(4): 2013-Feb-03\r\n\r\n NAME(1): ceta\r\nDURATION(2): 33d12h\r\n COUNT(3): 9\r\n WHEN(4): 06/Jan/2008 15:04:05 -0700\r\n\r\n"]
[21.034132, "o", "\r\n% "]
[25.035531, "o", "\u001b[H\u001b[2J\u001b[3J"]
[25.035585, "o", "#\r\n"]
[25.035681, "o", "# source tabular data:\r\n"]
[25.036179, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[25.036232, "o", "\r\n#\r\n"]
[25.036274, "o", "# yaml output mode:\r\n\r\n% tablizer -Y table.demo1"]
[26.036928, "o", "\r\n"]
[26.038674, "o", "entries:\r\n - count: 33\r\n duration: \"1d10h5m1s\"\r\n name: \"beta\"\r\n when: \"3/1/2014\"\r\n - count: 170\r\n duration: \"4h35m\"\r\n name: \"alpha\"\r\n when: \"2013-Feb-03\"\r\n - count: 9\r\n duration: \"33d12h\"\r\n name: \"ceta\"\r\n when: \"06/Jan/2008 15:04:05 -0700\"\r\n"]
[26.038975, "o", "\r\n% "]
[30.040539, "o", "\u001b[H\u001b[2J\u001b[3J"]
[30.040659, "o", "#\r\n# source tabular data:\r\n"]
[30.041167, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[30.041246, "o", "\r\n#\r\n# CSV output mode:\r\n\r\n% tablizer -C table.demo1"]
[31.042088, "o", "\r\n"]
[31.043721, "o", "NAME,DURATION,COUNT,WHEN\r\nbeta,1d10h5m1s,33,3/1/2014\r\nalpha,4h35m,170,2013-Feb-03\r\nceta,33d12h,9,06/Jan/2008 15:04:05 -0700\r\n"]
[31.043997, "o", "\r\n% "]
[35.045523, "o", "\u001b[H\u001b[2J\u001b[3J"]
[35.04563, "o", "#\r\n# source tabular data:\r\n"]
[35.046209, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[35.046275, "o", "\r\n#\r\n# sort by column 3:\r\n\r\n% tablizer -A -k 3 table.demo1"]
[36.047083, "o", "\r\n"]
[36.048793, "o", "NAME(1)\tDURATION(2)\tCOUNT(3)\tWHEN(4) \r\nalpha \t4h35m \t170 \t2013-Feb-03 \t\r\nbeta \t1d10h5m1s \t33 \t3/1/2014 \t\r\nceta \t33d12h \t9 \t06/Jan/2008 15:04:05 -0700\t\r\n"]
[36.049077, "o", "\r\n% "]
[40.050739, "o", "\u001b[H\u001b[2J\u001b[3J"]
[40.050925, "o", "#\r\n# source tabular data:\r\n"]
[40.051481, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[40.051671, "o", "\r\n#\r\n# sort by column 4 and sort type time:\r\n\r\n% tablizer -A -k 4 -t table.demo1"]
[41.052486, "o", "\r\n"]
[41.05454, "o", "NAME(1)\tDURATION(2)\tCOUNT(3)\tWHEN(4) \r\nceta \t33d12h \t9 \t06/Jan/2008 15:04:05 -0700\t\r\nalpha \t4h35m \t170 \t2013-Feb-03 \t\r\nbeta \t1d10h5m1s \t33 \t3/1/2014 \t\r\n"]
[41.054864, "o", "\r\n% "]
[45.056297, "o", "\u001b[H\u001b[2J\u001b[3J"]
[45.056405, "o", "#\r\n# source tabular data:\r\n"]
[45.056895, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[45.056978, "o", "\r\n#\r\n"]
[45.057023, "o", "# sort by column 2 and sort type duration:\r\n"]
[45.057073, "o", "\r\n% tablizer -A -k 2 -a table.demo1"]
[46.057895, "o", "\r\n"]
[46.059684, "o", "NAME(1)\tDURATION(2)\tCOUNT(3)\tWHEN(4) \r\nalpha \t4h35m \t170 \t2013-Feb-03 \t\r\nbeta \t1d10h5m1s \t33 \t3/1/2014 \t\r\nceta \t33d12h \t9 \t06/Jan/2008 15:04:05 -0700\t\r\n"]
[46.059988, "o", "\r\n% "]
[50.061514, "o", "\u001b[H\u001b[2J\u001b[3J"]
[50.061622, "o", "#\r\n# source tabular data:\r\n"]
[50.062091, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[50.062188, "o", "\r\n#\r\n# only display column 1 and 3:\r\n\r\n% tablizer -A -c 1,3 table.demo1"]
[51.062985, "o", "\r\n"]
[51.066293, "o", "NAME(1)\tCOUNT(3) \r\nbeta \t33 \t\r\nalpha \t170 \t\r\nceta \t9 \t\r\n"]
[51.066843, "o", "\r\n% "]
[55.070781, "o", "\u001b[H\u001b[2J\u001b[3J"]
[55.071327, "o", "#\r\n# source tabular data:\r\n"]
[55.073499, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[55.073822, "o", "\r\n#\r\n# only display columns matching /(RA|AM)/:\r\n"]
[55.074188, "o", "\r\n% tablizer -A -c AM,RA table.demo1"]
[56.07636, "o", "\r\n"]
[56.078603, "o", "NAME(1)\tDURATION(2) \r\nbeta \t1d10h5m1s \t\r\nalpha \t4h35m \t\r\nceta \t33d12h \t\r\n"]
[56.078957, "o", "\r\n% "]
[60.080574, "o", "\u001b[H\u001b[2J\u001b[3J"]
[60.080734, "o", "#\r\n# source tabular data:\r\n"]
[60.081286, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[60.081418, "o", "\r\n#\r\n# only display column 1 and 3 in extended mode:\r\n\r\n% tablizer -X -c 1,3 table.demo1"]
[61.082844, "o", "\r\n"]
[61.089822, "o", " NAME(1): beta\r\nCOUNT(3): 33\r\n\r\n NAME(1): alpha\r\nCOUNT(3): 170\r\n\r\n NAME(1): ceta\r\nCOUNT(3): 9\r\n\r\n"]
[61.090969, "o", "\r\n% "]
[65.096092, "o", "\u001b[H\u001b[2J\u001b[3J"]
[65.096571, "o", "#\r\n# source tabular data:\r\n"]
[65.098736, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[65.099085, "o", "\r\n#\r\n# only show rows matching /20:\r\n"]
[65.099283, "o", "\r\n% tablizer /20 -A table.demo1"]
[66.101537, "o", "\r\n"]
[66.109112, "o", "NAME(1)\tDURATION(2)\tCOUNT(3)\tWHEN(4) \r\nbeta \t1d10h5m1s \t33 \t3/1\u001b[102;30m/20\u001b[0m14 \t\r\nceta \t33d12h \t9 \t06/Jan\u001b[102;30m/20\u001b[0m08 15:04:05 -0700\t\r\n"]
[66.109405, "o", "\r\n% "]
[70.11076, "o", "\u001b[H\u001b[2J\u001b[3J"]
[70.110873, "o", "#\r\n# source tabular data:\r\n"]
[70.111365, "o", "NAME DURATION COUNT WHEN\r\nbeta 1d10h5m1s 33 3/1/2014\r\nalpha 4h35m 170 2013-Feb-03\r\nceta 33d12h 9 06/Jan/2008 15:04:05 -0700\r\n"]
[70.111469, "o", "\r\n#\r\n# only show rows NOT matching /20:\r\n\r\n% tablizer /20 -A -v table.demo1"]
[71.112738, "o", "\r\n"]
[71.120032, "o", "NAME(1)\tDURATION(2)\tCOUNT(3)\tWHEN(4) \r\nalpha \t4h35m \t170 \t2013-Feb-03\t\r\n"]
[71.121127, "o", "\r\n% "]
[75.126199, "o", "\u001b[H\u001b[2J\u001b[3J"]

BIN
demo/tablizer-demo.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 243 KiB

37
go.mod
View File

@@ -3,14 +3,41 @@ module github.com/tlinden/tablizer
go 1.18
require (
github.com/alecthomas/repr v0.0.0-20180818092828-117648cd9897
github.com/alecthomas/repr v0.1.1
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de
github.com/glycerine/zygomys v5.1.2+incompatible
github.com/gookit/color v1.5.2
github.com/hashicorp/hcl/v2 v2.19.1
github.com/lithammer/fuzzysearch v1.1.7
github.com/olekukonko/tablewriter v0.0.5
github.com/spf13/cobra v1.5.0
github.com/spf13/cobra v1.6.1
gopkg.in/yaml.v3 v3.0.1
)
require (
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/mattn/go-runewidth v0.0.9 // indirect
github.com/agext/levenshtein v1.2.1 // indirect
github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect
github.com/apparentlymart/go-textseg/v15 v15.0.0 // indirect
github.com/glycerine/blake2b v0.0.0-20151022103502-3c8c640cd7be // indirect
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 // indirect
github.com/glycerine/greenpack v5.1.1+incompatible // indirect
github.com/glycerine/liner v0.0.0-20160121172638-72909af234e0 // indirect
github.com/google/go-cmp v0.5.6 // indirect
github.com/gopherjs/gopherjs v1.17.2 // indirect
github.com/inconshreveable/mousetrap v1.0.1 // indirect
github.com/jtolds/gls v4.20.0+incompatible // indirect
github.com/mattn/go-runewidth v0.0.10 // indirect
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 // indirect
github.com/philhofer/fwd v1.1.2 // indirect
github.com/rivo/uniseg v0.1.0 // indirect
github.com/shurcooL/go v0.0.0-20200502201357-93f07166e636 // indirect
github.com/shurcooL/go-goon v1.0.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/testify v1.8.0 // indirect
github.com/tinylib/msgp v1.1.9 // indirect
github.com/ugorji/go/codec v1.2.11 // indirect
github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 // indirect
github.com/zclconf/go-cty v1.13.0 // indirect
golang.org/x/sys v0.13.0 // indirect
golang.org/x/text v0.11.0 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
)

111
go.sum
View File

@@ -1,29 +1,124 @@
github.com/alecthomas/repr v0.0.0-20180818092828-117648cd9897 h1:p9Sln00KOTlrYkxI1zYWl1QLnEqAqEARBEYa8FQnQcY=
github.com/alecthomas/repr v0.0.0-20180818092828-117648cd9897/go.mod h1:xTS7Pm1pD1mvyM075QCDSRqH6qRLXylzS24ZTpRiSzQ=
github.com/agext/levenshtein v1.2.1 h1:QmvMAjj2aEICytGiWzmxoE0x2KZvE0fvmqMOfy2tjT8=
github.com/agext/levenshtein v1.2.1/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558=
github.com/alecthomas/repr v0.1.1 h1:87P60cSmareLAxMc4Hro0r2RBY4ROm0dYwkJNpS4pPs=
github.com/alecthomas/repr v0.1.1/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4=
github.com/apparentlymart/go-textseg/v13 v13.0.0 h1:Y+KvPE1NYz0xl601PVImeQfFyEy6iT90AvPUL1NNfNw=
github.com/apparentlymart/go-textseg/v13 v13.0.0/go.mod h1:ZK2fH7c4NqDTLtiYLvIkEghdlcqw7yxLeM89kiTRPUo=
github.com/apparentlymart/go-textseg/v15 v15.0.0 h1:uYvfpb3DyLSCGWnctWKGj857c6ew1u1fNQOlOtuGxQY=
github.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmmsvpAG721bKi0joRfFdHIWJ4=
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de h1:FxWPpzIjnTlhPwqqXc4/vE0f7GvRjuAsbW+HOIe8KnA=
github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de/go.mod h1:DCaWoUhZrYW9p1lxo/cm8EmUOOzAPSEZNGF2DK1dJgw=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
github.com/glycerine/blake2b v0.0.0-20151022103502-3c8c640cd7be h1:XBJdPGgA3qqhW+p9CANCAVdF7ZIXdu3pZAkypMkKAjE=
github.com/glycerine/blake2b v0.0.0-20151022103502-3c8c640cd7be/go.mod h1:OSCrScrFAjcBObrulk6BEQlytA462OkG1UGB5NYj9kE=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/glycerine/greenpack v5.1.1+incompatible h1:fDr9i6MkSGZmAy4VXPfJhW+SyK2/LNnzIp5nHyDiaIM=
github.com/glycerine/greenpack v5.1.1+incompatible/go.mod h1:us0jVISAESGjsEuLlAfCd5nkZm6W6WQF18HPuOecIg4=
github.com/glycerine/liner v0.0.0-20160121172638-72909af234e0 h1:4ZegphJXBTc4uFQ08UVoWYmQXorGa+ipXetUj83sMBc=
github.com/glycerine/liner v0.0.0-20160121172638-72909af234e0/go.mod h1:AqJLs6UeoC65dnHxyCQ6MO31P5STpjcmgaANAU+No8Q=
github.com/glycerine/zygomys v5.1.2+incompatible h1:jmcdmA3XPxgfOunAXFpipE9LQoUL6eX6d2mhYyjV4GE=
github.com/glycerine/zygomys v5.1.2+incompatible/go.mod h1:i3SPKZpmy9dwF/3iWrXJ/ZLyzZucegwypwOmqRkUUaQ=
github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68=
github.com/google/go-cmp v0.5.6 h1:BKbKCqvP6I+rmFHt06ZmyQtvB8xAkWdhFyr0ZUNZcxQ=
github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/gookit/color v1.5.2 h1:uLnfXcaFjlrDnQDT+NCBcfhrXqYTx/rcCa6xn01Y8yI=
github.com/gookit/color v1.5.2/go.mod h1:w8h4bGiHeeBpvQVePTutdbERIUf3oJE5lZ8HM0UgXyg=
github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g=
github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k=
github.com/hashicorp/hcl/v2 v2.19.1 h1://i05Jqznmb2EXqa39Nsvyan2o5XyMowW5fnCKW5RPI=
github.com/hashicorp/hcl/v2 v2.19.1/go.mod h1:ThLC89FV4p9MPW804KVbe/cEXoQ8NZEh+JtMeeGErHE=
github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7Pgzkat/bFNc=
github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo=
github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU=
github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348 h1:MtvEpTB6LX3vkb4ax0b5D2DHbNAUsen0Gx5wZoq3lV4=
github.com/lithammer/fuzzysearch v1.1.7 h1:q8rZNmBIUkqxsxb/IlwsXVbCoPIH/0juxjFHY0UIwhU=
github.com/lithammer/fuzzysearch v1.1.7/go.mod h1:ZhIlfRGxnD8qa9car/yplC6GmnM14CS07BYAKJJBK2I=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.10 h1:CoZ3S2P7pvtP45xOtBw+/mDL2z0RKI576gSkzRRpdGg=
github.com/mattn/go-runewidth v0.0.10/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk=
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 h1:DpOJ2HYzCv8LZP15IdmG+YdwD2luVPHITV96TkirNBM=
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo=
github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/philhofer/fwd v1.1.2 h1:bnDivRJ1EWPjUIRXV5KfORO897HTbpFAQddBdE8t7Gw=
github.com/philhofer/fwd v1.1.2/go.mod h1:qkPdfjR2SIEbspLqpe1tO4n5yICnr2DY7mqEx2tUTP0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY=
github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/spf13/cobra v1.5.0 h1:X+jTBEBqF0bHN+9cSMgmfuvv2VHJ9ezmFNf9Y/XstYU=
github.com/spf13/cobra v1.5.0/go.mod h1:dWXEIy2H428czQCjInthrTRUg7yKbok+2Qi/yBIJoUM=
github.com/scylladb/termtables v0.0.0-20191203121021-c4c0b6d42ff4/go.mod h1:C1a7PQSMz9NShzorzCiG2fk9+xuCgLkPeCvMHYR2OWg=
github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
github.com/shurcooL/go v0.0.0-20200502201357-93f07166e636 h1:aSISeOcal5irEhJd1M+IrApc0PdcN7e7Aj4yuEnOrfQ=
github.com/shurcooL/go v0.0.0-20200502201357-93f07166e636/go.mod h1:TDJrrUr11Vxrven61rcy3hJMUqaf/CLWYhHNPmT14Lk=
github.com/shurcooL/go-goon v1.0.0 h1:BCQPvxGkHHJ4WpBO4m/9FXbITVIsvAm/T66cCcCGI7E=
github.com/shurcooL/go-goon v1.0.0/go.mod h1:2wTHMsGo7qnpmqA8ADYZtP4I1DD94JpXGQ3Dxq2YQ5w=
github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA=
github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0 h1:pSgiaMZlXftHpm5L7V1+rVB+AZJydKsMxsQBIJw4PKk=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/tinylib/msgp v1.1.9 h1:SHf3yoO2sGA0veCJeCBYLHuttAVFHGm2RHgNodW7wQU=
github.com/tinylib/msgp v1.1.9/go.mod h1:BCXGB54lDD8qUEPmiG0cQQUANC4IUQyB2ItS2UDlO/k=
github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU=
github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg=
github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 h1:QldyIu/L63oPpyvQmHgvgickp1Yw510KJOqX7H24mg8=
github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778/go.mod h1:2MuV+tbUrU1zIOPMxZ5EncGwgmMJsa+9ucAQZXxsObs=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/zclconf/go-cty v1.13.0 h1:It5dfKTTZHe9aeppbNOda3mN7Ag7sg6QkBNm6TkyFa0=
github.com/zclconf/go-cty v1.13.0/go.mod h1:YKQzy/7pZ7iq2jNFzy5go57xdxdWoLLpaEp4u238AE0=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/text v0.11.0 h1:LAntKIrcmeSKERyiOh0XMV39LXS8IE9UL2yP7+f5ij4=
golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -1,5 +1,5 @@
/*
Copyright © 2022 Thomas von Dein
Copyright © 2022-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -17,29 +17,20 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package lib
var (
// command line flags
Debug bool
XtendedOut bool
NoNumbering bool
ShowVersion bool
Columns string
UseColumns []int
Separator string
OutflagExtended bool
OutflagMarkdown bool
OutflagOrgtable bool
OutflagShell bool
OutputMode string
// contains a whole parsed table
type Tabdata struct {
maxwidthHeader int // longest header
columns int // count
headers []string // [ "ID", "NAME", ...]
entries [][]string
}
// used for validation
validOutputmodes = "(orgtbl|markdown|extended|ascii)"
func (data *Tabdata) CloneEmpty() Tabdata {
newdata := Tabdata{
maxwidthHeader: data.maxwidthHeader,
columns: data.columns,
headers: data.headers,
}
// main program version
Version = "v1.0.4"
// generated version string, used by -v contains lib.Version on
// main branch, and lib.Version-$branch-$lastcommit-$date on
// development branch
VERSION string
)
return newdata
}

130
lib/filter.go Normal file
View File

@@ -0,0 +1,130 @@
/*
Copyright © 2022-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package lib
import (
"bufio"
"fmt"
"io"
"strings"
"github.com/lithammer/fuzzysearch/fuzzy"
"github.com/tlinden/tablizer/cfg"
)
/*
* [!]Match a line, use fuzzy search for normal pattern strings and
* regexp otherwise.
*/
func matchPattern(conf cfg.Config, line string) bool {
if conf.UseFuzzySearch {
return fuzzy.MatchFold(conf.Pattern, line)
}
return conf.PatternR.MatchString(line)
}
/*
* Filter parsed data by fields. The filter is positive, so if one or
* more filters match on a row, it will be kept, otherwise it will be
* excluded.
*/
func FilterByFields(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
if len(conf.Filters) == 0 {
// no filters, no checking
return Tabdata{}, false, nil
}
newdata := data.CloneEmpty()
for _, row := range data.entries {
keep := true
for idx, header := range data.headers {
if !Exists(conf.Filters, strings.ToLower(header)) {
// do not filter by unspecified field
continue
}
if !conf.Filters[strings.ToLower(header)].MatchString(row[idx]) {
// there IS a filter, but it doesn't match
keep = false
break
}
}
if keep == !conf.InvertMatch {
// also apply -v
newdata.entries = append(newdata.entries, row)
}
}
return newdata, true, nil
}
/* generic map.Exists(key) */
func Exists[K comparable, V any](m map[K]V, v K) bool {
if _, ok := m[v]; ok {
return true
}
return false
}
func FilterByPattern(conf cfg.Config, input io.Reader) (io.Reader, error) {
if conf.Pattern == "" {
return input, nil
}
scanner := bufio.NewScanner(input)
lines := []string{}
hadFirst := false
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if hadFirst {
// don't match 1st line, it's the header
if conf.Pattern != "" && matchPattern(conf, line) == conf.InvertMatch {
// by default -v is false, so if a line does NOT
// match the pattern, we will ignore it. However,
// if the user specified -v, the matching is inverted,
// so we ignore all lines, which DO match.
continue
}
// apply user defined lisp filters, if any
accept, err := RunFilterHooks(conf, line)
if err != nil {
return input, fmt.Errorf("failed to apply filter hook: %w", err)
}
if !accept {
// IF there are filter hook[s] and IF one of them
// returns false on the current line, reject it
continue
}
}
lines = append(lines, line)
hadFirst = true
}
return strings.NewReader(strings.Join(lines, "\n")), nil
}

162
lib/filter_test.go Normal file
View File

@@ -0,0 +1,162 @@
/*
Copyright © 2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package lib
import (
"fmt"
"reflect"
"testing"
"github.com/tlinden/tablizer/cfg"
)
func TestMatchPattern(t *testing.T) {
var input = []struct {
name string
fuzzy bool
pattern string
line string
}{
{
name: "normal",
pattern: "haus",
line: "hausparty",
},
{
name: "fuzzy",
pattern: "hpt",
line: "haus-party-termin",
fuzzy: true,
},
}
for _, inputdata := range input {
testname := fmt.Sprintf("match-pattern-%s", inputdata.name)
t.Run(testname, func(t *testing.T) {
conf := cfg.Config{}
if inputdata.fuzzy {
conf.UseFuzzySearch = true
}
err := conf.PreparePattern(inputdata.pattern)
if err != nil {
t.Errorf("PreparePattern returned error: %s", err)
}
if !matchPattern(conf, inputdata.line) {
t.Errorf("matchPattern() did not match\nExp: true\nGot: false\n")
}
})
}
}
func TestFilterByFields(t *testing.T) {
data := Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"asd", "igig", "cxxxncnc"},
{"19191", "EDD 1", "x"},
{"8d8", "AN 1", "y"},
},
}
var input = []struct {
name string
filter []string
expect Tabdata
invert bool
}{
{
name: "one-field",
filter: []string{"one=19"},
expect: Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"19191", "EDD 1", "x"},
},
},
},
{
name: "one-field-inverted",
filter: []string{"one=19"},
invert: true,
expect: Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"asd", "igig", "cxxxncnc"},
{"8d8", "AN 1", "y"},
},
},
},
{
name: "many-fields",
filter: []string{"one=19", "two=DD"},
expect: Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"19191", "EDD 1", "x"},
},
},
},
{
name: "many-fields-inverted",
filter: []string{"one=19", "two=DD"},
invert: true,
expect: Tabdata{
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"asd", "igig", "cxxxncnc"},
{"8d8", "AN 1", "y"},
},
},
},
}
for _, inputdata := range input {
testname := fmt.Sprintf("filter-by-fields-%s", inputdata.name)
t.Run(testname, func(t *testing.T) {
conf := cfg.Config{Rawfilters: inputdata.filter, InvertMatch: inputdata.invert}
err := conf.PrepareFilters()
if err != nil {
t.Errorf("PrepareFilters returned error: %s", err)
}
data, _, _ := FilterByFields(conf, data)
if !reflect.DeepEqual(data, inputdata.expect) {
t.Errorf("Filtered data does not match expected data:\ngot: %+v\nexp: %+v", data, inputdata.expect)
}
})
}
}

View File

@@ -20,9 +20,14 @@ package lib
import (
"errors"
"fmt"
"os"
"regexp"
"sort"
"strconv"
"strings"
"github.com/gookit/color"
"github.com/tlinden/tablizer/cfg"
)
func contains(s []int, e int) bool {
@@ -31,61 +36,183 @@ func contains(s []int, e int) bool {
return true
}
}
return false
}
func PrepareColumns() error {
if len(Columns) > 0 {
for _, use := range strings.Split(Columns, ",") {
// parse columns list given with -c, modifies config.UseColumns based
// on eventually given regex
func PrepareColumns(conf *cfg.Config, data *Tabdata) error {
if conf.Columns == "" {
return nil
}
for _, use := range strings.Split(conf.Columns, ",") {
if len(use) == 0 {
return fmt.Errorf("could not parse columns list %s: empty column", conf.Columns)
}
usenum, err := strconv.Atoi(use)
if err != nil {
msg := fmt.Sprintf("Could not parse columns list %s: %v", Columns, err)
// might be a regexp
colPattern, err := regexp.Compile(use)
if err != nil {
msg := fmt.Sprintf("Could not parse columns list %s: %v", conf.Columns, err)
return errors.New(msg)
}
UseColumns = append(UseColumns, usenum)
}
}
return nil
}
func PrepareModeFlags() error {
if len(OutputMode) == 0 {
switch {
case OutflagExtended:
OutputMode = "extended"
case OutflagMarkdown:
OutputMode = "markdown"
case OutflagOrgtable:
OutputMode = "orgtbl"
case OutflagShell:
OutputMode = "shell"
NoNumbering = true
default:
OutputMode = "ascii"
// find matching header fields
for i, head := range data.headers {
if colPattern.MatchString(head) {
conf.UseColumns = append(conf.UseColumns, i+1)
}
}
} else {
r, err := regexp.Compile(validOutputmodes)
if err != nil {
return errors.New("Failed to validate output mode spec!")
}
match := r.MatchString(OutputMode)
if !match {
return errors.New("Invalid output mode!")
// we digress from go best practises here, because if
// a colum spec is not a number, we process them above
// inside the err handler for atoi(). so only add the
// number, if it's really just a number.
conf.UseColumns = append(conf.UseColumns, usenum)
}
}
// deduplicate: put all values into a map (value gets map key)
// thereby removing duplicates, extract keys into new slice
// and sort it
imap := make(map[int]int, len(conf.UseColumns))
for _, i := range conf.UseColumns {
imap[i] = 0
}
conf.UseColumns = nil
for k := range imap {
conf.UseColumns = append(conf.UseColumns, k)
}
sort.Ints(conf.UseColumns)
return nil
}
// prepare headers: add numbers to headers
func numberizeAndReduceHeaders(conf cfg.Config, data *Tabdata) {
numberedHeaders := []string{}
maxwidth := 0 // start from scratch, so we only look at displayed column widths
for idx, head := range data.headers {
var headlen int
if len(conf.Columns) > 0 {
// -c specified
if !contains(conf.UseColumns, idx+1) {
// ignore this one
continue
}
}
if conf.NoNumbering {
numberedHeaders = append(numberedHeaders, head)
headlen = len(head)
} else {
numhead := fmt.Sprintf("%s(%d)", head, idx+1)
headlen = len(numhead)
numberedHeaders = append(numberedHeaders, numhead)
}
if headlen > maxwidth {
maxwidth = headlen
}
}
data.headers = numberedHeaders
if data.maxwidthHeader != maxwidth && maxwidth > 0 {
data.maxwidthHeader = maxwidth
}
}
// exclude columns, if any
func reduceColumns(conf cfg.Config, data *Tabdata) {
if len(conf.Columns) > 0 {
reducedEntries := [][]string{}
var reducedEntry []string
for _, entry := range data.entries {
reducedEntry = nil
for i, value := range entry {
if !contains(conf.UseColumns, i+1) {
continue
}
reducedEntry = append(reducedEntry, value)
}
reducedEntries = append(reducedEntries, reducedEntry)
}
data.entries = reducedEntries
}
}
// FIXME: remove this when we only use Tablewriter and strip in ParseFile()!
func trimRow(row []string) []string {
// FIXME: remove this when we only use Tablewriter and strip in ParseFile()!
var fixedrow []string
for _, cell := range row {
fixedrow = append(fixedrow, strings.TrimSpace(cell))
var fixedrow = make([]string, len(row))
for idx, cell := range row {
fixedrow[idx] = strings.TrimSpace(cell)
}
return fixedrow
}
// FIXME: refactor this beast!
func colorizeData(conf cfg.Config, output string) string {
switch {
case conf.UseHighlight && color.IsConsole(os.Stdout):
highlight := true
colorized := ""
first := true
for _, line := range strings.Split(output, "\n") {
if highlight {
if first {
// we need to add two spaces to the header line
// because tablewriter omits them for some reason
// in pprint mode. This doesn't matter as long as
// we don't use colorization. But with colors the
// missing spaces can be seen.
if conf.OutputMode == cfg.ASCII {
line += " "
}
line = conf.HighlightHdrStyle.Sprint(line)
first = false
} else {
line = conf.HighlightStyle.Sprint(line)
}
} else {
line = conf.NoHighlightStyle.Sprint(line)
}
highlight = !highlight
colorized += line + "\n"
}
return colorized
case len(conf.Pattern) > 0 && !conf.NoColor && color.IsConsole(os.Stdout):
r := regexp.MustCompile("(" + conf.Pattern + ")")
return r.ReplaceAllStringFunc(output, func(in string) string {
return conf.ColorStyle.Sprint(in)
})
default:
return output
}
}

View File

@@ -21,9 +21,11 @@ import (
"fmt"
"reflect"
"testing"
"github.com/tlinden/tablizer/cfg"
)
func Testcontains(t *testing.T) {
func TestContains(t *testing.T) {
var tests = []struct {
list []int
search int
@@ -45,6 +47,19 @@ func Testcontains(t *testing.T) {
}
func TestPrepareColumns(t *testing.T) {
data := Tabdata{
maxwidthHeader: 5,
columns: 3,
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{
"2", "3", "4",
},
},
}
var tests = []struct {
input string
exp []int
@@ -52,22 +67,96 @@ func TestPrepareColumns(t *testing.T) {
}{
{"1,2,3", []int{1, 2, 3}, false},
{"1,2,", []int{}, true},
{"T", []int{2, 3}, false},
{"T,2,3", []int{2, 3}, false},
{"[a-z,4,5", []int{4, 5}, true}, // invalid regexp
}
for _, tt := range tests {
testname := fmt.Sprintf("PrepareColumns-%s-%t", tt.input, tt.wanterror)
for _, testdata := range tests {
testname := fmt.Sprintf("PrepareColumns-%s-%t", testdata.input, testdata.wanterror)
t.Run(testname, func(t *testing.T) {
Columns = tt.input
err := PrepareColumns()
conf := cfg.Config{Columns: testdata.input}
err := PrepareColumns(&conf, &data)
if err != nil {
if !tt.wanterror {
if !testdata.wanterror {
t.Errorf("got error: %v", err)
}
} else {
if !reflect.DeepEqual(UseColumns, tt.exp) {
t.Errorf("got: %v, expected: %v", UseColumns, tt.exp)
if !reflect.DeepEqual(conf.UseColumns, testdata.exp) {
t.Errorf("got: %v, expected: %v", conf.UseColumns, testdata.exp)
}
}
})
}
}
func TestReduceColumns(t *testing.T) {
var tests = []struct {
expect [][]string
columns []int
}{
{
expect: [][]string{{"a", "b"}},
columns: []int{1, 2},
},
{
expect: [][]string{{"a", "c"}},
columns: []int{1, 3},
},
{
expect: [][]string{{"a"}},
columns: []int{1},
},
{
expect: [][]string{nil},
columns: []int{4},
},
}
input := [][]string{{"a", "b", "c"}}
for _, testdata := range tests {
testname := fmt.Sprintf("reduce-columns-by-%+v", testdata.columns)
t.Run(testname, func(t *testing.T) {
c := cfg.Config{Columns: "x", UseColumns: testdata.columns}
data := Tabdata{entries: input}
reduceColumns(c, &data)
if !reflect.DeepEqual(data.entries, testdata.expect) {
t.Errorf("reduceColumns returned invalid data:\ngot: %+v\nexp: %+v",
data.entries, testdata.expect)
}
})
}
}
func TestNumberizeHeaders(t *testing.T) {
data := Tabdata{
headers: []string{"ONE", "TWO", "THREE"},
}
var tests = []struct {
expect []string
columns []int
nonum bool
}{
{[]string{"ONE(1)", "TWO(2)", "THREE(3)"}, []int{1, 2, 3}, false},
{[]string{"ONE(1)", "TWO(2)"}, []int{1, 2}, false},
{[]string{"ONE", "TWO"}, []int{1, 2}, true},
}
for _, testdata := range tests {
testname := fmt.Sprintf("numberize-headers-columns-%+v-nonum-%t",
testdata.columns, testdata.nonum)
t.Run(testname, func(t *testing.T) {
conf := cfg.Config{Columns: "x", UseColumns: testdata.columns, NoNumbering: testdata.nonum}
usedata := data
numberizeAndReduceHeaders(conf, &usedata)
if !reflect.DeepEqual(usedata.headers, testdata.expect) {
t.Errorf("numberizeAndReduceHeaders returned invalid data:\ngot: %+v\nexp: %+v",
usedata.headers, testdata.expect)
}
})
}
}

View File

@@ -19,65 +19,96 @@ package lib
import (
"errors"
"fmt"
"io"
"os"
"github.com/tlinden/tablizer/cfg"
)
func ProcessFiles(args []string) error {
fds, pattern, err := determineIO(args)
const RWRR = 0755
func ProcessFiles(conf *cfg.Config, args []string) error {
fds, pattern, err := determineIO(conf, args)
if err != nil {
return err
}
if err := conf.PreparePattern(pattern); err != nil {
return err
}
for _, fd := range fds {
data, err := parseFile(fd, pattern)
data, err := Parse(*conf, fd)
if err != nil {
return err
}
printData(&data)
err = PrepareColumns(conf, &data)
if err != nil {
return err
}
printData(os.Stdout, *conf, &data)
}
return nil
}
func determineIO(args []string) ([]io.Reader, string, error) {
func determineIO(conf *cfg.Config, args []string) ([]io.Reader, string, error) {
var filehandles []io.Reader
var pattern string
var fds []io.Reader
var havefiles bool
var haveio bool
stat, _ := os.Stdin.Stat()
if (stat.Mode() & os.ModeCharDevice) == 0 {
// we're reading from STDIN, which takes precedence over file args
filehandles = append(filehandles, os.Stdin)
if len(args) > 0 {
// ignore any args > 1
pattern = args[0]
conf.Pattern = args[0] // used for colorization by printData()
}
haveio = true
} else if len(args) > 0 {
// threre were args left, take a look
if args[0] == "-" {
// in traditional unix programs a dash denotes STDIN (forced)
filehandles = append(filehandles, os.Stdin)
haveio = true
} else {
if _, err := os.Stat(args[0]); err != nil {
// first one is not a file, consider it as regexp and
// shift arg list
pattern = args[0]
conf.Pattern = args[0] // used for colorization by printData()
args = args[1:]
}
if len(args) > 0 {
// only files
// consider any other args as files
for _, file := range args {
fd, err := os.OpenFile(file, os.O_RDONLY, 0755)
filehandle, err := os.OpenFile(file, os.O_RDONLY, RWRR)
if err != nil {
return nil, "", err
return nil, "", fmt.Errorf("failed to read input file %s: %w", file, err)
}
fds = append(fds, fd)
filehandles = append(filehandles, filehandle)
haveio = true
}
}
havefiles = true
}
}
if !havefiles {
stat, _ := os.Stdin.Stat()
if (stat.Mode() & os.ModeCharDevice) == 0 {
fds = append(fds, os.Stdin)
} else {
return nil, "", errors.New("No file specified and nothing to read on stdin!")
}
if !haveio {
return nil, "", errors.New("no file specified and nothing to read on stdin")
}
return fds, pattern, nil
return filehandles, pattern, nil
}

313
lib/lisp.go Normal file
View File

@@ -0,0 +1,313 @@
/*
Copyright © 2023 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package lib
import (
"errors"
"fmt"
"log"
"os"
"strings"
"github.com/glycerine/zygomys/zygo"
"github.com/tlinden/tablizer/cfg"
)
/*
needs to be global because we can't feed an cfg object to AddHook()
which is being called from user lisp code
*/
var Hooks map[string][]*zygo.SexpSymbol
/*
AddHook() (called addhook from lisp code) can be used by the user to
add a function to one of the available hooks provided by tablizer.
*/
func AddHook(env *zygo.Zlisp, name string, args []zygo.Sexp) (zygo.Sexp, error) {
var hookname string
if len(args) < 2 {
return zygo.SexpNull, errors.New("argument of %add-hook should be: %hook-name %your-function")
}
switch sexptype := args[0].(type) {
case *zygo.SexpSymbol:
if !HookExists(sexptype.Name()) {
return zygo.SexpNull, errors.New("Unknown hook " + sexptype.Name())
}
hookname = sexptype.Name()
default:
return zygo.SexpNull, errors.New("hook name must be a symbol ")
}
switch sexptype := args[1].(type) {
case *zygo.SexpSymbol:
_, exists := Hooks[hookname]
if !exists {
Hooks[hookname] = []*zygo.SexpSymbol{sexptype}
} else {
Hooks[hookname] = append(Hooks[hookname], sexptype)
}
default:
return zygo.SexpNull, errors.New("hook function must be a symbol ")
}
return zygo.SexpNull, nil
}
/*
Check if a hook exists
*/
func HookExists(key string) bool {
for _, hook := range cfg.ValidHooks {
if hook == key {
return true
}
}
return false
}
/*
* Basic sanity checks and load lisp file
*/
func LoadAndEvalFile(env *zygo.Zlisp, path string) error {
if strings.HasSuffix(path, `.zy`) {
code, err := os.ReadFile(path)
if err != nil {
return fmt.Errorf("failed to read lisp file %s: %w", path, err)
}
// FIXME: check what res (_ here) could be and mean
_, err = env.EvalString(string(code))
if err != nil {
log.Fatalf(env.GetStackTrace(err))
}
}
return nil
}
/*
* Setup lisp interpreter environment
*/
func SetupLisp(conf *cfg.Config) error {
// iterate over load-path and evaluate all *.zy files there, if any
// we ignore if load-path does not exist, which is the default anyway
path, err := os.Stat(conf.LispLoadPath)
if os.IsNotExist(err) {
return nil
}
// init global hooks
Hooks = make(map[string][]*zygo.SexpSymbol)
// init sandbox
env := zygo.NewZlispSandbox()
env.AddFunction("addhook", AddHook)
if !path.IsDir() {
// load single lisp file
err = LoadAndEvalFile(env, conf.LispLoadPath)
if err != nil {
return err
}
} else {
// load all lisp file in load dir
dir, err := os.ReadDir(conf.LispLoadPath)
if err != nil {
return fmt.Errorf("failed to read lisp dir %s: %w",
conf.LispLoadPath, err)
}
for _, entry := range dir {
if !entry.IsDir() {
err := LoadAndEvalFile(env, conf.LispLoadPath+"/"+entry.Name())
if err != nil {
return err
}
}
}
}
RegisterLib(env)
conf.Lisp = env
return nil
}
/*
Execute every user lisp function registered as filter hook.
Each function is given the current line as argument and is expected to
return a boolean. True indicates to keep the line, false to skip
it.
If there are multiple such functions registered, then the first one
returning false wins, that is if each function returns true the line
will be kept, if at least one of them returns false, it will be
skipped.
*/
func RunFilterHooks(conf cfg.Config, line string) (bool, error) {
for _, hook := range Hooks["filter"] {
var result bool
conf.Lisp.Clear()
res, err := conf.Lisp.EvalString(fmt.Sprintf("(%s `%s`)", hook.Name(), line))
if err != nil {
return false, fmt.Errorf("failed to evaluate hook loader: %w", err)
}
switch sexptype := res.(type) {
case *zygo.SexpBool:
result = sexptype.Val
default:
return false, fmt.Errorf("filter hook shall return bool")
}
if !result {
// the first hook which returns false leads to complete false
return result, nil
}
}
// if no hook returned false, we succeed and accept the given line
return true, nil
}
/*
These hooks get the data (Tabdata) readily processed by tablizer as
argument. They are expected to return a SexpPair containing a boolean
denoting if the data has been modified and the actual modified
data. Columns must be the same, rows may differ. Cells may also have
been modified.
Replaces the internal data structure Tabdata with the user supplied
version.
Only one process hook function is supported.
The somewhat complicated code is being caused by the fact, that we
need to convert our internal structure to a lisp variable and vice
versa afterwards.
*/
func RunProcessHooks(conf cfg.Config, data Tabdata) (Tabdata, bool, error) {
var userdata Tabdata
lisplist := []zygo.Sexp{}
if len(Hooks["process"]) == 0 {
return userdata, false, nil
}
if len(Hooks["process"]) > 1 {
fmt.Println("Warning: only one process hook is allowed!")
}
// there are hook[s] installed, convert the go data structure 'data to lisp
for _, row := range data.entries {
var entry zygo.SexpHash
for idx, cell := range row {
err := entry.HashSet(&zygo.SexpStr{S: data.headers[idx]}, &zygo.SexpStr{S: cell})
if err != nil {
return userdata, false, fmt.Errorf("failed to convert to lisp data: %w", err)
}
}
lisplist = append(lisplist, &entry)
}
// we need to add it to the env so that the function can use the struct directly
conf.Lisp.AddGlobal("data", &zygo.SexpArray{Val: lisplist, Env: conf.Lisp})
// execute the actual hook
hook := Hooks["process"][0]
conf.Lisp.Clear()
var result bool
res, err := conf.Lisp.EvalString(fmt.Sprintf("(%s data)", hook.Name()))
if err != nil {
return userdata, false, fmt.Errorf("failed to eval lisp loader: %w", err)
}
// we expect (bool, array(hash)) as return from the function
switch sexptype := res.(type) {
case *zygo.SexpPair:
switch th := sexptype.Head.(type) {
case *zygo.SexpBool:
result = th.Val
default:
return userdata, false, errors.New("xpect (bool, array(hash)) as return value")
}
switch sexptailtype := sexptype.Tail.(type) {
case *zygo.SexpArray:
lisplist = sexptailtype.Val
default:
return userdata, false, errors.New("expect (bool, array(hash)) as return value ")
}
default:
return userdata, false, errors.New("filter hook shall return array of hashes ")
}
if !result {
// no further processing required
return userdata, result, nil
}
// finally convert lispdata back to Tabdata
for _, item := range lisplist {
row := []string{}
switch hash := item.(type) {
case *zygo.SexpHash:
for _, header := range data.headers {
entry, err := hash.HashGetDefault(
conf.Lisp,
&zygo.SexpStr{S: header},
&zygo.SexpStr{S: ""})
if err != nil {
return userdata, false, fmt.Errorf("failed to get lisp hash entry: %w", err)
}
switch sexptype := entry.(type) {
case *zygo.SexpStr:
row = append(row, sexptype.S)
default:
return userdata, false, errors.New("hsh values should be string ")
}
}
default:
return userdata, false, errors.New("rturned array should contain hashes ")
}
userdata.entries = append(userdata.entries, row)
}
userdata.headers = data.headers
return userdata, result, nil
}

88
lib/lisplib.go Normal file
View File

@@ -0,0 +1,88 @@
/*
Copyright © 2023 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package lib
import (
"errors"
"fmt"
"regexp"
"strconv"
"github.com/glycerine/zygomys/zygo"
)
func Splice2SexpList(list []string) zygo.Sexp {
slist := []zygo.Sexp{}
for _, item := range list {
slist = append(slist, &zygo.SexpStr{S: item})
}
return zygo.MakeList(slist)
}
func StringReSplit(env *zygo.Zlisp, name string, args []zygo.Sexp) (zygo.Sexp, error) {
if len(args) < 2 {
return zygo.SexpNull, errors.New("expecting 2 arguments")
}
var separator, input string
switch t := args[0].(type) {
case *zygo.SexpStr:
input = t.S
default:
return zygo.SexpNull, errors.New("second argument must be a string")
}
switch t := args[1].(type) {
case *zygo.SexpStr:
separator = t.S
default:
return zygo.SexpNull, errors.New("first argument must be a string")
}
sep := regexp.MustCompile(separator)
return Splice2SexpList(sep.Split(input, -1)), nil
}
func String2Int(env *zygo.Zlisp, name string, args []zygo.Sexp) (zygo.Sexp, error) {
var number int
switch t := args[0].(type) {
case *zygo.SexpStr:
num, err := strconv.Atoi(t.S)
if err != nil {
return zygo.SexpNull, fmt.Errorf("failed to convert string to number: %w", err)
}
number = num
default:
return zygo.SexpNull, errors.New("argument must be a string")
}
return &zygo.SexpInt{Val: int64(number)}, nil
}
func RegisterLib(env *zygo.Zlisp) {
env.AddFunction("resplit", StringReSplit)
env.AddFunction("atoi", String2Int)
}

View File

@@ -1,5 +1,5 @@
/*
Copyright © 2022 Thomas von Dein
Copyright © 2022-2024 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -19,59 +19,96 @@ package lib
import (
"bufio"
"errors"
"encoding/csv"
"fmt"
"github.com/alecthomas/repr"
"io"
"regexp"
"strings"
"github.com/alecthomas/repr"
"github.com/tlinden/tablizer/cfg"
)
// contains a whole parsed table
type Tabdata struct {
maxwidthHeader int // longest header
maxwidthPerCol []int // max width per column
columns int
headerIndices []map[string]int // [ {beg=>0, end=>17}, ... ]
headers []string // [ "ID", "NAME", ...]
entries [][]string
/*
Parser switch
*/
func Parse(conf cfg.Config, input io.Reader) (Tabdata, error) {
if len(conf.Separator) == 1 {
return parseCSV(conf, input)
}
return parseTabular(conf, input)
}
/*
Parse tabular input. We split the header (first line) by 2 or more
spaces, remember the positions of the header fields. We then split
the data (everything after the first line) by those positions. That
way we can turn "tabular data" (with fields containing whitespaces)
into real tabular data. We re-tabulate our input if you will.
Parse CSV input.
*/
func parseFile(input io.Reader, pattern string) (Tabdata, error) {
func parseCSV(conf cfg.Config, input io.Reader) (Tabdata, error) {
data := Tabdata{}
// apply pattern, if any
content, err := FilterByPattern(conf, input)
if err != nil {
return data, err
}
csvreader := csv.NewReader(content)
csvreader.Comma = rune(conf.Separator[0])
records, err := csvreader.ReadAll()
if err != nil {
return data, fmt.Errorf("could not parse CSV input: %w", err)
}
if len(records) >= 1 {
data.headers = records[0]
data.columns = len(records)
for _, head := range data.headers {
// register widest header field
headerlen := len(head)
if headerlen > data.maxwidthHeader {
data.maxwidthHeader = headerlen
}
}
if len(records) > 1 {
data.entries = records[1:]
}
}
// apply user defined lisp process hooks, if any
userdata, changed, err := RunProcessHooks(conf, data)
if err != nil {
return data, fmt.Errorf("failed to apply filter hook: %w", err)
}
if changed {
data = userdata
}
return data, nil
}
/*
Parse tabular input.
*/
func parseTabular(conf cfg.Config, input io.Reader) (Tabdata, error) {
data := Tabdata{}
var scanner *bufio.Scanner
var spaces = `\s\s+|$`
if len(Separator) > 0 {
spaces = Separator
}
hadFirst := false
spacefinder := regexp.MustCompile(spaces)
beg := 0
separate := regexp.MustCompile(conf.Separator)
scanner = bufio.NewScanner(input)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
values := []string{}
patternR, err := regexp.Compile(pattern)
if err != nil {
return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, err))
}
parts := separate.Split(line, -1)
if !hadFirst {
// header processing
parts := spacefinder.FindAllStringIndex(line, -1)
data.columns = len(parts)
// if Debug {
// fmt.Println(parts)
@@ -79,82 +116,85 @@ func parseFile(input io.Reader, pattern string) (Tabdata, error) {
// process all header fields
for _, part := range parts {
// if Debug {
// fmt.Printf("Part: <%s>\n", string(line[beg:part[0]]))
//}
// current field
head := string(line[beg:part[0]])
// register begin and end of field within line
indices := make(map[string]int)
indices["beg"] = beg
if part[0] == part[1] {
indices["end"] = 0
} else {
indices["end"] = part[1] - 1
}
// register widest header field
headerlen := len(head)
headerlen := len(part)
if headerlen > data.maxwidthHeader {
data.maxwidthHeader = headerlen
}
// register fields data
data.headerIndices = append(data.headerIndices, indices)
data.headers = append(data.headers, head)
// end of current field == begin of next one
beg = part[1]
data.headers = append(data.headers, strings.TrimSpace(part))
// done
hadFirst = true
}
} else {
// data processing
if len(pattern) > 0 {
if !patternR.MatchString(line) {
if conf.Pattern != "" && matchPattern(conf, line) == conf.InvertMatch {
// by default -v is false, so if a line does NOT
// match the pattern, we will ignore it. However,
// if the user specified -v, the matching is inverted,
// so we ignore all lines, which DO match.
continue
}
// apply user defined lisp filters, if any
accept, err := RunFilterHooks(conf, line)
if err != nil {
return data, fmt.Errorf("failed to apply filter hook: %w", err)
}
if !accept {
// IF there are filter hook[s] and IF one of them
// returns false on the current line, reject it
continue
}
idx := 0 // we cannot use the header index, because we could exclude columns
for _, index := range data.headerIndices {
value := ""
if index["end"] == 0 {
value = string(line[index["beg"]:])
} else {
value = string(line[index["beg"]:index["end"]])
}
width := len(strings.TrimSpace(value))
if len(data.maxwidthPerCol)-1 < idx {
data.maxwidthPerCol = append(data.maxwidthPerCol, width)
} else {
if width > data.maxwidthPerCol[idx] {
data.maxwidthPerCol[idx] = width
}
}
values := []string{}
for _, part := range parts {
// if Debug {
// fmt.Printf("<%s> ", value)
// }
values = append(values, strings.TrimSpace(value))
values = append(values, strings.TrimSpace(part))
idx++
}
// fill up missing fields, if any
for i := len(values); i < len(data.headers); i++ {
values = append(values, "")
}
data.entries = append(data.entries, values)
}
}
if scanner.Err() != nil {
return data, errors.Unwrap(fmt.Errorf("Regexp pattern %s is invalid: %w", pattern, scanner.Err()))
return data, fmt.Errorf("failed to read from io.Reader: %w", scanner.Err())
}
if Debug {
// filter by field filters, if any
filtereddata, changed, err := FilterByFields(conf, data)
if err != nil {
return data, fmt.Errorf("failed to filter fields: %w", err)
}
if changed {
data = filtereddata
}
// apply user defined lisp process hooks, if any
userdata, changed, err := RunProcessHooks(conf, data)
if err != nil {
return data, fmt.Errorf("failed to apply filter hook: %w", err)
}
if changed {
data = userdata
}
if conf.Debug {
repr.Print(data)
}

View File

@@ -18,65 +18,149 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package lib
import (
"fmt"
"reflect"
"strings"
"testing"
"github.com/tlinden/tablizer/cfg"
)
var input = []struct {
name string
text string
separator string
}{
{
name: "tabular-data",
separator: cfg.DefaultSeparator,
text: `
ONE TWO THREE
asd igig cxxxncnc
19191 EDD 1 X`,
},
{
name: "csv-data",
separator: ",",
text: `
ONE,TWO,THREE
asd,igig,cxxxncnc
19191,"EDD 1",X`,
},
}
func TestParser(t *testing.T) {
data := Tabdata{
maxwidthHeader: 5,
maxwidthPerCol: []int{
5,
5,
8,
},
columns: 3,
headerIndices: []map[string]int{
map[string]int{
"beg": 0,
"end": 6,
},
map[string]int{
"end": 13,
"beg": 7,
},
map[string]int{
"beg": 14,
"end": 0,
},
},
headers: []string{
"ONE",
"TWO",
"THREE",
"ONE", "TWO", "THREE",
},
entries: [][]string{
[]string{
"asd",
"igig",
"cxxxncnc",
},
[]string{
"19191",
"EDD 1",
"X",
},
{"asd", "igig", "cxxxncnc"},
{"19191", "EDD 1", "X"},
},
}
table := `ONE TWO THREE
asd igig cxxxncnc
19191 EDD 1 X`
readFd := strings.NewReader(table)
gotdata, err := parseFile(readFd, "")
for _, testdata := range input {
testname := fmt.Sprintf("parse-%s", testdata.name)
t.Run(testname, func(t *testing.T) {
readFd := strings.NewReader(strings.TrimSpace(testdata.text))
conf := cfg.Config{Separator: testdata.separator}
gotdata, err := Parse(conf, readFd)
if err != nil {
t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, gotdata)
}
if !reflect.DeepEqual(data, gotdata) {
t.Errorf("Parser returned invalid data\nExp: %+v\nGot: %+v\n", data, gotdata)
t.Errorf("Parser returned invalid data\nExp: %+v\nGot: %+v\n",
data, gotdata)
}
})
}
}
func TestParserPatternmatching(t *testing.T) {
var tests = []struct {
entries [][]string
pattern string
invert bool
want bool
}{
{
entries: [][]string{
{"asd", "igig", "cxxxncnc"},
},
pattern: "ig",
invert: false,
},
{
entries: [][]string{
{"19191", "EDD 1", "X"},
},
pattern: "ig",
invert: true,
},
}
for _, inputdata := range input {
for _, testdata := range tests {
testname := fmt.Sprintf("parse-%s-with-pattern-%s-inverted-%t",
inputdata.name, testdata.pattern, testdata.invert)
t.Run(testname, func(t *testing.T) {
conf := cfg.Config{InvertMatch: testdata.invert, Pattern: testdata.pattern,
Separator: inputdata.separator}
_ = conf.PreparePattern(testdata.pattern)
readFd := strings.NewReader(strings.TrimSpace(inputdata.text))
gotdata, err := Parse(conf, readFd)
if err != nil {
if !testdata.want {
t.Errorf("Parser returned error: %s\nData processed so far: %+v",
err, gotdata)
}
} else {
if !reflect.DeepEqual(testdata.entries, gotdata.entries) {
t.Errorf("Parser returned invalid data (pattern: %s, invert: %t)\nExp: %+v\nGot: %+v\n",
testdata.pattern, testdata.invert, testdata.entries, gotdata.entries)
}
}
})
}
}
}
func TestParserIncompleteRows(t *testing.T) {
data := Tabdata{
maxwidthHeader: 5,
columns: 3,
headers: []string{
"ONE", "TWO", "THREE",
},
entries: [][]string{
{"asd", "igig", ""},
{"19191", "EDD 1", "X"},
},
}
table := `
ONE TWO THREE
asd igig
19191 EDD 1 X`
readFd := strings.NewReader(strings.TrimSpace(table))
conf := cfg.Config{Separator: cfg.DefaultSeparator}
gotdata, err := Parse(conf, readFd)
if err != nil {
t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, gotdata)
}
if !reflect.DeepEqual(data, gotdata) {
t.Errorf("Parser returned invalid data, Regex: %s\nExp: %+v\nGot: %+v\n",
conf.Separator, data, gotdata)
}
}

View File

@@ -18,74 +18,64 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package lib
import (
"encoding/csv"
"fmt"
"github.com/olekukonko/tablewriter"
"os"
"io"
"log"
"regexp"
"strconv"
"strings"
"github.com/gookit/color"
"github.com/olekukonko/tablewriter"
"github.com/tlinden/tablizer/cfg"
"gopkg.in/yaml.v3"
)
func printData(data *Tabdata) {
// prepare headers: add numbers to headers
numberedHeaders := []string{}
for i, head := range data.headers {
if len(Columns) > 0 {
// -c specified
if !contains(UseColumns, i+1) {
// ignore this one
continue
}
}
if NoNumbering {
numberedHeaders = append(numberedHeaders, head)
} else {
numberedHeaders = append(numberedHeaders, fmt.Sprintf("%s(%d)", head, i+1))
}
}
data.headers = numberedHeaders
func printData(writer io.Writer, conf cfg.Config, data *Tabdata) {
// add numbers to headers and remove this we're not interested in
numberizeAndReduceHeaders(conf, data)
// prepare data
if len(Columns) > 0 {
reducedEntries := [][]string{}
reducedEntry := []string{}
for _, entry := range data.entries {
reducedEntry = nil
for i, value := range entry {
if !contains(UseColumns, i+1) {
continue
}
// remove unwanted columns, if any
reduceColumns(conf, data)
reducedEntry = append(reducedEntry, value)
}
reducedEntries = append(reducedEntries, reducedEntry)
}
data.entries = reducedEntries
}
// sort the data
sortTable(conf, data)
switch OutputMode {
case "extended":
printExtendedData(data)
case "ascii":
printAsciiData(data)
case "orgtbl":
printOrgmodeData(data)
case "markdown":
printMarkdownData(data)
case "shell":
printShellData(data)
switch conf.OutputMode {
case cfg.Extended:
printExtendedData(writer, conf, data)
case cfg.ASCII:
printASCIIData(writer, conf, data)
case cfg.Orgtbl:
printOrgmodeData(writer, conf, data)
case cfg.Markdown:
printMarkdownData(writer, conf, data)
case cfg.Shell:
printShellData(writer, data)
case cfg.Yaml:
printYamlData(writer, data)
case cfg.CSV:
printCSVData(writer, data)
default:
printAsciiData(data)
printASCIIData(writer, conf, data)
}
}
func output(writer io.Writer, str string) {
fmt.Fprint(writer, str)
}
/*
Emacs org-mode compatible table (also orgtbl-mode)
Emacs org-mode compatible table (also orgtbl-mode)
*/
func printOrgmodeData(data *Tabdata) {
func printOrgmodeData(writer io.Writer, conf cfg.Config, data *Tabdata) {
tableString := &strings.Builder{}
table := tablewriter.NewWriter(tableString)
if !conf.NoHeaders {
table.SetHeader(data.headers)
}
for _, row := range data.entries {
table.Append(trimRow(row))
@@ -104,19 +94,25 @@ func printOrgmodeData(data *Tabdata) {
| cell | cell |
|------+------|
*/
leftR := regexp.MustCompile("(?m)^\\+")
rightR := regexp.MustCompile("\\+(?m)$")
leftR := regexp.MustCompile(`(?m)^\\+`)
rightR := regexp.MustCompile(`\\+(?m)$`)
fmt.Print(rightR.ReplaceAllString(leftR.ReplaceAllString(tableString.String(), "|"), "|"))
output(writer, color.Sprint(
colorizeData(conf,
rightR.ReplaceAllString(
leftR.ReplaceAllString(tableString.String(), "|"), "|"))))
}
/*
Markdown table
Markdown table
*/
func printMarkdownData(data *Tabdata) {
table := tablewriter.NewWriter(os.Stdout)
func printMarkdownData(writer io.Writer, conf cfg.Config, data *Tabdata) {
tableString := &strings.Builder{}
table := tablewriter.NewWriter(tableString)
if !conf.NoHeaders {
table.SetHeader(data.headers)
}
for _, row := range data.entries {
table.Append(trimRow(row))
@@ -126,20 +122,21 @@ func printMarkdownData(data *Tabdata) {
table.SetCenterSeparator("|")
table.Render()
output(writer, color.Sprint(colorizeData(conf, tableString.String())))
}
/*
Simple ASCII table without any borders etc, just like the input we expect
Simple ASCII table without any borders etc, just like the input we expect
*/
func printAsciiData(data *Tabdata) {
table := tablewriter.NewWriter(os.Stdout)
func printASCIIData(writer io.Writer, conf cfg.Config, data *Tabdata) {
tableString := &strings.Builder{}
table := tablewriter.NewWriter(tableString)
if !conf.NoHeaders {
table.SetHeader(data.headers)
table.AppendBulk(data.entries)
}
// for _, row := range data.entries {
// table.Append(trimRow(row))
// }
table.AppendBulk(data.entries)
table.SetAutoWrapText(false)
table.SetAutoFormatHeaders(true)
@@ -150,57 +147,116 @@ func printAsciiData(data *Tabdata) {
table.SetRowSeparator("")
table.SetHeaderLine(false)
table.SetBorder(false)
table.SetTablePadding("\t") // pad with tabs
table.SetNoWhiteSpace(true)
if !conf.UseHighlight {
// the tabs destroy the highlighting
table.SetTablePadding("\t") // pad with tabs
} else {
table.SetTablePadding(" ")
}
table.Render()
output(writer, color.Sprint(colorizeData(conf, tableString.String())))
}
/*
We simulate the \x command of psql (the PostgreSQL client)
We simulate the \x command of psql (the PostgreSQL client)
*/
func printExtendedData(data *Tabdata) {
func printExtendedData(writer io.Writer, conf cfg.Config, data *Tabdata) {
// needed for data output
format := fmt.Sprintf("%%%ds: %%s\n", data.maxwidthHeader) // FIXME: re-calculate if -c has been set
format := fmt.Sprintf("%%%ds: %%s\n", data.maxwidthHeader)
out := ""
if len(data.entries) > 0 {
var idx int
for _, entry := range data.entries {
idx = 0
for i, value := range entry {
if len(Columns) > 0 {
if !contains(UseColumns, i+1) {
continue
out += color.Sprintf(format, data.headers[i], value)
}
out += "\n"
}
}
fmt.Printf(format, data.headers[idx], value)
idx++
}
fmt.Println()
}
}
output(writer, colorizeData(conf, out))
}
/*
Shell output, ready to be eval'd. Just like FreeBSD stat(1)
Shell output, ready to be eval'd. Just like FreeBSD stat(1)
*/
func printShellData(data *Tabdata) {
func printShellData(writer io.Writer, data *Tabdata) {
out := ""
if len(data.entries) > 0 {
var idx int
for _, entry := range data.entries {
idx = 0
for i, value := range entry {
if len(Columns) > 0 {
if !contains(UseColumns, i+1) {
continue
shentries := []string{}
for idx, value := range entry {
shentries = append(shentries, fmt.Sprintf("%s=\"%s\"",
data.headers[idx], value))
}
out += strings.Join(shentries, " ") + "\n"
}
}
fmt.Printf("%s=\"%s\" ", data.headers[idx], value)
idx++
// no colorization here
output(writer, out)
}
func printYamlData(writer io.Writer, data *Tabdata) {
type Data struct {
Entries []map[string]interface{} `yaml:"entries"`
}
fmt.Println()
yamlout := Data{}
for _, entry := range data.entries {
yamldata := map[string]interface{}{}
for idx, entry := range entry {
style := yaml.TaggedStyle
_, err := strconv.Atoi(entry)
if err != nil {
style = yaml.DoubleQuotedStyle
}
yamldata[strings.ToLower(data.headers[idx])] =
&yaml.Node{
Kind: yaml.ScalarNode,
Style: style,
Value: entry}
}
yamlout.Entries = append(yamlout.Entries, yamldata)
}
yamlstr, err := yaml.Marshal(&yamlout)
if err != nil {
log.Fatal(err)
}
output(writer, string(yamlstr))
}
func printCSVData(writer io.Writer, data *Tabdata) {
csvout := csv.NewWriter(writer)
if err := csvout.Write(data.headers); err != nil {
log.Fatalln("error writing record to csv:", err)
}
for _, entry := range data.entries {
if err := csvout.Write(entry); err != nil {
log.Fatalln("error writing record to csv:", err)
}
}
csvout.Flush()
if err := csvout.Error(); err != nil {
log.Fatal(err)
}
}

View File

@@ -18,64 +18,276 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
package lib
import (
"os"
"bytes"
"fmt"
"strings"
"testing"
"github.com/tlinden/tablizer/cfg"
)
func TestPrinter(t *testing.T) {
table := `ONE TWO THREE
asd igig cxxxncnc
19191 EDD 1 X`
expects := map[string]string{
"ascii": `ONE(1) TWO(2) THREE(3)
asd igig cxxxncnc
19191 EDD 1 X`,
"orgtbl": `|--------+--------+----------|
| ONE(1) | TWO(2) | THREE(3) |
|--------+--------+----------|
| asd | igig | cxxxncnc |
| 19191 | EDD 1 | X |
|--------+--------+----------|`,
"markdown": `| ONE(1) | TWO(2) | THREE(3) |
|--------|--------|----------|
| asd | igig | cxxxncnc |
| 19191 | EDD 1 | X |`,
func newData() Tabdata {
return Tabdata{
maxwidthHeader: 8,
columns: 4,
headers: []string{
"NAME",
"DURATION",
"COUNT",
"WHEN",
},
entries: [][]string{
{
"beta",
"1d10h5m1s",
"33",
"3/1/2014",
},
{
"alpha",
"4h35m",
"170",
"2013-Feb-03",
},
{
"ceta",
"33d12h",
"9",
"06/Jan/2008 15:04:05 -0700",
},
},
}
}
var tests = []struct {
name string // so we can identify which one fails, can be the same
// for multiple tests, because flags will be appended to the name
sortby string // empty == default
column int // sort by this column, 0 == default first or NO Sort
desc bool // sort in descending order, default == ascending
nonum bool // hide numbering
mode int // shell, orgtbl, etc. empty == default: ascii
usecol []int // columns to display, empty == display all
usecolstr string // for testname, must match usecol
expect string // rendered output we expect
}{
// --------------------- Default settings mode tests ``
{
mode: cfg.ASCII,
name: "default",
expect: `
NAME(1) DURATION(2) COUNT(3) WHEN(4)
beta 1d10h5m1s 33 3/1/2014
alpha 4h35m 170 2013-Feb-03
ceta 33d12h 9 06/Jan/2008 15:04:05 -0700`,
},
{
mode: cfg.CSV,
name: "csv",
expect: `
NAME,DURATION,COUNT,WHEN
beta,1d10h5m1s,33,3/1/2014
alpha,4h35m,170,2013-Feb-03
ceta,33d12h,9,06/Jan/2008 15:04:05 -0700`,
},
{
name: "default",
mode: cfg.Orgtbl,
expect: `
+---------+-------------+----------+----------------------------+
| NAME(1) | DURATION(2) | COUNT(3) | WHEN(4) |
+---------+-------------+----------+----------------------------+
| beta | 1d10h5m1s | 33 | 3/1/2014 |
| alpha | 4h35m | 170 | 2013-Feb-03 |
| ceta | 33d12h | 9 | 06/Jan/2008 15:04:05 -0700 |
+---------+-------------+----------+----------------------------+`,
},
{
name: "default",
mode: cfg.Markdown,
expect: `
| NAME(1) | DURATION(2) | COUNT(3) | WHEN(4) |
|---------|-------------|----------|----------------------------|
| beta | 1d10h5m1s | 33 | 3/1/2014 |
| alpha | 4h35m | 170 | 2013-Feb-03 |
| ceta | 33d12h | 9 | 06/Jan/2008 15:04:05 -0700 |`,
},
{
name: "default",
mode: cfg.Shell,
nonum: true,
expect: `
NAME="beta" DURATION="1d10h5m1s" COUNT="33" WHEN="3/1/2014"
NAME="alpha" DURATION="4h35m" COUNT="170" WHEN="2013-Feb-03"
NAME="ceta" DURATION="33d12h" COUNT="9" WHEN="06/Jan/2008 15:04:05 -0700"`,
},
{
name: "default",
mode: cfg.Yaml,
nonum: true,
expect: `
entries:
- count: 33
duration: "1d10h5m1s"
name: "beta"
when: "3/1/2014"
- count: 170
duration: "4h35m"
name: "alpha"
when: "2013-Feb-03"
- count: 9
duration: "33d12h"
name: "ceta"
when: "06/Jan/2008 15:04:05 -0700"`,
},
{
name: "default",
mode: cfg.Extended,
expect: `
NAME(1): beta
DURATION(2): 1d10h5m1s
COUNT(3): 33
WHEN(4): 3/1/2014
NAME(1): alpha
DURATION(2): 4h35m
COUNT(3): 170
WHEN(4): 2013-Feb-03
NAME(1): ceta
DURATION(2): 33d12h
COUNT(3): 9
WHEN(4): 06/Jan/2008 15:04:05 -0700`,
},
//------------------------ SORT TESTS
{
name: "sortbycolumn",
column: 3,
sortby: "numeric",
desc: false,
expect: `
NAME(1) DURATION(2) COUNT(3) WHEN(4)
ceta 33d12h 9 06/Jan/2008 15:04:05 -0700
beta 1d10h5m1s 33 3/1/2014
alpha 4h35m 170 2013-Feb-03`,
},
{
name: "sortbycolumn",
column: 4,
sortby: "time",
desc: false,
expect: `
NAME(1) DURATION(2) COUNT(3) WHEN(4)
ceta 33d12h 9 06/Jan/2008 15:04:05 -0700
alpha 4h35m 170 2013-Feb-03
beta 1d10h5m1s 33 3/1/2014`,
},
{
name: "sortbycolumn",
column: 2,
sortby: "duration",
desc: false,
expect: `
NAME(1) DURATION(2) COUNT(3) WHEN(4)
alpha 4h35m 170 2013-Feb-03
beta 1d10h5m1s 33 3/1/2014
ceta 33d12h 9 06/Jan/2008 15:04:05 -0700`,
},
// ----------------------- UseColumns Tests
{
name: "usecolumns",
usecol: []int{1, 4},
usecolstr: "1,4",
expect: `
NAME(1) WHEN(4)
beta 3/1/2014
alpha 2013-Feb-03
ceta 06/Jan/2008 15:04:05 -0700`,
},
{
name: "usecolumns",
usecol: []int{2},
usecolstr: "2",
expect: `
DURATION(2)
1d10h5m1s
4h35m
33d12h`,
},
{
name: "usecolumns",
usecol: []int{3},
usecolstr: "3",
expect: `
COUNT(3)
33
170
9`,
},
{
name: "usecolumns",
column: 0,
usecol: []int{1, 3},
usecolstr: "1,3",
expect: `
NAME(1) COUNT(3)
beta 33
alpha 170
ceta 9`,
},
{
name: "usecolumns",
usecol: []int{2, 4},
usecolstr: "2,4",
expect: `
DURATION(2) WHEN(4)
1d10h5m1s 3/1/2014
4h35m 2013-Feb-03
33d12h 06/Jan/2008 15:04:05 -0700`,
},
}
func TestPrinter(t *testing.T) {
for _, testdata := range tests {
testname := fmt.Sprintf("print-sortcol-%d-desc-%t-sortby-%s-mode-%d-usecolumns-%s",
testdata.column, testdata.desc, testdata.sortby, testdata.mode, testdata.usecolstr)
t.Run(testname, func(t *testing.T) {
// replaces os.Stdout, but we ignore it
var writer bytes.Buffer
// cmd flags
conf := cfg.Config{
SortByColumn: testdata.column,
SortDescending: testdata.desc,
SortMode: testdata.sortby,
OutputMode: testdata.mode,
NoNumbering: testdata.nonum,
UseColumns: testdata.usecol,
NoColor: true,
}
conf.ApplyDefaults()
// the test checks the len!
if len(testdata.usecol) > 0 {
conf.Columns = "yes"
} else {
conf.Columns = ""
}
data := newData()
exp := strings.TrimSpace(testdata.expect)
printData(&writer, conf, &data)
got := strings.TrimSpace(writer.String())
if got != exp {
t.Errorf("not rendered correctly:\n+++ got:\n%s\n+++ want:\n%s",
got, exp)
}
})
}
r, w, err := os.Pipe()
if err != nil {
t.Fatal(err)
}
origStdout := os.Stdout
os.Stdout = w
for mode, expect := range expects {
OutputMode = mode
fd := strings.NewReader(table)
data, err := parseFile(fd, "")
if err != nil {
t.Errorf("Parser returned error: %s\nData processed so far: %+v", err, data)
}
printData(&data)
buf := make([]byte, 1024)
n, err := r.Read(buf)
if err != nil {
t.Fatal(err)
}
buf = buf[:n]
output := strings.TrimSpace(string(buf))
if output != expect {
t.Errorf("output mode: %s, got:\n%s\nwant:\n%s\n (%d <=> %d)", mode, output, expect, len(output), len(expect))
}
}
// Restore
os.Stdout = origStdout
}

127
lib/sort.go Normal file
View File

@@ -0,0 +1,127 @@
/*
Copyright © 2022 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package lib
import (
"regexp"
"sort"
"strconv"
"github.com/araddon/dateparse"
"github.com/tlinden/tablizer/cfg"
)
func sortTable(conf cfg.Config, data *Tabdata) {
if conf.SortByColumn <= 0 {
// no sorting wanted
return
}
// slightly modified here to match internal array indicies
col := conf.SortByColumn
col-- // ui starts counting by 1, but use 0 internally
// sanity checks
if len(data.entries) == 0 {
return
}
if col >= len(data.headers) {
// fall back to default column
col = 0
}
// actual sorting
sort.SliceStable(data.entries, func(i, j int) bool {
return compare(&conf, data.entries[i][col], data.entries[j][col])
})
}
// config is not modified here, but it would be inefficient to copy it every loop
func compare(conf *cfg.Config, left string, right string) bool {
var comp bool
switch conf.SortMode {
case "numeric":
left, err := strconv.Atoi(left)
if err != nil {
left = 0
}
right, err := strconv.Atoi(right)
if err != nil {
right = 0
}
comp = left < right
case "duration":
left := duration2int(left)
right := duration2int(right)
comp = left < right
case "time":
left, _ := dateparse.ParseAny(left)
right, _ := dateparse.ParseAny(right)
comp = left.Unix() < right.Unix()
default:
comp = left < right
}
if conf.SortDescending {
comp = !comp
}
return comp
}
/*
We could use time.ParseDuration(), but this doesn't support days.
We could also use github.com/xhit/go-str2duration/v2, which does
the job, but it's just another dependency, just for this little
gem. And we don't need a time.Time value. And int is good enough
for duration comparison.
Convert a duration into an integer. Valid time units are "s",
"m", "h" and "d".
*/
func duration2int(duration string) int {
re := regexp.MustCompile(`(\d+)([dhms])`)
seconds := 0
for _, match := range re.FindAllStringSubmatch(duration, -1) {
if len(match) == 3 {
durationvalue, _ := strconv.Atoi(match[1])
switch match[2][0] {
case 'd':
seconds += durationvalue * 86400
case 'h':
seconds += durationvalue * 3600
case 'm':
seconds += durationvalue * 60
case 's':
seconds += durationvalue
}
}
}
return seconds
}

82
lib/sort_test.go Normal file
View File

@@ -0,0 +1,82 @@
/*
Copyright © 2022 Thomas von Dein
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package lib
import (
"fmt"
"testing"
"github.com/tlinden/tablizer/cfg"
)
func TestDuration2Seconds(t *testing.T) {
var tests = []struct {
dur string
expect int
}{
{"1d", 60 * 60 * 24},
{"1h", 60 * 60},
{"10m", 60 * 10},
{"2h4m10s", (60 * 120) + (4 * 60) + 10},
{"88u", 0},
{"19t77X what?4s", 4},
}
for _, testdata := range tests {
testname := fmt.Sprintf("duration-%s", testdata.dur)
t.Run(testname, func(t *testing.T) {
seconds := duration2int(testdata.dur)
if seconds != testdata.expect {
t.Errorf("got %d, want %d", seconds, testdata.expect)
}
})
}
}
func TestCompare(t *testing.T) {
var tests = []struct {
mode string
a string
b string
want bool
desc bool
}{
// ascending
{"numeric", "10", "20", true, false},
{"duration", "2d4h5m", "45m", false, false},
{"time", "12/24/2022", "1/1/1970", false, false},
// descending
{"numeric", "10", "20", false, true},
{"duration", "2d4h5m", "45m", true, true},
{"time", "12/24/2022", "1/1/1970", true, true},
}
for _, testdata := range tests {
testname := fmt.Sprintf("compare-mode-%s-a-%s-b-%s-desc-%t",
testdata.mode, testdata.a, testdata.b, testdata.desc)
t.Run(testname, func(t *testing.T) {
c := cfg.Config{SortMode: testdata.mode, SortDescending: testdata.desc}
got := compare(&c, testdata.a, testdata.b)
if got != testdata.want {
t.Errorf("got %t, want %t", got, testdata.want)
}
})
}
}

View File

@@ -43,7 +43,7 @@ for D in $DIST; do
tardir="${tool}-${os}-${arch}-${version}"
tarfile="releases/${tool}-${os}-${arch}-${version}.tar.gz"
set -x
GOOS=${os} GOARCH=${arch} go build -o ${binfile}
GOOS=${os} GOARCH=${arch} go build -o ${binfile} -ldflags "-X 'github.com/tlinden/tablizer/cfg.VERSION=${version}'"
mkdir -p ${tardir}
cp ${binfile} README.md LICENSE ${tardir}/
echo 'tool = tablizer

10
t/plugintest.zy Normal file
View File

@@ -0,0 +1,10 @@
/*
Simple filter hook function. Splits the argument by whitespace,
fetches the 2nd element, converts it to an int and returns true
if it s larger than 5, false otherwise.
*/
(defn uselarge [line]
(cond (> (atoi (second (resplit line `\s+`))) 5) true false))
/* Register the filter hook */
(addhook %filter %uselarge)

45
t/test.sh Executable file
View File

@@ -0,0 +1,45 @@
#!/bin/sh
# simple commandline unit test script
t="../tablizer"
fail=0
ex() {
# execute a test, report+exit on error, stay silent otherwise
log="/tmp/test-tablizer.$$.log"
name=$1
shift
echo -n "TEST $name "
$* > $log 2>&1
if test $? -ne 0; then
echo "failed, see $log"
fail=1
else
echo "ok"
rm -f $log
fi
}
# only use files in test dir
cd $(dirname $0)
echo "Executing commandline tests ..."
# io pattern tests
ex io-pattern-and-file $t bk7 testtable
cat testtable | ex io-pattern-and-stdin $t bk7
cat testtable | ex io-pattern-and-stdin-dash $t bk7 -
# same w/o pattern
ex io-just-file $t testtable
cat testtable | ex io-just-stdin $t
cat testtable | ex io-just-stdin-dash $t -
if test $fail -ne 0; then
echo "!!! Some tests failed !!!"
exit 1
fi

6
t/testtable Normal file
View File

@@ -0,0 +1,6 @@
NAME READY STATUS RESTARTS AGE
alertmanager-kube-prometheus-alertmanager-0 2/2 Running 35 (45m ago) 11d
grafana-fcc54cbc9-bk7s8 1/1 Running 17 (45m ago) 1d
kube-prometheus-blackbox-exporter-5d85b5d8f4-tskh7 1/1 Running 17 (45m ago) 1h44m
kube-prometheus-kube-state-metrics-b4cd9487-75p7f 1/1 Running 20 (45m ago) 45m
kube-prometheus-node-exporter-bfzpl 1/1 Running 17 (45m ago) 54s

6
t/testtable2 Normal file
View File

@@ -0,0 +1,6 @@
NAME DURATION
x 10
a 100
z 0
u 4
k 6

View File

@@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "TABLIZER 1"
.TH TABLIZER 1 "2022-10-04" "1" "User Commands"
.TH TABLIZER 1 "2024-05-07" "1" "User Commands"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
@@ -146,26 +146,49 @@ tablizer \- Manipulate tabular output of other programs
\& Usage:
\& tablizer [regex] [file, ...] [flags]
\&
\& Flags:
\& Operational Flags:
\& \-c, \-\-columns string Only show the speficied columns (separated by ,)
\& \-d, \-\-debug Enable debugging
\& \-h, \-\-help help for tablizer
\& \-v, \-\-invert\-match select non\-matching rows
\& \-n, \-\-no\-numbering Disable header numbering
\& \-o, \-\-output string Output mode \- one of: orgtbl, markdown, extended, ascii(default)
\& \-N, \-\-no\-color Disable pattern highlighting
\& \-H, \-\-no\-headers Disable headers display
\& \-s, \-\-separator string Custom field separator
\& \-k, \-\-sort\-by int Sort by column (default: 1)
\& \-z, \-\-fuzzy Use fuzzy search [experimental]
\& \-F, \-\-filter field=reg Filter given field with regex, can be used multiple times
\&
\& Output Flags (mutually exclusive):
\& \-X, \-\-extended Enable extended output
\& \-M, \-\-markdown Enable markdown table output
\& \-O, \-\-orgtbl Enable org\-mode table output
\& \-s, \-\-separator string Custom field separator
\& \-v, \-\-version Print program version
\& \-S, \-\-shell Enable shell evaluable output
\& \-Y, \-\-yaml Enable yaml output
\& \-C, \-\-csv Enable CSV output
\& \-A, \-\-ascii Default output mode, ascii tabular
\& \-L, \-\-hightlight\-lines Use alternating background colors for tables
\&
\& Sort Mode Flags (mutually exclusive):
\& \-a, \-\-sort\-age sort according to age (duration) string
\& \-D, \-\-sort\-desc Sort in descending order (default: ascending)
\& \-i, \-\-sort\-numeric sort according to string numerical value
\& \-t, \-\-sort\-time sort according to time string
\&
\& Other Flags:
\& \-\-completion <shell> Generate the autocompletion script for <shell>
\& \-f, \-\-config <file> Configuration file (default: ~/.config/tablizer/config)
\& \-d, \-\-debug Enable debugging
\& \-h, \-\-help help for tablizer
\& \-m, \-\-man Display manual page
\& \-V, \-\-version Print program version
.Ve
.SH "DESCRIPTION"
.IX Header "DESCRIPTION"
Many programs generate tabular output. But sometimes you need to
post-process these tables, you may need to remove one or more columns
or you may want to filter for some pattern or you may need the output
in another program and need to parse it somehow. Standard unix tools
such as \fBawk\fR\|(1), \fBgrep\fR\|(1) or \fBcolumn\fR\|(1) may help, but sometimes it's a
tedious business.
or you may want to filter for some pattern (See \s-1PATTERNS\s0) or you
may need the output in another program and need to parse it somehow.
Standard unix tools such as \fBawk\fR\|(1), \fBgrep\fR\|(1) or \fBcolumn\fR\|(1) may help, but
sometimes it's a tedious business.
.PP
Let's take the output of the tool kubectl. It contains cells with
withespace and they do not separate columns by \s-1TAB\s0 characters. This is
@@ -173,14 +196,15 @@ not easy to process.
.PP
You can use \fBtablizer\fR to do these and more things.
.PP
\&\fBtablizer\fR analyses the header fiels of a table, registers the column
positions of each header field and separates columns by those
\&\fBtablizer\fR analyses the header fields of a table, registers the
column positions of each header field and separates columns by those
positions.
.PP
Without any options it reads its input from \f(CW\*(C`STDIN\*(C'\fR, but you can also
specify a file as a parameter. If you want to reduce the output by
some regular expression, just specify it as its first
parameters. Hence:
some regular expression, just specify it as its first parameter. You
may also use the \fB\-v\fR option to exclude all rows which match the
pattern. Hence:
.PP
.Vb 2
\& # read from STDIN
@@ -204,7 +228,7 @@ have a numer associated with it, e.g.:
.Ve
.PP
These numbers denote the column and you can use them to specify which
columns you want to have in your output:
columns you want to have in your output (see \s-1COLUMNS\s0:
.PP
.Vb 1
\& kubectl get pods | tablizer \-c1,3
@@ -215,14 +239,121 @@ the original order.
.PP
The numbering can be suppressed by using the \fB\-n\fR option.
.PP
By default tablizer shows a header containing the names of each
column. This can be disabled using the \fB\-H\fR option. Be aware that
this only affects tabular output modes. Shell, Extended, Yaml and \s-1CSV\s0
output modes always use the column names.
.PP
By default, if a \fBpattern\fR has been speficied, matches will be
highlighted. You can disable this behavior with the \fB\-N\fR option.
.PP
Use the \fB\-k\fR option to specify by which column to sort the tabular
data (as in \s-1GNU\s0 \fBsort\fR\|(1)). The default sort column is the first one. To
disable sorting at all, supply 0 (Zero) to \-k. The default sort order
is ascending. You can change this to descending order using the option
\&\fB\-D\fR. The default sort order is by string, but there are other sort
modes:
.IP "\fB\-a \-\-sort\-age\fR" 4
.IX Item "-a --sort-age"
Sorts duration strings like \*(L"1d4h32m51s\*(R".
.IP "\fB\-i \-\-sort\-numeric\fR" 4
.IX Item "-i --sort-numeric"
Sorts numeric fields.
.IP "\fB\-t \-\-sort\-time\fR" 4
.IX Item "-t --sort-time"
Sorts timestamps.
.PP
Finally the \fB\-d\fR option enables debugging output which is mostly
usefull for the developer.
useful for the developer.
.SS "\s-1PATTERNS AND FILTERING\s0"
.IX Subsection "PATTERNS AND FILTERING"
You can reduce the rows being displayed by using a regular expression
pattern. The regexp is \s-1PCRE\s0 compatible, refer to the syntax cheat
sheet here: <https://github.com/google/re2/wiki/Syntax>. If you want
to read a more comprehensive documentation about the topic and have
perl installed you can read it with:
.PP
.Vb 1
\& perldoc perlre
.Ve
.PP
Or read it online: <https://perldoc.perl.org/perlre>.
.PP
A note on modifiers: the regexp engine used in tablizer uses another
modifier syntax:
.PP
.Vb 1
\& (?MODIFIER)
.Ve
.PP
The most important modifiers are:
.PP
\&\f(CW\*(C`i\*(C'\fR ignore case
\&\f(CW\*(C`m\*(C'\fR multiline mode
\&\f(CW\*(C`s\*(C'\fR single line mode
.PP
Example for a case insensitive search:
.PP
.Vb 1
\& kubectl get pods \-A | tablizer "(?i)account"
.Ve
.PP
You can use the experimental fuzzy search feature by providing the
option \fB\-z\fR, in which case the pattern is regarded as a fuzzy search
term, not a regexp.
.PP
Sometimes you want to filter by one or more columns. You can do that
using the \fB\-F\fR option. The option can be specified multiple times and
has the following format:
.PP
.Vb 1
\& fieldname=regexp
.Ve
.PP
Fieldnames (== columns headers) are case insensitive.
.PP
If you specify more than one filter, both filters have to match (\s-1AND\s0
operation).
.PP
If the option \fB\-v\fR is specified, the filtering is inverted.
.SS "\s-1COLUMNS\s0"
.IX Subsection "COLUMNS"
The parameter \fB\-c\fR can be used to specify, which columns to
display. By default tablizer numerizes the header names and these
numbers can be used to specify which header to display, see example
above.
.PP
However, beside numbers, you can also use regular expressions with
\&\fB\-c\fR, also separated by comma. And you can mix column numbers with
regexps.
.PP
Lets take this table:
.PP
.Vb 4
\& PID TTY TIME CMD
\& 14001 pts/0 00:00:00 bash
\& 42871 pts/0 00:00:00 ps
\& 42872 pts/0 00:00:00 sed
.Ve
.PP
We want to see only the \s-1CMD\s0 column and use a regex for this:
.PP
.Vb 6
\& ps | tablizer \-s \*(Aq\es+\*(Aq \-c C
\& CMD(4)
\& bash
\& ps
\& tablizer
\& sed
.Ve
.PP
where \*(L"C\*(R" is our regexp which matches \s-1CMD.\s0
.SS "\s-1OUTPUT MODES\s0"
.IX Subsection "OUTPUT MODES"
There might be cases when the tabular output of a program is way too
large for your current terminal but you still need to see every
column. In such cases the \fB\-o extended\fR or \fB\-X\fR option can be
usefull which enables \fIextended mode\fR. In this mode, each row will be
useful which enables \fIextended mode\fR. In this mode, each row will be
printed vertically, header left, value right, aligned by the field
widths. Here's an example:
.PP
@@ -253,7 +384,116 @@ You can use this in an eval loop.
.PP
Beside normal ascii mode (the default) and extended mode there are
more output modes available: \fBorgtbl\fR which prints an Emacs org-mode
table and \fBmarkdown\fR which prints a Markdown table.
table and \fBmarkdown\fR which prints a Markdown table, \fByaml\fR, which
prints yaml encoding and \s-1CSV\s0 mode, which prints a comma separated
value file.
.SS "\s-1ENVIRONMENT VARIABLES\s0"
.IX Subsection "ENVIRONMENT VARIABLES"
\&\fBtablizer\fR supports certain environment variables which use can use
to influence program behavior. Commandline flags have always
precedence over environment variables.
.IP "<T_NO_HEADER_NUMBERING> \- disable numbering of header fields, like \fB\-n\fR." 4
.IX Item "<T_NO_HEADER_NUMBERING> - disable numbering of header fields, like -n."
.PD 0
.IP "<T_COLUMNS> \- comma separated list of columns to output, like \fB\-c\fR" 4
.IX Item "<T_COLUMNS> - comma separated list of columns to output, like -c"
.IP "<\s-1NO_COLORS\s0> \- disable colorization of matches, like \fB\-N\fR" 4
.IX Item "<NO_COLORS> - disable colorization of matches, like -N"
.PD
.SS "\s-1COMPLETION\s0"
.IX Subsection "COMPLETION"
Shell completion for command line options can be enabled by using the
\&\fB\-\-completion\fR flag. The required parameter is the name of your
shell. Currently supported are: bash, zsh, fish and powershell.
.PP
Detailed instructions:
.IP "Bash:" 4
.IX Item "Bash:"
.Vb 1
\& source <(tablizer \-\-completion bash)
.Ve
.Sp
To load completions for each session, execute once:
.Sp
.Vb 2
\& # Linux:
\& $ tablizer \-\-completion bash > /etc/bash_completion.d/tablizer
\&
\& # macOS:
\& $ tablizer \-\-completion bash > $(brew \-\-prefix)/etc/bash_completion.d/tablizer
.Ve
.IP "Zsh:" 4
.IX Item "Zsh:"
If shell completion is not already enabled in your environment,
you will need to enable it. You can execute the following once:
.Sp
.Vb 1
\& echo "autoload \-U compinit; compinit" >> ~/.zshrc
.Ve
.Sp
To load completions for each session, execute once:
.Sp
.Vb 1
\& $ tablizer \-\-completion zsh > "${fpath[1]}/_tablizer"
.Ve
.Sp
You will need to start a new shell for this setup to take effect.
.IP "fish:" 4
.IX Item "fish:"
.Vb 1
\& tablizer \-\-completion fish | source
.Ve
.Sp
To load completions for each session, execute once:
.Sp
.Vb 1
\& tablizer \-\-completion fish > ~/.config/fish/completions/tablizer.fish
.Ve
.IP "PowerShell:" 4
.IX Item "PowerShell:"
.Vb 1
\& tablizer \-\-completion powershell | Out\-String | Invoke\-Expression
.Ve
.Sp
To load completions for every new session, run:
.Sp
.Vb 1
\& tablizer \-\-completion powershell > tablizer.ps1
.Ve
.Sp
and source this file from your PowerShell profile.
.SH "CONFIGURATION AND COLORS"
.IX Header "CONFIGURATION AND COLORS"
YOu can put certain configuration values into a configuration file in
\&\s-1HCL\s0 format. By default tablizer looks for
\&\f(CW\*(C`$HOME/.config/tablizer/config\*(C'\fR, but you can provide one using the
parameter \f(CW\*(C`\-f\*(C'\fR.
.PP
In the configuration the following variables can be defined:
.PP
.Vb 8
\& BG = "lightGreen"
\& FG = "white"
\& HighlightBG = "lightGreen"
\& HighlightFG = "white"
\& NoHighlightBG = "white"
\& NoHighlightFG = "lightGreen"
\& HighlightHdrBG = "red"
\& HighlightHdrFG = "white"
.Ve
.PP
The following color definitions are available:
.PP
black, blue, cyan, darkGray, default, green, lightBlue, lightCyan,
lightGreen, lightMagenta, lightRed, lightWhite, lightYellow,
magenta, red, white, yellow
.PP
The Variables \fB\s-1FG\s0\fR and \fB\s-1BG\s0\fR are being used to highlight matches. The
other *FG and *BG variables are for colored table output (enabled with
the \f(CW\*(C`\-L\*(C'\fR parameter).
.PP
Colorization can be turned off completely either by setting the
parameter \f(CW\*(C`\-N\*(C'\fR or the environment variable \fB\s-1NO_COLOR\s0\fR to a true value.
.SH "BUGS"
.IX Header "BUGS"
In order to report a bug, unexpected behavior, feature requests
@@ -263,15 +503,27 @@ or to submit a patch, please open an issue on github:
.IX Header "LICENSE"
This software is licensed under the \s-1GNU GENERAL PUBLIC LICENSE\s0 version 3.
.PP
Copyright (c) 2022 by Thomas von Dein
Copyright (c) 2022\-2024 by Thomas von Dein
.PP
This software uses the following \s-1GO\s0 libraries:
This software uses the following \s-1GO\s0 modules:
.IP "repr (https://github.com/alecthomas/repr)" 4
.IX Item "repr (https://github.com/alecthomas/repr)"
Released under the \s-1MIT\s0 License, Copyright (c) 2016 Alec Thomas
.IP "cobra (https://github.com/spf13/cobra)" 4
.IX Item "cobra (https://github.com/spf13/cobra)"
Released under the Apache 2.0 license, Copyright 2013\-2022 The Cobra Authors
.IP "dateparse (github.com/araddon/dateparse)" 4
.IX Item "dateparse (github.com/araddon/dateparse)"
Released under the \s-1MIT\s0 License, Copyright (c) 2015\-2017 Aaron Raddon
.IP "color (github.com/gookit/color)" 4
.IX Item "color (github.com/gookit/color)"
Released under the \s-1MIT\s0 License, Copyright (c) 2016 inhere
.IP "tablewriter (github.com/olekukonko/tablewriter)" 4
.IX Item "tablewriter (github.com/olekukonko/tablewriter)"
Released under the \s-1MIT\s0 License, Copyright (c) 201 by Oleku Konko
.IP "yaml (gopkg.in/yaml.v3)" 4
.IX Item "yaml (gopkg.in/yaml.v3)"
Released under the \s-1MIT\s0 License, Copyright (c) 2006\-2011 Kirill Simonov
.SH "AUTHORS"
.IX Header "AUTHORS"
Thomas von Dein \fBtom \s-1AT\s0 vondein \s-1DOT\s0 org\fR

View File

@@ -7,27 +7,50 @@ tablizer - Manipulate tabular output of other programs
Usage:
tablizer [regex] [file, ...] [flags]
Flags:
Operational Flags:
-c, --columns string Only show the speficied columns (separated by ,)
-d, --debug Enable debugging
-h, --help help for tablizer
-v, --invert-match select non-matching rows
-n, --no-numbering Disable header numbering
-o, --output string Output mode - one of: orgtbl, markdown, extended, ascii(default)
-N, --no-color Disable pattern highlighting
-H, --no-headers Disable headers display
-s, --separator string Custom field separator
-k, --sort-by int Sort by column (default: 1)
-z, --fuzzy Use fuzzy search [experimental]
-F, --filter field=reg Filter given field with regex, can be used multiple times
Output Flags (mutually exclusive):
-X, --extended Enable extended output
-M, --markdown Enable markdown table output
-O, --orgtbl Enable org-mode table output
-s, --separator string Custom field separator
-v, --version Print program version
-S, --shell Enable shell evaluable output
-Y, --yaml Enable yaml output
-C, --csv Enable CSV output
-A, --ascii Default output mode, ascii tabular
-L, --hightlight-lines Use alternating background colors for tables
Sort Mode Flags (mutually exclusive):
-a, --sort-age sort according to age (duration) string
-D, --sort-desc Sort in descending order (default: ascending)
-i, --sort-numeric sort according to string numerical value
-t, --sort-time sort according to time string
Other Flags:
--completion <shell> Generate the autocompletion script for <shell>
-f, --config <file> Configuration file (default: ~/.config/tablizer/config)
-d, --debug Enable debugging
-h, --help help for tablizer
-m, --man Display manual page
-V, --version Print program version
=head1 DESCRIPTION
Many programs generate tabular output. But sometimes you need to
post-process these tables, you may need to remove one or more columns
or you may want to filter for some pattern or you may need the output
in another program and need to parse it somehow. Standard unix tools
such as awk(1), grep(1) or column(1) may help, but sometimes it's a
tedious business.
or you may want to filter for some pattern (See L<PATTERNS>) or you
may need the output in another program and need to parse it somehow.
Standard unix tools such as awk(1), grep(1) or column(1) may help, but
sometimes it's a tedious business.
Let's take the output of the tool kubectl. It contains cells with
withespace and they do not separate columns by TAB characters. This is
@@ -35,14 +58,15 @@ not easy to process.
You can use B<tablizer> to do these and more things.
B<tablizer> analyses the header fiels of a table, registers the column
positions of each header field and separates columns by those
B<tablizer> analyses the header fields of a table, registers the
column positions of each header field and separates columns by those
positions.
Without any options it reads its input from C<STDIN>, but you can also
specify a file as a parameter. If you want to reduce the output by
some regular expression, just specify it as its first
parameters. Hence:
some regular expression, just specify it as its first parameter. You
may also use the B<-v> option to exclude all rows which match the
pattern. Hence:
# read from STDIN
kubectl get pods | tablizer
@@ -62,7 +86,7 @@ have a numer associated with it, e.g.:
NAME(1) READY(2) STATUS(3) RESTARTS(4) AGE(5)
These numbers denote the column and you can use them to specify which
columns you want to have in your output:
columns you want to have in your output (see L<COLUMNS>:
kubectl get pods | tablizer -c1,3
@@ -71,15 +95,120 @@ the original order.
The numbering can be suppressed by using the B<-n> option.
By default tablizer shows a header containing the names of each
column. This can be disabled using the B<-H> option. Be aware that
this only affects tabular output modes. Shell, Extended, Yaml and CSV
output modes always use the column names.
By default, if a B<pattern> has been speficied, matches will be
highlighted. You can disable this behavior with the B<-N> option.
Use the B<-k> option to specify by which column to sort the tabular
data (as in GNU sort(1)). The default sort column is the first one. To
disable sorting at all, supply 0 (Zero) to -k. The default sort order
is ascending. You can change this to descending order using the option
B<-D>. The default sort order is by string, but there are other sort
modes:
=over
=item B<-a --sort-age>
Sorts duration strings like "1d4h32m51s".
=item B<-i --sort-numeric>
Sorts numeric fields.
=item B<-t --sort-time>
Sorts timestamps.
=back
Finally the B<-d> option enables debugging output which is mostly
usefull for the developer.
useful for the developer.
=head2 PATTERNS AND FILTERING
You can reduce the rows being displayed by using a regular expression
pattern. The regexp is PCRE compatible, refer to the syntax cheat
sheet here: L<https://github.com/google/re2/wiki/Syntax>. If you want
to read a more comprehensive documentation about the topic and have
perl installed you can read it with:
perldoc perlre
Or read it online: L<https://perldoc.perl.org/perlre>.
A note on modifiers: the regexp engine used in tablizer uses another
modifier syntax:
(?MODIFIER)
The most important modifiers are:
C<i> ignore case
C<m> multiline mode
C<s> single line mode
Example for a case insensitive search:
kubectl get pods -A | tablizer "(?i)account"
You can use the experimental fuzzy search feature by providing the
option B<-z>, in which case the pattern is regarded as a fuzzy search
term, not a regexp.
Sometimes you want to filter by one or more columns. You can do that
using the B<-F> option. The option can be specified multiple times and
has the following format:
fieldname=regexp
Fieldnames (== columns headers) are case insensitive.
If you specify more than one filter, both filters have to match (AND
operation).
If the option B<-v> is specified, the filtering is inverted.
=head2 COLUMNS
The parameter B<-c> can be used to specify, which columns to
display. By default tablizer numerizes the header names and these
numbers can be used to specify which header to display, see example
above.
However, beside numbers, you can also use regular expressions with
B<-c>, also separated by comma. And you can mix column numbers with
regexps.
Lets take this table:
PID TTY TIME CMD
14001 pts/0 00:00:00 bash
42871 pts/0 00:00:00 ps
42872 pts/0 00:00:00 sed
We want to see only the CMD column and use a regex for this:
ps | tablizer -s '\s+' -c C
CMD(4)
bash
ps
tablizer
sed
where "C" is our regexp which matches CMD.
=head2 OUTPUT MODES
There might be cases when the tabular output of a program is way too
large for your current terminal but you still need to see every
column. In such cases the B<-o extended> or B<-X> option can be
usefull which enables I<extended mode>. In this mode, each row will be
useful which enables I<extended mode>. In this mode, each row will be
printed vertically, header left, value right, aligned by the field
widths. Here's an example:
@@ -106,7 +235,111 @@ You can use this in an eval loop.
Beside normal ascii mode (the default) and extended mode there are
more output modes available: B<orgtbl> which prints an Emacs org-mode
table and B<markdown> which prints a Markdown table.
table and B<markdown> which prints a Markdown table, B<yaml>, which
prints yaml encoding and CSV mode, which prints a comma separated
value file.
=head2 ENVIRONMENT VARIABLES
B<tablizer> supports certain environment variables which use can use
to influence program behavior. Commandline flags have always
precedence over environment variables.
=over
=item <T_NO_HEADER_NUMBERING> - disable numbering of header fields, like B<-n>.
=item <T_COLUMNS> - comma separated list of columns to output, like B<-c>
=item <NO_COLORS> - disable colorization of matches, like B<-N>
=back
=head2 COMPLETION
Shell completion for command line options can be enabled by using the
B<--completion> flag. The required parameter is the name of your
shell. Currently supported are: bash, zsh, fish and powershell.
Detailed instructions:
=over
=item Bash:
source <(tablizer --completion bash)
To load completions for each session, execute once:
# Linux:
$ tablizer --completion bash > /etc/bash_completion.d/tablizer
# macOS:
$ tablizer --completion bash > $(brew --prefix)/etc/bash_completion.d/tablizer
=item Zsh:
If shell completion is not already enabled in your environment,
you will need to enable it. You can execute the following once:
echo "autoload -U compinit; compinit" >> ~/.zshrc
To load completions for each session, execute once:
$ tablizer --completion zsh > "${fpath[1]}/_tablizer"
You will need to start a new shell for this setup to take effect.
=item fish:
tablizer --completion fish | source
To load completions for each session, execute once:
tablizer --completion fish > ~/.config/fish/completions/tablizer.fish
=item PowerShell:
tablizer --completion powershell | Out-String | Invoke-Expression
To load completions for every new session, run:
tablizer --completion powershell > tablizer.ps1
and source this file from your PowerShell profile.
=back
=head1 CONFIGURATION AND COLORS
YOu can put certain configuration values into a configuration file in
HCL format. By default tablizer looks for
C<$HOME/.config/tablizer/config>, but you can provide one using the
parameter C<-f>.
In the configuration the following variables can be defined:
BG = "lightGreen"
FG = "white"
HighlightBG = "lightGreen"
HighlightFG = "white"
NoHighlightBG = "white"
NoHighlightFG = "lightGreen"
HighlightHdrBG = "red"
HighlightHdrFG = "white"
The following color definitions are available:
black, blue, cyan, darkGray, default, green, lightBlue, lightCyan,
lightGreen, lightMagenta, lightRed, lightWhite, lightYellow,
magenta, red, white, yellow
The Variables B<FG> and B<BG> are being used to highlight matches. The
other *FG and *BG variables are for colored table output (enabled with
the C<-L> parameter).
Colorization can be turned off completely either by setting the
parameter C<-N> or the environment variable B<NO_COLOR> to a true value.
=head1 BUGS
@@ -118,9 +351,9 @@ L<https://github.com/TLINDEN/tablizer/issues>.
This software is licensed under the GNU GENERAL PUBLIC LICENSE version 3.
Copyright (c) 2022 by Thomas von Dein
Copyright (c) 2022-2024 by Thomas von Dein
This software uses the following GO libraries:
This software uses the following GO modules:
=over 4
@@ -132,6 +365,22 @@ Released under the MIT License, Copyright (c) 2016 Alec Thomas
Released under the Apache 2.0 license, Copyright 2013-2022 The Cobra Authors
=item dateparse (github.com/araddon/dateparse)
Released under the MIT License, Copyright (c) 2015-2017 Aaron Raddon
=item color (github.com/gookit/color)
Released under the MIT License, Copyright (c) 2016 inhere
=item tablewriter (github.com/olekukonko/tablewriter)
Released under the MIT License, Copyright (c) 201 by Oleku Konko
=item yaml (gopkg.in/yaml.v3)
Released under the MIT License, Copyright (c) 2006-2011 Kirill Simonov
=back
=head1 AUTHORS

2
vendor/github.com/agext/levenshtein/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,2 @@
README.html
coverage.out

70
vendor/github.com/agext/levenshtein/.travis.yml generated vendored Normal file
View File

@@ -0,0 +1,70 @@
language: go
sudo: false
go:
- 1.8
- 1.7.5
- 1.7.4
- 1.7.3
- 1.7.2
- 1.7.1
- 1.7
- tip
- 1.6.4
- 1.6.3
- 1.6.2
- 1.6.1
- 1.6
- 1.5.4
- 1.5.3
- 1.5.2
- 1.5.1
- 1.5
- 1.4.3
- 1.4.2
- 1.4.1
- 1.4
- 1.3.3
- 1.3.2
- 1.3.1
- 1.3
- 1.2.2
- 1.2.1
- 1.2
- 1.1.2
- 1.1.1
- 1.1
before_install:
- go get github.com/mattn/goveralls
script:
- $HOME/gopath/bin/goveralls -service=travis-ci
notifications:
email:
on_success: never
matrix:
fast_finish: true
allow_failures:
- go: tip
- go: 1.6.4
- go: 1.6.3
- go: 1.6.2
- go: 1.6.1
- go: 1.6
- go: 1.5.4
- go: 1.5.3
- go: 1.5.2
- go: 1.5.1
- go: 1.5
- go: 1.4.3
- go: 1.4.2
- go: 1.4.1
- go: 1.4
- go: 1.3.3
- go: 1.3.2
- go: 1.3.1
- go: 1.3
- go: 1.2.2
- go: 1.2.1
- go: 1.2
- go: 1.1.2
- go: 1.1.1
- go: 1.1

36
vendor/github.com/agext/levenshtein/DCO generated vendored Normal file
View File

@@ -0,0 +1,36 @@
Developer Certificate of Origin
Version 1.1
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
660 York Street, Suite 102,
San Francisco, CA 94110 USA
Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed.
Developer's Certificate of Origin 1.1
By making a contribution to this project, I certify that:
(a) The contribution was created in whole or in part by me and I
have the right to submit it under the open source license
indicated in the file; or
(b) The contribution is based upon previous work that, to the best
of my knowledge, is covered under an appropriate open source
license and I have the right under that license to submit that
work with modifications, whether created in whole or in part
by me, under the same open source license (unless I am
permitted to submit under a different license), as indicated
in the file; or
(c) The contribution was provided directly to me by some other
person who certified (a), (b) or (c) and I have not modified
it.
(d) I understand and agree that this project and the contribution
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
this project or the open source license(s) involved.

201
vendor/github.com/agext/levenshtein/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

1
vendor/github.com/agext/levenshtein/MAINTAINERS generated vendored Normal file
View File

@@ -0,0 +1 @@
Alex Bucataru <alex@alrux.com> (@AlexBucataru)

5
vendor/github.com/agext/levenshtein/NOTICE generated vendored Normal file
View File

@@ -0,0 +1,5 @@
Alrux Go EXTensions (AGExt) - package levenshtein
Copyright 2016 ALRUX Inc.
This product includes software developed at ALRUX Inc.
(http://www.alrux.com/).

38
vendor/github.com/agext/levenshtein/README.md generated vendored Normal file
View File

@@ -0,0 +1,38 @@
# A Go package for calculating the Levenshtein distance between two strings
[![Release](https://img.shields.io/github/release/agext/levenshtein.svg?style=flat)](https://github.com/agext/levenshtein/releases/latest)
[![GoDoc](https://img.shields.io/badge/godoc-reference-blue.svg?style=flat)](https://godoc.org/github.com/agext/levenshtein) 
[![Build Status](https://travis-ci.org/agext/levenshtein.svg?branch=master&style=flat)](https://travis-ci.org/agext/levenshtein)
[![Coverage Status](https://coveralls.io/repos/github/agext/levenshtein/badge.svg?style=flat)](https://coveralls.io/github/agext/levenshtein)
[![Go Report Card](https://goreportcard.com/badge/github.com/agext/levenshtein?style=flat)](https://goreportcard.com/report/github.com/agext/levenshtein)
This package implements distance and similarity metrics for strings, based on the Levenshtein measure, in [Go](http://golang.org).
## Project Status
v1.2.1 Stable: Guaranteed no breaking changes to the API in future v1.x releases. Probably safe to use in production, though provided on "AS IS" basis.
This package is being actively maintained. If you encounter any problems or have any suggestions for improvement, please [open an issue](https://github.com/agext/levenshtein/issues). Pull requests are welcome.
## Overview
The Levenshtein `Distance` between two strings is the minimum total cost of edits that would convert the first string into the second. The allowed edit operations are insertions, deletions, and substitutions, all at character (one UTF-8 code point) level. Each operation has a default cost of 1, but each can be assigned its own cost equal to or greater than 0.
A `Distance` of 0 means the two strings are identical, and the higher the value the more different the strings. Since in practice we are interested in finding if the two strings are "close enough", it often does not make sense to continue the calculation once the result is mathematically guaranteed to exceed a desired threshold. Providing this value to the `Distance` function allows it to take a shortcut and return a lower bound instead of an exact cost when the threshold is exceeded.
The `Similarity` function calculates the distance, then converts it into a normalized metric within the range 0..1, with 1 meaning the strings are identical, and 0 that they have nothing in common. A minimum similarity threshold can be provided to speed up the calculation of the metric for strings that are far too dissimilar for the purpose at hand. All values under this threshold are rounded down to 0.
The `Match` function provides a similarity metric, with the same range and meaning as `Similarity`, but with a bonus for string pairs that share a common prefix and have a similarity above a "bonus threshold". It uses the same method as proposed by Winkler for the Jaro distance, and the reasoning behind it is that these string pairs are very likely spelling variations or errors, and they are more closely linked than the edit distance alone would suggest.
The underlying `Calculate` function is also exported, to allow the building of other derivative metrics, if needed.
## Installation
```
go get github.com/agext/levenshtein
```
## License
Package levenshtein is released under the Apache 2.0 license. See the [LICENSE](LICENSE) file for details.

290
vendor/github.com/agext/levenshtein/levenshtein.go generated vendored Normal file
View File

@@ -0,0 +1,290 @@
// Copyright 2016 ALRUX Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
Package levenshtein implements distance and similarity metrics for strings, based on the Levenshtein measure.
The Levenshtein `Distance` between two strings is the minimum total cost of edits that would convert the first string into the second. The allowed edit operations are insertions, deletions, and substitutions, all at character (one UTF-8 code point) level. Each operation has a default cost of 1, but each can be assigned its own cost equal to or greater than 0.
A `Distance` of 0 means the two strings are identical, and the higher the value the more different the strings. Since in practice we are interested in finding if the two strings are "close enough", it often does not make sense to continue the calculation once the result is mathematically guaranteed to exceed a desired threshold. Providing this value to the `Distance` function allows it to take a shortcut and return a lower bound instead of an exact cost when the threshold is exceeded.
The `Similarity` function calculates the distance, then converts it into a normalized metric within the range 0..1, with 1 meaning the strings are identical, and 0 that they have nothing in common. A minimum similarity threshold can be provided to speed up the calculation of the metric for strings that are far too dissimilar for the purpose at hand. All values under this threshold are rounded down to 0.
The `Match` function provides a similarity metric, with the same range and meaning as `Similarity`, but with a bonus for string pairs that share a common prefix and have a similarity above a "bonus threshold". It uses the same method as proposed by Winkler for the Jaro distance, and the reasoning behind it is that these string pairs are very likely spelling variations or errors, and they are more closely linked than the edit distance alone would suggest.
The underlying `Calculate` function is also exported, to allow the building of other derivative metrics, if needed.
*/
package levenshtein
// Calculate determines the Levenshtein distance between two strings, using
// the given costs for each edit operation. It returns the distance along with
// the lengths of the longest common prefix and suffix.
//
// If maxCost is non-zero, the calculation stops as soon as the distance is determined
// to be greater than maxCost. Therefore, any return value higher than maxCost is a
// lower bound for the actual distance.
func Calculate(str1, str2 []rune, maxCost, insCost, subCost, delCost int) (dist, prefixLen, suffixLen int) {
l1, l2 := len(str1), len(str2)
// trim common prefix, if any, as it doesn't affect the distance
for ; prefixLen < l1 && prefixLen < l2; prefixLen++ {
if str1[prefixLen] != str2[prefixLen] {
break
}
}
str1, str2 = str1[prefixLen:], str2[prefixLen:]
l1 -= prefixLen
l2 -= prefixLen
// trim common suffix, if any, as it doesn't affect the distance
for 0 < l1 && 0 < l2 {
if str1[l1-1] != str2[l2-1] {
str1, str2 = str1[:l1], str2[:l2]
break
}
l1--
l2--
suffixLen++
}
// if the first string is empty, the distance is the length of the second string times the cost of insertion
if l1 == 0 {
dist = l2 * insCost
return
}
// if the second string is empty, the distance is the length of the first string times the cost of deletion
if l2 == 0 {
dist = l1 * delCost
return
}
// variables used in inner "for" loops
var y, dy, c, l int
// if maxCost is greater than or equal to the maximum possible distance, it's equivalent to 'unlimited'
if maxCost > 0 {
if subCost < delCost+insCost {
if maxCost >= l1*subCost+(l2-l1)*insCost {
maxCost = 0
}
} else {
if maxCost >= l1*delCost+l2*insCost {
maxCost = 0
}
}
}
if maxCost > 0 {
// prefer the longer string first, to minimize time;
// a swap also transposes the meanings of insertion and deletion.
if l1 < l2 {
str1, str2, l1, l2, insCost, delCost = str2, str1, l2, l1, delCost, insCost
}
// the length differential times cost of deletion is a lower bound for the cost;
// if it is higher than the maxCost, there is no point going into the main calculation.
if dist = (l1 - l2) * delCost; dist > maxCost {
return
}
d := make([]int, l1+1)
// offset and length of d in the current row
doff, dlen := 0, 1
for y, dy = 1, delCost; y <= l1 && dy <= maxCost; dlen++ {
d[y] = dy
y++
dy = y * delCost
}
// fmt.Printf("%q -> %q: init doff=%d dlen=%d d[%d:%d]=%v\n", str1, str2, doff, dlen, doff, doff+dlen, d[doff:doff+dlen])
for x := 0; x < l2; x++ {
dy, d[doff] = d[doff], d[doff]+insCost
for d[doff] > maxCost && dlen > 0 {
if str1[doff] != str2[x] {
dy += subCost
}
doff++
dlen--
if c = d[doff] + insCost; c < dy {
dy = c
}
dy, d[doff] = d[doff], dy
}
for y, l = doff, doff+dlen-1; y < l; dy, d[y] = d[y], dy {
if str1[y] != str2[x] {
dy += subCost
}
if c = d[y] + delCost; c < dy {
dy = c
}
y++
if c = d[y] + insCost; c < dy {
dy = c
}
}
if y < l1 {
if str1[y] != str2[x] {
dy += subCost
}
if c = d[y] + delCost; c < dy {
dy = c
}
for ; dy <= maxCost && y < l1; dy, d[y] = dy+delCost, dy {
y++
dlen++
}
}
// fmt.Printf("%q -> %q: x=%d doff=%d dlen=%d d[%d:%d]=%v\n", str1, str2, x, doff, dlen, doff, doff+dlen, d[doff:doff+dlen])
if dlen == 0 {
dist = maxCost + 1
return
}
}
if doff+dlen-1 < l1 {
dist = maxCost + 1
return
}
dist = d[l1]
} else {
// ToDo: This is O(l1*l2) time and O(min(l1,l2)) space; investigate if it is
// worth to implement diagonal approach - O(l1*(1+dist)) time, up to O(l1*l2) space
// http://www.csse.monash.edu.au/~lloyd/tildeStrings/Alignment/92.IPL.html
// prefer the shorter string first, to minimize space; time is O(l1*l2) anyway;
// a swap also transposes the meanings of insertion and deletion.
if l1 > l2 {
str1, str2, l1, l2, insCost, delCost = str2, str1, l2, l1, delCost, insCost
}
d := make([]int, l1+1)
for y = 1; y <= l1; y++ {
d[y] = y * delCost
}
for x := 0; x < l2; x++ {
dy, d[0] = d[0], d[0]+insCost
for y = 0; y < l1; dy, d[y] = d[y], dy {
if str1[y] != str2[x] {
dy += subCost
}
if c = d[y] + delCost; c < dy {
dy = c
}
y++
if c = d[y] + insCost; c < dy {
dy = c
}
}
}
dist = d[l1]
}
return
}
// Distance returns the Levenshtein distance between str1 and str2, using the
// default or provided cost values. Pass nil for the third argument to use the
// default cost of 1 for all three operations, with no maximum.
func Distance(str1, str2 string, p *Params) int {
if p == nil {
p = defaultParams
}
dist, _, _ := Calculate([]rune(str1), []rune(str2), p.maxCost, p.insCost, p.subCost, p.delCost)
return dist
}
// Similarity returns a score in the range of 0..1 for how similar the two strings are.
// A score of 1 means the strings are identical, and 0 means they have nothing in common.
//
// A nil third argument uses the default cost of 1 for all three operations.
//
// If a non-zero MinScore value is provided in the parameters, scores lower than it
// will be returned as 0.
func Similarity(str1, str2 string, p *Params) float64 {
return Match(str1, str2, p.Clone().BonusThreshold(1.1)) // guaranteed no bonus
}
// Match returns a similarity score adjusted by the same method as proposed by Winkler for
// the Jaro distance - giving a bonus to string pairs that share a common prefix, only if their
// similarity score is already over a threshold.
//
// The score is in the range of 0..1, with 1 meaning the strings are identical,
// and 0 meaning they have nothing in common.
//
// A nil third argument uses the default cost of 1 for all three operations, maximum length of
// common prefix to consider for bonus of 4, scaling factor of 0.1, and bonus threshold of 0.7.
//
// If a non-zero MinScore value is provided in the parameters, scores lower than it
// will be returned as 0.
func Match(str1, str2 string, p *Params) float64 {
s1, s2 := []rune(str1), []rune(str2)
l1, l2 := len(s1), len(s2)
// two empty strings are identical; shortcut also avoids divByZero issues later on.
if l1 == 0 && l2 == 0 {
return 1
}
if p == nil {
p = defaultParams
}
// a min over 1 can never be satisfied, so the score is 0.
if p.minScore > 1 {
return 0
}
insCost, delCost, maxDist, max := p.insCost, p.delCost, 0, 0
if l1 > l2 {
l1, l2, insCost, delCost = l2, l1, delCost, insCost
}
if p.subCost < delCost+insCost {
maxDist = l1*p.subCost + (l2-l1)*insCost
} else {
maxDist = l1*delCost + l2*insCost
}
// a zero min is always satisfied, so no need to set a max cost.
if p.minScore > 0 {
// if p.minScore is lower than p.bonusThreshold, we can use a simplified formula
// for the max cost, because a sim score below min cannot receive a bonus.
if p.minScore < p.bonusThreshold {
// round down the max - a cost equal to a rounded up max would already be under min.
max = int((1 - p.minScore) * float64(maxDist))
} else {
// p.minScore <= sim + p.bonusPrefix*p.bonusScale*(1-sim)
// p.minScore <= (1-dist/maxDist) + p.bonusPrefix*p.bonusScale*(1-(1-dist/maxDist))
// p.minScore <= 1 - dist/maxDist + p.bonusPrefix*p.bonusScale*dist/maxDist
// 1 - p.minScore >= dist/maxDist - p.bonusPrefix*p.bonusScale*dist/maxDist
// (1-p.minScore)*maxDist/(1-p.bonusPrefix*p.bonusScale) >= dist
max = int((1 - p.minScore) * float64(maxDist) / (1 - float64(p.bonusPrefix)*p.bonusScale))
}
}
dist, pl, _ := Calculate(s1, s2, max, p.insCost, p.subCost, p.delCost)
if max > 0 && dist > max {
return 0
}
sim := 1 - float64(dist)/float64(maxDist)
if sim >= p.bonusThreshold && sim < 1 && p.bonusPrefix > 0 && p.bonusScale > 0 {
if pl > p.bonusPrefix {
pl = p.bonusPrefix
}
sim += float64(pl) * p.bonusScale * (1 - sim)
}
if sim < p.minScore {
return 0
}
return sim
}

152
vendor/github.com/agext/levenshtein/params.go generated vendored Normal file
View File

@@ -0,0 +1,152 @@
// Copyright 2016 ALRUX Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package levenshtein
// Params represents a set of parameter values for the various formulas involved
// in the calculation of the Levenshtein string metrics.
type Params struct {
insCost int
subCost int
delCost int
maxCost int
minScore float64
bonusPrefix int
bonusScale float64
bonusThreshold float64
}
var (
defaultParams = NewParams()
)
// NewParams creates a new set of parameters and initializes it with the default values.
func NewParams() *Params {
return &Params{
insCost: 1,
subCost: 1,
delCost: 1,
maxCost: 0,
minScore: 0,
bonusPrefix: 4,
bonusScale: .1,
bonusThreshold: .7,
}
}
// Clone returns a pointer to a copy of the receiver parameter set, or of a new
// default parameter set if the receiver is nil.
func (p *Params) Clone() *Params {
if p == nil {
return NewParams()
}
return &Params{
insCost: p.insCost,
subCost: p.subCost,
delCost: p.delCost,
maxCost: p.maxCost,
minScore: p.minScore,
bonusPrefix: p.bonusPrefix,
bonusScale: p.bonusScale,
bonusThreshold: p.bonusThreshold,
}
}
// InsCost overrides the default value of 1 for the cost of insertion.
// The new value must be zero or positive.
func (p *Params) InsCost(v int) *Params {
if v >= 0 {
p.insCost = v
}
return p
}
// SubCost overrides the default value of 1 for the cost of substitution.
// The new value must be zero or positive.
func (p *Params) SubCost(v int) *Params {
if v >= 0 {
p.subCost = v
}
return p
}
// DelCost overrides the default value of 1 for the cost of deletion.
// The new value must be zero or positive.
func (p *Params) DelCost(v int) *Params {
if v >= 0 {
p.delCost = v
}
return p
}
// MaxCost overrides the default value of 0 (meaning unlimited) for the maximum cost.
// The calculation of Distance() stops when the result is guaranteed to exceed
// this maximum, returning a lower-bound rather than exact value.
// The new value must be zero or positive.
func (p *Params) MaxCost(v int) *Params {
if v >= 0 {
p.maxCost = v
}
return p
}
// MinScore overrides the default value of 0 for the minimum similarity score.
// Scores below this threshold are returned as 0 by Similarity() and Match().
// The new value must be zero or positive. Note that a minimum greater than 1
// can never be satisfied, resulting in a score of 0 for any pair of strings.
func (p *Params) MinScore(v float64) *Params {
if v >= 0 {
p.minScore = v
}
return p
}
// BonusPrefix overrides the default value for the maximum length of
// common prefix to be considered for bonus by Match().
// The new value must be zero or positive.
func (p *Params) BonusPrefix(v int) *Params {
if v >= 0 {
p.bonusPrefix = v
}
return p
}
// BonusScale overrides the default value for the scaling factor used by Match()
// in calculating the bonus.
// The new value must be zero or positive. To guarantee that the similarity score
// remains in the interval 0..1, this scaling factor is not allowed to exceed
// 1 / BonusPrefix.
func (p *Params) BonusScale(v float64) *Params {
if v >= 0 {
p.bonusScale = v
}
// the bonus cannot exceed (1-sim), or the score may become greater than 1.
if float64(p.bonusPrefix)*p.bonusScale > 1 {
p.bonusScale = 1 / float64(p.bonusPrefix)
}
return p
}
// BonusThreshold overrides the default value for the minimum similarity score
// for which Match() can assign a bonus.
// The new value must be zero or positive. Note that a threshold greater than 1
// effectively makes Match() become the equivalent of Similarity().
func (p *Params) BonusThreshold(v float64) *Params {
if v >= 0 {
p.bonusThreshold = v
}
return p
}

21
vendor/github.com/alecthomas/repr/COPYING generated vendored Normal file
View File

@@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2016 Alec Thomas
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

90
vendor/github.com/alecthomas/repr/README.md generated vendored Normal file
View File

@@ -0,0 +1,90 @@
# Python's repr() for Go [![](https://godoc.org/github.com/alecthomas/repr?status.svg)](http://godoc.org/github.com/alecthomas/repr) [![CircleCI](https://img.shields.io/circleci/project/github/alecthomas/repr.svg)](https://circleci.com/gh/alecthomas/repr)
This package attempts to represent Go values in a form that can be used almost directly in Go source
code.
Unfortunately, some values (such as pointers to basic types) can not be represented directly in Go.
These values will be represented as `&<value>`. eg. `&23`
## Example
```go
type test struct {
S string
I int
A []int
}
func main() {
repr.Print(&test{
S: "String",
I: 123,
A: []int{1, 2, 3},
})
}
```
Outputs
```
&main.test{S: "String", I: 123, A: []int{1, 2, 3}}
```
## Why repr and not [pp](https://github.com/k0kubun/pp)?
pp is designed for printing coloured output to consoles, with (seemingly?) no way to disable this. If you don't want coloured output (eg. for use in diffs, logs, etc.) repr is for you.
## Why repr and not [go-spew](https://github.com/davecgh/go-spew)?
Repr deliberately contains much less metadata about values. It is designed to (generally) be copyable directly into source code.
Compare go-spew:
```go
(parser.expression) (len=1 cap=1) {
(parser.alternative) (len=1 cap=1) {
([]interface {}) (len=1 cap=1) {
(*parser.repitition)(0xc82000b220)({
expression: (parser.expression) (len=2 cap=2) {
(parser.alternative) (len=1 cap=1) {
([]interface {}) (len=1 cap=1) {
(parser.str) (len=1) "a"
}
},
(parser.alternative) (len=1 cap=1) {
([]interface {}) (len=1 cap=1) {
(*parser.self)(0x593ef0)({
})
}
}
}
})
}
}
}
```
To repr:
```go
parser.expression{
parser.alternative{
[]interface {}{
&parser.repitition{
expression: parser.expression{
parser.alternative{
[]interface {}{
parser.str("a"),
},
},
parser.alternative{
[]interface {}{
&parser.self{ },
},
},
},
},
},
},
}
```

353
vendor/github.com/alecthomas/repr/repr.go generated vendored Normal file
View File

@@ -0,0 +1,353 @@
// Package repr attempts to represent Go values in a form that can be copy-and-pasted into source
// code directly.
//
// Some values (such as pointers to basic types) can not be represented directly in
// Go. These values will be output as `&<value>`. eg. `&23`
package repr
import (
"bytes"
"fmt"
"io"
"os"
"reflect"
"sort"
"time"
"unsafe"
)
var (
// "Real" names of basic kinds, used to differentiate type aliases.
realKindName = map[reflect.Kind]string{
reflect.Bool: "bool",
reflect.Int: "int",
reflect.Int8: "int8",
reflect.Int16: "int16",
reflect.Int32: "int32",
reflect.Int64: "int64",
reflect.Uint: "uint",
reflect.Uint8: "uint8",
reflect.Uint16: "uint16",
reflect.Uint32: "uint32",
reflect.Uint64: "uint64",
reflect.Uintptr: "uintptr",
reflect.Float32: "float32",
reflect.Float64: "float64",
reflect.Complex64: "complex64",
reflect.Complex128: "complex128",
reflect.Array: "array",
reflect.Chan: "chan",
reflect.Func: "func",
reflect.Map: "map",
reflect.Slice: "slice",
reflect.String: "string",
}
goStringerType = reflect.TypeOf((*fmt.GoStringer)(nil)).Elem()
byteSliceType = reflect.TypeOf([]byte{})
)
// Default prints to os.Stdout with two space indentation.
var Default = New(os.Stdout, Indent(" "))
// An Option modifies the default behaviour of a Printer.
type Option func(o *Printer)
// Indent output by this much.
func Indent(indent string) Option { return func(o *Printer) { o.indent = indent } }
// NoIndent disables indenting.
func NoIndent() Option { return Indent("") }
// OmitEmpty sets whether empty field members should be omitted from output.
func OmitEmpty(omitEmpty bool) Option { return func(o *Printer) { o.omitEmpty = omitEmpty } }
// ExplicitTypes adds explicit typing to slice and map struct values that would normally be inferred by Go.
func ExplicitTypes(ok bool) Option { return func(o *Printer) { o.explicitTypes = true } }
// IgnoreGoStringer disables use of the .GoString() method.
func IgnoreGoStringer() Option { return func(o *Printer) { o.ignoreGoStringer = true } }
// Hide excludes the given types from representation, instead just printing the name of the type.
func Hide(ts ...interface{}) Option {
return func(o *Printer) {
for _, t := range ts {
rt := reflect.Indirect(reflect.ValueOf(t)).Type()
o.exclude[rt] = true
}
}
}
// AlwaysIncludeType always includes explicit type information for each item.
func AlwaysIncludeType() Option { return func(o *Printer) { o.alwaysIncludeType = true } }
// Printer represents structs in a printable manner.
type Printer struct {
indent string
omitEmpty bool
ignoreGoStringer bool
alwaysIncludeType bool
explicitTypes bool
exclude map[reflect.Type]bool
w io.Writer
}
// New creates a new Printer on w with the given Options.
func New(w io.Writer, options ...Option) *Printer {
p := &Printer{
w: w,
indent: " ",
omitEmpty: true,
exclude: map[reflect.Type]bool{},
}
for _, option := range options {
option(p)
}
return p
}
func (p *Printer) nextIndent(indent string) string {
if p.indent != "" {
return indent + p.indent
}
return ""
}
func (p *Printer) thisIndent(indent string) string {
if p.indent != "" {
return indent
}
return ""
}
// Print the values.
func (p *Printer) Print(vs ...interface{}) {
for i, v := range vs {
if i > 0 {
fmt.Fprint(p.w, " ")
}
p.reprValue(map[reflect.Value]bool{}, reflect.ValueOf(v), "", true)
}
}
// Println prints each value on a new line.
func (p *Printer) Println(vs ...interface{}) {
for i, v := range vs {
if i > 0 {
fmt.Fprint(p.w, " ")
}
p.reprValue(map[reflect.Value]bool{}, reflect.ValueOf(v), "", true)
}
fmt.Fprintln(p.w)
}
func (p *Printer) reprValue(seen map[reflect.Value]bool, v reflect.Value, indent string, showType bool) { // nolint: gocyclo
if seen[v] {
fmt.Fprint(p.w, "...")
return
}
seen[v] = true
defer delete(seen, v)
if v.Kind() == reflect.Invalid || (v.Kind() == reflect.Ptr || v.Kind() == reflect.Map || v.Kind() == reflect.Chan || v.Kind() == reflect.Slice || v.Kind() == reflect.Func || v.Kind() == reflect.Interface) && v.IsNil() {
fmt.Fprint(p.w, "nil")
return
}
if p.exclude[v.Type()] {
fmt.Fprintf(p.w, "%s...", v.Type().Name())
return
}
t := v.Type()
if t == byteSliceType {
fmt.Fprintf(p.w, "[]byte(%q)", v.Bytes())
return
}
// If we can't access a private field directly with reflection, try and do so via unsafe.
if !v.CanInterface() && v.CanAddr() {
uv := reflect.NewAt(t, unsafe.Pointer(v.UnsafeAddr())).Elem()
if uv.CanInterface() {
v = uv
}
}
// Attempt to use fmt.GoStringer interface.
if !p.ignoreGoStringer && t.Implements(goStringerType) {
fmt.Fprint(p.w, v.Interface().(fmt.GoStringer).GoString())
return
}
in := p.thisIndent(indent)
ni := p.nextIndent(indent)
switch v.Kind() {
case reflect.Slice, reflect.Array:
fmt.Fprintf(p.w, "%s{", v.Type())
if v.Len() == 0 {
fmt.Fprint(p.w, "}")
} else {
if p.indent != "" {
fmt.Fprintf(p.w, "\n")
}
for i := 0; i < v.Len(); i++ {
e := v.Index(i)
fmt.Fprintf(p.w, "%s", ni)
p.reprValue(seen, e, ni, p.alwaysIncludeType || p.explicitTypes)
if p.indent != "" {
fmt.Fprintf(p.w, ",\n")
} else if i < v.Len()-1 {
fmt.Fprintf(p.w, ", ")
}
}
fmt.Fprintf(p.w, "%s}", in)
}
case reflect.Chan:
fmt.Fprintf(p.w, "make(")
fmt.Fprintf(p.w, "%s", v.Type())
fmt.Fprintf(p.w, ", %d)", v.Cap())
case reflect.Map:
fmt.Fprintf(p.w, "%s{", v.Type())
if p.indent != "" && v.Len() != 0 {
fmt.Fprintf(p.w, "\n")
}
keys := v.MapKeys()
sort.Slice(keys, func(i, j int) bool {
return fmt.Sprint(keys[i]) < fmt.Sprint(keys[j])
})
for i, k := range keys {
kv := v.MapIndex(k)
fmt.Fprintf(p.w, "%s", ni)
p.reprValue(seen, k, ni, p.alwaysIncludeType || p.explicitTypes)
fmt.Fprintf(p.w, ": ")
p.reprValue(seen, kv, ni, true)
if p.indent != "" {
fmt.Fprintf(p.w, ",\n")
} else if i < v.Len()-1 {
fmt.Fprintf(p.w, ", ")
}
}
fmt.Fprintf(p.w, "%s}", in)
case reflect.Struct:
if td, ok := asTime(v); ok {
timeToGo(p.w, td)
} else {
if showType {
fmt.Fprintf(p.w, "%s{", v.Type())
} else {
fmt.Fprint(p.w, "{")
}
if p.indent != "" && v.NumField() != 0 {
fmt.Fprintf(p.w, "\n")
}
for i := 0; i < v.NumField(); i++ {
t := v.Type().Field(i)
f := v.Field(i)
if p.omitEmpty && f.IsZero() {
continue
}
fmt.Fprintf(p.w, "%s%s: ", ni, t.Name)
p.reprValue(seen, f, ni, true)
if p.indent != "" {
fmt.Fprintf(p.w, ",\n")
} else if i < v.NumField()-1 {
fmt.Fprintf(p.w, ", ")
}
}
fmt.Fprintf(p.w, "%s}", indent)
}
case reflect.Ptr:
if v.IsNil() {
fmt.Fprintf(p.w, "nil")
return
}
if showType {
fmt.Fprintf(p.w, "&")
}
p.reprValue(seen, v.Elem(), indent, showType)
case reflect.String:
if t.Name() != "string" || p.alwaysIncludeType {
fmt.Fprintf(p.w, "%s(%q)", t, v.String())
} else {
fmt.Fprintf(p.w, "%q", v.String())
}
case reflect.Interface:
if v.IsNil() {
fmt.Fprintf(p.w, "interface {}(nil)")
} else {
p.reprValue(seen, v.Elem(), indent, true)
}
default:
if t.Name() != realKindName[t.Kind()] || p.alwaysIncludeType {
fmt.Fprintf(p.w, "%s(%v)", t, v)
} else {
fmt.Fprintf(p.w, "%v", v)
}
}
}
func asTime(v reflect.Value) (time.Time, bool) {
if !v.CanInterface() {
return time.Time{}, false
}
t, ok := v.Interface().(time.Time)
return t, ok
}
// String returns a string representing v.
func String(v interface{}, options ...Option) string {
w := bytes.NewBuffer(nil)
options = append([]Option{NoIndent()}, options...)
p := New(w, options...)
p.Print(v)
return w.String()
}
func extractOptions(vs ...interface{}) (args []interface{}, options []Option) {
for _, v := range vs {
if o, ok := v.(Option); ok {
options = append(options, o)
} else {
args = append(args, v)
}
}
return
}
// Println prints v to os.Stdout, one per line.
func Println(vs ...interface{}) {
args, options := extractOptions(vs...)
New(os.Stdout, options...).Println(args...)
}
// Print writes a representation of v to os.Stdout, separated by spaces.
func Print(vs ...interface{}) {
args, options := extractOptions(vs...)
New(os.Stdout, options...).Print(args...)
}
func timeToGo(w io.Writer, t time.Time) {
if t.IsZero() {
fmt.Fprint(w, "time.Time{}")
return
}
var zone string
switch loc := t.Location(); loc {
case nil:
zone = "nil"
case time.UTC:
zone = "time.UTC"
case time.Local:
zone = "time.Local"
default:
n, off := t.Zone()
zone = fmt.Sprintf("time.FixedZone(%q, %d)", n, off)
}
y, m, d := t.Date()
fmt.Fprintf(w, `time.Date(%d, %d, %d, %d, %d, %d, %d, %s)`, y, m, d, t.Hour(), t.Minute(), t.Second(), t.Nanosecond(), zone)
}

View File

@@ -0,0 +1,95 @@
Copyright (c) 2017 Martin Atkins
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
---------
Unicode table generation programs are under a separate copyright and license:
Copyright (c) 2014 Couchbase, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
either express or implied. See the License for the specific language governing permissions
and limitations under the License.
---------
Grapheme break data is provided as part of the Unicode character database,
copright 2016 Unicode, Inc, which is provided with the following license:
Unicode Data Files include all data files under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
Unicode Data Files do not include PDF online code charts under the
directory http://www.unicode.org/Public/.
Software includes any source code published in the Unicode Standard
or under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
NOTICE TO USER: Carefully read the following legal agreement.
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT.
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
THE DATA FILES OR SOFTWARE.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2017 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.

View File

@@ -0,0 +1,30 @@
package textseg
import (
"bufio"
"bytes"
)
// AllTokens is a utility that uses a bufio.SplitFunc to produce a slice of
// all of the recognized tokens in the given buffer.
func AllTokens(buf []byte, splitFunc bufio.SplitFunc) ([][]byte, error) {
scanner := bufio.NewScanner(bytes.NewReader(buf))
scanner.Split(splitFunc)
var ret [][]byte
for scanner.Scan() {
ret = append(ret, scanner.Bytes())
}
return ret, scanner.Err()
}
// TokenCount is a utility that uses a bufio.SplitFunc to count the number of
// recognized tokens in the given buffer.
func TokenCount(buf []byte, splitFunc bufio.SplitFunc) (int, error) {
scanner := bufio.NewScanner(bytes.NewReader(buf))
scanner.Split(splitFunc)
var ret int
for scanner.Scan() {
ret++
}
return ret, scanner.Err()
}

View File

@@ -0,0 +1,525 @@
# The following Ragel file was autogenerated with unicode2ragel.rb
# from: https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt
#
# It defines ["Extended_Pictographic"].
#
# To use this, make sure that your alphtype is set to byte,
# and that your input is in utf8.
%%{
machine Emoji;
Extended_Pictographic =
0xC2 0xA9 #E0.6 [1] (©️) copyright
| 0xC2 0xAE #E0.6 [1] (®️) registered
| 0xE2 0x80 0xBC #E0.6 [1] (‼️) double exclamation mark
| 0xE2 0x81 0x89 #E0.6 [1] (⁉️) exclamation question ...
| 0xE2 0x84 0xA2 #E0.6 [1] (™️) trade mark
| 0xE2 0x84 0xB9 #E0.6 [1] () information
| 0xE2 0x86 0x94..0x99 #E0.6 [6] (↔️..↙️) left-right arrow..do...
| 0xE2 0x86 0xA9..0xAA #E0.6 [2] (↩️..↪️) right arrow curving ...
| 0xE2 0x8C 0x9A..0x9B #E0.6 [2] (⌚..⌛) watch..hourglass done
| 0xE2 0x8C 0xA8 #E1.0 [1] (⌨️) keyboard
| 0xE2 0x8E 0x88 #E0.0 [1] (⎈) HELM SYMBOL
| 0xE2 0x8F 0x8F #E1.0 [1] (⏏️) eject button
| 0xE2 0x8F 0xA9..0xAC #E0.6 [4] (⏩..⏬) fast-forward button..f...
| 0xE2 0x8F 0xAD..0xAE #E0.7 [2] (⏭️..⏮️) next track button..l...
| 0xE2 0x8F 0xAF #E1.0 [1] (⏯️) play or pause button
| 0xE2 0x8F 0xB0 #E0.6 [1] (⏰) alarm clock
| 0xE2 0x8F 0xB1..0xB2 #E1.0 [2] (⏱️..⏲️) stopwatch..timer clock
| 0xE2 0x8F 0xB3 #E0.6 [1] (⏳) hourglass not done
| 0xE2 0x8F 0xB8..0xBA #E0.7 [3] (⏸️..⏺️) pause button..record...
| 0xE2 0x93 0x82 #E0.6 [1] (Ⓜ️) circled M
| 0xE2 0x96 0xAA..0xAB #E0.6 [2] (▪️..▫️) black small square.....
| 0xE2 0x96 0xB6 #E0.6 [1] (▶️) play button
| 0xE2 0x97 0x80 #E0.6 [1] (◀️) reverse button
| 0xE2 0x97 0xBB..0xBE #E0.6 [4] (◻️..◾) white medium square.....
| 0xE2 0x98 0x80..0x81 #E0.6 [2] (☀️..☁️) sun..cloud
| 0xE2 0x98 0x82..0x83 #E0.7 [2] (☂️..☃️) umbrella..snowman
| 0xE2 0x98 0x84 #E1.0 [1] (☄️) comet
| 0xE2 0x98 0x85 #E0.0 [1] (★) BLACK STAR
| 0xE2 0x98 0x87..0x8D #E0.0 [7] (☇..☍) LIGHTNING..OPPOSITION
| 0xE2 0x98 0x8E #E0.6 [1] (☎️) telephone
| 0xE2 0x98 0x8F..0x90 #E0.0 [2] (☏..☐) WHITE TELEPHONE..BALLO...
| 0xE2 0x98 0x91 #E0.6 [1] (☑️) check box with check
| 0xE2 0x98 0x92 #E0.0 [1] (☒) BALLOT BOX WITH X
| 0xE2 0x98 0x94..0x95 #E0.6 [2] (☔..☕) umbrella with rain dro...
| 0xE2 0x98 0x96..0x97 #E0.0 [2] (☖..☗) WHITE SHOGI PIECE..BLA...
| 0xE2 0x98 0x98 #E1.0 [1] (☘️) shamrock
| 0xE2 0x98 0x99..0x9C #E0.0 [4] (☙..☜) REVERSED ROTATED FLORA...
| 0xE2 0x98 0x9D #E0.6 [1] (☝️) index pointing up
| 0xE2 0x98 0x9E..0x9F #E0.0 [2] (☞..☟) WHITE RIGHT POINTING I...
| 0xE2 0x98 0xA0 #E1.0 [1] (☠️) skull and crossbones
| 0xE2 0x98 0xA1 #E0.0 [1] (☡) CAUTION SIGN
| 0xE2 0x98 0xA2..0xA3 #E1.0 [2] (☢️..☣️) radioactive..biohazard
| 0xE2 0x98 0xA4..0xA5 #E0.0 [2] (☤..☥) CADUCEUS..ANKH
| 0xE2 0x98 0xA6 #E1.0 [1] (☦️) orthodox cross
| 0xE2 0x98 0xA7..0xA9 #E0.0 [3] (☧..☩) CHI RHO..CROSS OF JERU...
| 0xE2 0x98 0xAA #E0.7 [1] (☪️) star and crescent
| 0xE2 0x98 0xAB..0xAD #E0.0 [3] (☫..☭) FARSI SYMBOL..HAMMER A...
| 0xE2 0x98 0xAE #E1.0 [1] (☮️) peace symbol
| 0xE2 0x98 0xAF #E0.7 [1] (☯️) yin yang
| 0xE2 0x98 0xB0..0xB7 #E0.0 [8] (☰..☷) TRIGRAM FOR HEAVEN..TR...
| 0xE2 0x98 0xB8..0xB9 #E0.7 [2] (☸️..☹️) wheel of dharma..fro...
| 0xE2 0x98 0xBA #E0.6 [1] (☺️) smiling face
| 0xE2 0x98 0xBB..0xBF #E0.0 [5] (☻..☿) BLACK SMILING FACE..ME...
| 0xE2 0x99 0x80 #E4.0 [1] (♀️) female sign
| 0xE2 0x99 0x81 #E0.0 [1] (♁) EARTH
| 0xE2 0x99 0x82 #E4.0 [1] (♂️) male sign
| 0xE2 0x99 0x83..0x87 #E0.0 [5] (♃..♇) JUPITER..PLUTO
| 0xE2 0x99 0x88..0x93 #E0.6 [12] (♈..♓) Aries..Pisces
| 0xE2 0x99 0x94..0x9E #E0.0 [11] (♔..♞) WHITE CHESS KING..BLAC...
| 0xE2 0x99 0x9F #E11.0 [1] (♟️) chess pawn
| 0xE2 0x99 0xA0 #E0.6 [1] (♠️) spade suit
| 0xE2 0x99 0xA1..0xA2 #E0.0 [2] (♡..♢) WHITE HEART SUIT..WHIT...
| 0xE2 0x99 0xA3 #E0.6 [1] (♣️) club suit
| 0xE2 0x99 0xA4 #E0.0 [1] (♤) WHITE SPADE SUIT
| 0xE2 0x99 0xA5..0xA6 #E0.6 [2] (♥️..♦️) heart suit..diamond ...
| 0xE2 0x99 0xA7 #E0.0 [1] (♧) WHITE CLUB SUIT
| 0xE2 0x99 0xA8 #E0.6 [1] (♨️) hot springs
| 0xE2 0x99 0xA9..0xBA #E0.0 [18] (♩..♺) QUARTER NOTE..RECYCLIN...
| 0xE2 0x99 0xBB #E0.6 [1] (♻️) recycling symbol
| 0xE2 0x99 0xBC..0xBD #E0.0 [2] (♼..♽) RECYCLED PAPER SYMBOL....
| 0xE2 0x99 0xBE #E11.0 [1] (♾️) infinity
| 0xE2 0x99 0xBF #E0.6 [1] (♿) wheelchair symbol
| 0xE2 0x9A 0x80..0x85 #E0.0 [6] (⚀..⚅) DIE FACE-1..DIE FACE-6
| 0xE2 0x9A 0x90..0x91 #E0.0 [2] (⚐..⚑) WHITE FLAG..BLACK FLAG
| 0xE2 0x9A 0x92 #E1.0 [1] (⚒️) hammer and pick
| 0xE2 0x9A 0x93 #E0.6 [1] (⚓) anchor
| 0xE2 0x9A 0x94 #E1.0 [1] (⚔️) crossed swords
| 0xE2 0x9A 0x95 #E4.0 [1] (⚕️) medical symbol
| 0xE2 0x9A 0x96..0x97 #E1.0 [2] (⚖️..⚗️) balance scale..alembic
| 0xE2 0x9A 0x98 #E0.0 [1] (⚘) FLOWER
| 0xE2 0x9A 0x99 #E1.0 [1] (⚙️) gear
| 0xE2 0x9A 0x9A #E0.0 [1] (⚚) STAFF OF HERMES
| 0xE2 0x9A 0x9B..0x9C #E1.0 [2] (⚛️..⚜️) atom symbol..fleur-d...
| 0xE2 0x9A 0x9D..0x9F #E0.0 [3] (⚝..⚟) OUTLINED WHITE STAR..T...
| 0xE2 0x9A 0xA0..0xA1 #E0.6 [2] (⚠️..⚡) warning..high voltage
| 0xE2 0x9A 0xA2..0xA6 #E0.0 [5] (⚢..⚦) DOUBLED FEMALE SIGN..M...
| 0xE2 0x9A 0xA7 #E13.0 [1] (⚧️) transgender symbol
| 0xE2 0x9A 0xA8..0xA9 #E0.0 [2] (⚨..⚩) VERTICAL MALE WITH STR...
| 0xE2 0x9A 0xAA..0xAB #E0.6 [2] (⚪..⚫) white circle..black ci...
| 0xE2 0x9A 0xAC..0xAF #E0.0 [4] (⚬..⚯) MEDIUM SMALL WHITE CIR...
| 0xE2 0x9A 0xB0..0xB1 #E1.0 [2] (⚰️..⚱️) coffin..funeral urn
| 0xE2 0x9A 0xB2..0xBC #E0.0 [11] (⚲..⚼) NEUTER..SESQUIQUADRATE
| 0xE2 0x9A 0xBD..0xBE #E0.6 [2] (⚽..⚾) soccer ball..baseball
| 0xE2 0x9A 0xBF..0xFF #E0.0 [5] (⚿..⛃) SQUARED KEY..BLACK DRA...
| 0xE2 0x9B 0x00..0x83 #
| 0xE2 0x9B 0x84..0x85 #E0.6 [2] (⛄..⛅) snowman without snow.....
| 0xE2 0x9B 0x86..0x87 #E0.0 [2] (⛆..⛇) RAIN..BLACK SNOWMAN
| 0xE2 0x9B 0x88 #E0.7 [1] (⛈️) cloud with lightning ...
| 0xE2 0x9B 0x89..0x8D #E0.0 [5] (⛉..⛍) TURNED WHITE SHOGI PIE...
| 0xE2 0x9B 0x8E #E0.6 [1] (⛎) Ophiuchus
| 0xE2 0x9B 0x8F #E0.7 [1] (⛏️) pick
| 0xE2 0x9B 0x90 #E0.0 [1] (⛐) CAR SLIDING
| 0xE2 0x9B 0x91 #E0.7 [1] (⛑️) rescue workers helmet
| 0xE2 0x9B 0x92 #E0.0 [1] (⛒) CIRCLED CROSSING LANES
| 0xE2 0x9B 0x93 #E0.7 [1] (⛓️) chains
| 0xE2 0x9B 0x94 #E0.6 [1] (⛔) no entry
| 0xE2 0x9B 0x95..0xA8 #E0.0 [20] (⛕..⛨) ALTERNATE ONE-WAY LEFT...
| 0xE2 0x9B 0xA9 #E0.7 [1] (⛩️) shinto shrine
| 0xE2 0x9B 0xAA #E0.6 [1] (⛪) church
| 0xE2 0x9B 0xAB..0xAF #E0.0 [5] (⛫..⛯) CASTLE..MAP SYMBOL FOR...
| 0xE2 0x9B 0xB0..0xB1 #E0.7 [2] (⛰️..⛱️) mountain..umbrella o...
| 0xE2 0x9B 0xB2..0xB3 #E0.6 [2] (⛲..⛳) fountain..flag in hole
| 0xE2 0x9B 0xB4 #E0.7 [1] (⛴️) ferry
| 0xE2 0x9B 0xB5 #E0.6 [1] (⛵) sailboat
| 0xE2 0x9B 0xB6 #E0.0 [1] (⛶) SQUARE FOUR CORNERS
| 0xE2 0x9B 0xB7..0xB9 #E0.7 [3] (⛷️..⛹️) skier..person bounci...
| 0xE2 0x9B 0xBA #E0.6 [1] (⛺) tent
| 0xE2 0x9B 0xBB..0xBC #E0.0 [2] (⛻..⛼) JAPANESE BANK SYMBOL.....
| 0xE2 0x9B 0xBD #E0.6 [1] (⛽) fuel pump
| 0xE2 0x9B 0xBE..0xFF #E0.0 [4] (⛾..✁) CUP ON BLACK SQUARE..U...
| 0xE2 0x9C 0x00..0x81 #
| 0xE2 0x9C 0x82 #E0.6 [1] (✂️) scissors
| 0xE2 0x9C 0x83..0x84 #E0.0 [2] (✃..✄) LOWER BLADE SCISSORS.....
| 0xE2 0x9C 0x85 #E0.6 [1] (✅) check mark button
| 0xE2 0x9C 0x88..0x8C #E0.6 [5] (✈️..✌️) airplane..victory hand
| 0xE2 0x9C 0x8D #E0.7 [1] (✍️) writing hand
| 0xE2 0x9C 0x8E #E0.0 [1] (✎) LOWER RIGHT PENCIL
| 0xE2 0x9C 0x8F #E0.6 [1] (✏️) pencil
| 0xE2 0x9C 0x90..0x91 #E0.0 [2] (✐..✑) UPPER RIGHT PENCIL..WH...
| 0xE2 0x9C 0x92 #E0.6 [1] (✒️) black nib
| 0xE2 0x9C 0x94 #E0.6 [1] (✔️) check mark
| 0xE2 0x9C 0x96 #E0.6 [1] (✖️) multiply
| 0xE2 0x9C 0x9D #E0.7 [1] (✝️) latin cross
| 0xE2 0x9C 0xA1 #E0.7 [1] (✡️) star of David
| 0xE2 0x9C 0xA8 #E0.6 [1] (✨) sparkles
| 0xE2 0x9C 0xB3..0xB4 #E0.6 [2] (✳️..✴️) eight-spoked asteris...
| 0xE2 0x9D 0x84 #E0.6 [1] (❄️) snowflake
| 0xE2 0x9D 0x87 #E0.6 [1] (❇️) sparkle
| 0xE2 0x9D 0x8C #E0.6 [1] (❌) cross mark
| 0xE2 0x9D 0x8E #E0.6 [1] (❎) cross mark button
| 0xE2 0x9D 0x93..0x95 #E0.6 [3] (❓..❕) question mark..white e...
| 0xE2 0x9D 0x97 #E0.6 [1] (❗) exclamation mark
| 0xE2 0x9D 0xA3 #E1.0 [1] (❣️) heart exclamation
| 0xE2 0x9D 0xA4 #E0.6 [1] (❤️) red heart
| 0xE2 0x9D 0xA5..0xA7 #E0.0 [3] (❥..❧) ROTATED HEAVY BLACK HE...
| 0xE2 0x9E 0x95..0x97 #E0.6 [3] (..➗) plus..divide
| 0xE2 0x9E 0xA1 #E0.6 [1] (➡️) right arrow
| 0xE2 0x9E 0xB0 #E0.6 [1] (➰) curly loop
| 0xE2 0x9E 0xBF #E1.0 [1] (➿) double curly loop
| 0xE2 0xA4 0xB4..0xB5 #E0.6 [2] (⤴️..⤵️) right arrow curving ...
| 0xE2 0xAC 0x85..0x87 #E0.6 [3] (⬅️..⬇️) left arrow..down arrow
| 0xE2 0xAC 0x9B..0x9C #E0.6 [2] (⬛..⬜) black large square..wh...
| 0xE2 0xAD 0x90 #E0.6 [1] (⭐) star
| 0xE2 0xAD 0x95 #E0.6 [1] (⭕) hollow red circle
| 0xE3 0x80 0xB0 #E0.6 [1] (〰️) wavy dash
| 0xE3 0x80 0xBD #E0.6 [1] (〽️) part alternation mark
| 0xE3 0x8A 0x97 #E0.6 [1] (㊗️) Japanese “congratulat...
| 0xE3 0x8A 0x99 #E0.6 [1] (㊙️) Japanese “secret” button
| 0xF0 0x9F 0x80 0x80..0x83 #E0.0 [4] (🀀..🀃) MAHJONG TILE EAST W...
| 0xF0 0x9F 0x80 0x84 #E0.6 [1] (🀄) mahjong red dragon
| 0xF0 0x9F 0x80 0x85..0xFF #E0.0 [202] (🀅..🃎) MAHJONG TILE ...
| 0xF0 0x9F 0x81..0x82 0x00..0xFF #
| 0xF0 0x9F 0x83 0x00..0x8E #
| 0xF0 0x9F 0x83 0x8F #E0.6 [1] (🃏) joker
| 0xF0 0x9F 0x83 0x90..0xBF #E0.0 [48] (🃐..🃿) <reserved-1F0D0>..<...
| 0xF0 0x9F 0x84 0x8D..0x8F #E0.0 [3] (🄍..🄏) CIRCLED ZERO WITH S...
| 0xF0 0x9F 0x84 0xAF #E0.0 [1] (🄯) COPYLEFT SYMBOL
| 0xF0 0x9F 0x85 0xAC..0xAF #E0.0 [4] (🅬..🅯) RAISED MR SIGN..CIR...
| 0xF0 0x9F 0x85 0xB0..0xB1 #E0.6 [2] (🅰️..🅱️) A button (blood t...
| 0xF0 0x9F 0x85 0xBE..0xBF #E0.6 [2] (🅾️..🅿️) O button (blood t...
| 0xF0 0x9F 0x86 0x8E #E0.6 [1] (🆎) AB button (blood type)
| 0xF0 0x9F 0x86 0x91..0x9A #E0.6 [10] (🆑..🆚) CL button..VS button
| 0xF0 0x9F 0x86 0xAD..0xFF #E0.0 [57] (🆭..🇥) MASK WORK SYMBOL..<...
| 0xF0 0x9F 0x87 0x00..0xA5 #
| 0xF0 0x9F 0x88 0x81..0x82 #E0.6 [2] (🈁..🈂️) Japanese “here” bu...
| 0xF0 0x9F 0x88 0x83..0x8F #E0.0 [13] (🈃..🈏) <reserved-1F203>..<...
| 0xF0 0x9F 0x88 0x9A #E0.6 [1] (🈚) Japanese “free of char...
| 0xF0 0x9F 0x88 0xAF #E0.6 [1] (🈯) Japanese “reserved” bu...
| 0xF0 0x9F 0x88 0xB2..0xBA #E0.6 [9] (🈲..🈺) Japanese “prohibite...
| 0xF0 0x9F 0x88 0xBC..0xBF #E0.0 [4] (🈼..🈿) <reserved-1F23C>..<...
| 0xF0 0x9F 0x89 0x89..0x8F #E0.0 [7] (🉉..🉏) <reserved-1F249>..<...
| 0xF0 0x9F 0x89 0x90..0x91 #E0.6 [2] (🉐..🉑) Japanese “bargain” ...
| 0xF0 0x9F 0x89 0x92..0xFF #E0.0 [174] (🉒..🋿) <reserved-1F2...
| 0xF0 0x9F 0x8A..0x8A 0x00..0xFF #
| 0xF0 0x9F 0x8B 0x00..0xBF #
| 0xF0 0x9F 0x8C 0x80..0x8C #E0.6 [13] (🌀..🌌) cyclone..milky way
| 0xF0 0x9F 0x8C 0x8D..0x8E #E0.7 [2] (🌍..🌎) globe showing Europ...
| 0xF0 0x9F 0x8C 0x8F #E0.6 [1] (🌏) globe showing Asia-Aus...
| 0xF0 0x9F 0x8C 0x90 #E1.0 [1] (🌐) globe with meridians
| 0xF0 0x9F 0x8C 0x91 #E0.6 [1] (🌑) new moon
| 0xF0 0x9F 0x8C 0x92 #E1.0 [1] (🌒) waxing crescent moon
| 0xF0 0x9F 0x8C 0x93..0x95 #E0.6 [3] (🌓..🌕) first quarter moon....
| 0xF0 0x9F 0x8C 0x96..0x98 #E1.0 [3] (🌖..🌘) waning gibbous moon...
| 0xF0 0x9F 0x8C 0x99 #E0.6 [1] (🌙) crescent moon
| 0xF0 0x9F 0x8C 0x9A #E1.0 [1] (🌚) new moon face
| 0xF0 0x9F 0x8C 0x9B #E0.6 [1] (🌛) first quarter moon face
| 0xF0 0x9F 0x8C 0x9C #E0.7 [1] (🌜) last quarter moon face
| 0xF0 0x9F 0x8C 0x9D..0x9E #E1.0 [2] (🌝..🌞) full moon face..sun...
| 0xF0 0x9F 0x8C 0x9F..0xA0 #E0.6 [2] (🌟..🌠) glowing star..shoot...
| 0xF0 0x9F 0x8C 0xA1 #E0.7 [1] (🌡️) thermometer
| 0xF0 0x9F 0x8C 0xA2..0xA3 #E0.0 [2] (🌢..🌣) BLACK DROPLET..WHIT...
| 0xF0 0x9F 0x8C 0xA4..0xAC #E0.7 [9] (🌤️..🌬️) sun behind small ...
| 0xF0 0x9F 0x8C 0xAD..0xAF #E1.0 [3] (🌭..🌯) hot dog..burrito
| 0xF0 0x9F 0x8C 0xB0..0xB1 #E0.6 [2] (🌰..🌱) chestnut..seedling
| 0xF0 0x9F 0x8C 0xB2..0xB3 #E1.0 [2] (🌲..🌳) evergreen tree..dec...
| 0xF0 0x9F 0x8C 0xB4..0xB5 #E0.6 [2] (🌴..🌵) palm tree..cactus
| 0xF0 0x9F 0x8C 0xB6 #E0.7 [1] (🌶️) hot pepper
| 0xF0 0x9F 0x8C 0xB7..0xFF #E0.6 [20] (🌷..🍊) tulip..tangerine
| 0xF0 0x9F 0x8D 0x00..0x8A #
| 0xF0 0x9F 0x8D 0x8B #E1.0 [1] (🍋) lemon
| 0xF0 0x9F 0x8D 0x8C..0x8F #E0.6 [4] (🍌..🍏) banana..green apple
| 0xF0 0x9F 0x8D 0x90 #E1.0 [1] (🍐) pear
| 0xF0 0x9F 0x8D 0x91..0xBB #E0.6 [43] (🍑..🍻) peach..clinking bee...
| 0xF0 0x9F 0x8D 0xBC #E1.0 [1] (🍼) baby bottle
| 0xF0 0x9F 0x8D 0xBD #E0.7 [1] (🍽️) fork and knife with p...
| 0xF0 0x9F 0x8D 0xBE..0xBF #E1.0 [2] (🍾..🍿) bottle with popping...
| 0xF0 0x9F 0x8E 0x80..0x93 #E0.6 [20] (🎀..🎓) ribbon..graduation cap
| 0xF0 0x9F 0x8E 0x94..0x95 #E0.0 [2] (🎔..🎕) HEART WITH TIP ON T...
| 0xF0 0x9F 0x8E 0x96..0x97 #E0.7 [2] (🎖️..🎗️) military medal..r...
| 0xF0 0x9F 0x8E 0x98 #E0.0 [1] (🎘) MUSICAL KEYBOARD WITH ...
| 0xF0 0x9F 0x8E 0x99..0x9B #E0.7 [3] (🎙️..🎛️) studio microphone...
| 0xF0 0x9F 0x8E 0x9C..0x9D #E0.0 [2] (🎜..🎝) BEAMED ASCENDING MU...
| 0xF0 0x9F 0x8E 0x9E..0x9F #E0.7 [2] (🎞️..🎟️) film frames..admi...
| 0xF0 0x9F 0x8E 0xA0..0xFF #E0.6 [37] (🎠..🏄) carousel horse..per...
| 0xF0 0x9F 0x8F 0x00..0x84 #
| 0xF0 0x9F 0x8F 0x85 #E1.0 [1] (🏅) sports medal
| 0xF0 0x9F 0x8F 0x86 #E0.6 [1] (🏆) trophy
| 0xF0 0x9F 0x8F 0x87 #E1.0 [1] (🏇) horse racing
| 0xF0 0x9F 0x8F 0x88 #E0.6 [1] (🏈) american football
| 0xF0 0x9F 0x8F 0x89 #E1.0 [1] (🏉) rugby football
| 0xF0 0x9F 0x8F 0x8A #E0.6 [1] (🏊) person swimming
| 0xF0 0x9F 0x8F 0x8B..0x8E #E0.7 [4] (🏋️..🏎️) person lifting we...
| 0xF0 0x9F 0x8F 0x8F..0x93 #E1.0 [5] (🏏..🏓) cricket game..ping ...
| 0xF0 0x9F 0x8F 0x94..0x9F #E0.7 [12] (🏔️..🏟️) snow-capped mount...
| 0xF0 0x9F 0x8F 0xA0..0xA3 #E0.6 [4] (🏠..🏣) house..Japanese pos...
| 0xF0 0x9F 0x8F 0xA4 #E1.0 [1] (🏤) post office
| 0xF0 0x9F 0x8F 0xA5..0xB0 #E0.6 [12] (🏥..🏰) hospital..castle
| 0xF0 0x9F 0x8F 0xB1..0xB2 #E0.0 [2] (🏱..🏲) WHITE PENNANT..BLAC...
| 0xF0 0x9F 0x8F 0xB3 #E0.7 [1] (🏳️) white flag
| 0xF0 0x9F 0x8F 0xB4 #E1.0 [1] (🏴) black flag
| 0xF0 0x9F 0x8F 0xB5 #E0.7 [1] (🏵️) rosette
| 0xF0 0x9F 0x8F 0xB6 #E0.0 [1] (🏶) BLACK ROSETTE
| 0xF0 0x9F 0x8F 0xB7 #E0.7 [1] (🏷️) label
| 0xF0 0x9F 0x8F 0xB8..0xBA #E1.0 [3] (🏸..🏺) badminton..amphora
| 0xF0 0x9F 0x90 0x80..0x87 #E1.0 [8] (🐀..🐇) rat..rabbit
| 0xF0 0x9F 0x90 0x88 #E0.7 [1] (🐈) cat
| 0xF0 0x9F 0x90 0x89..0x8B #E1.0 [3] (🐉..🐋) dragon..whale
| 0xF0 0x9F 0x90 0x8C..0x8E #E0.6 [3] (🐌..🐎) snail..horse
| 0xF0 0x9F 0x90 0x8F..0x90 #E1.0 [2] (🐏..🐐) ram..goat
| 0xF0 0x9F 0x90 0x91..0x92 #E0.6 [2] (🐑..🐒) ewe..monkey
| 0xF0 0x9F 0x90 0x93 #E1.0 [1] (🐓) rooster
| 0xF0 0x9F 0x90 0x94 #E0.6 [1] (🐔) chicken
| 0xF0 0x9F 0x90 0x95 #E0.7 [1] (🐕) dog
| 0xF0 0x9F 0x90 0x96 #E1.0 [1] (🐖) pig
| 0xF0 0x9F 0x90 0x97..0xA9 #E0.6 [19] (🐗..🐩) boar..poodle
| 0xF0 0x9F 0x90 0xAA #E1.0 [1] (🐪) camel
| 0xF0 0x9F 0x90 0xAB..0xBE #E0.6 [20] (🐫..🐾) two-hump camel..paw...
| 0xF0 0x9F 0x90 0xBF #E0.7 [1] (🐿️) chipmunk
| 0xF0 0x9F 0x91 0x80 #E0.6 [1] (👀) eyes
| 0xF0 0x9F 0x91 0x81 #E0.7 [1] (👁️) eye
| 0xF0 0x9F 0x91 0x82..0xA4 #E0.6 [35] (👂..👤) ear..bust in silhou...
| 0xF0 0x9F 0x91 0xA5 #E1.0 [1] (👥) busts in silhouette
| 0xF0 0x9F 0x91 0xA6..0xAB #E0.6 [6] (👦..👫) boy..woman and man ...
| 0xF0 0x9F 0x91 0xAC..0xAD #E1.0 [2] (👬..👭) men holding hands.....
| 0xF0 0x9F 0x91 0xAE..0xFF #E0.6 [63] (👮..💬) police officer..spe...
| 0xF0 0x9F 0x92 0x00..0xAC #
| 0xF0 0x9F 0x92 0xAD #E1.0 [1] (💭) thought balloon
| 0xF0 0x9F 0x92 0xAE..0xB5 #E0.6 [8] (💮..💵) white flower..dolla...
| 0xF0 0x9F 0x92 0xB6..0xB7 #E1.0 [2] (💶..💷) euro banknote..poun...
| 0xF0 0x9F 0x92 0xB8..0xFF #E0.6 [52] (💸..📫) money with wings..c...
| 0xF0 0x9F 0x93 0x00..0xAB #
| 0xF0 0x9F 0x93 0xAC..0xAD #E0.7 [2] (📬..📭) open mailbox with r...
| 0xF0 0x9F 0x93 0xAE #E0.6 [1] (📮) postbox
| 0xF0 0x9F 0x93 0xAF #E1.0 [1] (📯) postal horn
| 0xF0 0x9F 0x93 0xB0..0xB4 #E0.6 [5] (📰..📴) newspaper..mobile p...
| 0xF0 0x9F 0x93 0xB5 #E1.0 [1] (📵) no mobile phones
| 0xF0 0x9F 0x93 0xB6..0xB7 #E0.6 [2] (📶..📷) antenna bars..camera
| 0xF0 0x9F 0x93 0xB8 #E1.0 [1] (📸) camera with flash
| 0xF0 0x9F 0x93 0xB9..0xBC #E0.6 [4] (📹..📼) video camera..video...
| 0xF0 0x9F 0x93 0xBD #E0.7 [1] (📽️) film projector
| 0xF0 0x9F 0x93 0xBE #E0.0 [1] (📾) PORTABLE STEREO
| 0xF0 0x9F 0x93 0xBF..0xFF #E1.0 [4] (📿..🔂) prayer beads..repea...
| 0xF0 0x9F 0x94 0x00..0x82 #
| 0xF0 0x9F 0x94 0x83 #E0.6 [1] (🔃) clockwise vertical arrows
| 0xF0 0x9F 0x94 0x84..0x87 #E1.0 [4] (🔄..🔇) counterclockwise ar...
| 0xF0 0x9F 0x94 0x88 #E0.7 [1] (🔈) speaker low volume
| 0xF0 0x9F 0x94 0x89 #E1.0 [1] (🔉) speaker medium volume
| 0xF0 0x9F 0x94 0x8A..0x94 #E0.6 [11] (🔊..🔔) speaker high volume...
| 0xF0 0x9F 0x94 0x95 #E1.0 [1] (🔕) bell with slash
| 0xF0 0x9F 0x94 0x96..0xAB #E0.6 [22] (🔖..🔫) bookmark..pistol
| 0xF0 0x9F 0x94 0xAC..0xAD #E1.0 [2] (🔬..🔭) microscope..telescope
| 0xF0 0x9F 0x94 0xAE..0xBD #E0.6 [16] (🔮..🔽) crystal ball..downw...
| 0xF0 0x9F 0x95 0x86..0x88 #E0.0 [3] (🕆..🕈) WHITE LATIN CROSS.....
| 0xF0 0x9F 0x95 0x89..0x8A #E0.7 [2] (🕉️..🕊️) om..dove
| 0xF0 0x9F 0x95 0x8B..0x8E #E1.0 [4] (🕋..🕎) kaaba..menorah
| 0xF0 0x9F 0x95 0x8F #E0.0 [1] (🕏) BOWL OF HYGIEIA
| 0xF0 0x9F 0x95 0x90..0x9B #E0.6 [12] (🕐..🕛) one oclock..twelve...
| 0xF0 0x9F 0x95 0x9C..0xA7 #E0.7 [12] (🕜..🕧) one-thirty..twelve-...
| 0xF0 0x9F 0x95 0xA8..0xAE #E0.0 [7] (🕨..🕮) RIGHT SPEAKER..BOOK
| 0xF0 0x9F 0x95 0xAF..0xB0 #E0.7 [2] (🕯️..🕰️) candle..mantelpie...
| 0xF0 0x9F 0x95 0xB1..0xB2 #E0.0 [2] (🕱..🕲) BLACK SKULL AND CRO...
| 0xF0 0x9F 0x95 0xB3..0xB9 #E0.7 [7] (🕳️..🕹️) hole..joystick
| 0xF0 0x9F 0x95 0xBA #E3.0 [1] (🕺) man dancing
| 0xF0 0x9F 0x95 0xBB..0xFF #E0.0 [12] (🕻..🖆) LEFT HAND TELEPHONE...
| 0xF0 0x9F 0x96 0x00..0x86 #
| 0xF0 0x9F 0x96 0x87 #E0.7 [1] (🖇️) linked paperclips
| 0xF0 0x9F 0x96 0x88..0x89 #E0.0 [2] (🖈..🖉) BLACK PUSHPIN..LOWE...
| 0xF0 0x9F 0x96 0x8A..0x8D #E0.7 [4] (🖊️..🖍️) pen..crayon
| 0xF0 0x9F 0x96 0x8E..0x8F #E0.0 [2] (🖎..🖏) LEFT WRITING HAND.....
| 0xF0 0x9F 0x96 0x90 #E0.7 [1] (🖐️) hand with fingers spl...
| 0xF0 0x9F 0x96 0x91..0x94 #E0.0 [4] (🖑..🖔) REVERSED RAISED HAN...
| 0xF0 0x9F 0x96 0x95..0x96 #E1.0 [2] (🖕..🖖) middle finger..vulc...
| 0xF0 0x9F 0x96 0x97..0xA3 #E0.0 [13] (🖗..🖣) WHITE DOWN POINTING...
| 0xF0 0x9F 0x96 0xA4 #E3.0 [1] (🖤) black heart
| 0xF0 0x9F 0x96 0xA5 #E0.7 [1] (🖥️) desktop computer
| 0xF0 0x9F 0x96 0xA6..0xA7 #E0.0 [2] (🖦..🖧) KEYBOARD AND MOUSE....
| 0xF0 0x9F 0x96 0xA8 #E0.7 [1] (🖨️) printer
| 0xF0 0x9F 0x96 0xA9..0xB0 #E0.0 [8] (🖩..🖰) POCKET CALCULATOR.....
| 0xF0 0x9F 0x96 0xB1..0xB2 #E0.7 [2] (🖱️..🖲️) computer mouse..t...
| 0xF0 0x9F 0x96 0xB3..0xBB #E0.0 [9] (🖳..🖻) OLD PERSONAL COMPUT...
| 0xF0 0x9F 0x96 0xBC #E0.7 [1] (🖼️) framed picture
| 0xF0 0x9F 0x96 0xBD..0xFF #E0.0 [5] (🖽..🗁) FRAME WITH TILES..O...
| 0xF0 0x9F 0x97 0x00..0x81 #
| 0xF0 0x9F 0x97 0x82..0x84 #E0.7 [3] (🗂️..🗄️) card index divide...
| 0xF0 0x9F 0x97 0x85..0x90 #E0.0 [12] (🗅..🗐) EMPTY NOTE..PAGES
| 0xF0 0x9F 0x97 0x91..0x93 #E0.7 [3] (🗑️..🗓️) wastebasket..spir...
| 0xF0 0x9F 0x97 0x94..0x9B #E0.0 [8] (🗔..🗛) DESKTOP WINDOW..DEC...
| 0xF0 0x9F 0x97 0x9C..0x9E #E0.7 [3] (🗜️..🗞️) clamp..rolled-up ...
| 0xF0 0x9F 0x97 0x9F..0xA0 #E0.0 [2] (🗟..🗠) PAGE WITH CIRCLED T...
| 0xF0 0x9F 0x97 0xA1 #E0.7 [1] (🗡️) dagger
| 0xF0 0x9F 0x97 0xA2 #E0.0 [1] (🗢) LIPS
| 0xF0 0x9F 0x97 0xA3 #E0.7 [1] (🗣️) speaking head
| 0xF0 0x9F 0x97 0xA4..0xA7 #E0.0 [4] (🗤..🗧) THREE RAYS ABOVE..T...
| 0xF0 0x9F 0x97 0xA8 #E2.0 [1] (🗨️) left speech bubble
| 0xF0 0x9F 0x97 0xA9..0xAE #E0.0 [6] (🗩..🗮) RIGHT SPEECH BUBBLE...
| 0xF0 0x9F 0x97 0xAF #E0.7 [1] (🗯️) right anger bubble
| 0xF0 0x9F 0x97 0xB0..0xB2 #E0.0 [3] (🗰..🗲) MOOD BUBBLE..LIGHTN...
| 0xF0 0x9F 0x97 0xB3 #E0.7 [1] (🗳️) ballot box with ballot
| 0xF0 0x9F 0x97 0xB4..0xB9 #E0.0 [6] (🗴..🗹) BALLOT SCRIPT X..BA...
| 0xF0 0x9F 0x97 0xBA #E0.7 [1] (🗺️) world map
| 0xF0 0x9F 0x97 0xBB..0xBF #E0.6 [5] (🗻..🗿) mount fuji..moai
| 0xF0 0x9F 0x98 0x80 #E1.0 [1] (😀) grinning face
| 0xF0 0x9F 0x98 0x81..0x86 #E0.6 [6] (😁..😆) beaming face with s...
| 0xF0 0x9F 0x98 0x87..0x88 #E1.0 [2] (😇..😈) smiling face with h...
| 0xF0 0x9F 0x98 0x89..0x8D #E0.6 [5] (😉..😍) winking face..smili...
| 0xF0 0x9F 0x98 0x8E #E1.0 [1] (😎) smiling face with sung...
| 0xF0 0x9F 0x98 0x8F #E0.6 [1] (😏) smirking face
| 0xF0 0x9F 0x98 0x90 #E0.7 [1] (😐) neutral face
| 0xF0 0x9F 0x98 0x91 #E1.0 [1] (😑) expressionless face
| 0xF0 0x9F 0x98 0x92..0x94 #E0.6 [3] (😒..😔) unamused face..pens...
| 0xF0 0x9F 0x98 0x95 #E1.0 [1] (😕) confused face
| 0xF0 0x9F 0x98 0x96 #E0.6 [1] (😖) confounded face
| 0xF0 0x9F 0x98 0x97 #E1.0 [1] (😗) kissing face
| 0xF0 0x9F 0x98 0x98 #E0.6 [1] (😘) face blowing a kiss
| 0xF0 0x9F 0x98 0x99 #E1.0 [1] (😙) kissing face with smil...
| 0xF0 0x9F 0x98 0x9A #E0.6 [1] (😚) kissing face with clos...
| 0xF0 0x9F 0x98 0x9B #E1.0 [1] (😛) face with tongue
| 0xF0 0x9F 0x98 0x9C..0x9E #E0.6 [3] (😜..😞) winking face with t...
| 0xF0 0x9F 0x98 0x9F #E1.0 [1] (😟) worried face
| 0xF0 0x9F 0x98 0xA0..0xA5 #E0.6 [6] (😠..😥) angry face..sad but...
| 0xF0 0x9F 0x98 0xA6..0xA7 #E1.0 [2] (😦..😧) frowning face with ...
| 0xF0 0x9F 0x98 0xA8..0xAB #E0.6 [4] (😨..😫) fearful face..tired...
| 0xF0 0x9F 0x98 0xAC #E1.0 [1] (😬) grimacing face
| 0xF0 0x9F 0x98 0xAD #E0.6 [1] (😭) loudly crying face
| 0xF0 0x9F 0x98 0xAE..0xAF #E1.0 [2] (😮..😯) face with open mout...
| 0xF0 0x9F 0x98 0xB0..0xB3 #E0.6 [4] (😰..😳) anxious face with s...
| 0xF0 0x9F 0x98 0xB4 #E1.0 [1] (😴) sleeping face
| 0xF0 0x9F 0x98 0xB5 #E0.6 [1] (😵) dizzy face
| 0xF0 0x9F 0x98 0xB6 #E1.0 [1] (😶) face without mouth
| 0xF0 0x9F 0x98 0xB7..0xFF #E0.6 [10] (😷..🙀) face with medical m...
| 0xF0 0x9F 0x99 0x00..0x80 #
| 0xF0 0x9F 0x99 0x81..0x84 #E1.0 [4] (🙁..🙄) slightly frowning f...
| 0xF0 0x9F 0x99 0x85..0x8F #E0.6 [11] (🙅..🙏) person gesturing NO...
| 0xF0 0x9F 0x9A 0x80 #E0.6 [1] (🚀) rocket
| 0xF0 0x9F 0x9A 0x81..0x82 #E1.0 [2] (🚁..🚂) helicopter..locomotive
| 0xF0 0x9F 0x9A 0x83..0x85 #E0.6 [3] (🚃..🚅) railway car..bullet...
| 0xF0 0x9F 0x9A 0x86 #E1.0 [1] (🚆) train
| 0xF0 0x9F 0x9A 0x87 #E0.6 [1] (🚇) metro
| 0xF0 0x9F 0x9A 0x88 #E1.0 [1] (🚈) light rail
| 0xF0 0x9F 0x9A 0x89 #E0.6 [1] (🚉) station
| 0xF0 0x9F 0x9A 0x8A..0x8B #E1.0 [2] (🚊..🚋) tram..tram car
| 0xF0 0x9F 0x9A 0x8C #E0.6 [1] (🚌) bus
| 0xF0 0x9F 0x9A 0x8D #E0.7 [1] (🚍) oncoming bus
| 0xF0 0x9F 0x9A 0x8E #E1.0 [1] (🚎) trolleybus
| 0xF0 0x9F 0x9A 0x8F #E0.6 [1] (🚏) bus stop
| 0xF0 0x9F 0x9A 0x90 #E1.0 [1] (🚐) minibus
| 0xF0 0x9F 0x9A 0x91..0x93 #E0.6 [3] (🚑..🚓) ambulance..police car
| 0xF0 0x9F 0x9A 0x94 #E0.7 [1] (🚔) oncoming police car
| 0xF0 0x9F 0x9A 0x95 #E0.6 [1] (🚕) taxi
| 0xF0 0x9F 0x9A 0x96 #E1.0 [1] (🚖) oncoming taxi
| 0xF0 0x9F 0x9A 0x97 #E0.6 [1] (🚗) automobile
| 0xF0 0x9F 0x9A 0x98 #E0.7 [1] (🚘) oncoming automobile
| 0xF0 0x9F 0x9A 0x99..0x9A #E0.6 [2] (🚙..🚚) sport utility vehic...
| 0xF0 0x9F 0x9A 0x9B..0xA1 #E1.0 [7] (🚛..🚡) articulated lorry.....
| 0xF0 0x9F 0x9A 0xA2 #E0.6 [1] (🚢) ship
| 0xF0 0x9F 0x9A 0xA3 #E1.0 [1] (🚣) person rowing boat
| 0xF0 0x9F 0x9A 0xA4..0xA5 #E0.6 [2] (🚤..🚥) speedboat..horizont...
| 0xF0 0x9F 0x9A 0xA6 #E1.0 [1] (🚦) vertical traffic light
| 0xF0 0x9F 0x9A 0xA7..0xAD #E0.6 [7] (🚧..🚭) construction..no sm...
| 0xF0 0x9F 0x9A 0xAE..0xB1 #E1.0 [4] (🚮..🚱) litter in bin sign....
| 0xF0 0x9F 0x9A 0xB2 #E0.6 [1] (🚲) bicycle
| 0xF0 0x9F 0x9A 0xB3..0xB5 #E1.0 [3] (🚳..🚵) no bicycles..person...
| 0xF0 0x9F 0x9A 0xB6 #E0.6 [1] (🚶) person walking
| 0xF0 0x9F 0x9A 0xB7..0xB8 #E1.0 [2] (🚷..🚸) no pedestrians..chi...
| 0xF0 0x9F 0x9A 0xB9..0xBE #E0.6 [6] (🚹..🚾) mens room..water c...
| 0xF0 0x9F 0x9A 0xBF #E1.0 [1] (🚿) shower
| 0xF0 0x9F 0x9B 0x80 #E0.6 [1] (🛀) person taking bath
| 0xF0 0x9F 0x9B 0x81..0x85 #E1.0 [5] (🛁..🛅) bathtub..left luggage
| 0xF0 0x9F 0x9B 0x86..0x8A #E0.0 [5] (🛆..🛊) TRIANGLE WITH ROUND...
| 0xF0 0x9F 0x9B 0x8B #E0.7 [1] (🛋️) couch and lamp
| 0xF0 0x9F 0x9B 0x8C #E1.0 [1] (🛌) person in bed
| 0xF0 0x9F 0x9B 0x8D..0x8F #E0.7 [3] (🛍️..🛏️) shopping bags..bed
| 0xF0 0x9F 0x9B 0x90 #E1.0 [1] (🛐) place of worship
| 0xF0 0x9F 0x9B 0x91..0x92 #E3.0 [2] (🛑..🛒) stop sign..shopping...
| 0xF0 0x9F 0x9B 0x93..0x94 #E0.0 [2] (🛓..🛔) STUPA..PAGODA
| 0xF0 0x9F 0x9B 0x95 #E12.0 [1] (🛕) hindu temple
| 0xF0 0x9F 0x9B 0x96..0x97 #E13.0 [2] (🛖..🛗) hut..elevator
| 0xF0 0x9F 0x9B 0x98..0x9F #E0.0 [8] (🛘..🛟) <reserved-1F6D8>..<...
| 0xF0 0x9F 0x9B 0xA0..0xA5 #E0.7 [6] (🛠️..🛥️) hammer and wrench...
| 0xF0 0x9F 0x9B 0xA6..0xA8 #E0.0 [3] (🛦..🛨) UP-POINTING MILITAR...
| 0xF0 0x9F 0x9B 0xA9 #E0.7 [1] (🛩️) small airplane
| 0xF0 0x9F 0x9B 0xAA #E0.0 [1] (🛪) NORTHEAST-POINTING AIR...
| 0xF0 0x9F 0x9B 0xAB..0xAC #E1.0 [2] (🛫..🛬) airplane departure....
| 0xF0 0x9F 0x9B 0xAD..0xAF #E0.0 [3] (🛭..🛯) <reserved-1F6ED>..<...
| 0xF0 0x9F 0x9B 0xB0 #E0.7 [1] (🛰️) satellite
| 0xF0 0x9F 0x9B 0xB1..0xB2 #E0.0 [2] (🛱..🛲) ONCOMING FIRE ENGIN...
| 0xF0 0x9F 0x9B 0xB3 #E0.7 [1] (🛳️) passenger ship
| 0xF0 0x9F 0x9B 0xB4..0xB6 #E3.0 [3] (🛴..🛶) kick scooter..canoe
| 0xF0 0x9F 0x9B 0xB7..0xB8 #E5.0 [2] (🛷..🛸) sled..flying saucer
| 0xF0 0x9F 0x9B 0xB9 #E11.0 [1] (🛹) skateboard
| 0xF0 0x9F 0x9B 0xBA #E12.0 [1] (🛺) auto rickshaw
| 0xF0 0x9F 0x9B 0xBB..0xBC #E13.0 [2] (🛻..🛼) pickup truck..rolle...
| 0xF0 0x9F 0x9B 0xBD..0xBF #E0.0 [3] (🛽..🛿) <reserved-1F6FD>..<...
| 0xF0 0x9F 0x9D 0xB4..0xBF #E0.0 [12] (🝴..🝿) <reserved-1F774>..<...
| 0xF0 0x9F 0x9F 0x95..0x9F #E0.0 [11] (🟕..🟟) CIRCLED TRIANGLE..<...
| 0xF0 0x9F 0x9F 0xA0..0xAB #E12.0 [12] (🟠..🟫) orange circle..brow...
| 0xF0 0x9F 0x9F 0xAC..0xBF #E0.0 [20] (🟬..🟿) <reserved-1F7EC>..<...
| 0xF0 0x9F 0xA0 0x8C..0x8F #E0.0 [4] (🠌..🠏) <reserved-1F80C>..<...
| 0xF0 0x9F 0xA1 0x88..0x8F #E0.0 [8] (🡈..🡏) <reserved-1F848>..<...
| 0xF0 0x9F 0xA1 0x9A..0x9F #E0.0 [6] (🡚..🡟) <reserved-1F85A>..<...
| 0xF0 0x9F 0xA2 0x88..0x8F #E0.0 [8] (🢈..🢏) <reserved-1F888>..<...
| 0xF0 0x9F 0xA2 0xAE..0xFF #E0.0 [82] (🢮..🣿) <reserved-1F8AE>..<...
| 0xF0 0x9F 0xA3 0x00..0xBF #
| 0xF0 0x9F 0xA4 0x8C #E13.0 [1] (🤌) pinched fingers
| 0xF0 0x9F 0xA4 0x8D..0x8F #E12.0 [3] (🤍..🤏) white heart..pinchi...
| 0xF0 0x9F 0xA4 0x90..0x98 #E1.0 [9] (🤐..🤘) zipper-mouth face.....
| 0xF0 0x9F 0xA4 0x99..0x9E #E3.0 [6] (🤙..🤞) call me hand..cross...
| 0xF0 0x9F 0xA4 0x9F #E5.0 [1] (🤟) love-you gesture
| 0xF0 0x9F 0xA4 0xA0..0xA7 #E3.0 [8] (🤠..🤧) cowboy hat face..sn...
| 0xF0 0x9F 0xA4 0xA8..0xAF #E5.0 [8] (🤨..🤯) face with raised ey...
| 0xF0 0x9F 0xA4 0xB0 #E3.0 [1] (🤰) pregnant woman
| 0xF0 0x9F 0xA4 0xB1..0xB2 #E5.0 [2] (🤱..🤲) breast-feeding..pal...
| 0xF0 0x9F 0xA4 0xB3..0xBA #E3.0 [8] (🤳..🤺) selfie..person fencing
| 0xF0 0x9F 0xA4 0xBC..0xBE #E3.0 [3] (🤼..🤾) people wrestling..p...
| 0xF0 0x9F 0xA4 0xBF #E12.0 [1] (🤿) diving mask
| 0xF0 0x9F 0xA5 0x80..0x85 #E3.0 [6] (🥀..🥅) wilted flower..goal...
| 0xF0 0x9F 0xA5 0x87..0x8B #E3.0 [5] (🥇..🥋) 1st place medal..ma...
| 0xF0 0x9F 0xA5 0x8C #E5.0 [1] (🥌) curling stone
| 0xF0 0x9F 0xA5 0x8D..0x8F #E11.0 [3] (🥍..🥏) lacrosse..flying disc
| 0xF0 0x9F 0xA5 0x90..0x9E #E3.0 [15] (🥐..🥞) croissant..pancakes
| 0xF0 0x9F 0xA5 0x9F..0xAB #E5.0 [13] (🥟..🥫) dumpling..canned food
| 0xF0 0x9F 0xA5 0xAC..0xB0 #E11.0 [5] (🥬..🥰) leafy green..smilin...
| 0xF0 0x9F 0xA5 0xB1 #E12.0 [1] (🥱) yawning face
| 0xF0 0x9F 0xA5 0xB2 #E13.0 [1] (🥲) smiling face with tear
| 0xF0 0x9F 0xA5 0xB3..0xB6 #E11.0 [4] (🥳..🥶) partying face..cold...
| 0xF0 0x9F 0xA5 0xB7..0xB8 #E13.0 [2] (🥷..🥸) ninja..disguised face
| 0xF0 0x9F 0xA5 0xB9 #E0.0 [1] (🥹) <reserved-1F979>
| 0xF0 0x9F 0xA5 0xBA #E11.0 [1] (🥺) pleading face
| 0xF0 0x9F 0xA5 0xBB #E12.0 [1] (🥻) sari
| 0xF0 0x9F 0xA5 0xBC..0xBF #E11.0 [4] (🥼..🥿) lab coat..flat shoe
| 0xF0 0x9F 0xA6 0x80..0x84 #E1.0 [5] (🦀..🦄) crab..unicorn
| 0xF0 0x9F 0xA6 0x85..0x91 #E3.0 [13] (🦅..🦑) eagle..squid
| 0xF0 0x9F 0xA6 0x92..0x97 #E5.0 [6] (🦒..🦗) giraffe..cricket
| 0xF0 0x9F 0xA6 0x98..0xA2 #E11.0 [11] (🦘..🦢) kangaroo..swan
| 0xF0 0x9F 0xA6 0xA3..0xA4 #E13.0 [2] (🦣..🦤) mammoth..dodo
| 0xF0 0x9F 0xA6 0xA5..0xAA #E12.0 [6] (🦥..🦪) sloth..oyster
| 0xF0 0x9F 0xA6 0xAB..0xAD #E13.0 [3] (🦫..🦭) beaver..seal
| 0xF0 0x9F 0xA6 0xAE..0xAF #E12.0 [2] (🦮..🦯) guide dog..white cane
| 0xF0 0x9F 0xA6 0xB0..0xB9 #E11.0 [10] (🦰..🦹) red hair..supervillain
| 0xF0 0x9F 0xA6 0xBA..0xBF #E12.0 [6] (🦺..🦿) safety vest..mechan...
| 0xF0 0x9F 0xA7 0x80 #E1.0 [1] (🧀) cheese wedge
| 0xF0 0x9F 0xA7 0x81..0x82 #E11.0 [2] (🧁..🧂) cupcake..salt
| 0xF0 0x9F 0xA7 0x83..0x8A #E12.0 [8] (🧃..🧊) beverage box..ice
| 0xF0 0x9F 0xA7 0x8B #E13.0 [1] (🧋) bubble tea
| 0xF0 0x9F 0xA7 0x8C #E0.0 [1] (🧌) <reserved-1F9CC>
| 0xF0 0x9F 0xA7 0x8D..0x8F #E12.0 [3] (🧍..🧏) person standing..de...
| 0xF0 0x9F 0xA7 0x90..0xA6 #E5.0 [23] (🧐..🧦) face with monocle.....
| 0xF0 0x9F 0xA7 0xA7..0xBF #E11.0 [25] (🧧..🧿) red envelope..nazar...
| 0xF0 0x9F 0xA8 0x80..0xFF #E0.0 [112] (🨀..🩯) NEUTRAL CHESS KING....
| 0xF0 0x9F 0xA9 0x00..0xAF #
| 0xF0 0x9F 0xA9 0xB0..0xB3 #E12.0 [4] (🩰..🩳) ballet shoes..shorts
| 0xF0 0x9F 0xA9 0xB4 #E13.0 [1] (🩴) thong sandal
| 0xF0 0x9F 0xA9 0xB5..0xB7 #E0.0 [3] (🩵..🩷) <reserved-1FA75>..<...
| 0xF0 0x9F 0xA9 0xB8..0xBA #E12.0 [3] (🩸..🩺) drop of blood..stet...
| 0xF0 0x9F 0xA9 0xBB..0xBF #E0.0 [5] (🩻..🩿) <reserved-1FA7B>..<...
| 0xF0 0x9F 0xAA 0x80..0x82 #E12.0 [3] (🪀..🪂) yo-yo..parachute
| 0xF0 0x9F 0xAA 0x83..0x86 #E13.0 [4] (🪃..🪆) boomerang..nesting ...
| 0xF0 0x9F 0xAA 0x87..0x8F #E0.0 [9] (🪇..🪏) <reserved-1FA87>..<...
| 0xF0 0x9F 0xAA 0x90..0x95 #E12.0 [6] (🪐..🪕) ringed planet..banjo
| 0xF0 0x9F 0xAA 0x96..0xA8 #E13.0 [19] (🪖..🪨) military helmet..rock
| 0xF0 0x9F 0xAA 0xA9..0xAF #E0.0 [7] (🪩..🪯) <reserved-1FAA9>..<...
| 0xF0 0x9F 0xAA 0xB0..0xB6 #E13.0 [7] (🪰..🪶) fly..feather
| 0xF0 0x9F 0xAA 0xB7..0xBF #E0.0 [9] (🪷..🪿) <reserved-1FAB7>..<...
| 0xF0 0x9F 0xAB 0x80..0x82 #E13.0 [3] (🫀..🫂) anatomical heart..p...
| 0xF0 0x9F 0xAB 0x83..0x8F #E0.0 [13] (🫃..🫏) <reserved-1FAC3>..<...
| 0xF0 0x9F 0xAB 0x90..0x96 #E13.0 [7] (🫐..🫖) blueberries..teapot
| 0xF0 0x9F 0xAB 0x97..0xBF #E0.0 [41] (🫗..🫿) <reserved-1FAD7>..<...
| 0xF0 0x9F 0xB0 0x80..0xFF #E0.0[1022] (🰀..🿽) <reserved-1FC...
| 0xF0 0x9F 0xB1..0xBE 0x00..0xFF #
| 0xF0 0x9F 0xBF 0x00..0xBD #
;
}%%

View File

@@ -0,0 +1,8 @@
package textseg
//go:generate go run make_tables.go -output tables.go
//go:generate go run make_test_tables.go -output tables_test.go
//go:generate ruby unicode2ragel.rb --url=https://www.unicode.org/Public/13.0.0/ucd/auxiliary/GraphemeBreakProperty.txt -m GraphemeCluster -p "Prepend,CR,LF,Control,Extend,Regional_Indicator,SpacingMark,L,V,T,LV,LVT,ZWJ" -o grapheme_clusters_table.rl
//go:generate ruby unicode2ragel.rb --url=https://www.unicode.org/Public/13.0.0/ucd/emoji/emoji-data.txt -m Emoji -p "Extended_Pictographic" -o emoji_table.rl
//go:generate ragel -Z grapheme_clusters.rl
//go:generate gofmt -w grapheme_clusters.go

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,133 @@
package textseg
import (
"errors"
"unicode/utf8"
)
// Generated from grapheme_clusters.rl. DO NOT EDIT
%%{
# (except you are actually in grapheme_clusters.rl here, so edit away!)
machine graphclust;
write data;
}%%
var Error = errors.New("invalid UTF8 text")
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
// on grapheme cluster boundaries.
func ScanGraphemeClusters(data []byte, atEOF bool) (int, []byte, error) {
if len(data) == 0 {
return 0, nil, nil
}
// Ragel state
cs := 0 // Current State
p := 0 // "Pointer" into data
pe := len(data) // End-of-data "pointer"
ts := 0
te := 0
act := 0
eof := pe
// Make Go compiler happy
_ = ts
_ = te
_ = act
_ = eof
startPos := 0
endPos := 0
%%{
include GraphemeCluster "grapheme_clusters_table.rl";
include Emoji "emoji_table.rl";
action start {
startPos = p
}
action end {
endPos = p
}
action emit {
return endPos+1, data[startPos:endPos+1], nil
}
ZWJGlue = ZWJ (Extended_Pictographic Extend*)?;
AnyExtender = Extend | ZWJGlue | SpacingMark;
Extension = AnyExtender*;
ReplacementChar = (0xEF 0xBF 0xBD);
CRLFSeq = CR LF;
ControlSeq = Control | ReplacementChar;
HangulSeq = (
L+ (((LV? V+ | LVT) T*)?|LV?) |
LV V* T* |
V+ T* |
LVT T* |
T+
) Extension;
EmojiSeq = Extended_Pictographic Extend* Extension;
ZWJSeq = ZWJ (ZWJ | Extend | SpacingMark)*;
EmojiFlagSeq = Regional_Indicator Regional_Indicator? Extension;
UTF8Cont = 0x80 .. 0xBF;
AnyUTF8 = (
0x00..0x7F |
0xC0..0xDF . UTF8Cont |
0xE0..0xEF . UTF8Cont . UTF8Cont |
0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
);
# OtherSeq is any character that isn't at the start of one of the extended sequences above, followed by extension
OtherSeq = (AnyUTF8 - (CR|LF|Control|ReplacementChar|L|LV|V|LVT|T|Extended_Pictographic|ZWJ|Regional_Indicator|Prepend)) (Extend | ZWJ | SpacingMark)*;
# PrependSeq is prepend followed by any of the other patterns above, except control characters which explicitly break
PrependSeq = Prepend+ (HangulSeq|EmojiSeq|ZWJSeq|EmojiFlagSeq|OtherSeq)?;
CRLFTok = CRLFSeq >start @end;
ControlTok = ControlSeq >start @end;
HangulTok = HangulSeq >start @end;
EmojiTok = EmojiSeq >start @end;
ZWJTok = ZWJSeq >start @end;
EmojiFlagTok = EmojiFlagSeq >start @end;
OtherTok = OtherSeq >start @end;
PrependTok = PrependSeq >start @end;
main := |*
CRLFTok => emit;
ControlTok => emit;
HangulTok => emit;
EmojiTok => emit;
ZWJTok => emit;
EmojiFlagTok => emit;
PrependTok => emit;
OtherTok => emit;
# any single valid UTF-8 character would also be valid per spec,
# but we'll handle that separately after the loop so we can deal
# with requesting more bytes if we're not at EOF.
*|;
write init;
write exec;
}%%
// If we fall out here then we were unable to complete a sequence.
// If we weren't able to complete a sequence then either we've
// reached the end of a partial buffer (so there's more data to come)
// or we have an isolated symbol that would normally be part of a
// grapheme cluster but has appeared in isolation here.
if !atEOF {
// Request more
return 0, nil, nil
}
// Just take the first UTF-8 sequence and return that.
_, seqLen := utf8.DecodeRune(data)
return seqLen, data[:seqLen], nil
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,335 @@
#!/usr/bin/env ruby
#
# This scripted has been updated to accept more command-line arguments:
#
# -u, --url URL to process
# -m, --machine Machine name
# -p, --properties Properties to add to the machine
# -o, --output Write output to file
#
# Updated by: Marty Schoch <marty.schoch@gmail.com>
#
# This script uses the unicode spec to generate a Ragel state machine
# that recognizes unicode alphanumeric characters. It generates 5
# character classes: uupper, ulower, ualpha, udigit, and ualnum.
# Currently supported encodings are UTF-8 [default] and UCS-4.
#
# Usage: unicode2ragel.rb [options]
# -e, --encoding [ucs4 | utf8] Data encoding
# -h, --help Show this message
#
# This script was originally written as part of the Ferret search
# engine library.
#
# Author: Rakan El-Khalil <rakan@well.com>
require 'optparse'
require 'open-uri'
ENCODINGS = [ :utf8, :ucs4 ]
ALPHTYPES = { :utf8 => "byte", :ucs4 => "rune" }
DEFAULT_CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
DEFAULT_MACHINE_NAME= "WChar"
###
# Display vars & default option
TOTAL_WIDTH = 80
RANGE_WIDTH = 23
@encoding = :utf8
@chart_url = DEFAULT_CHART_URL
machine_name = DEFAULT_MACHINE_NAME
properties = []
@output = $stdout
###
# Option parsing
cli_opts = OptionParser.new do |opts|
opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
@encoding = o.downcase.to_sym
end
opts.on("-h", "--help", "Show this message") do
puts opts
exit
end
opts.on("-u", "--url URL", "URL to process") do |o|
@chart_url = o
end
opts.on("-m", "--machine MACHINE_NAME", "Machine name") do |o|
machine_name = o
end
opts.on("-p", "--properties x,y,z", Array, "Properties to add to machine") do |o|
properties = o
end
opts.on("-o", "--output FILE", "output file") do |o|
@output = File.new(o, "w+")
end
end
cli_opts.parse(ARGV)
unless ENCODINGS.member? @encoding
puts "Invalid encoding: #{@encoding}"
puts cli_opts
exit
end
##
# Downloads the document at url and yields every alpha line's hex
# range and description.
def each_alpha( url, property )
URI.open( url ) do |file|
file.each_line do |line|
next if line =~ /^#/;
next if line !~ /; #{property} *#/;
range, description = line.split(/;/)
range.strip!
description.gsub!(/.*#/, '').strip!
if range =~ /\.\./
start, stop = range.split '..'
else start = stop = range
end
yield start.hex .. stop.hex, description
end
end
end
###
# Formats to hex at minimum width
def to_hex( n )
r = "%0X" % n
r = "0#{r}" unless (r.length % 2).zero?
r
end
###
# UCS4 is just a straight hex conversion of the unicode codepoint.
def to_ucs4( range )
rangestr = "0x" + to_hex(range.begin)
rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
[ rangestr ]
end
##
# 0x00 - 0x7f -> 0zzzzzzz[7]
# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
def to_utf8_enc( n )
r = 0
if n <= 0x7f
r = n
elsif n <= 0x7ff
y = 0xc0 | (n >> 6)
z = 0x80 | (n & 0x3f)
r = y << 8 | z
elsif n <= 0xffff
x = 0xe0 | (n >> 12)
y = 0x80 | (n >> 6) & 0x3f
z = 0x80 | n & 0x3f
r = x << 16 | y << 8 | z
elsif n <= 0x10ffff
w = 0xf0 | (n >> 18)
x = 0x80 | (n >> 12) & 0x3f
y = 0x80 | (n >> 6) & 0x3f
z = 0x80 | n & 0x3f
r = w << 24 | x << 16 | y << 8 | z
end
to_hex(r)
end
def from_utf8_enc( n )
n = n.hex
r = 0
if n <= 0x7f
r = n
elsif n <= 0xdfff
y = (n >> 8) & 0x1f
z = n & 0x3f
r = y << 6 | z
elsif n <= 0xefffff
x = (n >> 16) & 0x0f
y = (n >> 8) & 0x3f
z = n & 0x3f
r = x << 10 | y << 6 | z
elsif n <= 0xf7ffffff
w = (n >> 24) & 0x07
x = (n >> 16) & 0x3f
y = (n >> 8) & 0x3f
z = n & 0x3f
r = w << 18 | x << 12 | y << 6 | z
end
r
end
###
# Given a range, splits it up into ranges that can be continuously
# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
# This is not strictly needed since the current [5.1] unicode standard
# doesn't have ranges that straddle utf8 boundaries. This is included
# for completeness as there is no telling if that will ever change.
def utf8_ranges( range )
ranges = []
UTF8_BOUNDARIES.each do |max|
if range.begin <= max
if range.end <= max
ranges << range
return ranges
end
ranges << (range.begin .. max)
range = (max + 1) .. range.end
end
end
ranges
end
def build_range( start, stop )
size = start.size/2
left = size - 1
return [""] if size < 1
a = start[0..1]
b = stop[0..1]
###
# Shared prefix
if a == b
return build_range(start[2..-1], stop[2..-1]).map do |elt|
"0x#{a} " + elt
end
end
###
# Unshared prefix, end of run
return ["0x#{a}..0x#{b} "] if left.zero?
###
# Unshared prefix, not end of run
# Range can be 0x123456..0x56789A
# Which is equivalent to:
# 0x123456 .. 0x12FFFF
# 0x130000 .. 0x55FFFF
# 0x560000 .. 0x56789A
ret = []
ret << build_range(start, a + "FF" * left)
###
# Only generate middle range if need be.
if a.hex+1 != b.hex
max = to_hex(b.hex - 1)
max = "FF" if b == "FF"
ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
end
###
# Don't generate last range if it is covered by first range
ret << build_range(b + "00" * left, stop) unless b == "FF"
ret.flatten!
end
def to_utf8( range )
utf8_ranges( range ).map do |r|
begin_enc = to_utf8_enc(r.begin)
end_enc = to_utf8_enc(r.end)
build_range begin_enc, end_enc
end.flatten!
end
##
# Perform a 3-way comparison of the number of codepoints advertised by
# the unicode spec for the given range, the originally parsed range,
# and the resulting utf8 encoded range.
def count_codepoints( code )
code.split(' ').inject(1) do |acc, elt|
if elt =~ /0x(.+)\.\.0x(.+)/
if @encoding == :utf8
acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
else
acc * ($2.hex - $1.hex + 1)
end
else
acc
end
end
end
def is_valid?( range, desc, codes )
spec_count = 1
spec_count = $1.to_i if desc =~ /\[(\d+)\]/
range_count = range.end - range.begin + 1
sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
sum == spec_count and sum == range_count
end
##
# Generate the state maching to stdout
def generate_machine( name, property )
pipe = " "
@output.puts " #{name} = "
each_alpha( @chart_url, property ) do |range, desc|
codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
#raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
# is_valid? range, desc, codes
range_width = codes.map { |a| a.size }.max
range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
if desc.size > desc_width
desc = desc[0..desc_width - 4] + "..."
end
codes.each_with_index do |r, idx|
desc = "" unless idx.zero?
code = "%-#{range_width}s" % r
@output.puts " #{pipe} #{code} ##{desc}"
pipe = "|"
end
end
@output.puts " ;"
@output.puts ""
end
@output.puts <<EOF
# The following Ragel file was autogenerated with #{$0}
# from: #{@chart_url}
#
# It defines #{properties}.
#
# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
# and that your input is in #{@encoding}.
%%{
machine #{machine_name};
EOF
properties.each { |x| generate_machine( x, x ) }
@output.puts <<EOF
}%%
EOF

View File

@@ -0,0 +1,19 @@
package textseg
import "unicode/utf8"
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
// on UTF8 sequence boundaries.
//
// This is included largely for completeness, since this behavior is already
// built in to Go when ranging over a string.
func ScanUTF8Sequences(data []byte, atEOF bool) (int, []byte, error) {
if len(data) == 0 {
return 0, nil, nil
}
r, seqLen := utf8.DecodeRune(data)
if r == utf8.RuneError && !atEOF {
return 0, nil, nil
}
return seqLen, data[:seqLen], nil
}

View File

@@ -0,0 +1,95 @@
Copyright (c) 2017 Martin Atkins
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
---------
Unicode table generation programs are under a separate copyright and license:
Copyright (c) 2014 Couchbase, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
either express or implied. See the License for the specific language governing permissions
and limitations under the License.
---------
Grapheme break data is provided as part of the Unicode character database,
copright 2016 Unicode, Inc, which is provided with the following license:
Unicode Data Files include all data files under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
Unicode Data Files do not include PDF online code charts under the
directory http://www.unicode.org/Public/.
Software includes any source code published in the Unicode Standard
or under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
NOTICE TO USER: Carefully read the following legal agreement.
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT.
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
THE DATA FILES OR SOFTWARE.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2017 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.

View File

@@ -0,0 +1,30 @@
package textseg
import (
"bufio"
"bytes"
)
// AllTokens is a utility that uses a bufio.SplitFunc to produce a slice of
// all of the recognized tokens in the given buffer.
func AllTokens(buf []byte, splitFunc bufio.SplitFunc) ([][]byte, error) {
scanner := bufio.NewScanner(bytes.NewReader(buf))
scanner.Split(splitFunc)
var ret [][]byte
for scanner.Scan() {
ret = append(ret, scanner.Bytes())
}
return ret, scanner.Err()
}
// TokenCount is a utility that uses a bufio.SplitFunc to count the number of
// recognized tokens in the given buffer.
func TokenCount(buf []byte, splitFunc bufio.SplitFunc) (int, error) {
scanner := bufio.NewScanner(bytes.NewReader(buf))
scanner.Split(splitFunc)
var ret int
for scanner.Scan() {
ret++
}
return ret, scanner.Err()
}

View File

@@ -0,0 +1,545 @@
# The following Ragel file was autogenerated with unicode2ragel.rb
# from: https://www.unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
#
# It defines ["Extended_Pictographic"].
#
# To use this, make sure that your alphtype is set to byte,
# and that your input is in utf8.
%%{
machine Emoji;
Extended_Pictographic =
0xC2 0xA9 #E0.6 [1] (©️) copyright
| 0xC2 0xAE #E0.6 [1] (®️) registered
| 0xE2 0x80 0xBC #E0.6 [1] (‼️) double exclamation mark
| 0xE2 0x81 0x89 #E0.6 [1] (⁉️) exclamation question ...
| 0xE2 0x84 0xA2 #E0.6 [1] (™️) trade mark
| 0xE2 0x84 0xB9 #E0.6 [1] () information
| 0xE2 0x86 0x94..0x99 #E0.6 [6] (↔️..↙️) left-right arrow..do...
| 0xE2 0x86 0xA9..0xAA #E0.6 [2] (↩️..↪️) right arrow curving ...
| 0xE2 0x8C 0x9A..0x9B #E0.6 [2] (⌚..⌛) watch..hourglass done
| 0xE2 0x8C 0xA8 #E1.0 [1] (⌨️) keyboard
| 0xE2 0x8E 0x88 #E0.0 [1] (⎈) HELM SYMBOL
| 0xE2 0x8F 0x8F #E1.0 [1] (⏏️) eject button
| 0xE2 0x8F 0xA9..0xAC #E0.6 [4] (⏩..⏬) fast-forward button..f...
| 0xE2 0x8F 0xAD..0xAE #E0.7 [2] (⏭️..⏮️) next track button..l...
| 0xE2 0x8F 0xAF #E1.0 [1] (⏯️) play or pause button
| 0xE2 0x8F 0xB0 #E0.6 [1] (⏰) alarm clock
| 0xE2 0x8F 0xB1..0xB2 #E1.0 [2] (⏱️..⏲️) stopwatch..timer clock
| 0xE2 0x8F 0xB3 #E0.6 [1] (⏳) hourglass not done
| 0xE2 0x8F 0xB8..0xBA #E0.7 [3] (⏸️..⏺️) pause button..record...
| 0xE2 0x93 0x82 #E0.6 [1] (Ⓜ️) circled M
| 0xE2 0x96 0xAA..0xAB #E0.6 [2] (▪️..▫️) black small square.....
| 0xE2 0x96 0xB6 #E0.6 [1] (▶️) play button
| 0xE2 0x97 0x80 #E0.6 [1] (◀️) reverse button
| 0xE2 0x97 0xBB..0xBE #E0.6 [4] (◻️..◾) white medium square.....
| 0xE2 0x98 0x80..0x81 #E0.6 [2] (☀️..☁️) sun..cloud
| 0xE2 0x98 0x82..0x83 #E0.7 [2] (☂️..☃️) umbrella..snowman
| 0xE2 0x98 0x84 #E1.0 [1] (☄️) comet
| 0xE2 0x98 0x85 #E0.0 [1] (★) BLACK STAR
| 0xE2 0x98 0x87..0x8D #E0.0 [7] (☇..☍) LIGHTNING..OPPOSITION
| 0xE2 0x98 0x8E #E0.6 [1] (☎️) telephone
| 0xE2 0x98 0x8F..0x90 #E0.0 [2] (☏..☐) WHITE TELEPHONE..BALLO...
| 0xE2 0x98 0x91 #E0.6 [1] (☑️) check box with check
| 0xE2 0x98 0x92 #E0.0 [1] (☒) BALLOT BOX WITH X
| 0xE2 0x98 0x94..0x95 #E0.6 [2] (☔..☕) umbrella with rain dro...
| 0xE2 0x98 0x96..0x97 #E0.0 [2] (☖..☗) WHITE SHOGI PIECE..BLA...
| 0xE2 0x98 0x98 #E1.0 [1] (☘️) shamrock
| 0xE2 0x98 0x99..0x9C #E0.0 [4] (☙..☜) REVERSED ROTATED FLORA...
| 0xE2 0x98 0x9D #E0.6 [1] (☝️) index pointing up
| 0xE2 0x98 0x9E..0x9F #E0.0 [2] (☞..☟) WHITE RIGHT POINTING I...
| 0xE2 0x98 0xA0 #E1.0 [1] (☠️) skull and crossbones
| 0xE2 0x98 0xA1 #E0.0 [1] (☡) CAUTION SIGN
| 0xE2 0x98 0xA2..0xA3 #E1.0 [2] (☢️..☣️) radioactive..biohazard
| 0xE2 0x98 0xA4..0xA5 #E0.0 [2] (☤..☥) CADUCEUS..ANKH
| 0xE2 0x98 0xA6 #E1.0 [1] (☦️) orthodox cross
| 0xE2 0x98 0xA7..0xA9 #E0.0 [3] (☧..☩) CHI RHO..CROSS OF JERU...
| 0xE2 0x98 0xAA #E0.7 [1] (☪️) star and crescent
| 0xE2 0x98 0xAB..0xAD #E0.0 [3] (☫..☭) FARSI SYMBOL..HAMMER A...
| 0xE2 0x98 0xAE #E1.0 [1] (☮️) peace symbol
| 0xE2 0x98 0xAF #E0.7 [1] (☯️) yin yang
| 0xE2 0x98 0xB0..0xB7 #E0.0 [8] (☰..☷) TRIGRAM FOR HEAVEN..TR...
| 0xE2 0x98 0xB8..0xB9 #E0.7 [2] (☸️..☹️) wheel of dharma..fro...
| 0xE2 0x98 0xBA #E0.6 [1] (☺️) smiling face
| 0xE2 0x98 0xBB..0xBF #E0.0 [5] (☻..☿) BLACK SMILING FACE..ME...
| 0xE2 0x99 0x80 #E4.0 [1] (♀️) female sign
| 0xE2 0x99 0x81 #E0.0 [1] (♁) EARTH
| 0xE2 0x99 0x82 #E4.0 [1] (♂️) male sign
| 0xE2 0x99 0x83..0x87 #E0.0 [5] (♃..♇) JUPITER..PLUTO
| 0xE2 0x99 0x88..0x93 #E0.6 [12] (♈..♓) Aries..Pisces
| 0xE2 0x99 0x94..0x9E #E0.0 [11] (♔..♞) WHITE CHESS KING..BLAC...
| 0xE2 0x99 0x9F #E11.0 [1] (♟️) chess pawn
| 0xE2 0x99 0xA0 #E0.6 [1] (♠️) spade suit
| 0xE2 0x99 0xA1..0xA2 #E0.0 [2] (♡..♢) WHITE HEART SUIT..WHIT...
| 0xE2 0x99 0xA3 #E0.6 [1] (♣️) club suit
| 0xE2 0x99 0xA4 #E0.0 [1] (♤) WHITE SPADE SUIT
| 0xE2 0x99 0xA5..0xA6 #E0.6 [2] (♥️..♦️) heart suit..diamond ...
| 0xE2 0x99 0xA7 #E0.0 [1] (♧) WHITE CLUB SUIT
| 0xE2 0x99 0xA8 #E0.6 [1] (♨️) hot springs
| 0xE2 0x99 0xA9..0xBA #E0.0 [18] (♩..♺) QUARTER NOTE..RECYCLIN...
| 0xE2 0x99 0xBB #E0.6 [1] (♻️) recycling symbol
| 0xE2 0x99 0xBC..0xBD #E0.0 [2] (♼..♽) RECYCLED PAPER SYMBOL....
| 0xE2 0x99 0xBE #E11.0 [1] (♾️) infinity
| 0xE2 0x99 0xBF #E0.6 [1] (♿) wheelchair symbol
| 0xE2 0x9A 0x80..0x85 #E0.0 [6] (⚀..⚅) DIE FACE-1..DIE FACE-6
| 0xE2 0x9A 0x90..0x91 #E0.0 [2] (⚐..⚑) WHITE FLAG..BLACK FLAG
| 0xE2 0x9A 0x92 #E1.0 [1] (⚒️) hammer and pick
| 0xE2 0x9A 0x93 #E0.6 [1] (⚓) anchor
| 0xE2 0x9A 0x94 #E1.0 [1] (⚔️) crossed swords
| 0xE2 0x9A 0x95 #E4.0 [1] (⚕️) medical symbol
| 0xE2 0x9A 0x96..0x97 #E1.0 [2] (⚖️..⚗️) balance scale..alembic
| 0xE2 0x9A 0x98 #E0.0 [1] (⚘) FLOWER
| 0xE2 0x9A 0x99 #E1.0 [1] (⚙️) gear
| 0xE2 0x9A 0x9A #E0.0 [1] (⚚) STAFF OF HERMES
| 0xE2 0x9A 0x9B..0x9C #E1.0 [2] (⚛️..⚜️) atom symbol..fleur-d...
| 0xE2 0x9A 0x9D..0x9F #E0.0 [3] (⚝..⚟) OUTLINED WHITE STAR..T...
| 0xE2 0x9A 0xA0..0xA1 #E0.6 [2] (⚠️..⚡) warning..high voltage
| 0xE2 0x9A 0xA2..0xA6 #E0.0 [5] (⚢..⚦) DOUBLED FEMALE SIGN..M...
| 0xE2 0x9A 0xA7 #E13.0 [1] (⚧️) transgender symbol
| 0xE2 0x9A 0xA8..0xA9 #E0.0 [2] (⚨..⚩) VERTICAL MALE WITH STR...
| 0xE2 0x9A 0xAA..0xAB #E0.6 [2] (⚪..⚫) white circle..black ci...
| 0xE2 0x9A 0xAC..0xAF #E0.0 [4] (⚬..⚯) MEDIUM SMALL WHITE CIR...
| 0xE2 0x9A 0xB0..0xB1 #E1.0 [2] (⚰️..⚱️) coffin..funeral urn
| 0xE2 0x9A 0xB2..0xBC #E0.0 [11] (⚲..⚼) NEUTER..SESQUIQUADRATE
| 0xE2 0x9A 0xBD..0xBE #E0.6 [2] (⚽..⚾) soccer ball..baseball
| 0xE2 0x9A 0xBF..0xFF #E0.0 [5] (⚿..⛃) SQUARED KEY..BLACK DRA...
| 0xE2 0x9B 0x00..0x83 #
| 0xE2 0x9B 0x84..0x85 #E0.6 [2] (⛄..⛅) snowman without snow.....
| 0xE2 0x9B 0x86..0x87 #E0.0 [2] (⛆..⛇) RAIN..BLACK SNOWMAN
| 0xE2 0x9B 0x88 #E0.7 [1] (⛈️) cloud with lightning ...
| 0xE2 0x9B 0x89..0x8D #E0.0 [5] (⛉..⛍) TURNED WHITE SHOGI PIE...
| 0xE2 0x9B 0x8E #E0.6 [1] (⛎) Ophiuchus
| 0xE2 0x9B 0x8F #E0.7 [1] (⛏️) pick
| 0xE2 0x9B 0x90 #E0.0 [1] (⛐) CAR SLIDING
| 0xE2 0x9B 0x91 #E0.7 [1] (⛑️) rescue workers helmet
| 0xE2 0x9B 0x92 #E0.0 [1] (⛒) CIRCLED CROSSING LANES
| 0xE2 0x9B 0x93 #E0.7 [1] (⛓️) chains
| 0xE2 0x9B 0x94 #E0.6 [1] (⛔) no entry
| 0xE2 0x9B 0x95..0xA8 #E0.0 [20] (⛕..⛨) ALTERNATE ONE-WAY LEFT...
| 0xE2 0x9B 0xA9 #E0.7 [1] (⛩️) shinto shrine
| 0xE2 0x9B 0xAA #E0.6 [1] (⛪) church
| 0xE2 0x9B 0xAB..0xAF #E0.0 [5] (⛫..⛯) CASTLE..MAP SYMBOL FOR...
| 0xE2 0x9B 0xB0..0xB1 #E0.7 [2] (⛰️..⛱️) mountain..umbrella o...
| 0xE2 0x9B 0xB2..0xB3 #E0.6 [2] (⛲..⛳) fountain..flag in hole
| 0xE2 0x9B 0xB4 #E0.7 [1] (⛴️) ferry
| 0xE2 0x9B 0xB5 #E0.6 [1] (⛵) sailboat
| 0xE2 0x9B 0xB6 #E0.0 [1] (⛶) SQUARE FOUR CORNERS
| 0xE2 0x9B 0xB7..0xB9 #E0.7 [3] (⛷️..⛹️) skier..person bounci...
| 0xE2 0x9B 0xBA #E0.6 [1] (⛺) tent
| 0xE2 0x9B 0xBB..0xBC #E0.0 [2] (⛻..⛼) JAPANESE BANK SYMBOL.....
| 0xE2 0x9B 0xBD #E0.6 [1] (⛽) fuel pump
| 0xE2 0x9B 0xBE..0xFF #E0.0 [4] (⛾..✁) CUP ON BLACK SQUARE..U...
| 0xE2 0x9C 0x00..0x81 #
| 0xE2 0x9C 0x82 #E0.6 [1] (✂️) scissors
| 0xE2 0x9C 0x83..0x84 #E0.0 [2] (✃..✄) LOWER BLADE SCISSORS.....
| 0xE2 0x9C 0x85 #E0.6 [1] (✅) check mark button
| 0xE2 0x9C 0x88..0x8C #E0.6 [5] (✈️..✌️) airplane..victory hand
| 0xE2 0x9C 0x8D #E0.7 [1] (✍️) writing hand
| 0xE2 0x9C 0x8E #E0.0 [1] (✎) LOWER RIGHT PENCIL
| 0xE2 0x9C 0x8F #E0.6 [1] (✏️) pencil
| 0xE2 0x9C 0x90..0x91 #E0.0 [2] (✐..✑) UPPER RIGHT PENCIL..WH...
| 0xE2 0x9C 0x92 #E0.6 [1] (✒️) black nib
| 0xE2 0x9C 0x94 #E0.6 [1] (✔️) check mark
| 0xE2 0x9C 0x96 #E0.6 [1] (✖️) multiply
| 0xE2 0x9C 0x9D #E0.7 [1] (✝️) latin cross
| 0xE2 0x9C 0xA1 #E0.7 [1] (✡️) star of David
| 0xE2 0x9C 0xA8 #E0.6 [1] (✨) sparkles
| 0xE2 0x9C 0xB3..0xB4 #E0.6 [2] (✳️..✴️) eight-spoked asteris...
| 0xE2 0x9D 0x84 #E0.6 [1] (❄️) snowflake
| 0xE2 0x9D 0x87 #E0.6 [1] (❇️) sparkle
| 0xE2 0x9D 0x8C #E0.6 [1] (❌) cross mark
| 0xE2 0x9D 0x8E #E0.6 [1] (❎) cross mark button
| 0xE2 0x9D 0x93..0x95 #E0.6 [3] (❓..❕) red question mark..whi...
| 0xE2 0x9D 0x97 #E0.6 [1] (❗) red exclamation mark
| 0xE2 0x9D 0xA3 #E1.0 [1] (❣️) heart exclamation
| 0xE2 0x9D 0xA4 #E0.6 [1] (❤️) red heart
| 0xE2 0x9D 0xA5..0xA7 #E0.0 [3] (❥..❧) ROTATED HEAVY BLACK HE...
| 0xE2 0x9E 0x95..0x97 #E0.6 [3] (..➗) plus..divide
| 0xE2 0x9E 0xA1 #E0.6 [1] (➡️) right arrow
| 0xE2 0x9E 0xB0 #E0.6 [1] (➰) curly loop
| 0xE2 0x9E 0xBF #E1.0 [1] (➿) double curly loop
| 0xE2 0xA4 0xB4..0xB5 #E0.6 [2] (⤴️..⤵️) right arrow curving ...
| 0xE2 0xAC 0x85..0x87 #E0.6 [3] (⬅️..⬇️) left arrow..down arrow
| 0xE2 0xAC 0x9B..0x9C #E0.6 [2] (⬛..⬜) black large square..wh...
| 0xE2 0xAD 0x90 #E0.6 [1] (⭐) star
| 0xE2 0xAD 0x95 #E0.6 [1] (⭕) hollow red circle
| 0xE3 0x80 0xB0 #E0.6 [1] (〰️) wavy dash
| 0xE3 0x80 0xBD #E0.6 [1] (〽️) part alternation mark
| 0xE3 0x8A 0x97 #E0.6 [1] (㊗️) Japanese “congratulat...
| 0xE3 0x8A 0x99 #E0.6 [1] (㊙️) Japanese “secret” button
| 0xF0 0x9F 0x80 0x80..0x83 #E0.0 [4] (🀀..🀃) MAHJONG TILE EAST W...
| 0xF0 0x9F 0x80 0x84 #E0.6 [1] (🀄) mahjong red dragon
| 0xF0 0x9F 0x80 0x85..0xFF #E0.0 [202] (🀅..🃎) MAHJONG TILE ...
| 0xF0 0x9F 0x81..0x82 0x00..0xFF #
| 0xF0 0x9F 0x83 0x00..0x8E #
| 0xF0 0x9F 0x83 0x8F #E0.6 [1] (🃏) joker
| 0xF0 0x9F 0x83 0x90..0xBF #E0.0 [48] (🃐..🃿) <reserved-1F0D0>..<...
| 0xF0 0x9F 0x84 0x8D..0x8F #E0.0 [3] (🄍..🄏) CIRCLED ZERO WITH S...
| 0xF0 0x9F 0x84 0xAF #E0.0 [1] (🄯) COPYLEFT SYMBOL
| 0xF0 0x9F 0x85 0xAC..0xAF #E0.0 [4] (🅬..🅯) RAISED MR SIGN..CIR...
| 0xF0 0x9F 0x85 0xB0..0xB1 #E0.6 [2] (🅰️..🅱️) A button (blood t...
| 0xF0 0x9F 0x85 0xBE..0xBF #E0.6 [2] (🅾️..🅿️) O button (blood t...
| 0xF0 0x9F 0x86 0x8E #E0.6 [1] (🆎) AB button (blood type)
| 0xF0 0x9F 0x86 0x91..0x9A #E0.6 [10] (🆑..🆚) CL button..VS button
| 0xF0 0x9F 0x86 0xAD..0xFF #E0.0 [57] (🆭..🇥) MASK WORK SYMBOL..<...
| 0xF0 0x9F 0x87 0x00..0xA5 #
| 0xF0 0x9F 0x88 0x81..0x82 #E0.6 [2] (🈁..🈂️) Japanese “here” bu...
| 0xF0 0x9F 0x88 0x83..0x8F #E0.0 [13] (🈃..🈏) <reserved-1F203>..<...
| 0xF0 0x9F 0x88 0x9A #E0.6 [1] (🈚) Japanese “free of char...
| 0xF0 0x9F 0x88 0xAF #E0.6 [1] (🈯) Japanese “reserved” bu...
| 0xF0 0x9F 0x88 0xB2..0xBA #E0.6 [9] (🈲..🈺) Japanese “prohibite...
| 0xF0 0x9F 0x88 0xBC..0xBF #E0.0 [4] (🈼..🈿) <reserved-1F23C>..<...
| 0xF0 0x9F 0x89 0x89..0x8F #E0.0 [7] (🉉..🉏) <reserved-1F249>..<...
| 0xF0 0x9F 0x89 0x90..0x91 #E0.6 [2] (🉐..🉑) Japanese “bargain” ...
| 0xF0 0x9F 0x89 0x92..0xFF #E0.0 [174] (🉒..🋿) <reserved-1F2...
| 0xF0 0x9F 0x8A..0x8A 0x00..0xFF #
| 0xF0 0x9F 0x8B 0x00..0xBF #
| 0xF0 0x9F 0x8C 0x80..0x8C #E0.6 [13] (🌀..🌌) cyclone..milky way
| 0xF0 0x9F 0x8C 0x8D..0x8E #E0.7 [2] (🌍..🌎) globe showing Europ...
| 0xF0 0x9F 0x8C 0x8F #E0.6 [1] (🌏) globe showing Asia-Aus...
| 0xF0 0x9F 0x8C 0x90 #E1.0 [1] (🌐) globe with meridians
| 0xF0 0x9F 0x8C 0x91 #E0.6 [1] (🌑) new moon
| 0xF0 0x9F 0x8C 0x92 #E1.0 [1] (🌒) waxing crescent moon
| 0xF0 0x9F 0x8C 0x93..0x95 #E0.6 [3] (🌓..🌕) first quarter moon....
| 0xF0 0x9F 0x8C 0x96..0x98 #E1.0 [3] (🌖..🌘) waning gibbous moon...
| 0xF0 0x9F 0x8C 0x99 #E0.6 [1] (🌙) crescent moon
| 0xF0 0x9F 0x8C 0x9A #E1.0 [1] (🌚) new moon face
| 0xF0 0x9F 0x8C 0x9B #E0.6 [1] (🌛) first quarter moon face
| 0xF0 0x9F 0x8C 0x9C #E0.7 [1] (🌜) last quarter moon face
| 0xF0 0x9F 0x8C 0x9D..0x9E #E1.0 [2] (🌝..🌞) full moon face..sun...
| 0xF0 0x9F 0x8C 0x9F..0xA0 #E0.6 [2] (🌟..🌠) glowing star..shoot...
| 0xF0 0x9F 0x8C 0xA1 #E0.7 [1] (🌡️) thermometer
| 0xF0 0x9F 0x8C 0xA2..0xA3 #E0.0 [2] (🌢..🌣) BLACK DROPLET..WHIT...
| 0xF0 0x9F 0x8C 0xA4..0xAC #E0.7 [9] (🌤️..🌬️) sun behind small ...
| 0xF0 0x9F 0x8C 0xAD..0xAF #E1.0 [3] (🌭..🌯) hot dog..burrito
| 0xF0 0x9F 0x8C 0xB0..0xB1 #E0.6 [2] (🌰..🌱) chestnut..seedling
| 0xF0 0x9F 0x8C 0xB2..0xB3 #E1.0 [2] (🌲..🌳) evergreen tree..dec...
| 0xF0 0x9F 0x8C 0xB4..0xB5 #E0.6 [2] (🌴..🌵) palm tree..cactus
| 0xF0 0x9F 0x8C 0xB6 #E0.7 [1] (🌶️) hot pepper
| 0xF0 0x9F 0x8C 0xB7..0xFF #E0.6 [20] (🌷..🍊) tulip..tangerine
| 0xF0 0x9F 0x8D 0x00..0x8A #
| 0xF0 0x9F 0x8D 0x8B #E1.0 [1] (🍋) lemon
| 0xF0 0x9F 0x8D 0x8C..0x8F #E0.6 [4] (🍌..🍏) banana..green apple
| 0xF0 0x9F 0x8D 0x90 #E1.0 [1] (🍐) pear
| 0xF0 0x9F 0x8D 0x91..0xBB #E0.6 [43] (🍑..🍻) peach..clinking bee...
| 0xF0 0x9F 0x8D 0xBC #E1.0 [1] (🍼) baby bottle
| 0xF0 0x9F 0x8D 0xBD #E0.7 [1] (🍽️) fork and knife with p...
| 0xF0 0x9F 0x8D 0xBE..0xBF #E1.0 [2] (🍾..🍿) bottle with popping...
| 0xF0 0x9F 0x8E 0x80..0x93 #E0.6 [20] (🎀..🎓) ribbon..graduation cap
| 0xF0 0x9F 0x8E 0x94..0x95 #E0.0 [2] (🎔..🎕) HEART WITH TIP ON T...
| 0xF0 0x9F 0x8E 0x96..0x97 #E0.7 [2] (🎖️..🎗️) military medal..r...
| 0xF0 0x9F 0x8E 0x98 #E0.0 [1] (🎘) MUSICAL KEYBOARD WITH ...
| 0xF0 0x9F 0x8E 0x99..0x9B #E0.7 [3] (🎙️..🎛️) studio microphone...
| 0xF0 0x9F 0x8E 0x9C..0x9D #E0.0 [2] (🎜..🎝) BEAMED ASCENDING MU...
| 0xF0 0x9F 0x8E 0x9E..0x9F #E0.7 [2] (🎞️..🎟️) film frames..admi...
| 0xF0 0x9F 0x8E 0xA0..0xFF #E0.6 [37] (🎠..🏄) carousel horse..per...
| 0xF0 0x9F 0x8F 0x00..0x84 #
| 0xF0 0x9F 0x8F 0x85 #E1.0 [1] (🏅) sports medal
| 0xF0 0x9F 0x8F 0x86 #E0.6 [1] (🏆) trophy
| 0xF0 0x9F 0x8F 0x87 #E1.0 [1] (🏇) horse racing
| 0xF0 0x9F 0x8F 0x88 #E0.6 [1] (🏈) american football
| 0xF0 0x9F 0x8F 0x89 #E1.0 [1] (🏉) rugby football
| 0xF0 0x9F 0x8F 0x8A #E0.6 [1] (🏊) person swimming
| 0xF0 0x9F 0x8F 0x8B..0x8E #E0.7 [4] (🏋️..🏎️) person lifting we...
| 0xF0 0x9F 0x8F 0x8F..0x93 #E1.0 [5] (🏏..🏓) cricket game..ping ...
| 0xF0 0x9F 0x8F 0x94..0x9F #E0.7 [12] (🏔️..🏟️) snow-capped mount...
| 0xF0 0x9F 0x8F 0xA0..0xA3 #E0.6 [4] (🏠..🏣) house..Japanese pos...
| 0xF0 0x9F 0x8F 0xA4 #E1.0 [1] (🏤) post office
| 0xF0 0x9F 0x8F 0xA5..0xB0 #E0.6 [12] (🏥..🏰) hospital..castle
| 0xF0 0x9F 0x8F 0xB1..0xB2 #E0.0 [2] (🏱..🏲) WHITE PENNANT..BLAC...
| 0xF0 0x9F 0x8F 0xB3 #E0.7 [1] (🏳️) white flag
| 0xF0 0x9F 0x8F 0xB4 #E1.0 [1] (🏴) black flag
| 0xF0 0x9F 0x8F 0xB5 #E0.7 [1] (🏵️) rosette
| 0xF0 0x9F 0x8F 0xB6 #E0.0 [1] (🏶) BLACK ROSETTE
| 0xF0 0x9F 0x8F 0xB7 #E0.7 [1] (🏷️) label
| 0xF0 0x9F 0x8F 0xB8..0xBA #E1.0 [3] (🏸..🏺) badminton..amphora
| 0xF0 0x9F 0x90 0x80..0x87 #E1.0 [8] (🐀..🐇) rat..rabbit
| 0xF0 0x9F 0x90 0x88 #E0.7 [1] (🐈) cat
| 0xF0 0x9F 0x90 0x89..0x8B #E1.0 [3] (🐉..🐋) dragon..whale
| 0xF0 0x9F 0x90 0x8C..0x8E #E0.6 [3] (🐌..🐎) snail..horse
| 0xF0 0x9F 0x90 0x8F..0x90 #E1.0 [2] (🐏..🐐) ram..goat
| 0xF0 0x9F 0x90 0x91..0x92 #E0.6 [2] (🐑..🐒) ewe..monkey
| 0xF0 0x9F 0x90 0x93 #E1.0 [1] (🐓) rooster
| 0xF0 0x9F 0x90 0x94 #E0.6 [1] (🐔) chicken
| 0xF0 0x9F 0x90 0x95 #E0.7 [1] (🐕) dog
| 0xF0 0x9F 0x90 0x96 #E1.0 [1] (🐖) pig
| 0xF0 0x9F 0x90 0x97..0xA9 #E0.6 [19] (🐗..🐩) boar..poodle
| 0xF0 0x9F 0x90 0xAA #E1.0 [1] (🐪) camel
| 0xF0 0x9F 0x90 0xAB..0xBE #E0.6 [20] (🐫..🐾) two-hump camel..paw...
| 0xF0 0x9F 0x90 0xBF #E0.7 [1] (🐿️) chipmunk
| 0xF0 0x9F 0x91 0x80 #E0.6 [1] (👀) eyes
| 0xF0 0x9F 0x91 0x81 #E0.7 [1] (👁️) eye
| 0xF0 0x9F 0x91 0x82..0xA4 #E0.6 [35] (👂..👤) ear..bust in silhou...
| 0xF0 0x9F 0x91 0xA5 #E1.0 [1] (👥) busts in silhouette
| 0xF0 0x9F 0x91 0xA6..0xAB #E0.6 [6] (👦..👫) boy..woman and man ...
| 0xF0 0x9F 0x91 0xAC..0xAD #E1.0 [2] (👬..👭) men holding hands.....
| 0xF0 0x9F 0x91 0xAE..0xFF #E0.6 [63] (👮..💬) police officer..spe...
| 0xF0 0x9F 0x92 0x00..0xAC #
| 0xF0 0x9F 0x92 0xAD #E1.0 [1] (💭) thought balloon
| 0xF0 0x9F 0x92 0xAE..0xB5 #E0.6 [8] (💮..💵) white flower..dolla...
| 0xF0 0x9F 0x92 0xB6..0xB7 #E1.0 [2] (💶..💷) euro banknote..poun...
| 0xF0 0x9F 0x92 0xB8..0xFF #E0.6 [52] (💸..📫) money with wings..c...
| 0xF0 0x9F 0x93 0x00..0xAB #
| 0xF0 0x9F 0x93 0xAC..0xAD #E0.7 [2] (📬..📭) open mailbox with r...
| 0xF0 0x9F 0x93 0xAE #E0.6 [1] (📮) postbox
| 0xF0 0x9F 0x93 0xAF #E1.0 [1] (📯) postal horn
| 0xF0 0x9F 0x93 0xB0..0xB4 #E0.6 [5] (📰..📴) newspaper..mobile p...
| 0xF0 0x9F 0x93 0xB5 #E1.0 [1] (📵) no mobile phones
| 0xF0 0x9F 0x93 0xB6..0xB7 #E0.6 [2] (📶..📷) antenna bars..camera
| 0xF0 0x9F 0x93 0xB8 #E1.0 [1] (📸) camera with flash
| 0xF0 0x9F 0x93 0xB9..0xBC #E0.6 [4] (📹..📼) video camera..video...
| 0xF0 0x9F 0x93 0xBD #E0.7 [1] (📽️) film projector
| 0xF0 0x9F 0x93 0xBE #E0.0 [1] (📾) PORTABLE STEREO
| 0xF0 0x9F 0x93 0xBF..0xFF #E1.0 [4] (📿..🔂) prayer beads..repea...
| 0xF0 0x9F 0x94 0x00..0x82 #
| 0xF0 0x9F 0x94 0x83 #E0.6 [1] (🔃) clockwise vertical arrows
| 0xF0 0x9F 0x94 0x84..0x87 #E1.0 [4] (🔄..🔇) counterclockwise ar...
| 0xF0 0x9F 0x94 0x88 #E0.7 [1] (🔈) speaker low volume
| 0xF0 0x9F 0x94 0x89 #E1.0 [1] (🔉) speaker medium volume
| 0xF0 0x9F 0x94 0x8A..0x94 #E0.6 [11] (🔊..🔔) speaker high volume...
| 0xF0 0x9F 0x94 0x95 #E1.0 [1] (🔕) bell with slash
| 0xF0 0x9F 0x94 0x96..0xAB #E0.6 [22] (🔖..🔫) bookmark..water pistol
| 0xF0 0x9F 0x94 0xAC..0xAD #E1.0 [2] (🔬..🔭) microscope..telescope
| 0xF0 0x9F 0x94 0xAE..0xBD #E0.6 [16] (🔮..🔽) crystal ball..downw...
| 0xF0 0x9F 0x95 0x86..0x88 #E0.0 [3] (🕆..🕈) WHITE LATIN CROSS.....
| 0xF0 0x9F 0x95 0x89..0x8A #E0.7 [2] (🕉️..🕊️) om..dove
| 0xF0 0x9F 0x95 0x8B..0x8E #E1.0 [4] (🕋..🕎) kaaba..menorah
| 0xF0 0x9F 0x95 0x8F #E0.0 [1] (🕏) BOWL OF HYGIEIA
| 0xF0 0x9F 0x95 0x90..0x9B #E0.6 [12] (🕐..🕛) one oclock..twelve...
| 0xF0 0x9F 0x95 0x9C..0xA7 #E0.7 [12] (🕜..🕧) one-thirty..twelve-...
| 0xF0 0x9F 0x95 0xA8..0xAE #E0.0 [7] (🕨..🕮) RIGHT SPEAKER..BOOK
| 0xF0 0x9F 0x95 0xAF..0xB0 #E0.7 [2] (🕯️..🕰️) candle..mantelpie...
| 0xF0 0x9F 0x95 0xB1..0xB2 #E0.0 [2] (🕱..🕲) BLACK SKULL AND CRO...
| 0xF0 0x9F 0x95 0xB3..0xB9 #E0.7 [7] (🕳️..🕹️) hole..joystick
| 0xF0 0x9F 0x95 0xBA #E3.0 [1] (🕺) man dancing
| 0xF0 0x9F 0x95 0xBB..0xFF #E0.0 [12] (🕻..🖆) LEFT HAND TELEPHONE...
| 0xF0 0x9F 0x96 0x00..0x86 #
| 0xF0 0x9F 0x96 0x87 #E0.7 [1] (🖇️) linked paperclips
| 0xF0 0x9F 0x96 0x88..0x89 #E0.0 [2] (🖈..🖉) BLACK PUSHPIN..LOWE...
| 0xF0 0x9F 0x96 0x8A..0x8D #E0.7 [4] (🖊️..🖍️) pen..crayon
| 0xF0 0x9F 0x96 0x8E..0x8F #E0.0 [2] (🖎..🖏) LEFT WRITING HAND.....
| 0xF0 0x9F 0x96 0x90 #E0.7 [1] (🖐️) hand with fingers spl...
| 0xF0 0x9F 0x96 0x91..0x94 #E0.0 [4] (🖑..🖔) REVERSED RAISED HAN...
| 0xF0 0x9F 0x96 0x95..0x96 #E1.0 [2] (🖕..🖖) middle finger..vulc...
| 0xF0 0x9F 0x96 0x97..0xA3 #E0.0 [13] (🖗..🖣) WHITE DOWN POINTING...
| 0xF0 0x9F 0x96 0xA4 #E3.0 [1] (🖤) black heart
| 0xF0 0x9F 0x96 0xA5 #E0.7 [1] (🖥️) desktop computer
| 0xF0 0x9F 0x96 0xA6..0xA7 #E0.0 [2] (🖦..🖧) KEYBOARD AND MOUSE....
| 0xF0 0x9F 0x96 0xA8 #E0.7 [1] (🖨️) printer
| 0xF0 0x9F 0x96 0xA9..0xB0 #E0.0 [8] (🖩..🖰) POCKET CALCULATOR.....
| 0xF0 0x9F 0x96 0xB1..0xB2 #E0.7 [2] (🖱️..🖲️) computer mouse..t...
| 0xF0 0x9F 0x96 0xB3..0xBB #E0.0 [9] (🖳..🖻) OLD PERSONAL COMPUT...
| 0xF0 0x9F 0x96 0xBC #E0.7 [1] (🖼️) framed picture
| 0xF0 0x9F 0x96 0xBD..0xFF #E0.0 [5] (🖽..🗁) FRAME WITH TILES..O...
| 0xF0 0x9F 0x97 0x00..0x81 #
| 0xF0 0x9F 0x97 0x82..0x84 #E0.7 [3] (🗂️..🗄️) card index divide...
| 0xF0 0x9F 0x97 0x85..0x90 #E0.0 [12] (🗅..🗐) EMPTY NOTE..PAGES
| 0xF0 0x9F 0x97 0x91..0x93 #E0.7 [3] (🗑️..🗓️) wastebasket..spir...
| 0xF0 0x9F 0x97 0x94..0x9B #E0.0 [8] (🗔..🗛) DESKTOP WINDOW..DEC...
| 0xF0 0x9F 0x97 0x9C..0x9E #E0.7 [3] (🗜️..🗞️) clamp..rolled-up ...
| 0xF0 0x9F 0x97 0x9F..0xA0 #E0.0 [2] (🗟..🗠) PAGE WITH CIRCLED T...
| 0xF0 0x9F 0x97 0xA1 #E0.7 [1] (🗡️) dagger
| 0xF0 0x9F 0x97 0xA2 #E0.0 [1] (🗢) LIPS
| 0xF0 0x9F 0x97 0xA3 #E0.7 [1] (🗣️) speaking head
| 0xF0 0x9F 0x97 0xA4..0xA7 #E0.0 [4] (🗤..🗧) THREE RAYS ABOVE..T...
| 0xF0 0x9F 0x97 0xA8 #E2.0 [1] (🗨️) left speech bubble
| 0xF0 0x9F 0x97 0xA9..0xAE #E0.0 [6] (🗩..🗮) RIGHT SPEECH BUBBLE...
| 0xF0 0x9F 0x97 0xAF #E0.7 [1] (🗯️) right anger bubble
| 0xF0 0x9F 0x97 0xB0..0xB2 #E0.0 [3] (🗰..🗲) MOOD BUBBLE..LIGHTN...
| 0xF0 0x9F 0x97 0xB3 #E0.7 [1] (🗳️) ballot box with ballot
| 0xF0 0x9F 0x97 0xB4..0xB9 #E0.0 [6] (🗴..🗹) BALLOT SCRIPT X..BA...
| 0xF0 0x9F 0x97 0xBA #E0.7 [1] (🗺️) world map
| 0xF0 0x9F 0x97 0xBB..0xBF #E0.6 [5] (🗻..🗿) mount fuji..moai
| 0xF0 0x9F 0x98 0x80 #E1.0 [1] (😀) grinning face
| 0xF0 0x9F 0x98 0x81..0x86 #E0.6 [6] (😁..😆) beaming face with s...
| 0xF0 0x9F 0x98 0x87..0x88 #E1.0 [2] (😇..😈) smiling face with h...
| 0xF0 0x9F 0x98 0x89..0x8D #E0.6 [5] (😉..😍) winking face..smili...
| 0xF0 0x9F 0x98 0x8E #E1.0 [1] (😎) smiling face with sung...
| 0xF0 0x9F 0x98 0x8F #E0.6 [1] (😏) smirking face
| 0xF0 0x9F 0x98 0x90 #E0.7 [1] (😐) neutral face
| 0xF0 0x9F 0x98 0x91 #E1.0 [1] (😑) expressionless face
| 0xF0 0x9F 0x98 0x92..0x94 #E0.6 [3] (😒..😔) unamused face..pens...
| 0xF0 0x9F 0x98 0x95 #E1.0 [1] (😕) confused face
| 0xF0 0x9F 0x98 0x96 #E0.6 [1] (😖) confounded face
| 0xF0 0x9F 0x98 0x97 #E1.0 [1] (😗) kissing face
| 0xF0 0x9F 0x98 0x98 #E0.6 [1] (😘) face blowing a kiss
| 0xF0 0x9F 0x98 0x99 #E1.0 [1] (😙) kissing face with smil...
| 0xF0 0x9F 0x98 0x9A #E0.6 [1] (😚) kissing face with clos...
| 0xF0 0x9F 0x98 0x9B #E1.0 [1] (😛) face with tongue
| 0xF0 0x9F 0x98 0x9C..0x9E #E0.6 [3] (😜..😞) winking face with t...
| 0xF0 0x9F 0x98 0x9F #E1.0 [1] (😟) worried face
| 0xF0 0x9F 0x98 0xA0..0xA5 #E0.6 [6] (😠..😥) angry face..sad but...
| 0xF0 0x9F 0x98 0xA6..0xA7 #E1.0 [2] (😦..😧) frowning face with ...
| 0xF0 0x9F 0x98 0xA8..0xAB #E0.6 [4] (😨..😫) fearful face..tired...
| 0xF0 0x9F 0x98 0xAC #E1.0 [1] (😬) grimacing face
| 0xF0 0x9F 0x98 0xAD #E0.6 [1] (😭) loudly crying face
| 0xF0 0x9F 0x98 0xAE..0xAF #E1.0 [2] (😮..😯) face with open mout...
| 0xF0 0x9F 0x98 0xB0..0xB3 #E0.6 [4] (😰..😳) anxious face with s...
| 0xF0 0x9F 0x98 0xB4 #E1.0 [1] (😴) sleeping face
| 0xF0 0x9F 0x98 0xB5 #E0.6 [1] (😵) face with crossed-out ...
| 0xF0 0x9F 0x98 0xB6 #E1.0 [1] (😶) face without mouth
| 0xF0 0x9F 0x98 0xB7..0xFF #E0.6 [10] (😷..🙀) face with medical m...
| 0xF0 0x9F 0x99 0x00..0x80 #
| 0xF0 0x9F 0x99 0x81..0x84 #E1.0 [4] (🙁..🙄) slightly frowning f...
| 0xF0 0x9F 0x99 0x85..0x8F #E0.6 [11] (🙅..🙏) person gesturing NO...
| 0xF0 0x9F 0x9A 0x80 #E0.6 [1] (🚀) rocket
| 0xF0 0x9F 0x9A 0x81..0x82 #E1.0 [2] (🚁..🚂) helicopter..locomotive
| 0xF0 0x9F 0x9A 0x83..0x85 #E0.6 [3] (🚃..🚅) railway car..bullet...
| 0xF0 0x9F 0x9A 0x86 #E1.0 [1] (🚆) train
| 0xF0 0x9F 0x9A 0x87 #E0.6 [1] (🚇) metro
| 0xF0 0x9F 0x9A 0x88 #E1.0 [1] (🚈) light rail
| 0xF0 0x9F 0x9A 0x89 #E0.6 [1] (🚉) station
| 0xF0 0x9F 0x9A 0x8A..0x8B #E1.0 [2] (🚊..🚋) tram..tram car
| 0xF0 0x9F 0x9A 0x8C #E0.6 [1] (🚌) bus
| 0xF0 0x9F 0x9A 0x8D #E0.7 [1] (🚍) oncoming bus
| 0xF0 0x9F 0x9A 0x8E #E1.0 [1] (🚎) trolleybus
| 0xF0 0x9F 0x9A 0x8F #E0.6 [1] (🚏) bus stop
| 0xF0 0x9F 0x9A 0x90 #E1.0 [1] (🚐) minibus
| 0xF0 0x9F 0x9A 0x91..0x93 #E0.6 [3] (🚑..🚓) ambulance..police car
| 0xF0 0x9F 0x9A 0x94 #E0.7 [1] (🚔) oncoming police car
| 0xF0 0x9F 0x9A 0x95 #E0.6 [1] (🚕) taxi
| 0xF0 0x9F 0x9A 0x96 #E1.0 [1] (🚖) oncoming taxi
| 0xF0 0x9F 0x9A 0x97 #E0.6 [1] (🚗) automobile
| 0xF0 0x9F 0x9A 0x98 #E0.7 [1] (🚘) oncoming automobile
| 0xF0 0x9F 0x9A 0x99..0x9A #E0.6 [2] (🚙..🚚) sport utility vehic...
| 0xF0 0x9F 0x9A 0x9B..0xA1 #E1.0 [7] (🚛..🚡) articulated lorry.....
| 0xF0 0x9F 0x9A 0xA2 #E0.6 [1] (🚢) ship
| 0xF0 0x9F 0x9A 0xA3 #E1.0 [1] (🚣) person rowing boat
| 0xF0 0x9F 0x9A 0xA4..0xA5 #E0.6 [2] (🚤..🚥) speedboat..horizont...
| 0xF0 0x9F 0x9A 0xA6 #E1.0 [1] (🚦) vertical traffic light
| 0xF0 0x9F 0x9A 0xA7..0xAD #E0.6 [7] (🚧..🚭) construction..no sm...
| 0xF0 0x9F 0x9A 0xAE..0xB1 #E1.0 [4] (🚮..🚱) litter in bin sign....
| 0xF0 0x9F 0x9A 0xB2 #E0.6 [1] (🚲) bicycle
| 0xF0 0x9F 0x9A 0xB3..0xB5 #E1.0 [3] (🚳..🚵) no bicycles..person...
| 0xF0 0x9F 0x9A 0xB6 #E0.6 [1] (🚶) person walking
| 0xF0 0x9F 0x9A 0xB7..0xB8 #E1.0 [2] (🚷..🚸) no pedestrians..chi...
| 0xF0 0x9F 0x9A 0xB9..0xBE #E0.6 [6] (🚹..🚾) mens room..water c...
| 0xF0 0x9F 0x9A 0xBF #E1.0 [1] (🚿) shower
| 0xF0 0x9F 0x9B 0x80 #E0.6 [1] (🛀) person taking bath
| 0xF0 0x9F 0x9B 0x81..0x85 #E1.0 [5] (🛁..🛅) bathtub..left luggage
| 0xF0 0x9F 0x9B 0x86..0x8A #E0.0 [5] (🛆..🛊) TRIANGLE WITH ROUND...
| 0xF0 0x9F 0x9B 0x8B #E0.7 [1] (🛋️) couch and lamp
| 0xF0 0x9F 0x9B 0x8C #E1.0 [1] (🛌) person in bed
| 0xF0 0x9F 0x9B 0x8D..0x8F #E0.7 [3] (🛍️..🛏️) shopping bags..bed
| 0xF0 0x9F 0x9B 0x90 #E1.0 [1] (🛐) place of worship
| 0xF0 0x9F 0x9B 0x91..0x92 #E3.0 [2] (🛑..🛒) stop sign..shopping...
| 0xF0 0x9F 0x9B 0x93..0x94 #E0.0 [2] (🛓..🛔) STUPA..PAGODA
| 0xF0 0x9F 0x9B 0x95 #E12.0 [1] (🛕) hindu temple
| 0xF0 0x9F 0x9B 0x96..0x97 #E13.0 [2] (🛖..🛗) hut..elevator
| 0xF0 0x9F 0x9B 0x98..0x9B #E0.0 [4] (🛘..🛛) <reserved-1F6D8>..<...
| 0xF0 0x9F 0x9B 0x9C #E15.0 [1] (🛜) wireless
| 0xF0 0x9F 0x9B 0x9D..0x9F #E14.0 [3] (🛝..🛟) playground slide..r...
| 0xF0 0x9F 0x9B 0xA0..0xA5 #E0.7 [6] (🛠️..🛥️) hammer and wrench...
| 0xF0 0x9F 0x9B 0xA6..0xA8 #E0.0 [3] (🛦..🛨) UP-POINTING MILITAR...
| 0xF0 0x9F 0x9B 0xA9 #E0.7 [1] (🛩️) small airplane
| 0xF0 0x9F 0x9B 0xAA #E0.0 [1] (🛪) NORTHEAST-POINTING AIR...
| 0xF0 0x9F 0x9B 0xAB..0xAC #E1.0 [2] (🛫..🛬) airplane departure....
| 0xF0 0x9F 0x9B 0xAD..0xAF #E0.0 [3] (🛭..🛯) <reserved-1F6ED>..<...
| 0xF0 0x9F 0x9B 0xB0 #E0.7 [1] (🛰️) satellite
| 0xF0 0x9F 0x9B 0xB1..0xB2 #E0.0 [2] (🛱..🛲) ONCOMING FIRE ENGIN...
| 0xF0 0x9F 0x9B 0xB3 #E0.7 [1] (🛳️) passenger ship
| 0xF0 0x9F 0x9B 0xB4..0xB6 #E3.0 [3] (🛴..🛶) kick scooter..canoe
| 0xF0 0x9F 0x9B 0xB7..0xB8 #E5.0 [2] (🛷..🛸) sled..flying saucer
| 0xF0 0x9F 0x9B 0xB9 #E11.0 [1] (🛹) skateboard
| 0xF0 0x9F 0x9B 0xBA #E12.0 [1] (🛺) auto rickshaw
| 0xF0 0x9F 0x9B 0xBB..0xBC #E13.0 [2] (🛻..🛼) pickup truck..rolle...
| 0xF0 0x9F 0x9B 0xBD..0xBF #E0.0 [3] (🛽..🛿) <reserved-1F6FD>..<...
| 0xF0 0x9F 0x9D 0xB4..0xBF #E0.0 [12] (🝴..🝿) LOT OF FORTUNE..ORCUS
| 0xF0 0x9F 0x9F 0x95..0x9F #E0.0 [11] (🟕..🟟) CIRCLED TRIANGLE..<...
| 0xF0 0x9F 0x9F 0xA0..0xAB #E12.0 [12] (🟠..🟫) orange circle..brow...
| 0xF0 0x9F 0x9F 0xAC..0xAF #E0.0 [4] (🟬..🟯) <reserved-1F7EC>..<...
| 0xF0 0x9F 0x9F 0xB0 #E14.0 [1] (🟰) heavy equals sign
| 0xF0 0x9F 0x9F 0xB1..0xBF #E0.0 [15] (🟱..🟿) <reserved-1F7F1>..<...
| 0xF0 0x9F 0xA0 0x8C..0x8F #E0.0 [4] (🠌..🠏) <reserved-1F80C>..<...
| 0xF0 0x9F 0xA1 0x88..0x8F #E0.0 [8] (🡈..🡏) <reserved-1F848>..<...
| 0xF0 0x9F 0xA1 0x9A..0x9F #E0.0 [6] (🡚..🡟) <reserved-1F85A>..<...
| 0xF0 0x9F 0xA2 0x88..0x8F #E0.0 [8] (🢈..🢏) <reserved-1F888>..<...
| 0xF0 0x9F 0xA2 0xAE..0xFF #E0.0 [82] (🢮..🣿) <reserved-1F8AE>..<...
| 0xF0 0x9F 0xA3 0x00..0xBF #
| 0xF0 0x9F 0xA4 0x8C #E13.0 [1] (🤌) pinched fingers
| 0xF0 0x9F 0xA4 0x8D..0x8F #E12.0 [3] (🤍..🤏) white heart..pinchi...
| 0xF0 0x9F 0xA4 0x90..0x98 #E1.0 [9] (🤐..🤘) zipper-mouth face.....
| 0xF0 0x9F 0xA4 0x99..0x9E #E3.0 [6] (🤙..🤞) call me hand..cross...
| 0xF0 0x9F 0xA4 0x9F #E5.0 [1] (🤟) love-you gesture
| 0xF0 0x9F 0xA4 0xA0..0xA7 #E3.0 [8] (🤠..🤧) cowboy hat face..sn...
| 0xF0 0x9F 0xA4 0xA8..0xAF #E5.0 [8] (🤨..🤯) face with raised ey...
| 0xF0 0x9F 0xA4 0xB0 #E3.0 [1] (🤰) pregnant woman
| 0xF0 0x9F 0xA4 0xB1..0xB2 #E5.0 [2] (🤱..🤲) breast-feeding..pal...
| 0xF0 0x9F 0xA4 0xB3..0xBA #E3.0 [8] (🤳..🤺) selfie..person fencing
| 0xF0 0x9F 0xA4 0xBC..0xBE #E3.0 [3] (🤼..🤾) people wrestling..p...
| 0xF0 0x9F 0xA4 0xBF #E12.0 [1] (🤿) diving mask
| 0xF0 0x9F 0xA5 0x80..0x85 #E3.0 [6] (🥀..🥅) wilted flower..goal...
| 0xF0 0x9F 0xA5 0x87..0x8B #E3.0 [5] (🥇..🥋) 1st place medal..ma...
| 0xF0 0x9F 0xA5 0x8C #E5.0 [1] (🥌) curling stone
| 0xF0 0x9F 0xA5 0x8D..0x8F #E11.0 [3] (🥍..🥏) lacrosse..flying disc
| 0xF0 0x9F 0xA5 0x90..0x9E #E3.0 [15] (🥐..🥞) croissant..pancakes
| 0xF0 0x9F 0xA5 0x9F..0xAB #E5.0 [13] (🥟..🥫) dumpling..canned food
| 0xF0 0x9F 0xA5 0xAC..0xB0 #E11.0 [5] (🥬..🥰) leafy green..smilin...
| 0xF0 0x9F 0xA5 0xB1 #E12.0 [1] (🥱) yawning face
| 0xF0 0x9F 0xA5 0xB2 #E13.0 [1] (🥲) smiling face with tear
| 0xF0 0x9F 0xA5 0xB3..0xB6 #E11.0 [4] (🥳..🥶) partying face..cold...
| 0xF0 0x9F 0xA5 0xB7..0xB8 #E13.0 [2] (🥷..🥸) ninja..disguised face
| 0xF0 0x9F 0xA5 0xB9 #E14.0 [1] (🥹) face holding back tears
| 0xF0 0x9F 0xA5 0xBA #E11.0 [1] (🥺) pleading face
| 0xF0 0x9F 0xA5 0xBB #E12.0 [1] (🥻) sari
| 0xF0 0x9F 0xA5 0xBC..0xBF #E11.0 [4] (🥼..🥿) lab coat..flat shoe
| 0xF0 0x9F 0xA6 0x80..0x84 #E1.0 [5] (🦀..🦄) crab..unicorn
| 0xF0 0x9F 0xA6 0x85..0x91 #E3.0 [13] (🦅..🦑) eagle..squid
| 0xF0 0x9F 0xA6 0x92..0x97 #E5.0 [6] (🦒..🦗) giraffe..cricket
| 0xF0 0x9F 0xA6 0x98..0xA2 #E11.0 [11] (🦘..🦢) kangaroo..swan
| 0xF0 0x9F 0xA6 0xA3..0xA4 #E13.0 [2] (🦣..🦤) mammoth..dodo
| 0xF0 0x9F 0xA6 0xA5..0xAA #E12.0 [6] (🦥..🦪) sloth..oyster
| 0xF0 0x9F 0xA6 0xAB..0xAD #E13.0 [3] (🦫..🦭) beaver..seal
| 0xF0 0x9F 0xA6 0xAE..0xAF #E12.0 [2] (🦮..🦯) guide dog..white cane
| 0xF0 0x9F 0xA6 0xB0..0xB9 #E11.0 [10] (🦰..🦹) red hair..supervillain
| 0xF0 0x9F 0xA6 0xBA..0xBF #E12.0 [6] (🦺..🦿) safety vest..mechan...
| 0xF0 0x9F 0xA7 0x80 #E1.0 [1] (🧀) cheese wedge
| 0xF0 0x9F 0xA7 0x81..0x82 #E11.0 [2] (🧁..🧂) cupcake..salt
| 0xF0 0x9F 0xA7 0x83..0x8A #E12.0 [8] (🧃..🧊) beverage box..ice
| 0xF0 0x9F 0xA7 0x8B #E13.0 [1] (🧋) bubble tea
| 0xF0 0x9F 0xA7 0x8C #E14.0 [1] (🧌) troll
| 0xF0 0x9F 0xA7 0x8D..0x8F #E12.0 [3] (🧍..🧏) person standing..de...
| 0xF0 0x9F 0xA7 0x90..0xA6 #E5.0 [23] (🧐..🧦) face with monocle.....
| 0xF0 0x9F 0xA7 0xA7..0xBF #E11.0 [25] (🧧..🧿) red envelope..nazar...
| 0xF0 0x9F 0xA8 0x80..0xFF #E0.0 [112] (🨀..🩯) NEUTRAL CHESS KING....
| 0xF0 0x9F 0xA9 0x00..0xAF #
| 0xF0 0x9F 0xA9 0xB0..0xB3 #E12.0 [4] (🩰..🩳) ballet shoes..shorts
| 0xF0 0x9F 0xA9 0xB4 #E13.0 [1] (🩴) thong sandal
| 0xF0 0x9F 0xA9 0xB5..0xB7 #E15.0 [3] (🩵..🩷) light blue heart..p...
| 0xF0 0x9F 0xA9 0xB8..0xBA #E12.0 [3] (🩸..🩺) drop of blood..stet...
| 0xF0 0x9F 0xA9 0xBB..0xBC #E14.0 [2] (🩻..🩼) x-ray..crutch
| 0xF0 0x9F 0xA9 0xBD..0xBF #E0.0 [3] (🩽..🩿) <reserved-1FA7D>..<...
| 0xF0 0x9F 0xAA 0x80..0x82 #E12.0 [3] (🪀..🪂) yo-yo..parachute
| 0xF0 0x9F 0xAA 0x83..0x86 #E13.0 [4] (🪃..🪆) boomerang..nesting ...
| 0xF0 0x9F 0xAA 0x87..0x88 #E15.0 [2] (🪇..🪈) maracas..flute
| 0xF0 0x9F 0xAA 0x89..0x8F #E0.0 [7] (🪉..🪏) <reserved-1FA89>..<...
| 0xF0 0x9F 0xAA 0x90..0x95 #E12.0 [6] (🪐..🪕) ringed planet..banjo
| 0xF0 0x9F 0xAA 0x96..0xA8 #E13.0 [19] (🪖..🪨) military helmet..rock
| 0xF0 0x9F 0xAA 0xA9..0xAC #E14.0 [4] (🪩..🪬) mirror ball..hamsa
| 0xF0 0x9F 0xAA 0xAD..0xAF #E15.0 [3] (🪭..🪯) folding hand fan..k...
| 0xF0 0x9F 0xAA 0xB0..0xB6 #E13.0 [7] (🪰..🪶) fly..feather
| 0xF0 0x9F 0xAA 0xB7..0xBA #E14.0 [4] (🪷..🪺) lotus..nest with eggs
| 0xF0 0x9F 0xAA 0xBB..0xBD #E15.0 [3] (🪻..🪽) hyacinth..wing
| 0xF0 0x9F 0xAA 0xBE #E0.0 [1] (🪾) <reserved-1FABE>
| 0xF0 0x9F 0xAA 0xBF #E15.0 [1] (🪿) goose
| 0xF0 0x9F 0xAB 0x80..0x82 #E13.0 [3] (🫀..🫂) anatomical heart..p...
| 0xF0 0x9F 0xAB 0x83..0x85 #E14.0 [3] (🫃..🫅) pregnant man..perso...
| 0xF0 0x9F 0xAB 0x86..0x8D #E0.0 [8] (🫆..🫍) <reserved-1FAC6>..<...
| 0xF0 0x9F 0xAB 0x8E..0x8F #E15.0 [2] (🫎..🫏) moose..donkey
| 0xF0 0x9F 0xAB 0x90..0x96 #E13.0 [7] (🫐..🫖) blueberries..teapot
| 0xF0 0x9F 0xAB 0x97..0x99 #E14.0 [3] (🫗..🫙) pouring liquid..jar
| 0xF0 0x9F 0xAB 0x9A..0x9B #E15.0 [2] (🫚..🫛) ginger root..pea pod
| 0xF0 0x9F 0xAB 0x9C..0x9F #E0.0 [4] (🫜..🫟) <reserved-1FADC>..<...
| 0xF0 0x9F 0xAB 0xA0..0xA7 #E14.0 [8] (🫠..🫧) melting face..bubbles
| 0xF0 0x9F 0xAB 0xA8 #E15.0 [1] (🫨) shaking face
| 0xF0 0x9F 0xAB 0xA9..0xAF #E0.0 [7] (🫩..🫯) <reserved-1FAE9>..<...
| 0xF0 0x9F 0xAB 0xB0..0xB6 #E14.0 [7] (🫰..🫶) hand with index fin...
| 0xF0 0x9F 0xAB 0xB7..0xB8 #E15.0 [2] (🫷..🫸) leftwards pushing h...
| 0xF0 0x9F 0xAB 0xB9..0xBF #E0.0 [7] (🫹..🫿) <reserved-1FAF9>..<...
| 0xF0 0x9F 0xB0 0x80..0xFF #E0.0[1022] (🰀..🿽) <reserved-1FC...
| 0xF0 0x9F 0xB1..0xBE 0x00..0xFF #
| 0xF0 0x9F 0xBF 0x00..0xBD #
;
}%%

View File

@@ -0,0 +1,8 @@
package textseg
//go:generate go run make_tables.go -output tables.go
//go:generate go run make_test_tables.go -output tables_test.go
//go:generate ruby unicode2ragel.rb --url=https://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt -m GraphemeCluster -p "Prepend,CR,LF,Control,Extend,Regional_Indicator,SpacingMark,L,V,T,LV,LVT,ZWJ" -o grapheme_clusters_table.rl
//go:generate ruby unicode2ragel.rb --url=https://www.unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt -m Emoji -p "Extended_Pictographic" -o emoji_table.rl
//go:generate ragel -Z grapheme_clusters.rl
//go:generate gofmt -w grapheme_clusters.go

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,133 @@
package textseg
import (
"errors"
"unicode/utf8"
)
// Generated from grapheme_clusters.rl. DO NOT EDIT
%%{
# (except you are actually in grapheme_clusters.rl here, so edit away!)
machine graphclust;
write data;
}%%
var Error = errors.New("invalid UTF8 text")
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
// on grapheme cluster boundaries.
func ScanGraphemeClusters(data []byte, atEOF bool) (int, []byte, error) {
if len(data) == 0 {
return 0, nil, nil
}
// Ragel state
cs := 0 // Current State
p := 0 // "Pointer" into data
pe := len(data) // End-of-data "pointer"
ts := 0
te := 0
act := 0
eof := pe
// Make Go compiler happy
_ = ts
_ = te
_ = act
_ = eof
startPos := 0
endPos := 0
%%{
include GraphemeCluster "grapheme_clusters_table.rl";
include Emoji "emoji_table.rl";
action start {
startPos = p
}
action end {
endPos = p
}
action emit {
return endPos+1, data[startPos:endPos+1], nil
}
ZWJGlue = ZWJ (Extended_Pictographic Extend*)?;
AnyExtender = Extend | ZWJGlue | SpacingMark;
Extension = AnyExtender*;
ReplacementChar = (0xEF 0xBF 0xBD);
CRLFSeq = CR LF;
ControlSeq = Control | ReplacementChar;
HangulSeq = (
L+ (((LV? V+ | LVT) T*)?|LV?) |
LV V* T* |
V+ T* |
LVT T* |
T+
) Extension;
EmojiSeq = Extended_Pictographic Extend* Extension;
ZWJSeq = ZWJ (ZWJ | Extend | SpacingMark)*;
EmojiFlagSeq = Regional_Indicator Regional_Indicator? Extension;
UTF8Cont = 0x80 .. 0xBF;
AnyUTF8 = (
0x00..0x7F |
0xC0..0xDF . UTF8Cont |
0xE0..0xEF . UTF8Cont . UTF8Cont |
0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
);
# OtherSeq is any character that isn't at the start of one of the extended sequences above, followed by extension
OtherSeq = (AnyUTF8 - (CR|LF|Control|ReplacementChar|L|LV|V|LVT|T|Extended_Pictographic|ZWJ|Regional_Indicator|Prepend)) (Extend | ZWJ | SpacingMark)*;
# PrependSeq is prepend followed by any of the other patterns above, except control characters which explicitly break
PrependSeq = Prepend+ (HangulSeq|EmojiSeq|ZWJSeq|EmojiFlagSeq|OtherSeq)?;
CRLFTok = CRLFSeq >start @end;
ControlTok = ControlSeq >start @end;
HangulTok = HangulSeq >start @end;
EmojiTok = EmojiSeq >start @end;
ZWJTok = ZWJSeq >start @end;
EmojiFlagTok = EmojiFlagSeq >start @end;
OtherTok = OtherSeq >start @end;
PrependTok = PrependSeq >start @end;
main := |*
CRLFTok => emit;
ControlTok => emit;
HangulTok => emit;
EmojiTok => emit;
ZWJTok => emit;
EmojiFlagTok => emit;
PrependTok => emit;
OtherTok => emit;
# any single valid UTF-8 character would also be valid per spec,
# but we'll handle that separately after the loop so we can deal
# with requesting more bytes if we're not at EOF.
*|;
write init;
write exec;
}%%
// If we fall out here then we were unable to complete a sequence.
// If we weren't able to complete a sequence then either we've
// reached the end of a partial buffer (so there's more data to come)
// or we have an isolated symbol that would normally be part of a
// grapheme cluster but has appeared in isolation here.
if !atEOF {
// Request more
return 0, nil, nil
}
// Just take the first UTF-8 sequence and return that.
_, seqLen := utf8.DecodeRune(data)
return seqLen, data[:seqLen], nil
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,335 @@
#!/usr/bin/env ruby
#
# This scripted has been updated to accept more command-line arguments:
#
# -u, --url URL to process
# -m, --machine Machine name
# -p, --properties Properties to add to the machine
# -o, --output Write output to file
#
# Updated by: Marty Schoch <marty.schoch@gmail.com>
#
# This script uses the unicode spec to generate a Ragel state machine
# that recognizes unicode alphanumeric characters. It generates 5
# character classes: uupper, ulower, ualpha, udigit, and ualnum.
# Currently supported encodings are UTF-8 [default] and UCS-4.
#
# Usage: unicode2ragel.rb [options]
# -e, --encoding [ucs4 | utf8] Data encoding
# -h, --help Show this message
#
# This script was originally written as part of the Ferret search
# engine library.
#
# Author: Rakan El-Khalil <rakan@well.com>
require 'optparse'
require 'open-uri'
ENCODINGS = [ :utf8, :ucs4 ]
ALPHTYPES = { :utf8 => "byte", :ucs4 => "rune" }
DEFAULT_CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
DEFAULT_MACHINE_NAME= "WChar"
###
# Display vars & default option
TOTAL_WIDTH = 80
RANGE_WIDTH = 23
@encoding = :utf8
@chart_url = DEFAULT_CHART_URL
machine_name = DEFAULT_MACHINE_NAME
properties = []
@output = $stdout
###
# Option parsing
cli_opts = OptionParser.new do |opts|
opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
@encoding = o.downcase.to_sym
end
opts.on("-h", "--help", "Show this message") do
puts opts
exit
end
opts.on("-u", "--url URL", "URL to process") do |o|
@chart_url = o
end
opts.on("-m", "--machine MACHINE_NAME", "Machine name") do |o|
machine_name = o
end
opts.on("-p", "--properties x,y,z", Array, "Properties to add to machine") do |o|
properties = o
end
opts.on("-o", "--output FILE", "output file") do |o|
@output = File.new(o, "w+")
end
end
cli_opts.parse(ARGV)
unless ENCODINGS.member? @encoding
puts "Invalid encoding: #{@encoding}"
puts cli_opts
exit
end
##
# Downloads the document at url and yields every alpha line's hex
# range and description.
def each_alpha( url, property )
URI.open( url ) do |file|
file.each_line do |line|
next if line =~ /^#/;
next if line !~ /; #{property} *#/;
range, description = line.split(/;/)
range.strip!
description.gsub!(/.*#/, '').strip!
if range =~ /\.\./
start, stop = range.split '..'
else start = stop = range
end
yield start.hex .. stop.hex, description
end
end
end
###
# Formats to hex at minimum width
def to_hex( n )
r = "%0X" % n
r = "0#{r}" unless (r.length % 2).zero?
r
end
###
# UCS4 is just a straight hex conversion of the unicode codepoint.
def to_ucs4( range )
rangestr = "0x" + to_hex(range.begin)
rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
[ rangestr ]
end
##
# 0x00 - 0x7f -> 0zzzzzzz[7]
# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
def to_utf8_enc( n )
r = 0
if n <= 0x7f
r = n
elsif n <= 0x7ff
y = 0xc0 | (n >> 6)
z = 0x80 | (n & 0x3f)
r = y << 8 | z
elsif n <= 0xffff
x = 0xe0 | (n >> 12)
y = 0x80 | (n >> 6) & 0x3f
z = 0x80 | n & 0x3f
r = x << 16 | y << 8 | z
elsif n <= 0x10ffff
w = 0xf0 | (n >> 18)
x = 0x80 | (n >> 12) & 0x3f
y = 0x80 | (n >> 6) & 0x3f
z = 0x80 | n & 0x3f
r = w << 24 | x << 16 | y << 8 | z
end
to_hex(r)
end
def from_utf8_enc( n )
n = n.hex
r = 0
if n <= 0x7f
r = n
elsif n <= 0xdfff
y = (n >> 8) & 0x1f
z = n & 0x3f
r = y << 6 | z
elsif n <= 0xefffff
x = (n >> 16) & 0x0f
y = (n >> 8) & 0x3f
z = n & 0x3f
r = x << 10 | y << 6 | z
elsif n <= 0xf7ffffff
w = (n >> 24) & 0x07
x = (n >> 16) & 0x3f
y = (n >> 8) & 0x3f
z = n & 0x3f
r = w << 18 | x << 12 | y << 6 | z
end
r
end
###
# Given a range, splits it up into ranges that can be continuously
# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
# This is not strictly needed since the current [5.1] unicode standard
# doesn't have ranges that straddle utf8 boundaries. This is included
# for completeness as there is no telling if that will ever change.
def utf8_ranges( range )
ranges = []
UTF8_BOUNDARIES.each do |max|
if range.begin <= max
if range.end <= max
ranges << range
return ranges
end
ranges << (range.begin .. max)
range = (max + 1) .. range.end
end
end
ranges
end
def build_range( start, stop )
size = start.size/2
left = size - 1
return [""] if size < 1
a = start[0..1]
b = stop[0..1]
###
# Shared prefix
if a == b
return build_range(start[2..-1], stop[2..-1]).map do |elt|
"0x#{a} " + elt
end
end
###
# Unshared prefix, end of run
return ["0x#{a}..0x#{b} "] if left.zero?
###
# Unshared prefix, not end of run
# Range can be 0x123456..0x56789A
# Which is equivalent to:
# 0x123456 .. 0x12FFFF
# 0x130000 .. 0x55FFFF
# 0x560000 .. 0x56789A
ret = []
ret << build_range(start, a + "FF" * left)
###
# Only generate middle range if need be.
if a.hex+1 != b.hex
max = to_hex(b.hex - 1)
max = "FF" if b == "FF"
ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
end
###
# Don't generate last range if it is covered by first range
ret << build_range(b + "00" * left, stop) unless b == "FF"
ret.flatten!
end
def to_utf8( range )
utf8_ranges( range ).map do |r|
begin_enc = to_utf8_enc(r.begin)
end_enc = to_utf8_enc(r.end)
build_range begin_enc, end_enc
end.flatten!
end
##
# Perform a 3-way comparison of the number of codepoints advertised by
# the unicode spec for the given range, the originally parsed range,
# and the resulting utf8 encoded range.
def count_codepoints( code )
code.split(' ').inject(1) do |acc, elt|
if elt =~ /0x(.+)\.\.0x(.+)/
if @encoding == :utf8
acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
else
acc * ($2.hex - $1.hex + 1)
end
else
acc
end
end
end
def is_valid?( range, desc, codes )
spec_count = 1
spec_count = $1.to_i if desc =~ /\[(\d+)\]/
range_count = range.end - range.begin + 1
sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
sum == spec_count and sum == range_count
end
##
# Generate the state maching to stdout
def generate_machine( name, property )
pipe = " "
@output.puts " #{name} = "
each_alpha( @chart_url, property ) do |range, desc|
codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
#raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
# is_valid? range, desc, codes
range_width = codes.map { |a| a.size }.max
range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
if desc.size > desc_width
desc = desc[0..desc_width - 4] + "..."
end
codes.each_with_index do |r, idx|
desc = "" unless idx.zero?
code = "%-#{range_width}s" % r
@output.puts " #{pipe} #{code} ##{desc}"
pipe = "|"
end
end
@output.puts " ;"
@output.puts ""
end
@output.puts <<EOF
# The following Ragel file was autogenerated with #{$0}
# from: #{@chart_url}
#
# It defines #{properties}.
#
# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
# and that your input is in #{@encoding}.
%%{
machine #{machine_name};
EOF
properties.each { |x| generate_machine( x, x ) }
@output.puts <<EOF
}%%
EOF

View File

@@ -0,0 +1,19 @@
package textseg
import "unicode/utf8"
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
// on UTF8 sequence boundaries.
//
// This is included largely for completeness, since this behavior is already
// built in to Go when ranging over a string.
func ScanUTF8Sequences(data []byte, atEOF bool) (int, []byte, error) {
if len(data) == 0 {
return 0, nil, nil
}
r, seqLen := utf8.DecodeRune(data)
if r == utf8.RuneError && !atEOF {
return 0, nil, nil
}
return seqLen, data[:seqLen], nil
}

13
vendor/github.com/araddon/dateparse/.travis.yml generated vendored Normal file
View File

@@ -0,0 +1,13 @@
language: go
go:
- 1.13.x
before_install:
- go get -t -v ./...
script:
- go test -race -coverprofile=coverage.txt -covermode=atomic
after_success:
- bash <(curl -s https://codecov.io/bash)

21
vendor/github.com/araddon/dateparse/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2015-2017 Aaron Raddon
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

323
vendor/github.com/araddon/dateparse/README.md generated vendored Normal file
View File

@@ -0,0 +1,323 @@
Go Date Parser
---------------------------
Parse many date strings without knowing format in advance. Uses a scanner to read bytes and use a state machine to find format. Much faster than shotgun based parse methods. See [bench_test.go](https://github.com/araddon/dateparse/blob/master/bench_test.go) for performance comparison.
[![Code Coverage](https://codecov.io/gh/araddon/dateparse/branch/master/graph/badge.svg)](https://codecov.io/gh/araddon/dateparse)
[![GoDoc](https://godoc.org/github.com/araddon/dateparse?status.svg)](http://godoc.org/github.com/araddon/dateparse)
[![Build Status](https://travis-ci.org/araddon/dateparse.svg?branch=master)](https://travis-ci.org/araddon/dateparse)
[![Go ReportCard](https://goreportcard.com/badge/araddon/dateparse)](https://goreportcard.com/report/araddon/dateparse)
**MM/DD/YYYY VS DD/MM/YYYY** Right now this uses mm/dd/yyyy WHEN ambiguous if this is not desired behavior, use `ParseStrict` which will fail on ambiguous date strings.
**Timezones** The location your server is configured affects the results! See example or https://play.golang.org/p/IDHRalIyXh and last paragraph here https://golang.org/pkg/time/#Parse.
```go
// Normal parse. Equivalent Timezone rules as time.Parse()
t, err := dateparse.ParseAny("3/1/2014")
// Parse Strict, error on ambigous mm/dd vs dd/mm dates
t, err := dateparse.ParseStrict("3/1/2014")
> returns error
// Return a string that represents the layout to parse the given date-time.
layout, err := dateparse.ParseFormat("May 8, 2009 5:57:51 PM")
> "Jan 2, 2006 3:04:05 PM"
```
cli tool for testing dateformats
----------------------------------
[Date Parse CLI](https://github.com/araddon/dateparse/blob/master/dateparse)
Extended example
-------------------
https://github.com/araddon/dateparse/blob/master/example/main.go
```go
package main
import (
"flag"
"fmt"
"time"
"github.com/scylladb/termtables"
"github.com/araddon/dateparse"
)
var examples = []string{
"May 8, 2009 5:57:51 PM",
"oct 7, 1970",
"oct 7, '70",
"oct. 7, 1970",
"oct. 7, 70",
"Mon Jan 2 15:04:05 2006",
"Mon Jan 2 15:04:05 MST 2006",
"Mon Jan 02 15:04:05 -0700 2006",
"Monday, 02-Jan-06 15:04:05 MST",
"Mon, 02 Jan 2006 15:04:05 MST",
"Tue, 11 Jul 2017 16:28:13 +0200 (CEST)",
"Mon, 02 Jan 2006 15:04:05 -0700",
"Mon 30 Sep 2018 09:09:09 PM UTC",
"Mon Aug 10 15:44:11 UTC+0100 2015",
"Thu, 4 Jan 2018 17:53:36 +0000",
"Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time)",
"Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00)",
"September 17, 2012 10:09am",
"September 17, 2012 at 10:09am PST-08",
"September 17, 2012, 10:10:09",
"October 7, 1970",
"October 7th, 1970",
"12 Feb 2006, 19:17",
"12 Feb 2006 19:17",
"14 May 2019 19:11:40.164",
"7 oct 70",
"7 oct 1970",
"03 February 2013",
"1 July 2013",
"2013-Feb-03",
// dd/Mon/yyy alpha Months
"06/Jan/2008:15:04:05 -0700",
"06/Jan/2008 15:04:05 -0700",
// mm/dd/yy
"3/31/2014",
"03/31/2014",
"08/21/71",
"8/1/71",
"4/8/2014 22:05",
"04/08/2014 22:05",
"4/8/14 22:05",
"04/2/2014 03:00:51",
"8/8/1965 12:00:00 AM",
"8/8/1965 01:00:01 PM",
"8/8/1965 01:00 PM",
"8/8/1965 1:00 PM",
"8/8/1965 12:00 AM",
"4/02/2014 03:00:51",
"03/19/2012 10:11:59",
"03/19/2012 10:11:59.3186369",
// yyyy/mm/dd
"2014/3/31",
"2014/03/31",
"2014/4/8 22:05",
"2014/04/08 22:05",
"2014/04/2 03:00:51",
"2014/4/02 03:00:51",
"2012/03/19 10:11:59",
"2012/03/19 10:11:59.3186369",
// yyyy:mm:dd
"2014:3:31",
"2014:03:31",
"2014:4:8 22:05",
"2014:04:08 22:05",
"2014:04:2 03:00:51",
"2014:4:02 03:00:51",
"2012:03:19 10:11:59",
"2012:03:19 10:11:59.3186369",
// Chinese
"2014年04月08日",
// yyyy-mm-ddThh
"2006-01-02T15:04:05+0000",
"2009-08-12T22:15:09-07:00",
"2009-08-12T22:15:09",
"2009-08-12T22:15:09.988",
"2009-08-12T22:15:09Z",
"2017-07-19T03:21:51:897+0100",
"2019-05-29T08:41-04", // no seconds, 2 digit TZ offset
// yyyy-mm-dd hh:mm:ss
"2014-04-26 17:24:37.3186369",
"2012-08-03 18:31:59.257000000",
"2014-04-26 17:24:37.123",
"2013-04-01 22:43",
"2013-04-01 22:43:22",
"2014-12-16 06:20:00 UTC",
"2014-12-16 06:20:00 GMT",
"2014-04-26 05:24:37 PM",
"2014-04-26 13:13:43 +0800",
"2014-04-26 13:13:43 +0800 +08",
"2014-04-26 13:13:44 +09:00",
"2012-08-03 18:31:59.257000000 +0000 UTC",
"2015-09-30 18:48:56.35272715 +0000 UTC",
"2015-02-18 00:12:00 +0000 GMT",
"2015-02-18 00:12:00 +0000 UTC",
"2015-02-08 03:02:00 +0300 MSK m=+0.000000001",
"2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001",
"2017-07-19 03:21:51+00:00",
"2014-04-26",
"2014-04",
"2014",
"2014-05-11 08:20:13,787",
// yyyy-mm-dd-07:00
"2020-07-20+08:00",
// mm.dd.yy
"3.31.2014",
"03.31.2014",
"08.21.71",
"2014.03",
"2014.03.30",
// yyyymmdd and similar
"20140601",
"20140722105203",
// yymmdd hh:mm:yy mysql log
// 080313 05:21:55 mysqld started
"171113 14:14:20",
// unix seconds, ms, micro, nano
"1332151919",
"1384216367189",
"1384216367111222",
"1384216367111222333",
}
var (
timezone = ""
)
func main() {
flag.StringVar(&timezone, "timezone", "UTC", "Timezone aka `America/Los_Angeles` formatted time-zone")
flag.Parse()
if timezone != "" {
// NOTE: This is very, very important to understand
// time-parsing in go
loc, err := time.LoadLocation(timezone)
if err != nil {
panic(err.Error())
}
time.Local = loc
}
table := termtables.CreateTable()
table.AddHeaders("Input", "Parsed, and Output as %v")
for _, dateExample := range examples {
t, err := dateparse.ParseLocal(dateExample)
if err != nil {
panic(err.Error())
}
table.AddRow(dateExample, fmt.Sprintf("%v", t))
}
fmt.Println(table.Render())
}
/*
+-------------------------------------------------------+-----------------------------------------+
| Input | Parsed, and Output as %v |
+-------------------------------------------------------+-----------------------------------------+
| May 8, 2009 5:57:51 PM | 2009-05-08 17:57:51 +0000 UTC |
| oct 7, 1970 | 1970-10-07 00:00:00 +0000 UTC |
| oct 7, '70 | 1970-10-07 00:00:00 +0000 UTC |
| oct. 7, 1970 | 1970-10-07 00:00:00 +0000 UTC |
| oct. 7, 70 | 1970-10-07 00:00:00 +0000 UTC |
| Mon Jan 2 15:04:05 2006 | 2006-01-02 15:04:05 +0000 UTC |
| Mon Jan 2 15:04:05 MST 2006 | 2006-01-02 15:04:05 +0000 MST |
| Mon Jan 02 15:04:05 -0700 2006 | 2006-01-02 15:04:05 -0700 -0700 |
| Monday, 02-Jan-06 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST |
| Mon, 02 Jan 2006 15:04:05 MST | 2006-01-02 15:04:05 +0000 MST |
| Tue, 11 Jul 2017 16:28:13 +0200 (CEST) | 2017-07-11 16:28:13 +0200 +0200 |
| Mon, 02 Jan 2006 15:04:05 -0700 | 2006-01-02 15:04:05 -0700 -0700 |
| Mon 30 Sep 2018 09:09:09 PM UTC | 2018-09-30 21:09:09 +0000 UTC |
| Mon Aug 10 15:44:11 UTC+0100 2015 | 2015-08-10 15:44:11 +0000 UTC |
| Thu, 4 Jan 2018 17:53:36 +0000 | 2018-01-04 17:53:36 +0000 UTC |
| Fri Jul 03 2015 18:04:07 GMT+0100 (GMT Daylight Time) | 2015-07-03 18:04:07 +0100 GMT |
| Sun, 3 Jan 2021 00:12:23 +0800 (GMT+08:00) | 2021-01-03 00:12:23 +0800 +0800 |
| September 17, 2012 10:09am | 2012-09-17 10:09:00 +0000 UTC |
| September 17, 2012 at 10:09am PST-08 | 2012-09-17 10:09:00 -0800 PST |
| September 17, 2012, 10:10:09 | 2012-09-17 10:10:09 +0000 UTC |
| October 7, 1970 | 1970-10-07 00:00:00 +0000 UTC |
| October 7th, 1970 | 1970-10-07 00:00:00 +0000 UTC |
| 12 Feb 2006, 19:17 | 2006-02-12 19:17:00 +0000 UTC |
| 12 Feb 2006 19:17 | 2006-02-12 19:17:00 +0000 UTC |
| 14 May 2019 19:11:40.164 | 2019-05-14 19:11:40.164 +0000 UTC |
| 7 oct 70 | 1970-10-07 00:00:00 +0000 UTC |
| 7 oct 1970 | 1970-10-07 00:00:00 +0000 UTC |
| 03 February 2013 | 2013-02-03 00:00:00 +0000 UTC |
| 1 July 2013 | 2013-07-01 00:00:00 +0000 UTC |
| 2013-Feb-03 | 2013-02-03 00:00:00 +0000 UTC |
| 06/Jan/2008:15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 |
| 06/Jan/2008 15:04:05 -0700 | 2008-01-06 15:04:05 -0700 -0700 |
| 3/31/2014 | 2014-03-31 00:00:00 +0000 UTC |
| 03/31/2014 | 2014-03-31 00:00:00 +0000 UTC |
| 08/21/71 | 1971-08-21 00:00:00 +0000 UTC |
| 8/1/71 | 1971-08-01 00:00:00 +0000 UTC |
| 4/8/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC |
| 04/08/2014 22:05 | 2014-04-08 22:05:00 +0000 UTC |
| 4/8/14 22:05 | 2014-04-08 22:05:00 +0000 UTC |
| 04/2/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC |
| 8/8/1965 12:00:00 AM | 1965-08-08 00:00:00 +0000 UTC |
| 8/8/1965 01:00:01 PM | 1965-08-08 13:00:01 +0000 UTC |
| 8/8/1965 01:00 PM | 1965-08-08 13:00:00 +0000 UTC |
| 8/8/1965 1:00 PM | 1965-08-08 13:00:00 +0000 UTC |
| 8/8/1965 12:00 AM | 1965-08-08 00:00:00 +0000 UTC |
| 4/02/2014 03:00:51 | 2014-04-02 03:00:51 +0000 UTC |
| 03/19/2012 10:11:59 | 2012-03-19 10:11:59 +0000 UTC |
| 03/19/2012 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC |
| 2014/3/31 | 2014-03-31 00:00:00 +0000 UTC |
| 2014/03/31 | 2014-03-31 00:00:00 +0000 UTC |
| 2014/4/8 22:05 | 2014-04-08 22:05:00 +0000 UTC |
| 2014/04/08 22:05 | 2014-04-08 22:05:00 +0000 UTC |
| 2014/04/2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC |
| 2014/4/02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC |
| 2012/03/19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC |
| 2012/03/19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC |
| 2014:3:31 | 2014-03-31 00:00:00 +0000 UTC |
| 2014:03:31 | 2014-03-31 00:00:00 +0000 UTC |
| 2014:4:8 22:05 | 2014-04-08 22:05:00 +0000 UTC |
| 2014:04:08 22:05 | 2014-04-08 22:05:00 +0000 UTC |
| 2014:04:2 03:00:51 | 2014-04-02 03:00:51 +0000 UTC |
| 2014:4:02 03:00:51 | 2014-04-02 03:00:51 +0000 UTC |
| 2012:03:19 10:11:59 | 2012-03-19 10:11:59 +0000 UTC |
| 2012:03:19 10:11:59.3186369 | 2012-03-19 10:11:59.3186369 +0000 UTC |
| 2014年04月08日 | 2014-04-08 00:00:00 +0000 UTC |
| 2006-01-02T15:04:05+0000 | 2006-01-02 15:04:05 +0000 UTC |
| 2009-08-12T22:15:09-07:00 | 2009-08-12 22:15:09 -0700 -0700 |
| 2009-08-12T22:15:09 | 2009-08-12 22:15:09 +0000 UTC |
| 2009-08-12T22:15:09.988 | 2009-08-12 22:15:09.988 +0000 UTC |
| 2009-08-12T22:15:09Z | 2009-08-12 22:15:09 +0000 UTC |
| 2017-07-19T03:21:51:897+0100 | 2017-07-19 03:21:51.897 +0100 +0100 |
| 2019-05-29T08:41-04 | 2019-05-29 08:41:00 -0400 -0400 |
| 2014-04-26 17:24:37.3186369 | 2014-04-26 17:24:37.3186369 +0000 UTC |
| 2012-08-03 18:31:59.257000000 | 2012-08-03 18:31:59.257 +0000 UTC |
| 2014-04-26 17:24:37.123 | 2014-04-26 17:24:37.123 +0000 UTC |
| 2013-04-01 22:43 | 2013-04-01 22:43:00 +0000 UTC |
| 2013-04-01 22:43:22 | 2013-04-01 22:43:22 +0000 UTC |
| 2014-12-16 06:20:00 UTC | 2014-12-16 06:20:00 +0000 UTC |
| 2014-12-16 06:20:00 GMT | 2014-12-16 06:20:00 +0000 UTC |
| 2014-04-26 05:24:37 PM | 2014-04-26 17:24:37 +0000 UTC |
| 2014-04-26 13:13:43 +0800 | 2014-04-26 13:13:43 +0800 +0800 |
| 2014-04-26 13:13:43 +0800 +08 | 2014-04-26 13:13:43 +0800 +0800 |
| 2014-04-26 13:13:44 +09:00 | 2014-04-26 13:13:44 +0900 +0900 |
| 2012-08-03 18:31:59.257000000 +0000 UTC | 2012-08-03 18:31:59.257 +0000 UTC |
| 2015-09-30 18:48:56.35272715 +0000 UTC | 2015-09-30 18:48:56.35272715 +0000 UTC |
| 2015-02-18 00:12:00 +0000 GMT | 2015-02-18 00:12:00 +0000 UTC |
| 2015-02-18 00:12:00 +0000 UTC | 2015-02-18 00:12:00 +0000 UTC |
| 2015-02-08 03:02:00 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00 +0300 +0300 |
| 2015-02-08 03:02:00.001 +0300 MSK m=+0.000000001 | 2015-02-08 03:02:00.001 +0300 +0300 |
| 2017-07-19 03:21:51+00:00 | 2017-07-19 03:21:51 +0000 UTC |
| 2014-04-26 | 2014-04-26 00:00:00 +0000 UTC |
| 2014-04 | 2014-04-01 00:00:00 +0000 UTC |
| 2014 | 2014-01-01 00:00:00 +0000 UTC |
| 2014-05-11 08:20:13,787 | 2014-05-11 08:20:13.787 +0000 UTC |
| 2020-07-20+08:00 | 2020-07-20 00:00:00 +0800 +0800 |
| 3.31.2014 | 2014-03-31 00:00:00 +0000 UTC |
| 03.31.2014 | 2014-03-31 00:00:00 +0000 UTC |
| 08.21.71 | 1971-08-21 00:00:00 +0000 UTC |
| 2014.03 | 2014-03-01 00:00:00 +0000 UTC |
| 2014.03.30 | 2014-03-30 00:00:00 +0000 UTC |
| 20140601 | 2014-06-01 00:00:00 +0000 UTC |
| 20140722105203 | 2014-07-22 10:52:03 +0000 UTC |
| 171113 14:14:20 | 2017-11-13 14:14:20 +0000 UTC |
| 1332151919 | 2012-03-19 10:11:59 +0000 UTC |
| 1384216367189 | 2013-11-12 00:32:47.189 +0000 UTC |
| 1384216367111222 | 2013-11-12 00:32:47.111222 +0000 UTC |
| 1384216367111222333 | 2013-11-12 00:32:47.111222333 +0000 UTC |
+-------------------------------------------------------+-----------------------------------------+
*/
```

2189
vendor/github.com/araddon/dateparse/parseany.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

23
vendor/github.com/glycerine/blake2b/README generated vendored Normal file
View File

@@ -0,0 +1,23 @@
Go implementation of BLAKE2b collision-resistant cryptographic hash function
created by Jean-Philippe Aumasson, Samuel Neves, Zooko Wilcox-O'Hearn, and
Christian Winnerlein (https://blake2.net).
INSTALLATION
$ go get github.com/dchest/blake2b
DOCUMENTATION
See http://godoc.org/github.com/dchest/blake2b
PUBLIC DOMAIN DEDICATION
Written in 2012 by Dmitry Chestnykh.
To the extent possible under law, the author have dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
http://creativecommons.org/publicdomain/zero/1.0/

299
vendor/github.com/glycerine/blake2b/blake2b.go generated vendored Normal file
View File

@@ -0,0 +1,299 @@
// Written in 2012 by Dmitry Chestnykh.
//
// To the extent possible under law, the author have dedicated all copyright
// and related and neighboring rights to this software to the public domain
// worldwide. This software is distributed without any warranty.
// http://creativecommons.org/publicdomain/zero/1.0/
// Package blake2b implements BLAKE2b cryptographic hash function.
package blake2b
import (
"encoding/binary"
"errors"
"hash"
)
const (
BlockSize = 128 // block size of algorithm
Size = 64 // maximum digest size
SaltSize = 16 // maximum salt size
PersonSize = 16 // maximum personalization string size
KeySize = 64 // maximum size of key
)
type digest struct {
h [8]uint64 // current chain value
t [2]uint64 // message bytes counter
f [2]uint64 // finalization flags
x [BlockSize]byte // buffer for data not yet compressed
nx int // number of bytes in buffer
ih [8]uint64 // initial chain value (after config)
paddedKey [BlockSize]byte // copy of key, padded with zeros
isKeyed bool // indicates whether hash was keyed
size uint8 // digest size in bytes
isLastNode bool // indicates processing of the last node in tree hashing
}
// Initialization values.
var iv = [8]uint64{
0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
0x510e527fade682d1, 0x9b05688c2b3e6c1f,
0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
}
// Config is used to configure hash function parameters and keying.
// All parameters are optional.
type Config struct {
Size uint8 // digest size (if zero, default size of 64 bytes is used)
Key []byte // key for prefix-MAC
Salt []byte // salt (if < 16 bytes, padded with zeros)
Person []byte // personalization (if < 16 bytes, padded with zeros)
Tree *Tree // parameters for tree hashing
}
// Tree represents parameters for tree hashing.
type Tree struct {
Fanout uint8 // fanout
MaxDepth uint8 // maximal depth
LeafSize uint32 // leaf maximal byte length (0 for unlimited)
NodeOffset uint64 // node offset (0 for first, leftmost or leaf)
NodeDepth uint8 // node depth (0 for leaves)
InnerHashSize uint8 // inner hash byte length
IsLastNode bool // indicates processing of the last node of layer
}
var (
defaultConfig = &Config{Size: Size}
config256 = &Config{Size: 32}
)
func verifyConfig(c *Config) error {
if c.Size > Size {
return errors.New("digest size is too large")
}
if len(c.Key) > KeySize {
return errors.New("key is too large")
}
if len(c.Salt) > SaltSize {
// Smaller salt is okay: it will be padded with zeros.
return errors.New("salt is too large")
}
if len(c.Person) > PersonSize {
// Smaller personalization is okay: it will be padded with zeros.
return errors.New("personalization is too large")
}
if c.Tree != nil {
if c.Tree.Fanout == 1 {
return errors.New("fanout of 1 is not allowed in tree mode")
}
if c.Tree.MaxDepth < 2 {
return errors.New("incorrect tree depth")
}
if c.Tree.InnerHashSize < 1 || c.Tree.InnerHashSize > Size {
return errors.New("incorrect tree inner hash size")
}
}
return nil
}
// New returns a new hash.Hash configured with the given Config.
// Config can be nil, in which case the default one is used, calculating 64-byte digest.
// Returns non-nil error if Config contains invalid parameters.
func New(c *Config) (hash.Hash, error) {
if c == nil {
c = defaultConfig
} else {
if c.Size == 0 {
// Set default size if it's zero.
c.Size = Size
}
if err := verifyConfig(c); err != nil {
return nil, err
}
}
d := new(digest)
d.initialize(c)
return d, nil
}
// initialize initializes digest with the given
// config, which must be non-nil and verified.
func (d *digest) initialize(c *Config) {
// Create parameter block.
var p [BlockSize]byte
p[0] = c.Size
p[1] = uint8(len(c.Key))
if c.Salt != nil {
copy(p[32:], c.Salt)
}
if c.Person != nil {
copy(p[48:], c.Person)
}
if c.Tree != nil {
p[2] = c.Tree.Fanout
p[3] = c.Tree.MaxDepth
binary.LittleEndian.PutUint32(p[4:], c.Tree.LeafSize)
binary.LittleEndian.PutUint64(p[8:], c.Tree.NodeOffset)
p[16] = c.Tree.NodeDepth
p[17] = c.Tree.InnerHashSize
} else {
p[2] = 1
p[3] = 1
}
// Initialize.
d.size = c.Size
for i := 0; i < 8; i++ {
d.h[i] = iv[i] ^ binary.LittleEndian.Uint64(p[i*8:])
}
if c.Tree != nil && c.Tree.IsLastNode {
d.isLastNode = true
}
// Process key.
if c.Key != nil {
copy(d.paddedKey[:], c.Key)
d.Write(d.paddedKey[:])
d.isKeyed = true
}
// Save a copy of initialized state.
copy(d.ih[:], d.h[:])
}
// New512 returns a new hash.Hash computing the BLAKE2b 64-byte checksum.
func New512() hash.Hash {
d := new(digest)
d.initialize(defaultConfig)
return d
}
// New256 returns a new hash.Hash computing the BLAKE2b 32-byte checksum.
func New256() hash.Hash {
d := new(digest)
d.initialize(config256)
return d
}
// NewMAC returns a new hash.Hash computing BLAKE2b prefix-
// Message Authentication Code of the given size in bytes
// (up to 64) with the given key (up to 64 bytes in length).
func NewMAC(outBytes uint8, key []byte) hash.Hash {
d, err := New(&Config{Size: outBytes, Key: key})
if err != nil {
panic(err.Error())
}
return d
}
// Reset resets the state of digest to the initial state
// after configuration and keying.
func (d *digest) Reset() {
copy(d.h[:], d.ih[:])
d.t[0] = 0
d.t[1] = 0
d.f[0] = 0
d.f[1] = 0
d.nx = 0
if d.isKeyed {
d.Write(d.paddedKey[:])
}
}
// Size returns the digest size in bytes.
func (d *digest) Size() int { return int(d.size) }
// BlockSize returns the algorithm block size in bytes.
func (d *digest) BlockSize() int { return BlockSize }
func (d *digest) Write(p []byte) (nn int, err error) {
nn = len(p)
left := BlockSize - d.nx
if len(p) > left {
// Process buffer.
copy(d.x[d.nx:], p[:left])
p = p[left:]
blocks(d, d.x[:])
d.nx = 0
}
// Process full blocks except for the last one.
if len(p) > BlockSize {
n := len(p) &^ (BlockSize - 1)
if n == len(p) {
n -= BlockSize
}
blocks(d, p[:n])
p = p[n:]
}
// Fill buffer.
d.nx += copy(d.x[d.nx:], p)
return
}
// Sum returns the calculated checksum.
func (d0 *digest) Sum(in []byte) []byte {
// Make a copy of d0 so that caller can keep writing and summing.
d := *d0
hash := d.checkSum()
return append(in, hash[:d.size]...)
}
func (d *digest) checkSum() [Size]byte {
// Do not create unnecessary copies of the key.
if d.isKeyed {
for i := 0; i < len(d.paddedKey); i++ {
d.paddedKey[i] = 0
}
}
dec := BlockSize - uint64(d.nx)
if d.t[0] < dec {
d.t[1]--
}
d.t[0] -= dec
// Pad buffer with zeros.
for i := d.nx; i < len(d.x); i++ {
d.x[i] = 0
}
// Set last block flag.
d.f[0] = 0xffffffffffffffff
if d.isLastNode {
d.f[1] = 0xffffffffffffffff
}
// Compress last block.
blocks(d, d.x[:])
var out [Size]byte
j := 0
for _, s := range d.h[:(d.size-1)/8+1] {
out[j+0] = byte(s >> 0)
out[j+1] = byte(s >> 8)
out[j+2] = byte(s >> 16)
out[j+3] = byte(s >> 24)
out[j+4] = byte(s >> 32)
out[j+5] = byte(s >> 40)
out[j+6] = byte(s >> 48)
out[j+7] = byte(s >> 56)
j += 8
}
return out
}
// Sum512 returns a 64-byte BLAKE2b hash of data.
func Sum512(data []byte) [64]byte {
var d digest
d.initialize(defaultConfig)
d.Write(data)
return d.checkSum()
}
// Sum256 returns a 32-byte BLAKE2b hash of data.
func Sum256(data []byte) (out [32]byte) {
var d digest
d.initialize(config256)
d.Write(data)
sum := d.checkSum()
copy(out[:], sum[:32])
return
}

1420
vendor/github.com/glycerine/blake2b/block.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

25
vendor/github.com/glycerine/greenpack/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,25 @@
MIT License
Portions Copyright (c) 2016 Jason E. Aten
Portions Copyright (c) 2014 Philip Hofer
Portions Copyright (c) 2009 The Go Authors (license at http://golang.org) where indicated
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom
the Software is furnished to do so, subject to the
following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
OR OTHER DEALINGS IN THE SOFTWARE.

View File

@@ -0,0 +1,24 @@
// +build linux,!appengine
package msgp
import (
"os"
"syscall"
)
func adviseRead(mem []byte) {
syscall.Madvise(mem, syscall.MADV_SEQUENTIAL|syscall.MADV_WILLNEED)
}
func adviseWrite(mem []byte) {
syscall.Madvise(mem, syscall.MADV_SEQUENTIAL)
}
func fallocate(f *os.File, sz int64) error {
err := syscall.Fallocate(int(f.Fd()), 0, 0, sz)
if err == syscall.ENOTSUP {
return f.Truncate(sz)
}
return err
}

View File

@@ -0,0 +1,17 @@
// +build !linux appengine
package msgp
import (
"os"
)
// TODO: darwin, BSD support
func adviseRead(mem []byte) {}
func adviseWrite(mem []byte) {}
func fallocate(f *os.File, sz int64) error {
return f.Truncate(sz)
}

View File

@@ -0,0 +1,15 @@
// +build appengine
package msgp
// let's just assume appengine
// uses 64-bit hardware...
const smallint = false
func UnsafeString(b []byte) string {
return string(b)
}
func UnsafeBytes(s string) []byte {
return []byte(s)
}

39
vendor/github.com/glycerine/greenpack/msgp/circular.go generated vendored Normal file
View File

@@ -0,0 +1,39 @@
package msgp
type timer interface {
StartTimer()
StopTimer()
}
// EndlessReader is an io.Reader
// that loops over the same data
// endlessly. It is used for benchmarking.
type EndlessReader struct {
tb timer
data []byte
offset int
}
// NewEndlessReader returns a new endless reader
func NewEndlessReader(b []byte, tb timer) *EndlessReader {
return &EndlessReader{tb: tb, data: b, offset: 0}
}
// Read implements io.Reader. In practice, it
// always returns (len(p), nil), although it
// fills the supplied slice while the benchmark
// timer is stopped.
func (c *EndlessReader) Read(p []byte) (int, error) {
c.tb.StopTimer()
var n int
l := len(p)
m := len(c.data)
for n < l {
nn := copy(p[n:], c.data[c.offset:])
n += nn
c.offset += nn
c.offset %= m
}
c.tb.StartTimer()
return n, nil
}

37
vendor/github.com/glycerine/greenpack/msgp/clue.go generated vendored Normal file
View File

@@ -0,0 +1,37 @@
package msgp
import (
"fmt"
"strconv"
"strings"
)
func Clue2Field(name string, clue string, zid int64) string {
if zid >= 0 {
return fmt.Sprintf("%s_zid%02d_%s", name, zid, clue)
}
// handle the missing zid, and don't write -1 as the zid.
return fmt.Sprintf("%s__%s", name, clue)
}
func Field2Clue(fieldname string) (name string, clue string, zid int64, err error) {
parts := strings.Split(fieldname, "_")
n := len(parts)
if n < 3 {
err = fmt.Errorf("too few underscore (expect at least two) in fieldname '%s'", fieldname)
return
}
clue = parts[n-1]
if strings.HasPrefix(parts[n-2], "zid") {
tmp, err2 := strconv.Atoi(parts[n-2][3:])
if err2 == nil {
zid = int64(tmp)
} else {
err = fmt.Errorf("problem parsing out _zid field in fieldname '%s': '%v'", fieldname, err2)
return
}
}
used := len(parts[n-1]) + len(parts[n-2]) + 2
name = fieldname[:len(fieldname)-used]
return
}

146
vendor/github.com/glycerine/greenpack/msgp/dedup.go generated vendored Normal file
View File

@@ -0,0 +1,146 @@
package msgp
import (
"fmt"
"reflect"
)
var _ = fmt.Printf
// Methods for deduplicating repeated occurances of the same pointer.
//
// When writing, we track the sequence of pointers written.
// When we see a duplicate pointer, we write the special
// extension "duplicate" value along with the pointer's
// occurance order in the serialization.
//
// As we read back, we keep a count that increments for every
// pointer we read, and we save a map from the count to the pointer.
// When we encounter a value that is the special value indicating reuse, then
// we refer back to the pointer k (k being found within the special extension value)
// and we plug in the k-th pointer instead.
// writer then reader methods
// ===============================
// ===============================
// Writer methods
// ===============================
// ===============================
func (mw *Writer) DedupReset() {
mw.ptrWrit = make(map[interface{}]int)
mw.ptrCountNext = 0
}
// diagnostic
func (mw *Writer) DedupPointerCount() int {
return len(mw.ptrWrit)
}
// upon writing each pointer, first check if it is a duplicate;
// i.e. appears more than once, pointing to the same object.
func (mw *Writer) DedupWriteIsDup(v interface{}) (res bool, err error) {
defer func() {
// This recover allows test 911 (_generated/gen_test.go:67) to run green.
// It turns indexing by []byte msgp.Raw into a no-op. Which it
// should be.
if recover() != nil {
return
}
}()
if v == nil || reflect.ValueOf(v).IsNil() {
return false, nil
}
k, dup := mw.ptrWrit[v]
if !dup {
mw.ptrWrit[v] = mw.ptrCountNext
//fmt.Printf("\n\n $$$ NOT dup write %p -> k=%v / %#v\n\n", v, mw.ptrCountNext, v)
mw.ptrCountNext++
return false, nil
} else {
//fmt.Printf("\n\n $$$ DUP write %p -> k=%v / %#v\n\n", v, k, v)
}
return true, mw.DedupWriteExt(k)
}
// write DedupExtension with k integer count
// of the pointer that is duplicated here. k is
// runtime appearance order.
func (mw *Writer) DedupWriteExt(k int) error {
var by [8]byte
kby := AppendInt(by[:0], k)
ext := RawExtension{
Data: kby,
Type: DedupExtension,
}
return mw.WriteExtension(&ext)
}
// =============================
// =============================
// Reader side
// =============================
// =============================
func (m *Reader) DedupReadExt() (int, error) {
ext := RawExtension{
Type: DedupExtension,
}
err := m.ReadExtension(&ext)
if err != nil {
return -1, err
}
var nbs NilBitsStack
k, _, err := nbs.ReadIntBytes(ext.Data)
if err != nil {
return -1, err
}
return k, nil
}
func (r *Reader) DedupReset() {
r.dedupPointers = r.dedupPointers[:0]
}
func (r *Reader) DedupPtr(k int) interface{} {
if k >= 0 && k < len(r.dedupPointers) {
return r.dedupPointers[k]
}
panic(fmt.Sprintf("Reader.DedupPtr requested for k=%v but that was out of range! (avail=%v)", k, len(r.dedupPointers)))
return nil
}
func (m *Reader) DedupIndexEachPtr(ptr interface{}) {
//fmt.Printf("\n DedupIndexEachPtr called for ptr=%p/%T/val='%#v'\n", ptr, ptr, ptr)
if ptr == nil {
return
}
va := reflect.ValueOf(ptr)
if va.IsNil() {
return
}
m.dedupPointers = append(m.dedupPointers, ptr)
//fmt.Printf("\n\n *** Reader.DedupIndexEachPtr stored ptr '%#v', as sequence k=%v\n\n", ptr, len(m.dedupPointers)-1)
}
func (m *Reader) DedupReadIsDup(field, typeTarget string) (iface interface{}, res bool) {
//fmt.Printf("\n+++ Reader.DedupReadIsDup(field:'%s', type:'%s') starting.\n", field, typeTarget)
//defer func() {
// fmt.Printf("\n^^^ Reader.DedupReadIsDup() returning res=%v\n", res)
//}()
typ, err := m.peekExtensionType()
if err != nil {
return nil, false
}
if typ != DedupExtension {
return nil, false
}
k, err := m.DedupReadExt()
if err != nil {
return nil, false
}
ptr := m.DedupPtr(k)
//fmt.Printf("\n m.DedupReadIsDup() substituting, ptr= %p b/c read k=%v\n", ptr, k)
return ptr, true
}

142
vendor/github.com/glycerine/greenpack/msgp/defs.go generated vendored Normal file
View File

@@ -0,0 +1,142 @@
// This package is the support library for the greenpack code generator (http://github.com/glycerine/greenpack).
//
// This package defines the utilites used by the greenpack code generator for encoding and decoding MessagePack
// from []byte and io.Reader/io.Writer types. Much of this package is devoted to helping the greenpack code
// generator implement the Marshaler/Unmarshaler and Encodable/Decodable interfaces.
//
// This package defines four "families" of functions:
// - AppendXxxx() appends an object to a []byte in MessagePack encoding.
// - ReadXxxxBytes() reads an object from a []byte and returns the remaining bytes.
// - (*Writer).WriteXxxx() writes an object to the buffered *Writer type.
// - (*Reader).ReadXxxx() reads an object from a buffered *Reader type.
//
// Once a type has satisfied the `Encodable` and `Decodable` interfaces,
// it can be written and read from arbitrary `io.Writer`s and `io.Reader`s using
// msgp.Encode(io.Writer, msgp.Encodable)
// and
// msgp.Decode(io.Reader, msgp.Decodable)
//
// There are also methods for converting MessagePack to JSON without
// an explicit de-serialization step.
//
// For additional tips, tricks, and gotchas, please visit
// the wiki at http://github.com/glycerine/greenpack
package msgp
const last4 = 0x0f
const first4 = 0xf0
const last5 = 0x1f
const first3 = 0xe0
const last7 = 0x7f
func isfixint(b byte) bool {
return b>>7 == 0
}
func isnfixint(b byte) bool {
return b&first3 == mnfixint
}
func isfixmap(b byte) bool {
return b&first4 == mfixmap
}
func isfixarray(b byte) bool {
return b&first4 == mfixarray
}
func isfixstr(b byte) bool {
return b&first3 == mfixstr
}
func wfixint(u uint8) byte {
return u & last7
}
func rfixint(b byte) uint8 {
return b
}
func wnfixint(i int8) byte {
return byte(i) | mnfixint
}
func rnfixint(b byte) int8 {
return int8(b)
}
func rfixmap(b byte) uint8 {
return b & last4
}
func wfixmap(u uint8) byte {
return mfixmap | (u & last4)
}
func rfixstr(b byte) uint8 {
return b & last5
}
func wfixstr(u uint8) byte {
return (u & last5) | mfixstr
}
func rfixarray(b byte) uint8 {
return (b & last4)
}
func wfixarray(u uint8) byte {
return (u & last4) | mfixarray
}
// These are all the byte
// prefixes defined by the
// msgpack standard
const (
// 0XXXXXXX
mfixint uint8 = 0x00
// 111XXXXX
mnfixint uint8 = 0xe0
// 1000XXXX
mfixmap uint8 = 0x80
// 1001XXXX
mfixarray uint8 = 0x90
// 101XXXXX
mfixstr uint8 = 0xa0
mnil uint8 = 0xc0
mfalse uint8 = 0xc2
mtrue uint8 = 0xc3
mbin8 uint8 = 0xc4
mbin16 uint8 = 0xc5
mbin32 uint8 = 0xc6
mext8 uint8 = 0xc7
mext16 uint8 = 0xc8
mext32 uint8 = 0xc9
mfloat32 uint8 = 0xca
mfloat64 uint8 = 0xcb
muint8 uint8 = 0xcc
muint16 uint8 = 0xcd
muint32 uint8 = 0xce
muint64 uint8 = 0xcf
mint8 uint8 = 0xd0
mint16 uint8 = 0xd1
mint32 uint8 = 0xd2
mint64 uint8 = 0xd3
mfixext1 uint8 = 0xd4
mfixext2 uint8 = 0xd5
mfixext4 uint8 = 0xd6
mfixext8 uint8 = 0xd7
mfixext16 uint8 = 0xd8
mstr8 uint8 = 0xd9
mstr16 uint8 = 0xda
mstr32 uint8 = 0xdb
marray16 uint8 = 0xdc
marray32 uint8 = 0xdd
mmap16 uint8 = 0xde
mmap32 uint8 = 0xdf
)

245
vendor/github.com/glycerine/greenpack/msgp/edit.go generated vendored Normal file
View File

@@ -0,0 +1,245 @@
package msgp
import (
"math"
)
// Locate returns a []byte pointing to the field
// in a messagepack map with the provided key. (The returned []byte
// points to a sub-slice of 'raw'; Locate does no allocations.) If the
// key doesn't exist in the map, a zero-length []byte will be returned.
func Locate(key string, raw []byte) []byte {
s, n := locate(raw, key)
return raw[s:n]
}
// Replace takes a key ("key") in a messagepack map ("raw")
// and replaces its value with the one provided and returns
// the new []byte. The returned []byte may point to the same
// memory as "raw". Replace makes no effort to evaluate the validity
// of the contents of 'val'. It may use up to the full capacity of 'raw.'
// Replace returns 'nil' if the field doesn't exist or if the object in 'raw'
// is not a map.
func Replace(key string, raw []byte, val []byte) []byte {
start, end := locate(raw, key)
if start == end {
return nil
}
return replace(raw, start, end, val, true)
}
// CopyReplace works similarly to Replace except that the returned
// byte slice does not point to the same memory as 'raw'. CopyReplace
// returns 'nil' if the field doesn't exist or 'raw' isn't a map.
func CopyReplace(key string, raw []byte, val []byte) []byte {
start, end := locate(raw, key)
if start == end {
return nil
}
return replace(raw, start, end, val, false)
}
// Remove removes a key-value pair from 'raw'. It returns
// 'raw' unchanged if the key didn't exist.
func Remove(key string, raw []byte) []byte {
start, end := locateKV(raw, key)
if start == end {
return raw
}
raw = raw[:start+copy(raw[start:], raw[end:])]
return resizeMap(raw, -1)
}
// HasKey returns whether the map in 'raw' has
// a field with key 'key'
func HasKey(key string, raw []byte) bool {
var nbs *NilBitsStack
sz, bts, err := nbs.ReadMapHeaderBytes(raw)
if err != nil {
return false
}
var field []byte
for i := uint32(0); i < sz; i++ {
field, bts, err = nbs.ReadStringZC(bts)
if err != nil {
return false
}
if UnsafeString(field) == key {
return true
}
}
return false
}
func replace(raw []byte, start int, end int, val []byte, inplace bool) []byte {
ll := end - start // length of segment to replace
lv := len(val)
if inplace {
extra := lv - ll
// fastest case: we're doing
// a 1:1 replacement
if extra == 0 {
copy(raw[start:], val)
return raw
} else if extra < 0 {
// 'val' smaller than replaced value
// copy in place and shift back
x := copy(raw[start:], val)
y := copy(raw[start+x:], raw[end:])
return raw[:start+x+y]
} else if extra < cap(raw)-len(raw) {
// 'val' less than (cap-len) extra bytes
// copy in place and shift forward
raw = raw[0 : len(raw)+extra]
// shift end forward
copy(raw[end+extra:], raw[end:])
copy(raw[start:], val)
return raw
}
}
// we have to allocate new space
out := make([]byte, len(raw)+len(val)-ll)
x := copy(out, raw[:start])
y := copy(out[x:], val)
copy(out[x+y:], raw[end:])
return out
}
// locate does a naive O(n) search for the map key; returns start, end
// (returns 0,0 on error)
func locate(raw []byte, key string) (start int, end int) {
var (
sz uint32
bts []byte
field []byte
err error
)
var nbs *NilBitsStack
sz, bts, err = nbs.ReadMapHeaderBytes(raw)
if err != nil {
return
}
// loop and locate field
for i := uint32(0); i < sz; i++ {
field, bts, err = nbs.ReadStringZC(bts)
if err != nil {
return 0, 0
}
if UnsafeString(field) == key {
// start location
l := len(raw)
start = l - len(bts)
bts, err = Skip(bts)
if err != nil {
return 0, 0
}
end = l - len(bts)
return
}
bts, err = Skip(bts)
if err != nil {
return 0, 0
}
}
return 0, 0
}
// locate key AND value
func locateKV(raw []byte, key string) (start int, end int) {
var nbs *NilBitsStack
var (
sz uint32
bts []byte
field []byte
err error
)
sz, bts, err = nbs.ReadMapHeaderBytes(raw)
if err != nil {
return 0, 0
}
for i := uint32(0); i < sz; i++ {
tmp := len(bts)
field, bts, err = nbs.ReadStringZC(bts)
if err != nil {
return 0, 0
}
if UnsafeString(field) == key {
start = len(raw) - tmp
bts, err = Skip(bts)
if err != nil {
return 0, 0
}
end = len(raw) - len(bts)
return
}
bts, err = Skip(bts)
if err != nil {
return 0, 0
}
}
return 0, 0
}
// delta is delta on map size
func resizeMap(raw []byte, delta int64) []byte {
var sz int64
switch raw[0] {
case mmap16:
sz = int64(big.Uint16(raw[1:]))
if sz+delta <= math.MaxUint16 {
big.PutUint16(raw[1:], uint16(sz+delta))
return raw
}
if cap(raw)-len(raw) >= 2 {
raw = raw[0 : len(raw)+2]
copy(raw[5:], raw[3:])
big.PutUint32(raw[1:], uint32(sz+delta))
return raw
}
n := make([]byte, 0, len(raw)+5)
n = AppendMapHeader(n, uint32(sz+delta))
return append(n, raw[3:]...)
case mmap32:
sz = int64(big.Uint32(raw[1:]))
big.PutUint32(raw[1:], uint32(sz+delta))
return raw
default:
sz = int64(rfixmap(raw[0]))
if sz+delta < 16 {
raw[0] = wfixmap(uint8(sz + delta))
return raw
} else if sz+delta <= math.MaxUint16 {
if cap(raw)-len(raw) >= 2 {
raw = raw[0 : len(raw)+2]
copy(raw[3:], raw[1:])
raw[0] = mmap16
big.PutUint16(raw[1:], uint16(sz+delta))
return raw
}
n := make([]byte, 0, len(raw)+5)
n = AppendMapHeader(n, uint32(sz+delta))
return append(n, raw[1:]...)
}
if cap(raw)-len(raw) >= 4 {
raw = raw[0 : len(raw)+4]
copy(raw[5:], raw[1:])
raw[0] = mmap32
big.PutUint32(raw[1:], uint32(sz+delta))
return raw
}
n := make([]byte, 0, len(raw)+5)
n = AppendMapHeader(n, uint32(sz+delta))
return append(n, raw[1:]...)
}
}

99
vendor/github.com/glycerine/greenpack/msgp/elsize.go generated vendored Normal file
View File

@@ -0,0 +1,99 @@
package msgp
// size of every object on the wire,
// plus type information. gives us
// constant-time type information
// for traversing composite objects.
//
var sizes = [256]bytespec{
mnil: {size: 1, extra: constsize, typ: NilType},
mfalse: {size: 1, extra: constsize, typ: BoolType},
mtrue: {size: 1, extra: constsize, typ: BoolType},
mbin8: {size: 2, extra: extra8, typ: BinType},
mbin16: {size: 3, extra: extra16, typ: BinType},
mbin32: {size: 5, extra: extra32, typ: BinType},
mext8: {size: 3, extra: extra8, typ: ExtensionType},
mext16: {size: 4, extra: extra16, typ: ExtensionType},
mext32: {size: 6, extra: extra32, typ: ExtensionType},
mfloat32: {size: 5, extra: constsize, typ: Float32Type},
mfloat64: {size: 9, extra: constsize, typ: Float64Type},
muint8: {size: 2, extra: constsize, typ: UintType},
muint16: {size: 3, extra: constsize, typ: UintType},
muint32: {size: 5, extra: constsize, typ: UintType},
muint64: {size: 9, extra: constsize, typ: UintType},
mint8: {size: 2, extra: constsize, typ: IntType},
mint16: {size: 3, extra: constsize, typ: IntType},
mint32: {size: 5, extra: constsize, typ: IntType},
mint64: {size: 9, extra: constsize, typ: IntType},
mfixext1: {size: 3, extra: constsize, typ: ExtensionType},
mfixext2: {size: 4, extra: constsize, typ: ExtensionType},
mfixext4: {size: 6, extra: constsize, typ: ExtensionType},
mfixext8: {size: 10, extra: constsize, typ: ExtensionType},
mfixext16: {size: 18, extra: constsize, typ: ExtensionType},
mstr8: {size: 2, extra: extra8, typ: StrType},
mstr16: {size: 3, extra: extra16, typ: StrType},
mstr32: {size: 5, extra: extra32, typ: StrType},
marray16: {size: 3, extra: array16v, typ: ArrayType},
marray32: {size: 5, extra: array32v, typ: ArrayType},
mmap16: {size: 3, extra: map16v, typ: MapType},
mmap32: {size: 5, extra: map32v, typ: MapType},
}
func init() {
// set up fixed fields
// fixint
for i := mfixint; i < 0x80; i++ {
sizes[i] = bytespec{size: 1, extra: constsize, typ: IntType}
}
// nfixint
for i := uint16(mnfixint); i < 0x100; i++ {
sizes[uint8(i)] = bytespec{size: 1, extra: constsize, typ: IntType}
}
// fixstr gets constsize,
// since the prefix yields the size
for i := mfixstr; i < 0xc0; i++ {
sizes[i] = bytespec{size: 1 + rfixstr(i), extra: constsize, typ: StrType}
}
// fixmap
for i := mfixmap; i < 0x90; i++ {
sizes[i] = bytespec{size: 1, extra: varmode(2 * rfixmap(i)), typ: MapType}
}
// fixarray
for i := mfixarray; i < 0xa0; i++ {
sizes[i] = bytespec{size: 1, extra: varmode(rfixarray(i)), typ: ArrayType}
}
}
// a valid bytespsec has
// non-zero 'size' and
// non-zero 'typ'
type bytespec struct {
size uint8 // prefix size information
extra varmode // extra size information
typ Type // type
_ byte // makes bytespec 4 bytes (yes, this matters)
}
// size mode
// if positive, # elements for composites
type varmode int8
const (
constsize varmode = 0 // constant size (size bytes + uint8(varmode) objects)
extra8 = -1 // has uint8(p[1]) extra bytes
extra16 = -2 // has be16(p[1:]) extra bytes
extra32 = -3 // has be32(p[1:]) extra bytes
map16v = -4 // use map16
map32v = -5 // use map32
array16v = -6 // use array16
array32v = -7 // use array32
)
func getType(v byte) Type {
return sizes[v].typ
}

142
vendor/github.com/glycerine/greenpack/msgp/errors.go generated vendored Normal file
View File

@@ -0,0 +1,142 @@
package msgp
import (
"fmt"
"reflect"
)
var (
// ErrShortBytes is returned when the
// slice being decoded is too short to
// contain the contents of the message
ErrShortBytes error = errShort{}
// this error is only returned
// if we reach code that should
// be unreachable
fatal error = errFatal{}
)
// Error is the interface satisfied
// by all of the errors that originate
// from this package.
type Error interface {
error
// Resumable returns whether
// or not the error means that
// the stream of data is malformed
// and the information is unrecoverable.
Resumable() bool
}
type errShort struct{}
func (e errShort) Error() string { return "msgp: too few bytes left to read object" }
func (e errShort) Resumable() bool { return false }
type errFatal struct{}
func (f errFatal) Error() string { return "msgp: fatal decoding error (unreachable code)" }
func (f errFatal) Resumable() bool { return false }
// ArrayError is an error returned
// when decoding a fix-sized array
// of the wrong size
type ArrayError struct {
Wanted uint32
Got uint32
}
// Error implements the error interface
func (a ArrayError) Error() string {
return fmt.Sprintf("msgp: wanted array of size %d; got %d", a.Wanted, a.Got)
}
// Resumable is always 'true' for ArrayErrors
func (a ArrayError) Resumable() bool { return true }
// IntOverflow is returned when a call
// would downcast an integer to a type
// with too few bits to hold its value.
type IntOverflow struct {
Value int64 // the value of the integer
FailedBitsize int // the bit size that the int64 could not fit into
}
// Error implements the error interface
func (i IntOverflow) Error() string {
return fmt.Sprintf("msgp: %d overflows int%d", i.Value, i.FailedBitsize)
}
// Resumable is always 'true' for overflows
func (i IntOverflow) Resumable() bool { return true }
// UintOverflow is returned when a call
// would downcast an unsigned integer to a type
// with too few bits to hold its value
type UintOverflow struct {
Value uint64 // value of the uint
FailedBitsize int // the bit size that couldn't fit the value
}
// Error implements the error interface
func (u UintOverflow) Error() string {
return fmt.Sprintf("msgp: %d overflows uint%d", u.Value, u.FailedBitsize)
}
// Resumable is always 'true' for overflows
func (u UintOverflow) Resumable() bool { return true }
// A TypeError is returned when a particular
// decoding method is unsuitable for decoding
// a particular MessagePack value.
type TypeError struct {
Method Type // Type expected by method
Encoded Type // Type actually encoded
}
// Error implements the error interface
func (t TypeError) Error() string {
return fmt.Sprintf("msgp: attempted to decode type %q with method for %q", t.Encoded, t.Method)
}
// Resumable returns 'true' for TypeErrors
func (t TypeError) Resumable() bool { return true }
// returns either InvalidPrefixError or
// TypeError depending on whether or not
// the prefix is recognized
func badPrefix(want Type, lead byte) error {
t := sizes[lead].typ
if t == InvalidType {
return InvalidPrefixError(lead)
}
return TypeError{Method: want, Encoded: t}
}
// InvalidPrefixError is returned when a bad encoding
// uses a prefix that is not recognized in the MessagePack standard.
// This kind of error is unrecoverable.
type InvalidPrefixError byte
// Error implements the error interface
func (i InvalidPrefixError) Error() string {
return fmt.Sprintf("msgp: unrecognized type prefix 0x%x", byte(i))
}
// Resumable returns 'false' for InvalidPrefixErrors
func (i InvalidPrefixError) Resumable() bool { return false }
// ErrUnsupportedType is returned
// when a bad argument is supplied
// to a function that takes `interface{}`.
type ErrUnsupportedType struct {
T reflect.Type
}
// Error implements error
func (e *ErrUnsupportedType) Error() string { return fmt.Sprintf("msgp: type %q not supported(4)", e.T) }
// Resumable returns 'true' for ErrUnsupportedType
func (e *ErrUnsupportedType) Resumable() bool { return true }

559
vendor/github.com/glycerine/greenpack/msgp/extension.go generated vendored Normal file
View File

@@ -0,0 +1,559 @@
package msgp
import (
"fmt"
"math"
)
const (
// Complex64Extension is the extension number used for complex64
Complex64Extension = 3
// Complex128Extension is the extension number used for complex128
Complex128Extension = 4
// TimeExtension is the extension number used for time.Time
TimeExtension = 5
// DedupExtension allows us to avoid cycles and avoid excess
// space consumption for graphs that are not strictly trees.
DedupExtension = 6
// DurationExtension is used for time.Duration
DurationExtension = 7
)
// our extensions live here
var extensionReg = make(map[int8]func() Extension)
// RegisterExtension registers extensions so that they
// can be initialized and returned by methods that
// decode `interface{}` values. This should only
// be called during initialization. f() should return
// a newly-initialized zero value of the extension. Keep in
// mind that extensions 3, 4, and 5 are reserved for
// complex64, complex128, and time.Time, respectively,
// and that MessagePack reserves extension types from -127 to -1.
//
// For example, if you wanted to register a user-defined struct:
//
// msgp.RegisterExtension(10, func() msgp.Extension { &MyExtension{} })
//
// RegisterExtension will panic if you call it multiple times
// with the same 'typ' argument, or if you use a reserved
// type (3, 4, 5, 6, 7).
func RegisterExtension(typ int8, f func() Extension) {
switch typ {
case Complex64Extension, Complex128Extension, TimeExtension, DedupExtension, DurationExtension:
panic(fmt.Sprint("msgp: forbidden extension type:", typ))
}
if _, ok := extensionReg[typ]; ok {
panic(fmt.Sprint("msgp: RegisterExtension() called with typ", typ, "more than once"))
}
extensionReg[typ] = f
}
// ExtensionTypeError is an error type returned
// when there is a mis-match between an extension type
// and the type encoded on the wire
type ExtensionTypeError struct {
Got int8
Want int8
}
// Error implements the error interface
func (e ExtensionTypeError) Error() string {
return fmt.Sprintf("msgp: error decoding extension: wanted type %d; got type %d", e.Want, e.Got)
}
// Resumable returns 'true' for ExtensionTypeErrors
func (e ExtensionTypeError) Resumable() bool { return true }
func errExt(got int8, wanted int8) error {
return ExtensionTypeError{Got: got, Want: wanted}
}
// Extension is the interface fulfilled
// by types that want to define their
// own binary encoding.
type Extension interface {
// ExtensionType should return
// a int8 that identifies the concrete
// type of the extension. (Types <0 are
// officially reserved by the MessagePack
// specifications.)
ExtensionType() int8
// Len should return the length
// of the data to be encoded
Len() int
// MarshalBinaryTo should copy
// the data into the supplied slice,
// assuming that the slice has length Len()
MarshalBinaryTo([]byte) error
UnmarshalBinary([]byte) error
}
// RawExtension implements the Extension interface
type RawExtension struct {
Data []byte
Type int8
}
// ExtensionType implements Extension.ExtensionType, and returns r.Type
func (r *RawExtension) ExtensionType() int8 { return r.Type }
// Len implements Extension.Len, and returns len(r.Data)
func (r *RawExtension) Len() int { return len(r.Data) }
// MarshalBinaryTo implements Extension.MarshalBinaryTo,
// and returns a copy of r.Data
func (r *RawExtension) MarshalBinaryTo(d []byte) error {
copy(d, r.Data)
return nil
}
// UnmarshalBinary implements Extension.UnmarshalBinary,
// and sets r.Data to the contents of the provided slice
func (r *RawExtension) UnmarshalBinary(b []byte) error {
if cap(r.Data) >= len(b) {
r.Data = r.Data[0:len(b)]
} else {
r.Data = make([]byte, len(b))
}
copy(r.Data, b)
return nil
}
// WriteExtension writes an extension type to the writer
func (mw *Writer) WriteExtension(e Extension) error {
l := e.Len()
var err error
switch l {
case 0:
o, err := mw.require(3)
if err != nil {
return err
}
mw.buf[o] = mext8
mw.buf[o+1] = 0
mw.buf[o+2] = byte(e.ExtensionType())
case 1:
o, err := mw.require(2)
if err != nil {
return err
}
mw.buf[o] = mfixext1
mw.buf[o+1] = byte(e.ExtensionType())
case 2:
o, err := mw.require(2)
if err != nil {
return err
}
mw.buf[o] = mfixext2
mw.buf[o+1] = byte(e.ExtensionType())
case 4:
o, err := mw.require(2)
if err != nil {
return err
}
mw.buf[o] = mfixext4
mw.buf[o+1] = byte(e.ExtensionType())
case 8:
o, err := mw.require(2)
if err != nil {
return err
}
mw.buf[o] = mfixext8
mw.buf[o+1] = byte(e.ExtensionType())
case 16:
o, err := mw.require(2)
if err != nil {
return err
}
mw.buf[o] = mfixext16
mw.buf[o+1] = byte(e.ExtensionType())
default:
switch {
case l < math.MaxUint8:
o, err := mw.require(3)
if err != nil {
return err
}
mw.buf[o] = mext8
mw.buf[o+1] = byte(uint8(l))
mw.buf[o+2] = byte(e.ExtensionType())
case l < math.MaxUint16:
o, err := mw.require(4)
if err != nil {
return err
}
mw.buf[o] = mext16
big.PutUint16(mw.buf[o+1:], uint16(l))
mw.buf[o+3] = byte(e.ExtensionType())
default:
o, err := mw.require(6)
if err != nil {
return err
}
mw.buf[o] = mext32
big.PutUint32(mw.buf[o+1:], uint32(l))
mw.buf[o+5] = byte(e.ExtensionType())
}
}
// we can only write directly to the
// buffer if we're sure that it
// fits the object
if l <= mw.bufsize() {
o, err := mw.require(l)
if err != nil {
return err
}
return e.MarshalBinaryTo(mw.buf[o:])
}
// here we create a new buffer
// just large enough for the body
// and save it as the write buffer
err = mw.flush()
if err != nil {
return err
}
buf := make([]byte, l)
err = e.MarshalBinaryTo(buf)
if err != nil {
return err
}
mw.buf = buf
mw.wloc = l
return nil
}
// peek at the extension type, assuming the next
// kind to be read is Extension
func (m *Reader) peekExtensionType() (int8, error) {
p, err := m.R.Peek(2)
if err != nil {
return 0, err
}
spec := sizes[p[0]]
if spec.typ != ExtensionType {
return 0, badPrefix(ExtensionType, p[0])
}
if spec.extra == constsize {
return int8(p[1]), nil
}
size := spec.size
p, err = m.R.Peek(int(size))
if err != nil {
return 0, err
}
return int8(p[size-1]), nil
}
// peekExtension peeks at the extension encoding type
// (must guarantee at least 1 byte in 'b')
func peekExtension(b []byte) (int8, error) {
spec := sizes[b[0]]
size := spec.size
if spec.typ != ExtensionType {
return 0, badPrefix(ExtensionType, b[0])
}
if len(b) < int(size) {
return 0, ErrShortBytes
}
// for fixed extensions,
// the type information is in
// the second byte
if spec.extra == constsize {
return int8(b[1]), nil
}
// otherwise, it's in the last
// part of the prefix
return int8(b[size-1]), nil
}
// ReadExtension reads the next object from the reader
// as an extension. ReadExtension will fail if the next
// object in the stream is not an extension, or if
// e.Type() is not the same as the wire type.
func (m *Reader) ReadExtension(e Extension) (err error) {
var p []byte
p, err = m.R.Peek(2)
if err != nil {
return
}
lead := p[0]
var read int
var off int
switch lead {
case mfixext1:
if int8(p[1]) != e.ExtensionType() {
err = errExt(int8(p[1]), e.ExtensionType())
return
}
p, err = m.R.Peek(3)
if err != nil {
return
}
err = e.UnmarshalBinary(p[2:])
if err == nil {
_, err = m.R.Skip(3)
}
return
case mfixext2:
if int8(p[1]) != e.ExtensionType() {
err = errExt(int8(p[1]), e.ExtensionType())
return
}
p, err = m.R.Peek(4)
if err != nil {
return
}
err = e.UnmarshalBinary(p[2:])
if err == nil {
_, err = m.R.Skip(4)
}
return
case mfixext4:
if int8(p[1]) != e.ExtensionType() {
err = errExt(int8(p[1]), e.ExtensionType())
return
}
p, err = m.R.Peek(6)
if err != nil {
return
}
err = e.UnmarshalBinary(p[2:])
if err == nil {
_, err = m.R.Skip(6)
}
return
case mfixext8:
if int8(p[1]) != e.ExtensionType() {
err = errExt(int8(p[1]), e.ExtensionType())
return
}
p, err = m.R.Peek(10)
if err != nil {
return
}
err = e.UnmarshalBinary(p[2:])
if err == nil {
_, err = m.R.Skip(10)
}
return
case mfixext16:
if int8(p[1]) != e.ExtensionType() {
err = errExt(int8(p[1]), e.ExtensionType())
return
}
p, err = m.R.Peek(18)
if err != nil {
return
}
err = e.UnmarshalBinary(p[2:])
if err == nil {
_, err = m.R.Skip(18)
}
return
case mext8:
p, err = m.R.Peek(3)
if err != nil {
return
}
if int8(p[2]) != e.ExtensionType() {
err = errExt(int8(p[2]), e.ExtensionType())
return
}
read = int(uint8(p[1]))
off = 3
case mext16:
p, err = m.R.Peek(4)
if err != nil {
return
}
if int8(p[3]) != e.ExtensionType() {
err = errExt(int8(p[3]), e.ExtensionType())
return
}
read = int(big.Uint16(p[1:]))
off = 4
case mext32:
p, err = m.R.Peek(6)
if err != nil {
return
}
if int8(p[5]) != e.ExtensionType() {
err = errExt(int8(p[5]), e.ExtensionType())
return
}
read = int(big.Uint32(p[1:]))
off = 6
default:
err = badPrefix(ExtensionType, lead)
return
}
p, err = m.R.Peek(read + off)
if err != nil {
return
}
err = e.UnmarshalBinary(p[off:])
if err == nil {
_, err = m.R.Skip(read + off)
}
return
}
// AppendExtension appends a MessagePack extension to the provided slice
func AppendExtension(b []byte, e Extension) ([]byte, error) {
l := e.Len()
var o []byte
var n int
switch l {
case 0:
o, n = ensure(b, 3)
o[n] = mext8
o[n+1] = 0
o[n+2] = byte(e.ExtensionType())
return o[:n+3], nil
case 1:
o, n = ensure(b, 3)
o[n] = mfixext1
o[n+1] = byte(e.ExtensionType())
n += 2
case 2:
o, n = ensure(b, 4)
o[n] = mfixext2
o[n+1] = byte(e.ExtensionType())
n += 2
case 4:
o, n = ensure(b, 6)
o[n] = mfixext4
o[n+1] = byte(e.ExtensionType())
n += 2
case 8:
o, n = ensure(b, 10)
o[n] = mfixext8
o[n+1] = byte(e.ExtensionType())
n += 2
case 16:
o, n = ensure(b, 18)
o[n] = mfixext16
o[n+1] = byte(e.ExtensionType())
n += 2
}
switch {
case l < math.MaxUint8:
o, n = ensure(b, l+3)
o[n] = mext8
o[n+1] = byte(uint8(l))
o[n+2] = byte(e.ExtensionType())
n += 3
case l < math.MaxUint16:
o, n = ensure(b, l+4)
o[n] = mext16
big.PutUint16(o[n+1:], uint16(l))
o[n+3] = byte(e.ExtensionType())
n += 4
default:
o, n = ensure(b, l+6)
o[n] = mext32
big.PutUint32(o[n+1:], uint32(l))
o[n+5] = byte(e.ExtensionType())
n += 6
}
return o, e.MarshalBinaryTo(o[n:])
}
// ReadExtensionBytes reads an extension from 'b' into 'e'
// and returns any remaining bytes.
// Possible errors:
// - ErrShortBytes ('b' not long enough)
// - ExtensionTypeErorr{} (wire type not the same as e.Type())
// - TypeErorr{} (next object not an extension)
// - InvalidPrefixError
// - An umarshal error returned from e.UnmarshalBinary
func (nbs *NilBitsStack) ReadExtensionBytes(b []byte, e Extension) ([]byte, error) {
if nbs != nil && nbs.AlwaysNil {
return b, nil
}
l := len(b)
if l < 3 {
return b, ErrShortBytes
}
lead := b[0]
var (
sz int // size of 'data'
off int // offset of 'data'
typ int8
)
switch lead {
case mfixext1:
typ = int8(b[1])
sz = 1
off = 2
case mfixext2:
typ = int8(b[1])
sz = 2
off = 2
case mfixext4:
typ = int8(b[1])
sz = 4
off = 2
case mfixext8:
typ = int8(b[1])
sz = 8
off = 2
case mfixext16:
typ = int8(b[1])
sz = 16
off = 2
case mext8:
sz = int(uint8(b[1]))
typ = int8(b[2])
off = 3
if sz == 0 {
return b[3:], e.UnmarshalBinary(b[3:3])
}
case mext16:
if l < 4 {
return b, ErrShortBytes
}
sz = int(big.Uint16(b[1:]))
typ = int8(b[3])
off = 4
case mext32:
if l < 6 {
return b, ErrShortBytes
}
sz = int(big.Uint32(b[1:]))
typ = int8(b[5])
off = 6
default:
return b, badPrefix(ExtensionType, lead)
}
if typ != e.ExtensionType() {
return b, errExt(typ, e.ExtensionType())
}
// the data of the extension starts
// at 'off' and is 'sz' bytes long
if len(b[off:]) < sz {
return b, ErrShortBytes
}
tot := off + sz
return b[tot:], e.UnmarshalBinary(b[off:tot])
}

91
vendor/github.com/glycerine/greenpack/msgp/file.go generated vendored Normal file
View File

@@ -0,0 +1,91 @@
// +build linux,!appengine darwin dragonfly freebsd netbsd openbsd
package msgp
import (
"os"
"syscall"
)
// ReadFile reads a file into 'dst' using
// a read-only memory mapping. Consequently,
// the file must be mmap-able, and the
// Unmarshaler should never write to
// the source memory. (Methods generated
// by the msgp tool obey that constraint, but
// user-defined implementations may not.)
//
// Reading and writing through file mappings
// is only efficient for large files; small
// files are best read and written using
// the ordinary streaming interfaces.
//
func ReadFile(dst Unmarshaler, file *os.File) error {
stat, err := file.Stat()
if err != nil {
return err
}
data, err := syscall.Mmap(int(file.Fd()), 0, int(stat.Size()), syscall.PROT_READ, syscall.MAP_SHARED)
if err != nil {
return err
}
adviseRead(data)
_, err = dst.UnmarshalMsg(data)
uerr := syscall.Munmap(data)
if err == nil {
err = uerr
}
return err
}
// MarshalSizer is the combination
// of the Marshaler and Sizer
// interfaces.
type MarshalSizer interface {
Marshaler
Sizer
}
// WriteFile writes a file from 'src' using
// memory mapping. It overwrites the entire
// contents of the previous file.
// The mapping size is calculated
// using the `Msgsize()` method
// of 'src', so it must produce a result
// equal to or greater than the actual encoded
// size of the object. Otherwise,
// a fault (SIGBUS) will occur.
//
// Reading and writing through file mappings
// is only efficient for large files; small
// files are best read and written using
// the ordinary streaming interfaces.
//
// NOTE: The performance of this call
// is highly OS- and filesystem-dependent.
// Users should take care to test that this
// performs as expected in a production environment.
// (Linux users should run a kernel and filesystem
// that support fallocate(2) for the best results.)
func WriteFile(src MarshalSizer, file *os.File) error {
sz := src.Msgsize()
err := fallocate(file, int64(sz))
if err != nil {
return err
}
data, err := syscall.Mmap(int(file.Fd()), 0, sz, syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_SHARED)
if err != nil {
return err
}
adviseWrite(data)
chunk := data[:0]
chunk, err = src.MarshalMsg(chunk)
if err != nil {
return err
}
uerr := syscall.Munmap(data)
if uerr != nil {
return uerr
}
return file.Truncate(int64(len(chunk)))
}

View File

@@ -0,0 +1,47 @@
// +build windows appengine
package msgp
import (
"io/ioutil"
"os"
)
// MarshalSizer is the combination
// of the Marshaler and Sizer
// interfaces.
type MarshalSizer interface {
Marshaler
Sizer
}
func ReadFile(dst Unmarshaler, file *os.File) error {
if u, ok := dst.(Decodable); ok {
return u.DecodeMsg(NewReader(file))
}
data, err := ioutil.ReadAll(file)
if err != nil {
return err
}
_, err = dst.UnmarshalMsg(data)
return err
}
func WriteFile(src MarshalSizer, file *os.File) error {
if e, ok := src.(Encodable); ok {
w := NewWriter(file)
err := e.EncodeMsg(w)
if err == nil {
err = w.Flush()
}
return err
}
raw, err := src.MarshalMsg(nil)
if err != nil {
return err
}
_, err = file.Write(raw)
return err
}

174
vendor/github.com/glycerine/greenpack/msgp/integers.go generated vendored Normal file
View File

@@ -0,0 +1,174 @@
package msgp
/* ----------------------------------
integer encoding utilities
(inline-able)
TODO(tinylib): there are faster,
albeit non-portable solutions
to the code below. implement
byteswap?
---------------------------------- */
func putMint64(b []byte, i int64) {
b[0] = mint64
b[1] = byte(i >> 56)
b[2] = byte(i >> 48)
b[3] = byte(i >> 40)
b[4] = byte(i >> 32)
b[5] = byte(i >> 24)
b[6] = byte(i >> 16)
b[7] = byte(i >> 8)
b[8] = byte(i)
}
func getMint64(b []byte) int64 {
return (int64(b[1]) << 56) | (int64(b[2]) << 48) |
(int64(b[3]) << 40) | (int64(b[4]) << 32) |
(int64(b[5]) << 24) | (int64(b[6]) << 16) |
(int64(b[7]) << 8) | (int64(b[8]))
}
func putMint32(b []byte, i int32) {
b[0] = mint32
b[1] = byte(i >> 24)
b[2] = byte(i >> 16)
b[3] = byte(i >> 8)
b[4] = byte(i)
}
func getMint32(b []byte) int32 {
return (int32(b[1]) << 24) | (int32(b[2]) << 16) | (int32(b[3]) << 8) | (int32(b[4]))
}
func putMint16(b []byte, i int16) {
b[0] = mint16
b[1] = byte(i >> 8)
b[2] = byte(i)
}
func getMint16(b []byte) (i int16) {
return (int16(b[1]) << 8) | int16(b[2])
}
func putMint8(b []byte, i int8) {
b[0] = mint8
b[1] = byte(i)
}
func getMint8(b []byte) (i int8) {
return int8(b[1])
}
func putMuint64(b []byte, u uint64) {
b[0] = muint64
b[1] = byte(u >> 56)
b[2] = byte(u >> 48)
b[3] = byte(u >> 40)
b[4] = byte(u >> 32)
b[5] = byte(u >> 24)
b[6] = byte(u >> 16)
b[7] = byte(u >> 8)
b[8] = byte(u)
}
func getMuint64(b []byte) uint64 {
return (uint64(b[1]) << 56) | (uint64(b[2]) << 48) |
(uint64(b[3]) << 40) | (uint64(b[4]) << 32) |
(uint64(b[5]) << 24) | (uint64(b[6]) << 16) |
(uint64(b[7]) << 8) | (uint64(b[8]))
}
func putMuint32(b []byte, u uint32) {
b[0] = muint32
b[1] = byte(u >> 24)
b[2] = byte(u >> 16)
b[3] = byte(u >> 8)
b[4] = byte(u)
}
func getMuint32(b []byte) uint32 {
return (uint32(b[1]) << 24) | (uint32(b[2]) << 16) | (uint32(b[3]) << 8) | (uint32(b[4]))
}
func putMuint16(b []byte, u uint16) {
b[0] = muint16
b[1] = byte(u >> 8)
b[2] = byte(u)
}
func getMuint16(b []byte) uint16 {
return (uint16(b[1]) << 8) | uint16(b[2])
}
func putMuint8(b []byte, u uint8) {
b[0] = muint8
b[1] = byte(u)
}
func getMuint8(b []byte) uint8 {
return uint8(b[1])
}
func getUnix(b []byte) (sec int64, nsec int32) {
sec = (int64(b[0]) << 56) | (int64(b[1]) << 48) |
(int64(b[2]) << 40) | (int64(b[3]) << 32) |
(int64(b[4]) << 24) | (int64(b[5]) << 16) |
(int64(b[6]) << 8) | (int64(b[7]))
nsec = (int32(b[8]) << 24) | (int32(b[9]) << 16) | (int32(b[10]) << 8) | (int32(b[11]))
return
}
func putUnix(b []byte, sec int64, nsec int32) {
b[0] = byte(sec >> 56)
b[1] = byte(sec >> 48)
b[2] = byte(sec >> 40)
b[3] = byte(sec >> 32)
b[4] = byte(sec >> 24)
b[5] = byte(sec >> 16)
b[6] = byte(sec >> 8)
b[7] = byte(sec)
b[8] = byte(nsec >> 24)
b[9] = byte(nsec >> 16)
b[10] = byte(nsec >> 8)
b[11] = byte(nsec)
}
/* -----------------------------
prefix utilities
----------------------------- */
// write prefix and uint8
func prefixu8(b []byte, pre byte, sz uint8) {
b[0] = pre
b[1] = byte(sz)
}
// write prefix and big-endian uint16
func prefixu16(b []byte, pre byte, sz uint16) {
b[0] = pre
b[1] = byte(sz >> 8)
b[2] = byte(sz)
}
// write prefix and big-endian uint32
func prefixu32(b []byte, pre byte, sz uint32) {
b[0] = pre
b[1] = byte(sz >> 24)
b[2] = byte(sz >> 16)
b[3] = byte(sz >> 8)
b[4] = byte(sz)
}
func prefixu64(b []byte, pre byte, sz uint64) {
b[0] = pre
b[1] = byte(sz >> 56)
b[2] = byte(sz >> 48)
b[3] = byte(sz >> 40)
b[4] = byte(sz >> 32)
b[5] = byte(sz >> 24)
b[6] = byte(sz >> 16)
b[7] = byte(sz >> 8)
b[8] = byte(sz)
}

Some files were not shown because too many files have changed in this diff Show More