Files
tablizer/vendor/github.com/glycerine/zygomys/zygo/parser.go
2024-05-14 12:10:58 +02:00

651 lines
15 KiB
Go

package zygo
import (
"errors"
"fmt"
"io"
"math"
"strconv"
"strings"
"sync"
)
var NaN float64
func init() {
NaN = math.NaN()
}
type Parser struct {
lexer *Lexer
env *Zlisp
Done chan bool
reqStop chan bool
AddInput chan io.RuneScanner
ReqReset chan io.RuneScanner
ParsedOutput chan []ParserReply
mut sync.Mutex
stopped bool
sendMe []ParserReply
FlagSendNeedInput bool
inBacktick bool
}
type ParserReply struct {
Expr []Sexp
Err error
}
func (env *Zlisp) NewParser() *Parser {
p := &Parser{
env: env,
Done: make(chan bool),
reqStop: make(chan bool),
ReqReset: make(chan io.RuneScanner),
AddInput: make(chan io.RuneScanner),
ParsedOutput: make(chan []ParserReply),
sendMe: make([]ParserReply, 0, 1),
}
p.lexer = NewLexer(p)
return p
}
func (p *Parser) Stop() error {
p.mut.Lock()
defer p.mut.Unlock()
if p.stopped {
return nil
}
p.stopped = true
close(p.reqStop)
<-p.Done
return nil
}
// Starts launches a background goroutine that runs an
// infinite parsing loop.
func (p *Parser) Start() {
go func() {
defer close(p.Done)
expressions := make([]Sexp, 0, SliceDefaultCap)
// maybe we already have input, be optimistic!
// no need to call p.GetMoreInput() before staring
// our loop.
for {
expr, err := p.ParseExpression(0)
if err != nil || expr == SexpEnd {
if err == ParserHaltRequested {
return
}
err = p.GetMoreInput(expressions, err)
if err == ParserHaltRequested {
return
}
// GetMoreInput will have delivered what we gave them. Reset since we
// don't own that memory any more.
expressions = make([]Sexp, 0, SliceDefaultCap)
} else {
// INVAR: err == nil && expr is not SexpEnd
expressions = append(expressions, expr)
}
}
}()
}
var ParserHaltRequested = fmt.Errorf("parser halt requested")
var ResetRequested = fmt.Errorf("parser reset requested")
var ErrMoreInputNeeded = fmt.Errorf("parser needs more input")
// This function should *return* when it has more input
// for the parser/lexer, which will call it when they get wedged.
//
// Listeners on p.ParsedOutput should know the Convention: sending
// a length 0 []ParserReply on p.ParsedOutput channel means: we need more
// input! They should send some in on p.AddInput channel; or request
// a reset and simultaneously give us new input with p.ReqReset channel.
func (p *Parser) GetMoreInput(deliverThese []Sexp, errorToReport error) error {
if len(deliverThese) == 0 && errorToReport == nil {
p.FlagSendNeedInput = true
} else {
p.sendMe = append(p.sendMe,
ParserReply{
Expr: deliverThese,
Err: errorToReport,
})
}
for {
select {
case <-p.reqStop:
return ParserHaltRequested
case input := <-p.AddInput:
p.lexer.AddNextStream(input)
p.FlagSendNeedInput = false
return nil
case input := <-p.ReqReset:
p.lexer.Reset()
p.lexer.AddNextStream(input)
p.FlagSendNeedInput = false
return ResetRequested
case p.HaveStuffToSend() <- p.sendMe:
p.sendMe = make([]ParserReply, 0, 1)
p.FlagSendNeedInput = false
}
}
}
func (p *Parser) HaveStuffToSend() chan []ParserReply {
if len(p.sendMe) > 0 || p.FlagSendNeedInput {
return p.ParsedOutput
}
return nil
}
func (p *Parser) Reset() {
select {
case p.ReqReset <- nil:
case <-p.reqStop:
}
}
func (p *Parser) NewInput(s io.RuneScanner) {
select {
case p.AddInput <- s:
case <-p.reqStop:
}
}
func (p *Parser) ResetAddNewInput(s io.RuneScanner) {
select {
case p.ReqReset <- s:
case <-p.reqStop:
}
}
var UnexpectedEnd error = errors.New("Unexpected end of input")
const SliceDefaultCap = 10
func (parser *Parser) ParseList(depth int) (sx Sexp, err error) {
lexer := parser.lexer
var tok Token
tokFilled:
for {
tok, err = lexer.PeekNextToken()
//Q("\n ParseList(depth=%d) got lexer.PeekNextToken() -> tok='%v' err='%v'\n", depth, tok, err)
if err != nil {
return SexpNull, err
}
if tok.typ != TokenEnd {
break tokFilled
}
// instead of returning UnexpectedEnd, we:
err = parser.GetMoreInput(nil, ErrMoreInputNeeded)
//Q("\n ParseList(depth=%d) got back from parser.GetMoreInput(): '%v'\n", depth, err)
switch err {
case ParserHaltRequested:
return SexpNull, err
case ResetRequested:
return SexpEnd, err
}
// have to still fill tok, so
// loop to the top to PeekNextToken
}
if tok.typ == TokenRParen {
_, _ = lexer.GetNextToken()
return SexpNull, nil
}
var start = &SexpPair{}
expr, err := parser.ParseExpression(depth + 1)
if err != nil {
return SexpNull, err
}
start.Head = expr
tok, err = lexer.PeekNextToken()
if err != nil {
return SexpNull, err
}
// backslash '\' replaces dot '.' in zygo
if tok.typ == TokenBackslash {
// eat up the backslash
_, _ = lexer.GetNextToken()
expr, err = parser.ParseExpression(depth + 1)
if err != nil {
return SexpNull, err
}
// eat up the end paren
tok, err = lexer.GetNextToken()
if err != nil {
return SexpNull, err
}
// make sure it was actually an end paren
if tok.typ != TokenRParen {
return SexpNull, errors.New("extra value in dotted pair")
}
start.Tail = expr
return start, nil
}
expr, err = parser.ParseList(depth + 1)
if err != nil {
return start, err
}
start.Tail = expr
return start, nil
}
func (parser *Parser) ParseArray(depth int) (Sexp, error) {
lexer := parser.lexer
arr := make([]Sexp, 0, SliceDefaultCap)
var tok Token
var err error
for {
getTok:
for {
tok, err = lexer.PeekNextToken()
if err != nil {
return SexpEnd, err
}
if tok.typ == TokenComma {
// pop off the ,
_, _ = lexer.GetNextToken()
continue getTok
}
if tok.typ != TokenEnd {
break getTok
} else {
//instead of return SexpEnd, UnexpectedEnd
// we ask for more, and then loop
err = parser.GetMoreInput(nil, ErrMoreInputNeeded)
switch err {
case ParserHaltRequested:
return SexpNull, err
case ResetRequested:
return SexpEnd, err
}
}
}
if tok.typ == TokenRSquare {
// pop off the ]
_, _ = lexer.GetNextToken()
break
}
expr, err := parser.ParseExpression(depth + 1)
if err != nil {
return SexpNull, err
}
arr = append(arr, expr)
}
return &SexpArray{Val: arr, Env: parser.env}, nil
}
func (parser *Parser) ParseExpression(depth int) (res Sexp, err error) {
defer func() {
if res != nil {
//Q("returning from ParseExpression at depth=%v with res='%s'\n", depth, res.SexpString(nil))
} else {
//Q("returning from ParseExpression at depth=%v, res = nil", depth)
}
}()
lexer := parser.lexer
env := parser.env
//getAnother:
tok, err := lexer.GetNextToken()
if err != nil {
return SexpEnd, err
}
switch tok.typ {
case TokenLParen:
exp, err := parser.ParseList(depth + 1)
return exp, err
case TokenLSquare:
exp, err := parser.ParseArray(depth + 1)
return exp, err
case TokenLCurly:
exp, err := parser.ParseInfix(depth + 1)
return exp, err
case TokenQuote:
expr, err := parser.ParseExpression(depth + 1)
if err != nil {
return SexpNull, err
}
return MakeList([]Sexp{env.MakeSymbol("quote"), expr}), nil
case TokenCaret:
// '^' is now our syntax-quote symbol, not TokenBacktick, to allow go-style `string literals`.
expr, err := parser.ParseExpression(depth + 1)
if err != nil {
return SexpNull, err
}
return MakeList([]Sexp{env.MakeSymbol("syntaxQuote"), expr}), nil
case TokenTilde:
expr, err := parser.ParseExpression(depth + 1)
if err != nil {
return SexpNull, err
}
return MakeList([]Sexp{env.MakeSymbol("unquote"), expr}), nil
case TokenTildeAt:
expr, err := parser.ParseExpression(depth + 1)
if err != nil {
return SexpNull, err
}
return MakeList([]Sexp{env.MakeSymbol("unquote-splicing"), expr}), nil
case TokenFreshAssign:
return env.MakeSymbol(tok.str), nil
case TokenColonOperator:
return env.MakeSymbol(tok.str), nil
case TokenDollar:
return env.MakeSymbol(tok.str), nil
case TokenBool:
return &SexpBool{Val: tok.str == "true"}, nil
case TokenUint64:
// truncate off the "ULL" suffix
inp := tok.str[:len(tok.str)-3]
// handle hex 0x and octacl 0o
n := len(inp)
base := 10
if n > 2 {
switch inp[:2] {
case "0o":
base = 8
inp = inp[2:]
case "0x":
base = 16
inp = inp[2:]
}
}
u, err := strconv.ParseUint(inp, base, 64)
//fmt.Printf("debug: parsed inp='%s' into u=%v\n", inp, u)
if err != nil {
return SexpNull, err
}
return &SexpUint64{Val: u}, nil
case TokenDecimal:
i, err := strconv.ParseInt(tok.str, 10, SexpIntSize)
if err != nil {
return SexpNull, err
}
return &SexpInt{Val: i}, nil
case TokenHex:
i, err := strconv.ParseInt(tok.str, 16, SexpIntSize)
if err != nil {
return SexpNull, err
}
return &SexpInt{Val: i}, nil
case TokenOct:
i, err := strconv.ParseInt(tok.str, 8, SexpIntSize)
if err != nil {
return SexpNull, err
}
return &SexpInt{Val: i}, nil
case TokenBinary:
i, err := strconv.ParseInt(tok.str, 2, SexpIntSize)
if err != nil {
return SexpNull, err
}
return &SexpInt{Val: i}, nil
case TokenChar:
return &SexpChar{Val: rune(tok.str[0])}, nil
case TokenString:
return &SexpStr{S: tok.str}, nil
case TokenBeginBacktickString:
parser.inBacktick = true
return parser.ParseBacktickString(&tok)
case TokenBacktickString:
parser.inBacktick = false
return &SexpStr{S: tok.str, backtick: true}, nil
case TokenFloat:
var f float64
if tok.str == "NaN" {
f = NaN
} else {
f, err = strconv.ParseFloat(tok.str, SexpFloatSize)
if err != nil {
return SexpNull, err
}
}
r := &SexpFloat{Val: f}
if strings.Contains(tok.str, "e") || strings.Contains(tok.str, "E") {
r.Scientific = true
}
return r, nil
case TokenEnd:
return SexpEnd, nil
case TokenSymbol:
return env.MakeSymbol(tok.str), nil
case TokenSymbolColon:
sym := env.MakeSymbol(tok.str)
sym.colonTail = true
return sym, nil
case TokenDot:
sym := env.MakeSymbol(tok.str)
sym.isDot = true
return sym, nil
case TokenDotSymbol:
sym := env.MakeSymbol(tok.str)
sym.isDot = true
return sym, nil
case TokenComment:
//Q("parser making SexpComment from '%s'", tok.str)
return &SexpComment{Comment: tok.str}, nil
// parser skips comments
//goto getAnother
case TokenBeginBlockComment:
// parser skips comments
return parser.ParseBlockComment(&tok)
//parser.ParseBlockComment(&tok)
//goto getAnother
case TokenComma:
return &SexpComma{}, nil
case TokenSemicolon:
return &SexpSemicolon{}, nil
}
return SexpNull, fmt.Errorf("Invalid syntax, don't know what to do with %v '%v'", tok.typ, tok)
}
// ParseTokens is the main service the Parser provides.
// Currently returns first error encountered, ignoring
// any expressions after that.
func (p *Parser) ParseTokens() ([]Sexp, error) {
select {
case out := <-p.ParsedOutput:
Q("ParseTokens got p.ParsedOutput out: '%#v'", out)
r := make([]Sexp, 0)
for _, k := range out {
r = append(r, k.Expr...)
Q("\n ParseTokens k.Expr = '%v'\n\n", (&SexpArray{Val: k.Expr, Env: p.env}).SexpString(nil))
if k.Err != nil {
return r, k.Err
}
}
return r, nil
case <-p.reqStop:
return nil, ErrShuttingDown
}
}
var ErrShuttingDown error = fmt.Errorf("lexer shutting down")
func (parser *Parser) ParseBlockComment(start *Token) (sx Sexp, err error) {
defer func() {
if sx != nil {
//Q("returning from ParseBlockComment with sx ='%v', err='%v'",
// sx.SexpString(), err)
}
}()
lexer := parser.lexer
var tok Token
var block = &SexpComment{Block: true, Comment: start.str}
for {
tokFilled:
for {
tok, err = lexer.PeekNextToken()
if err != nil {
return SexpNull, err
}
if tok.typ != TokenEnd {
break tokFilled
}
err = parser.GetMoreInput(nil, ErrMoreInputNeeded)
switch err {
case ParserHaltRequested:
return SexpNull, err
case ResetRequested:
return SexpEnd, err
}
// have to still fill tok, so
// loop to the top to PeekNextToken
}
// consume it
//cons, err := lexer.GetNextToken()
_, err := lexer.GetNextToken()
if err != nil {
return nil, err
}
//Q("parse block comment is consuming '%v'", cons)
switch tok.typ {
case TokenEndBlockComment:
block.Comment += tok.str
return block, nil
case TokenComment:
block.Comment += tok.str
default:
panic("internal error: inside a block comment, we should only see TokenComment and TokenEndBlockComment tokens")
}
}
//return block, nil
}
func (parser *Parser) ParseBacktickString(start *Token) (sx Sexp, err error) {
defer func() {
if sx != nil {
//Q("returning from ParseBlockComment with sx ='%v', err='%v'",
// sx.SexpString(), err)
}
}()
lexer := parser.lexer
var tok Token
for {
tokFilled:
for {
tok, err = lexer.PeekNextToken()
if err != nil {
return SexpNull, err
}
if tok.typ != TokenEnd {
break tokFilled
}
err = parser.GetMoreInput(nil, ErrMoreInputNeeded)
switch err {
case ParserHaltRequested:
return SexpNull, err
case ResetRequested:
return SexpEnd, err
}
// have to still fill tok, so
// loop to the top to PeekNextToken
}
// consume it
//cons, err := lexer.GetNextToken()
_, err := lexer.GetNextToken()
if err != nil {
return nil, err
}
//P("parse backtick string is consuming '%v'", cons)
switch tok.typ {
case TokenBacktickString:
return &SexpStr{S: tok.str, backtick: true}, nil
default:
panic("internal error: inside a backtick string, we should only see TokenBacktickString token")
}
}
}
func (parser *Parser) ParseInfix(depth int) (Sexp, error) {
lexer := parser.lexer
arr := make([]Sexp, 0, SliceDefaultCap)
var err error
var tok Token
for {
getTok:
for {
tok, err = lexer.PeekNextToken()
if err != nil {
return SexpEnd, err
}
if tok.typ != TokenEnd {
break getTok
} else {
//instead of return SexpEnd, UnexpectedEnd
// we ask for more, and then loop
err = parser.GetMoreInput(nil, ErrMoreInputNeeded)
switch err {
case ParserHaltRequested:
return SexpNull, err
case ResetRequested:
return SexpEnd, err
}
}
}
if tok.typ == TokenRCurly {
// pop off the }
_, _ = lexer.GetNextToken()
break
}
Q("debug: ParseInfix(depth=%v) calling ParseExpression", depth)
expr, err := parser.ParseExpression(depth + 1)
if err != nil {
return SexpNull, err
}
Q("debug2: ParseInfix(depth=%v) appending expr = '%v'", depth, expr.SexpString(nil))
arr = append(arr, expr)
}
var list SexpPair
list.Head = parser.env.MakeSymbol("infix")
list.Tail = SexpNull
if len(arr) > 0 {
list.Tail = Cons(&SexpArray{Val: arr, Infix: true, Env: parser.env}, SexpNull)
}
return &list, nil
//return &SexpArray{Val: arr, Infix: true, Env: env}, nil
}