Files
tablizer/vendor/github.com/glycerine/zygomys/zygo/pratt.go
2024-05-14 12:10:58 +02:00

671 lines
18 KiB
Go

package zygo
import (
"fmt"
"io"
)
// Pratt parsing. see http://javascript.crockford.com/tdop/tdop.html
// Also nice writeup: http://journal.stuffwithstuff.com/2011/03/19/pratt-parsers-expression-parsing-made-easy/
// precedence levels (smaller == lower priority,
// so smaller => goes towards top of tree)
//
// Borrowing from the tdop.html precedence list mostly:
//
// 0 non-binding operators like ;
// 10 assignment operators like = :=
// 20 ?
// 30 or and
// 40 relational operators like ==
// 50 + -
// 60 * /
// 65 **
// 70 unary operators like 'not'
// 80 . [ (
//
// InfixOp lets us attach led (MunchLeft) and nud (MunchRight)
// Pratt parsing methods, along with a binding power, to a symbol.
type InfixOp struct {
Sym *SexpSymbol
Bp int // binding power, aka precedence level.
MunchRight RightMuncher // aka nud
MunchLeft LeftMuncher // aka led
MunchStmt StmtMuncher // aka std. Used only at the beginning of a statement.
IsAssign bool
}
// Infix creates a new infix operator
func (env *Zlisp) Infix(op string, bp int) *InfixOp {
oper := env.MakeSymbol(op)
iop := &InfixOp{
Sym: oper,
Bp: bp,
MunchLeft: func(env *Zlisp, pr *Pratt, left Sexp) (Sexp, error) {
right, err := pr.Expression(env, bp)
if err != nil {
return SexpNull, err
}
list := MakeList([]Sexp{
oper, left, right,
})
Q("in Infix(), MunchLeft() call, pr.NextToken = %v. list returned = '%v'",
pr.NextToken.SexpString(nil), list.SexpString(nil))
return list, nil
},
}
env.infixOps[op] = iop
return iop
}
func (env *Zlisp) InfixF(op string, bp int, f func(env *Zlisp, op string, bp int) *InfixOp) *InfixOp {
return f(env, op, bp)
}
// Infix creates a new (right-associative) short-circuiting
// infix operator, used for `and` and `or` in infix processing.
func (env *Zlisp) Infixr(op string, bp int) *InfixOp {
oper := env.MakeSymbol(op)
iop := &InfixOp{
Sym: oper,
Bp: bp,
MunchLeft: func(env *Zlisp, pr *Pratt, left Sexp) (Sexp, error) {
right, err := pr.Expression(env, bp-1)
if err != nil {
return SexpNull, err
}
list := MakeList([]Sexp{
oper, left, right,
})
return list, nil
},
}
env.infixOps[op] = iop
return iop
}
// Prefix creates a new prefix operator, like `not`, for
// infix processing.
func (env *Zlisp) Prefix(op string, bp int) *InfixOp {
oper := env.MakeSymbol(op)
iop := &InfixOp{
Sym: oper,
Bp: bp,
MunchRight: func(env *Zlisp, pr *Pratt) (Sexp, error) {
right, err := pr.Expression(env, bp)
if err != nil {
return SexpNull, err
}
list := MakeList([]Sexp{
oper, right,
})
return list, nil
},
}
env.infixOps[op] = iop
return iop
}
// Assignment creates a new assignment operator for infix
// processing.
func (env *Zlisp) Assignment(op string, bp int) *InfixOp {
oper := env.MakeSymbol(op)
operSet := env.MakeSymbol("set")
iop := &InfixOp{
Sym: oper,
Bp: bp,
MunchLeft: func(env *Zlisp, pr *Pratt, left Sexp) (Sexp, error) {
// TODO: check that left is okay as an LVALUE.
right, err := pr.Expression(env, bp-1)
if err != nil {
return SexpNull, err
}
if op == "=" || op == ":=" {
oper = operSet
}
list := MakeList([]Sexp{
oper, left, right,
})
Q("assignment returning list: '%v'", list.SexpString(nil))
return list, nil
},
IsAssign: true,
}
env.infixOps[op] = iop
return iop
}
// PostfixAssign creates a new postfix assignment operator for infix
// processing.
func (env *Zlisp) PostfixAssign(op string, bp int) *InfixOp {
oper := env.MakeSymbol(op)
iop := &InfixOp{
Sym: oper,
Bp: bp,
MunchLeft: func(env *Zlisp, pr *Pratt, left Sexp) (Sexp, error) {
// TODO: check that left is okay as an LVALUE
list := MakeList([]Sexp{
oper, left,
})
Q("postfix assignment returning list: '%v'", list.SexpString(nil))
return list, nil
},
}
env.infixOps[op] = iop
return iop
}
func arrayOpMunchLeft(env *Zlisp, pr *Pratt, left Sexp) (Sexp, error) {
oper := env.MakeSymbol("arrayidx")
Q("pr.NextToken = '%v', left = %#v", pr.NextToken.SexpString(nil), left)
if len(pr.CnodeStack) > 0 {
Q("pr.CnodeStack[0] = '%v'", pr.CnodeStack[0])
}
right := pr.NextToken
Q("right = %#v", right)
list := MakeList([]Sexp{
oper, left, pr.CnodeStack[0],
})
return list, nil
}
func dotOpMunchLeft(env *Zlisp, pr *Pratt, left Sexp) (Sexp, error) {
//Q("dotOp MunchLeft, left = '%v'. NextToken='%v'. pr.CnodeStack[0]='%v'", left.SexpString(nil), pr.NextToken.SexpString(nil), pr.CnodeStack[0].SexpString(nil))
list := MakeList([]Sexp{
env.MakeSymbol("hashidx"), left, pr.CnodeStack[0],
})
return list, nil
}
func starOpMunchRight(env *Zlisp, pr *Pratt) (Sexp, error) {
right, err := pr.Expression(env, 70)
if err != nil {
return SexpNull, err
}
list := MakeList([]Sexp{
env.MakeSymbol("*"), right,
})
return list, nil
}
var arrayOp *InfixOp
// InitInfixOps establishes the env.infixOps definitions
// required for infix parsing using the Pratt parser.
func (env *Zlisp) InitInfixOps() {
env.Infix("+", 50)
env.Infix("-", 50)
star := env.Infix("*", 60)
star.MunchRight = starOpMunchRight
env.Infix("/", 60)
env.Infix("mod", 60)
env.Infixr("**", 65)
env.Infixr("and", 30)
env.Infixr("or", 30)
env.Prefix("not", 70)
env.Assignment("=", 10)
env.Assignment(":=", 10)
env.Assignment("+=", 10)
env.Assignment("-=", 10)
env.PostfixAssign("++", 10)
env.PostfixAssign("--", 10)
env.Infix("==", 40)
env.Infix("!=", 40)
env.Infix(">", 40)
env.Infix(">=", 40)
env.Infix("<", 40)
env.Infix("<=", 40)
// set the global arrayOp
arrayOp = &InfixOp{
Bp: 80,
MunchLeft: arrayOpMunchLeft,
}
dotOp := env.Infix(".", 80)
dotOp.MunchLeft = dotOpMunchLeft
ifOp := env.Prefix("if", 5)
//Q("ifOp = %#v", ifOp.SexpString(nil))
ifOp.MunchRight = func(env *Zlisp, pr *Pratt) (Sexp, error) {
Q("ifOp.MunchRight(): NextToken='%v'. pr.CnodeStack[0]='%v'", pr.NextToken.SexpString(nil), pr.CnodeStack[0].SexpString(nil))
right, err := pr.Expression(env, 5)
Q("ifOp.MunchRight: back from Expression-1st-call, err = %#v, right = '%v'", err, right.SexpString(nil))
if err != nil {
return SexpNull, err
}
Q("in ifOpMunchRight, got from p.Expression(env, 0) the right = '%v', err = %#v, pr.CnodeStack[0] = %#v, ifOp.Sym = '%v'", right.SexpString(nil), err, pr.CnodeStack[0], ifOp.Sym.SexpString(nil))
thenExpr, err := pr.Expression(env, 0)
Q("ifOp.MunchRight: back from Expression-2nd-call, err = %#v, thenExpr = '%v'", err, thenExpr.SexpString(nil))
if err != nil {
return SexpNull, err
}
Q("ifOp.MunchRight(), after Expression-2nd-call: . NextToken='%v'. pr.CnodeStack[0]='%v'", pr.NextToken.SexpString(nil), pr.CnodeStack[0].SexpString(nil))
var elseExpr Sexp = SexpNull
switch sym := pr.NextToken.(type) {
case *SexpSymbol:
if sym.name == "else" {
Q("detected else, advancing past it")
pr.Advance()
elseExpr, err = pr.Expression(env, 0)
Q("ifOp.MunchRight: back from Expression-3rd-call, err = %#v, elseExpr = '%v'", err, elseExpr.SexpString(nil))
if err != nil {
return SexpNull, err
}
}
}
list := MakeList([]Sexp{
env.MakeSymbol("cond"), right, thenExpr, elseExpr,
})
return list, nil
}
env.Infix("comma", 15)
}
type RightMuncher func(env *Zlisp, pr *Pratt) (Sexp, error)
type LeftMuncher func(env *Zlisp, pr *Pratt, left Sexp) (Sexp, error)
type StmtMuncher func(env *Zlisp, pr *Pratt) (Sexp, error)
func InfixBuilder(env *Zlisp, name string, args []Sexp) (Sexp, error) {
Q("InfixBuilder top, name='%s', len(args)==%v ", name, len(args))
if name != "infixExpand" && len(args) != 1 {
// let {} mean nil
return SexpNull, nil
}
var arr *SexpArray
Q("InfixBuilder after top, args[0] has type ='%T' ", args[0])
switch v := args[0].(type) {
case *SexpArray:
arr = v
case *SexpPair:
if name == "infixExpand" {
_, isSent := v.Tail.(*SexpSentinel)
if isSent {
// expansion of {} is nil
return SexpNull, nil
}
pair, isPair := v.Tail.(*SexpPair)
if !isPair {
return SexpNull, fmt.Errorf("infixExpand expects (infix []) as its argument; instead we saw '%T' [err 3]", v.Tail)
}
switch ar2 := pair.Head.(type) {
case *SexpArray:
Q("infixExpand, doing recursive call to InfixBuilder, ar2 = '%v'", ar2.SexpString(nil))
return InfixBuilder(env, name, []Sexp{ar2})
default:
return SexpNull, fmt.Errorf("infixExpand expects (infix []) as its argument; instead we saw '%T'", v.Tail)
}
}
return SexpNull, fmt.Errorf("InfixBuilder must receive an SexpArray")
default:
return SexpNull, fmt.Errorf("InfixBuilder must receive an SexpArray")
}
Q("InfixBuilder, name='%s', arr = ", name)
for i := range arr.Val {
Q("arr[%v] = '%v', of type %T", i, arr.Val[i].SexpString(nil), arr.Val[i])
}
pr := NewPratt(arr.Val)
xs := []Sexp{}
if name == "infixExpand" {
xs = append(xs, env.MakeSymbol("quote"))
}
for {
x, err := pr.Expression(env, 0)
if err != nil {
return SexpNull, err
}
if x == nil {
Q("x was nil")
} else {
Q("x back is not nil and is of type %T/val = '%v', err = %v", x, x.SexpString(nil), err)
}
_, isSemi := x.(*SexpSemicolon)
if !isSemi {
xs = append(xs, x)
}
Q("end of infix builder loop, pr.NextToken = '%v'", pr.NextToken.SexpString(nil))
if pr.IsEOF() {
break
}
_, nextIsSemi := pr.NextToken.(*SexpSemicolon)
if nextIsSemi {
pr.Advance() // skip over the semicolon
}
}
Q("infix builder loop done, here are my expressions:")
for i, ele := range xs {
Q("xs[%v] = %v", i, ele.SexpString(nil))
}
if name == "infixExpand" {
ret := MakeList(xs)
Q("infixExpand: returning ret = '%v'", ret.SexpString(nil))
return ret, nil
}
ev, err := EvalFunction(env, "infixEval", xs)
if err != nil {
return SexpNull, err
}
return ev, nil
}
type Pratt struct {
NextToken Sexp
CnodeStack []Sexp
AccumTree Sexp
Pos int
Stream []Sexp
}
func NewPratt(stream []Sexp) *Pratt {
p := &Pratt{
NextToken: SexpNull,
AccumTree: SexpNull,
CnodeStack: make([]Sexp, 0),
Stream: stream,
}
if len(stream) > 0 {
p.NextToken = stream[0]
}
return p
}
// Expression():
//
// From Douglas Crockford's article on Pratt parsing:
// "Top Down Operator Precedence"
// http://javascript.crockford.com/tdop/tdop.html
//
// The heart of Pratt's technique is the expression
// function. It takes a right binding power that
// controls how aggressively it binds to tokens on its right.
// expression calls the nud method of the token.
//
// The nud is used to process literals, variables,
// and prefix operators.
//
// Then as long as the right binding
// power is less than the left binding power of the next
// token, the led method is invoked on the following
// token. The led is used to process infix and
// suffix operators. This process can be recursive
// because the nud and led
// methods can call expression.
//
// In pseudo Java script:
//
// var expression = function (rbp) {
// var left;
// var t = token;
// advance();
// left = t.nud();
// while (rbp < token.lbp) {
// t = token;
// advance();
// left = t.led(left);
// }
// return left;
// }
//
// jea: Below is a working expression() parsing routine. Reproduces the
// original Pratt and Crockford formulation.
//
// AccumTree holds the accumulated parse tree at any point in time.
// "The parse Tree Up to this point, by consuming the tokens
// to the left" would be a better-but-too-long name.
//
// and AccumTree is the stuff to the left of the
// current operator in the parse stream.
//
// data flows from NextToken -> cnode -> (possibly on the stack of t
// recursive MunchLeft calls) -> into the AccumTree tree.
//
// better names: _left -> AccumTree (to be returned)
// t -> cnode; as it is the current token's qtree
// node to be processed. Once we grab this
// we always advance() past it
// before processing it, so that
// NextToken contains the
// following token.
//
//
// meaning of rbp parameter: if you hit a token with
// a NextToken.Lbp < rbp, then don't bother calling MunchLeft,
// stop and return what you have.
//
// better explanation: rbp = a lower bound on descendant nodes
// precedence level, so we can
// guarantee the precedence-hill-climbing property (small precedence
// at the top) in the resulting parse tree.
//
func (p *Pratt) Expression(env *Zlisp, rbp int) (ret Sexp, err error) {
defer func() {
if ret == nil {
Q("Expression is returning Sexp ret = nil")
} else {
Q("Expression is returning Sexp ret = '%v'", ret.SexpString(nil))
}
}()
cnode := p.NextToken
if cnode != nil {
Q("top of Expression, rbp = %v, cnode = '%v'", rbp, cnode.SexpString(nil))
} else {
Q("top of Expression, rbp = %v, cnode is nil", rbp)
}
if p.IsEOF() {
Q("Expression sees IsEOF, returning cnode = %v", cnode.SexpString(nil))
return cnode, nil
}
p.CnodeStack = append([]Sexp{p.NextToken}, p.CnodeStack...)
//p.ShowCnodeStack()
p.Advance()
var curOp *InfixOp
switch x := cnode.(type) {
case *SexpSymbol:
op, found := env.infixOps[x.name]
if found {
Q("Expression lookup of op.Sym=%v/op='%#v' succeeded", op.Sym.SexpString(nil), op)
curOp = op
} else {
Q("Expression lookup of x.name == '%v' failed", x.name)
}
case *SexpArray:
Q("in pratt parsing, got array x = '%v'", x.SexpString(nil))
}
if curOp != nil && curOp.MunchRight != nil {
// munch_right() of atoms returns this/itself, in which
// case: p.AccumTree = t; is the result.
Q("about to MunchRight on cnode = %v", cnode.SexpString(nil))
p.AccumTree, err = curOp.MunchRight(env, p)
if err != nil {
Q("Expression(%v) MunchRight saw err = %v", rbp, err)
return SexpNull, err
}
Q("after MunchRight on cnode = %v, p.AccumTree = '%v'",
cnode.SexpString(nil), p.AccumTree.SexpString(nil))
} else {
// do this, or have the default MunchRight return itself.
p.AccumTree = cnode
}
for !p.IsEOF() {
nextLbp, err := env.LeftBindingPower(p.NextToken)
if err != nil {
Q("env.LeftBindingPower('%s') saw err = %v",
p.NextToken.SexpString(nil), err)
return SexpNull, err
}
Q("nextLbp = %v, and rbp = %v, so rpb >= nextLbp == %v", nextLbp, rbp, rbp >= nextLbp)
if rbp >= nextLbp {
Q("found rbp >= nextLbp so breaking out of left-binding loop")
break
}
cnode = p.NextToken
curOp = nil
switch x := cnode.(type) {
case *SexpSymbol:
op, found := env.infixOps[x.name]
if found {
Q("assigning curOp <- cnode '%s'", x.name)
curOp = op
} else {
if x.isDot {
curOp = env.infixOps["."]
Q("assigning curOp <- dotInfixOp; then curOp = %#v", curOp)
}
}
case *SexpArray:
Q("assigning curOp <- arrayOp")
curOp = arrayOp
case *SexpComma:
curOp = env.infixOps["comma"]
Q("assigning curOp <- infixOps[`comma`]; then curOp = %#v", curOp)
case *SexpPair:
// sexp-call, treat like function call with rbp 80
Q("Expression sees an SexpPair")
// leaving curOp nil seems to work just fine here.
default:
panic(fmt.Errorf("how to handle cnode type = %#v", cnode))
}
Q("curOp = %#v", curOp)
p.CnodeStack[0] = p.NextToken
//_cnode_stack.front() = NextToken;
Q("in MunchLeft loop, before Advance, p.NextToken = %v",
p.NextToken.SexpString(nil))
p.Advance()
if p.Pos < len(p.Stream) {
Q("in MunchLeft loop, after Advance, p.NextToken = %v",
p.NextToken.SexpString(nil))
}
// if cnode->munch_left() returns this/itself, then
// the net effect is: p.AccumTree = cnode;
if curOp != nil && curOp.MunchLeft != nil {
Q("about to MunchLeft, cnode = %v, p.AccumTree = %v", cnode.SexpString(nil), p.AccumTree.SexpString(nil))
p.AccumTree, err = curOp.MunchLeft(env, p, p.AccumTree)
if err != nil {
Q("curOp.MunchLeft saw err = %v", err)
return SexpNull, err
}
} else {
Q("curOp has not MunchLeft, setting AccumTree <- cnode. here cnode = %v", cnode.SexpString(nil))
// do this, or have the default MunchLeft return itself.
p.AccumTree = cnode
}
} // end for !p.IsEOF()
p.CnodeStack = p.CnodeStack[1:]
//_cnode_stack.pop_front()
Q("at end of Expression(%v), returning p.AccumTree=%v, err=nil", rbp, p.AccumTree.SexpString(nil))
return p.AccumTree, nil
}
// Advance sets p.NextToken
func (p *Pratt) Advance() error {
p.Pos++
if p.Pos >= len(p.Stream) {
return io.EOF
}
p.NextToken = p.Stream[p.Pos]
Q("end of Advance, p.NextToken = '%v'", p.NextToken.SexpString(nil))
return nil
}
func (p *Pratt) IsEOF() bool {
if p.Pos >= len(p.Stream) {
return true
}
return false
}
func (env *Zlisp) LeftBindingPower(sx Sexp) (int, error) {
Q("LeftBindingPower: sx is '%v'", sx.SexpString(nil))
switch x := sx.(type) {
case *SexpInt:
return 0, nil
case *SexpBool:
return 0, nil
case *SexpStr:
return 0, nil
case *SexpSymbol:
op, found := env.infixOps[x.name]
if x.name == "if" {
// we don't want if to be doing any binding to the left,
// so we enforce that it has zero left-binding power. It
// gets a right-binding power of 5 since it is a prefix operator.
Q("LeftBindingPower: found if, return 0 left-binding-power")
return 0, nil
}
if found {
Q("LeftBindingPower: found op '%#v', returning op.Bp = %v", op, op.Bp)
return op.Bp, nil
}
if x.isDot {
Q("LeftBindingPower: dot symbol '%v', "+
"giving it binding-power 80", x.name)
return 80, nil
}
Q("LeftBindingPower: no entry in env.infixOps for operation '%s'",
x.name)
return 0, nil
case *SexpArray:
return 80, nil
case *SexpComma:
return 15, nil
case *SexpSemicolon:
return 0, nil
case *SexpComment:
return 0, nil
case *SexpPair:
if x.Head != nil {
switch sym := x.Head.(type) {
case *SexpSymbol:
if sym.name == "infix" {
Q("detected infix!!! -- setting binding power to 0")
return 0, nil
}
}
}
return 0, nil
}
return 0, fmt.Errorf("LeftBindingPower: unhandled sx :%#v", sx)
}
func (p *Pratt) ShowCnodeStack() {
if len(p.CnodeStack) == 0 {
fmt.Println("CnodeStack is: empty")
return
}
fmt.Println("CnodeStack is:")
for i := range p.CnodeStack {
fmt.Printf("CnodeStack[%v] = %v\n", i, p.CnodeStack[i].SexpString(nil))
}
}