295 lines
8.8 KiB
Julia
295 lines
8.8 KiB
Julia
## Parser for (ESR) expressions in infix format
|
|
|
|
mutable struct Parser
|
|
const str::AbstractString # string to be parsed
|
|
pos::Int64 # current position in string
|
|
sy::Union{AbstractString,Nothing} # current lookahead symbol
|
|
const pSy::Symbol
|
|
const xSy::Symbol
|
|
const varnames::Vector{<:AbstractString}
|
|
const paramnames::Vector{<:AbstractString}
|
|
const coeff::Vector{Float64}
|
|
const numbers_as_parameters::Bool
|
|
const integers_as_constants::Bool # TODO rename and implement as rationals_as_constants
|
|
|
|
# The kwparam numbers_as_parameters allows to include coefficient values directly in the expression and the values are parsed as parameters
|
|
# In this mode the suffix 'f' allows to mark constants. E.g. 3 * x ^ 2f would create the parameterized expression a0*x^2 with 2 a constant value.
|
|
function Parser(str::AbstractString, varnames::Vector{<:AbstractString}, paramnames::Vector{<:AbstractString}; numbers_as_parameters=false, integers_as_constants=false)
|
|
if numbers_as_parameters && length(paramnames) > 0
|
|
error("the parser does not support paramnames when numbers_as_parameters=true")
|
|
end
|
|
if !numbers_as_parameters && integers_as_constants
|
|
error("Set numbers_as_parameters=true to parse integers_as_constants")
|
|
end
|
|
|
|
p = new(lowercase(str), 1, nothing, :p, :x, varnames, paramnames, Vector{Float64}(), numbers_as_parameters, integers_as_constants)
|
|
next_symbol!(p)
|
|
return p;
|
|
end
|
|
end
|
|
|
|
# recursive descent parser
|
|
# scanner is also defined in this file
|
|
|
|
# LL(1) grammar:
|
|
# G(Expr):
|
|
# Expr = Term { ('+' | '-') Term }
|
|
# Term = Fact { ('*' | '/') Fact }
|
|
# Fact = { '+' | '-' }
|
|
# (ident | number | parameter
|
|
# | '(' Expr ')'
|
|
# | ident ParamList // function call
|
|
# ) [ ('**' | '^') Fact ]
|
|
# ParamList = '(' Expr { ',' Expr } ')'
|
|
|
|
|
|
|
|
# scanner
|
|
|
|
|
|
function parse_infix(exprStr::AbstractString, varnames::Vector{<:AbstractString}, paramnames::Vector{<:AbstractString};
|
|
numbers_as_parameters = false, integers_as_constants = false)::Tuple{Expr, Vector{Float64}}
|
|
parser = Parser(exprStr, varnames, paramnames;
|
|
numbers_as_parameters = numbers_as_parameters, integers_as_constants = integers_as_constants)
|
|
body = parse_expr!(parser)
|
|
expr = Expr(:->, Expr(:tuple, :x, :p), body) # :((x,p) -> $body)
|
|
(expr, parser.coeff)
|
|
end
|
|
|
|
function parse_expr!(p::Parser)
|
|
t1 = parse_term!(p)
|
|
while p.sy == "+" || p.sy == "-"
|
|
if p.sy == "+"
|
|
next_symbol!(p)
|
|
t2 = parse_term!(p)
|
|
t1 = :($t1 + $t2) # add_simpl(t1, t2)
|
|
else
|
|
next_symbol!(p)
|
|
t2 = parse_term!(p)
|
|
t1 = :($t1 - $t2) # sub_simpl(t1, t2)
|
|
end
|
|
end
|
|
return t1
|
|
end
|
|
|
|
function parse_term!(p::Parser)
|
|
f1 = parse_factor!(p)
|
|
while p.sy == "*" || p.sy == "/"
|
|
if p.sy == "*"
|
|
next_symbol!(p)
|
|
f2 = parse_factor!(p)
|
|
f1 = :($f1 * $f2) # mul_simpl(f1, f2)
|
|
else
|
|
next_symbol!(p)
|
|
f2 = parse_factor!(p)
|
|
f1 = :($f1 / $f2) # div_simpl(f1, f2)
|
|
end
|
|
end
|
|
return f1
|
|
end
|
|
|
|
# Fact = { '+' | '-' }
|
|
# (constant | parameter
|
|
# | '(' Expr ')'
|
|
# | ident [ ParamList ] variable or function call
|
|
# ) [ ('**' | '^') Fact ]
|
|
# ParamList = '(' Expr { ',' Expr } ')'
|
|
|
|
function parse_factor!(p::Parser)
|
|
sign = 1.0
|
|
|
|
while p.sy == "+" || p.sy == "-"
|
|
if p.sy == "-"
|
|
sign = sign * -1.0
|
|
end
|
|
next_symbol!(p)
|
|
end
|
|
|
|
factor = 1.0
|
|
|
|
if isident(p.sy)
|
|
ident = p.sy
|
|
next_symbol!(p)
|
|
if p.sy == "("
|
|
parameters = parse_paramlist!(p)
|
|
|
|
if ident == "sqr"
|
|
# convert sqr(x) call to x**2 (so that we don't have to update the interpreters)
|
|
factor = Expr(:call, func_symbol("pow"), parameters..., 2.0)
|
|
else
|
|
factor = Expr(:call, func_symbol(ident), parameters...)
|
|
end
|
|
else
|
|
idx = findfirst(p -> p==ident, p.varnames)
|
|
if !isnothing(idx)
|
|
factor = Expr(:ref, p.xSy, idx)
|
|
elseif !p.numbers_as_parameters # only if paramnames are given
|
|
idx = findfirst(p -> p==ident, p.paramnames)
|
|
|
|
# replace parameter variables with access to coefficient vector (initialized to zero)
|
|
if !isnothing(idx)
|
|
factor = Expr(:ref, p.pSy, idx)
|
|
push!(p.coeff, 0.0)
|
|
else
|
|
error("undefined symbol $ident")
|
|
end
|
|
else
|
|
error("undefined variable $ident")
|
|
end
|
|
end
|
|
|
|
elseif isnumber(p.sy)
|
|
if p.numbers_as_parameters
|
|
numStr = p.sy
|
|
val = parse(Float64, numStr)
|
|
next_symbol!(p)
|
|
if p.sy == "f"
|
|
# constant
|
|
factor = sign * val # numbers are parsed without sign (if we parsed a sign above then we can include this in the constant here)
|
|
sign = 1.0
|
|
next_symbol!(p)
|
|
elseif p.integers_as_constants && isinteger(val)
|
|
# integers are parsed as constants
|
|
factor = sign * val # numbers are parsed without sign (if we parsed a sign above then we can include this in the constant here)
|
|
sign = 1.0
|
|
else
|
|
# parameter
|
|
factor = new_param!(p, sign * val)
|
|
sign = 1.0
|
|
end
|
|
else
|
|
# otherwise all numbers are parsed as constants
|
|
numStr = p.sy
|
|
next_symbol!(p)
|
|
|
|
if p.sy == "//"
|
|
num = parse(Int64, numStr)
|
|
next_symbol!(p)
|
|
denom = parse(Int64, p.sy)
|
|
val = num // denom
|
|
next_symbol!(p)
|
|
else
|
|
val = parse(Float64, numStr)
|
|
end
|
|
|
|
factor = sign * val
|
|
sign = 1.0
|
|
end
|
|
|
|
elseif p.sy == "("
|
|
next_symbol!(p)
|
|
factor = parse_expr!(p)
|
|
expect_and_next!(p, ")")
|
|
|
|
else
|
|
error("cannot parse expression")
|
|
end
|
|
|
|
if p.sy == "**" || p.sy == "^"
|
|
next_symbol!(p)
|
|
exponent = parse_factor!(p)
|
|
factor = :($factor ^ $exponent) # pow_simpl(factor, exponent)
|
|
end
|
|
|
|
if sign == -1
|
|
:(-$factor)
|
|
else
|
|
factor
|
|
end
|
|
end
|
|
|
|
function parse_paramlist!(p::Parser)::Vector
|
|
parameters = Vector()
|
|
expect_and_next!(p, "(")
|
|
push!(parameters, parse_expr!(p))
|
|
while p.sy == ","
|
|
next_symbol!(p)
|
|
push!(parameters, parse_expr!(p))
|
|
end
|
|
expect_and_next!(p, ")")
|
|
return parameters
|
|
end
|
|
|
|
function expect_and_next!(p::Parser, expectedSy::AbstractString)
|
|
if p.sy != expectedSy
|
|
error("expected: $(expectedSy) at column $(p.pos)")
|
|
else
|
|
next_symbol!(p)
|
|
end
|
|
end
|
|
|
|
function new_param!(p::Parser, val::Float64)::Expr
|
|
push!(p.coeff, val)
|
|
return Expr(:ref, p.pSy, length(p.coeff))
|
|
end
|
|
|
|
|
|
function isident(s::AbstractString)::Bool
|
|
return s != "nan" && s != "inf" && !isnothing(match(r"^[_a-zA-Z][_a-zA-Z0-9]*$", s))
|
|
end
|
|
|
|
function isnumber(s::AbstractString)::Bool
|
|
return !isnothing(tryparse(Float64, s))
|
|
end
|
|
|
|
function variable_index(p::Parser, str::AbstractString)
|
|
return findfirst(s->s==str, p.varNames)
|
|
end
|
|
|
|
function func_symbol(id::AbstractString)
|
|
if id == "pow"
|
|
return :^;
|
|
else
|
|
return Symbol(id)
|
|
end
|
|
end
|
|
|
|
function next_symbol!(p::Parser)
|
|
s = p.str
|
|
pos = p.pos
|
|
# skip whitespace
|
|
while pos <= length(s) && isspace(s[pos])
|
|
pos += 1
|
|
end
|
|
|
|
if pos > length(s)
|
|
p.sy = nothing
|
|
p.pos = pos
|
|
return
|
|
end
|
|
|
|
if isdigit(s[pos]) # numbers
|
|
m = match(r"(\d+([.]\d*)?([eE][+-]?\d+)?|[.]\d+([eE][+-]?\d+)?)", s, pos) # match floating point number
|
|
pos += length(m[1]) # get the whole match
|
|
p.sy = m[1]
|
|
elseif isletter(s[pos]) # identifiers
|
|
idStr = string(s[pos])
|
|
pos += 1
|
|
while pos <= length(s) && (isdigit(s[pos]) || isletter(s[pos]) || s[pos] == '_')
|
|
idStr = idStr * s[pos]
|
|
pos += 1
|
|
end
|
|
p.sy = idStr
|
|
elseif s[pos] == '*'
|
|
pos += 1
|
|
p.sy = "*"
|
|
if s[pos] == '*'
|
|
p.sy = "**"
|
|
pos += 1
|
|
end
|
|
elseif s[pos] == '/'
|
|
pos += 1
|
|
p.sy = "/"
|
|
if s[pos] == '/'
|
|
p.sy = "//"
|
|
pos += 1
|
|
end
|
|
else
|
|
p.sy = string(s[pos]) # single character symbol
|
|
pos += 1
|
|
end
|
|
|
|
p.pos = pos
|
|
# println((p.sy, pos)) # for debugging
|
|
end
|