master-thesis/package/test/parser.jl
Gabriel Kronberger bc49b33149 Bugfix
2025-04-18 11:41:11 +02:00

295 lines
8.8 KiB
Julia

## Parser for (ESR) expressions in infix format
mutable struct Parser
const str::AbstractString # string to be parsed
pos::Int64 # current position in string
sy::Union{AbstractString,Nothing} # current lookahead symbol
const pSy::Symbol
const xSy::Symbol
const varnames::Vector{<:AbstractString}
const paramnames::Vector{<:AbstractString}
const coeff::Vector{Float64}
const numbers_as_parameters::Bool
const integers_as_constants::Bool # TODO rename and implement as rationals_as_constants
# The kwparam numbers_as_parameters allows to include coefficient values directly in the expression and the values are parsed as parameters
# In this mode the suffix 'f' allows to mark constants. E.g. 3 * x ^ 2f would create the parameterized expression a0*x^2 with 2 a constant value.
function Parser(str::AbstractString, varnames::Vector{<:AbstractString}, paramnames::Vector{<:AbstractString}; numbers_as_parameters=false, integers_as_constants=false)
if numbers_as_parameters && length(paramnames) > 0
error("the parser does not support paramnames when numbers_as_parameters=true")
end
if !numbers_as_parameters && integers_as_constants
error("Set numbers_as_parameters=true to parse integers_as_constants")
end
p = new(lowercase(str), 1, nothing, :p, :x, varnames, paramnames, Vector{Float64}(), numbers_as_parameters, integers_as_constants)
next_symbol!(p)
return p;
end
end
# recursive descent parser
# scanner is also defined in this file
# LL(1) grammar:
# G(Expr):
# Expr = Term { ('+' | '-') Term }
# Term = Fact { ('*' | '/') Fact }
# Fact = { '+' | '-' }
# (ident | number | parameter
# | '(' Expr ')'
# | ident ParamList // function call
# ) [ ('**' | '^') Fact ]
# ParamList = '(' Expr { ',' Expr } ')'
# scanner
function parse_infix(exprStr::AbstractString, varnames::Vector{<:AbstractString}, paramnames::Vector{<:AbstractString};
numbers_as_parameters = false, integers_as_constants = false)::Tuple{Expr, Vector{Float64}}
parser = Parser(exprStr, varnames, paramnames;
numbers_as_parameters = numbers_as_parameters, integers_as_constants = integers_as_constants)
body = parse_expr!(parser)
expr = Expr(:->, Expr(:tuple, :x, :p), body) # :((x,p) -> $body)
(expr, parser.coeff)
end
function parse_expr!(p::Parser)
t1 = parse_term!(p)
while p.sy == "+" || p.sy == "-"
if p.sy == "+"
next_symbol!(p)
t2 = parse_term!(p)
t1 = :($t1 + $t2) # add_simpl(t1, t2)
else
next_symbol!(p)
t2 = parse_term!(p)
t1 = :($t1 - $t2) # sub_simpl(t1, t2)
end
end
return t1
end
function parse_term!(p::Parser)
f1 = parse_factor!(p)
while p.sy == "*" || p.sy == "/"
if p.sy == "*"
next_symbol!(p)
f2 = parse_factor!(p)
f1 = :($f1 * $f2) # mul_simpl(f1, f2)
else
next_symbol!(p)
f2 = parse_factor!(p)
f1 = :($f1 / $f2) # div_simpl(f1, f2)
end
end
return f1
end
# Fact = { '+' | '-' }
# (constant | parameter
# | '(' Expr ')'
# | ident [ ParamList ] variable or function call
# ) [ ('**' | '^') Fact ]
# ParamList = '(' Expr { ',' Expr } ')'
function parse_factor!(p::Parser)
sign = 1.0
while p.sy == "+" || p.sy == "-"
if p.sy == "-"
sign = sign * -1.0
end
next_symbol!(p)
end
factor = 1.0
if isident(p.sy)
ident = p.sy
next_symbol!(p)
if p.sy == "("
parameters = parse_paramlist!(p)
if ident == "sqr"
# convert sqr(x) call to x**2 (so that we don't have to update the interpreters)
factor = Expr(:call, func_symbol("pow"), parameters..., 2.0)
else
factor = Expr(:call, func_symbol(ident), parameters...)
end
else
idx = findfirst(p -> p==ident, p.varnames)
if !isnothing(idx)
factor = Expr(:ref, p.xSy, idx)
elseif !p.numbers_as_parameters # only if paramnames are given
idx = findfirst(p -> p==ident, p.paramnames)
# replace parameter variables with access to coefficient vector (initialized to zero)
if !isnothing(idx)
factor = Expr(:ref, p.pSy, idx)
push!(p.coeff, 0.0)
else
error("undefined symbol $ident")
end
else
error("undefined variable $ident")
end
end
elseif isnumber(p.sy)
if p.numbers_as_parameters
numStr = p.sy
val = parse(Float64, numStr)
next_symbol!(p)
if p.sy == "f"
# constant
factor = sign * val # numbers are parsed without sign (if we parsed a sign above then we can include this in the constant here)
sign = 1.0
next_symbol!(p)
elseif p.integers_as_constants && isinteger(val)
# integers are parsed as constants
factor = sign * val # numbers are parsed without sign (if we parsed a sign above then we can include this in the constant here)
sign = 1.0
else
# parameter
factor = new_param!(p, sign * val)
sign = 1.0
end
else
# otherwise all numbers are parsed as constants
numStr = p.sy
next_symbol!(p)
if p.sy == "//"
num = parse(Int64, numStr)
next_symbol!(p)
denom = parse(Int64, p.sy)
val = num // denom
next_symbol!(p)
else
val = parse(Float64, numStr)
end
factor = sign * val
sign = 1.0
end
elseif p.sy == "("
next_symbol!(p)
factor = parse_expr!(p)
expect_and_next!(p, ")")
else
error("cannot parse expression")
end
if p.sy == "**" || p.sy == "^"
next_symbol!(p)
exponent = parse_factor!(p)
factor = :($factor ^ $exponent) # pow_simpl(factor, exponent)
end
if sign == -1
:(-$factor)
else
factor
end
end
function parse_paramlist!(p::Parser)::Vector
parameters = Vector()
expect_and_next!(p, "(")
push!(parameters, parse_expr!(p))
while p.sy == ","
next_symbol!(p)
push!(parameters, parse_expr!(p))
end
expect_and_next!(p, ")")
return parameters
end
function expect_and_next!(p::Parser, expectedSy::AbstractString)
if p.sy != expectedSy
error("expected: $(expectedSy) at column $(p.pos)")
else
next_symbol!(p)
end
end
function new_param!(p::Parser, val::Float64)::Expr
push!(p.coeff, val)
return Expr(:ref, p.pSy, length(p.coeff))
end
function isident(s::AbstractString)::Bool
return s != "nan" && s != "inf" && !isnothing(match(r"^[_a-zA-Z][_a-zA-Z0-9]*$", s))
end
function isnumber(s::AbstractString)::Bool
return !isnothing(tryparse(Float64, s))
end
function variable_index(p::Parser, str::AbstractString)
return findfirst(s->s==str, p.varNames)
end
function func_symbol(id::AbstractString)
if id == "pow"
return :^;
else
return Symbol(id)
end
end
function next_symbol!(p::Parser)
s = p.str
pos = p.pos
# skip whitespace
while pos <= length(s) && isspace(s[pos])
pos += 1
end
if pos > length(s)
p.sy = nothing
p.pos = pos
return
end
if isdigit(s[pos]) # numbers
m = match(r"(\d+([.]\d*)?([eE][+-]?\d+)?|[.]\d+([eE][+-]?\d+)?)", s, pos) # match floating point number
pos += length(m[1]) # get the whole match
p.sy = m[1]
elseif isletter(s[pos]) # identifiers
idStr = string(s[pos])
pos += 1
while pos <= length(s) && (isdigit(s[pos]) || isletter(s[pos]) || s[pos] == '_')
idStr = idStr * s[pos]
pos += 1
end
p.sy = idStr
elseif s[pos] == '*'
pos += 1
p.sy = "*"
if s[pos] == '*'
p.sy = "**"
pos += 1
end
elseif s[pos] == '/'
pos += 1
p.sy = "/"
if s[pos] == '/'
p.sy = "//"
pos += 1
end
else
p.sy = string(s[pos]) # single character symbol
pos += 1
end
p.pos = pos
# println((p.sy, pos)) # for debugging
end