## Parser for (ESR) expressions in infix format mutable struct Parser const str::AbstractString # string to be parsed pos::Int64 # current position in string sy::Union{AbstractString,Nothing} # current lookahead symbol const pSy::Symbol const xSy::Symbol const varnames::Vector{<:AbstractString} const paramnames::Vector{<:AbstractString} const coeff::Vector{Float64} const numbers_as_parameters::Bool const integers_as_constants::Bool # TODO rename and implement as rationals_as_constants # The kwparam numbers_as_parameters allows to include coefficient values directly in the expression and the values are parsed as parameters # In this mode the suffix 'f' allows to mark constants. E.g. 3 * x ^ 2f would create the parameterized expression a0*x^2 with 2 a constant value. function Parser(str::AbstractString, varnames::Vector{<:AbstractString}, paramnames::Vector{<:AbstractString}; numbers_as_parameters=false, integers_as_constants=false) if numbers_as_parameters && length(paramnames) > 0 error("the parser does not support paramnames when numbers_as_parameters=true") end if !numbers_as_parameters && integers_as_constants error("Set numbers_as_parameters=true to parse integers_as_constants") end p = new(lowercase(str), 1, nothing, :p, :x, varnames, paramnames, Vector{Float64}(), numbers_as_parameters, integers_as_constants) next_symbol!(p) return p; end end # recursive descent parser # scanner is also defined in this file # LL(1) grammar: # G(Expr): # Expr = Term { ('+' | '-') Term } # Term = Fact { ('*' | '/') Fact } # Fact = { '+' | '-' } # (ident | number | parameter # | '(' Expr ')' # | ident ParamList // function call # ) [ ('**' | '^') Fact ] # ParamList = '(' Expr { ',' Expr } ')' # scanner function parse_infix(exprStr::AbstractString, varnames::Vector{<:AbstractString}, paramnames::Vector{<:AbstractString}; numbers_as_parameters = false, integers_as_constants = false)::Tuple{Expr, Vector{Float64}} parser = Parser(exprStr, varnames, paramnames; numbers_as_parameters = numbers_as_parameters, integers_as_constants = integers_as_constants) body = parse_expr!(parser) expr = Expr(:->, Expr(:tuple, :x, :p), body) # :((x,p) -> $body) (expr, parser.coeff) end function parse_expr!(p::Parser) t1 = parse_term!(p) while p.sy == "+" || p.sy == "-" if p.sy == "+" next_symbol!(p) t2 = parse_term!(p) t1 = :($t1 + $t2) # add_simpl(t1, t2) else next_symbol!(p) t2 = parse_term!(p) t1 = :($t1 - $t2) # sub_simpl(t1, t2) end end return t1 end function parse_term!(p::Parser) f1 = parse_factor!(p) while p.sy == "*" || p.sy == "/" if p.sy == "*" next_symbol!(p) f2 = parse_factor!(p) f1 = :($f1 * $f2) # mul_simpl(f1, f2) else next_symbol!(p) f2 = parse_factor!(p) f1 = :($f1 / $f2) # div_simpl(f1, f2) end end return f1 end # Fact = { '+' | '-' } # (constant | parameter # | '(' Expr ')' # | ident [ ParamList ] variable or function call # ) [ ('**' | '^') Fact ] # ParamList = '(' Expr { ',' Expr } ')' function parse_factor!(p::Parser) sign = 1.0 while p.sy == "+" || p.sy == "-" if p.sy == "-" sign = sign * -1.0 end next_symbol!(p) end factor = 1.0 if isident(p.sy) ident = p.sy next_symbol!(p) if p.sy == "(" parameters = parse_paramlist!(p) if ident == "sqr" # convert sqr(x) call to x**2 (so that we don't have to update the interpreters) factor = Expr(:call, func_symbol("pow"), parameters..., 2.0) else factor = Expr(:call, func_symbol(ident), parameters...) end else idx = findfirst(p -> p==ident, p.varnames) if !isnothing(idx) factor = Expr(:ref, p.xSy, idx) elseif !p.numbers_as_parameters # only if paramnames are given idx = findfirst(p -> p==ident, p.paramnames) # replace parameter variables with access to coefficient vector (initialized to zero) if !isnothing(idx) factor = Expr(:ref, p.pSy, idx) push!(p.coeff, 0.0) else error("undefined symbol $ident") end else error("undefined variable $ident") end end elseif isnumber(p.sy) if p.numbers_as_parameters numStr = p.sy val = parse(Float64, numStr) next_symbol!(p) if p.sy == "f" # constant factor = sign * val # numbers are parsed without sign (if we parsed a sign above then we can include this in the constant here) sign = 1.0 next_symbol!(p) elseif p.integers_as_constants && isinteger(val) # integers are parsed as constants factor = sign * val # numbers are parsed without sign (if we parsed a sign above then we can include this in the constant here) sign = 1.0 else # parameter factor = new_param!(p, sign * val) sign = 1.0 end else # otherwise all numbers are parsed as constants numStr = p.sy next_symbol!(p) if p.sy == "//" num = parse(Int64, numStr) next_symbol!(p) denom = parse(Int64, p.sy) val = num // denom next_symbol!(p) else val = parse(Float64, numStr) end factor = sign * val sign = 1.0 end elseif p.sy == "(" next_symbol!(p) factor = parse_expr!(p) expect_and_next!(p, ")") else error("cannot parse expression") end if p.sy == "**" || p.sy == "^" next_symbol!(p) exponent = parse_factor!(p) factor = :($factor ^ $exponent) # pow_simpl(factor, exponent) end if sign == -1 :(-$factor) else factor end end function parse_paramlist!(p::Parser)::Vector parameters = Vector() expect_and_next!(p, "(") push!(parameters, parse_expr!(p)) while p.sy == "," next_symbol!(p) push!(parameters, parse_expr!(p)) end expect_and_next!(p, ")") return parameters end function expect_and_next!(p::Parser, expectedSy::AbstractString) if p.sy != expectedSy error("expected: $(expectedSy) at column $(p.pos)") else next_symbol!(p) end end function new_param!(p::Parser, val::Float64)::Expr push!(p.coeff, val) return Expr(:ref, p.pSy, length(p.coeff)) end function isident(s::AbstractString)::Bool return s != "nan" && s != "inf" && !isnothing(match(r"^[_a-zA-Z][_a-zA-Z0-9]*$", s)) end function isnumber(s::AbstractString)::Bool return !isnothing(tryparse(Float64, s)) end function variable_index(p::Parser, str::AbstractString) return findfirst(s->s==str, p.varNames) end function func_symbol(id::AbstractString) if id == "pow" return :^; else return Symbol(id) end end function next_symbol!(p::Parser) s = p.str pos = p.pos # skip whitespace while pos <= length(s) && isspace(s[pos]) pos += 1 end if pos > length(s) p.sy = nothing p.pos = pos return end if isdigit(s[pos]) # numbers m = match(r"(\d+([.]\d*)?([eE][+-]?\d+)?|[.]\d+([eE][+-]?\d+)?)", s, pos) # match floating point number pos += length(m[1]) # get the whole match p.sy = m[1] elseif isletter(s[pos]) # identifiers idStr = string(s[pos]) pos += 1 while pos <= length(s) && (isdigit(s[pos]) || isletter(s[pos]) || s[pos] == '_') idStr = idStr * s[pos] pos += 1 end p.sy = idStr elseif s[pos] == '*' pos += 1 p.sy = "*" if s[pos] == '*' p.sy = "**" pos += 1 end elseif s[pos] == '/' pos += 1 p.sy = "/" if s[pos] == '/' p.sy = "//" pos += 1 end else p.sy = string(s[pos]) # single character symbol pos += 1 end p.pos = pos # println((p.sy, pos)) # for debugging end