Add files with expressions, parser, Nikuradse dataset and a new test case.
This commit is contained in:
294
package/test/parser.jl
Normal file
294
package/test/parser.jl
Normal file
@ -0,0 +1,294 @@
|
||||
## Parser for (ESR) expressions in infix format
|
||||
|
||||
mutable struct Parser
|
||||
const str::AbstractString # string to be parsed
|
||||
pos::Int64 # current position in string
|
||||
sy::Union{AbstractString,Nothing} # current lookahead symbol
|
||||
const pSy::Symbol
|
||||
const xSy::Symbol
|
||||
const varnames::Vector{<:AbstractString}
|
||||
const paramnames::Vector{<:AbstractString}
|
||||
const coeff::Vector{Float64}
|
||||
const numbers_as_parameters::Bool
|
||||
const integers_as_constants::Bool # TODO rename and implement as rationals_as_constants
|
||||
|
||||
# The kwparam numbers_as_parameters allows to include coefficient values directly in the expression and the values are parsed as parameters
|
||||
# In this mode the suffix 'f' allows to mark constants. E.g. 3 * x ^ 2f would create the parameterized expression a0*x^2 with 2 a constant value.
|
||||
function Parser(str::AbstractString, varnames::Vector{<:AbstractString}, paramnames::Vector{<:AbstractString}; numbers_as_parameters=false, integers_as_constants=false)
|
||||
if numbers_as_parameters && length(paramnames) > 0
|
||||
error("the parser does not support paramnames when numbers_as_parameters=true")
|
||||
end
|
||||
if !numbers_as_parameters && integers_as_constants
|
||||
error("Set numbers_as_parameters=true to parse integers_as_constants")
|
||||
end
|
||||
|
||||
p = new(lowercase(str), 1, nothing, :p, :x, varnames, paramnames, Vector{Float64}(), numbers_as_parameters, integers_as_constants)
|
||||
next_symbol!(p)
|
||||
return p;
|
||||
end
|
||||
end
|
||||
|
||||
# recursive descent parser
|
||||
# scanner is also defined in this file
|
||||
|
||||
# LL(1) grammar:
|
||||
# G(Expr):
|
||||
# Expr = Term { ('+' | '-') Term }
|
||||
# Term = Fact { ('*' | '/') Fact }
|
||||
# Fact = { '+' | '-' }
|
||||
# (ident | number | parameter
|
||||
# | '(' Expr ')'
|
||||
# | ident ParamList // function call
|
||||
# ) [ ('**' | '^') Fact ]
|
||||
# ParamList = '(' Expr { ',' Expr } ')'
|
||||
|
||||
|
||||
|
||||
# scanner
|
||||
|
||||
|
||||
function parse_infix(exprStr::AbstractString, varnames::Vector{<:AbstractString}, paramnames::Vector{<:AbstractString};
|
||||
numbers_as_parameters = false, integers_as_constants = false)::Tuple{Expr, Vector{Float64}}
|
||||
parser = Parser(exprStr, varnames, paramnames;
|
||||
numbers_as_parameters = numbers_as_parameters, integers_as_constants = integers_as_constants)
|
||||
body = parse_expr!(parser)
|
||||
expr = Expr(:->, Expr(:tuple, :x, :p), body) # :((x,p) -> $body)
|
||||
(expr, parser.coeff)
|
||||
end
|
||||
|
||||
function parse_expr!(p::Parser)
|
||||
t1 = parse_term!(p)
|
||||
while p.sy == "+" || p.sy == "-"
|
||||
if p.sy == "+"
|
||||
next_symbol!(p)
|
||||
t2 = parse_term!(p)
|
||||
t1 = :($t1 + $t2) # add_simpl(t1, t2)
|
||||
else
|
||||
next_symbol!(p)
|
||||
t2 = parse_term!(p)
|
||||
t1 = :($t1 - $t2) # sub_simpl(t1, t2)
|
||||
end
|
||||
end
|
||||
return t1
|
||||
end
|
||||
|
||||
function parse_term!(p::Parser)
|
||||
f1 = parse_factor!(p)
|
||||
while p.sy == "*" || p.sy == "/"
|
||||
if p.sy == "*"
|
||||
next_symbol!(p)
|
||||
f2 = parse_factor!(p)
|
||||
f1 = :($f1 * $f2) # mul_simpl(f1, f2)
|
||||
else
|
||||
next_symbol!(p)
|
||||
f2 = parse_factor!(p)
|
||||
f1 = :($f1 / $f2) # div_simpl(f1, f2)
|
||||
end
|
||||
end
|
||||
return f1
|
||||
end
|
||||
|
||||
# Fact = { '+' | '-' }
|
||||
# (constant | parameter
|
||||
# | '(' Expr ')'
|
||||
# | ident [ ParamList ] variable or function call
|
||||
# ) [ ('**' | '^') Fact ]
|
||||
# ParamList = '(' Expr { ',' Expr } ')'
|
||||
|
||||
function parse_factor!(p::Parser)
|
||||
sign = 1.0
|
||||
|
||||
while p.sy == "+" || p.sy == "-"
|
||||
if p.sy == "-"
|
||||
sign = sign * -1.0
|
||||
end
|
||||
next_symbol!(p)
|
||||
end
|
||||
|
||||
factor = 1.0
|
||||
|
||||
if isident(p.sy)
|
||||
ident = p.sy
|
||||
next_symbol!(p)
|
||||
if p.sy == "("
|
||||
parameters = parse_paramlist!(p)
|
||||
|
||||
if ident == "sqr"
|
||||
# convert sqr(x) call to x**2 (so that we don't have to update the interpreters)
|
||||
factor = Expr(:call, func_symbol("pow"), parameters..., 2.0)
|
||||
else
|
||||
factor = Expr(:call, func_symbol(ident), parameters...)
|
||||
end
|
||||
else
|
||||
idx = findfirst(p -> p==ident, p.varnames)
|
||||
if !isnothing(idx)
|
||||
factor = Expr(:ref, p.xSy, idx)
|
||||
elseif !p.numbers_as_parameters # only if paramnames are given
|
||||
idx = findfirst(p -> p==ident, p.paramnames)
|
||||
|
||||
# replace parameter variables with access to coefficient vector (initialized to zero)
|
||||
if !isnothing(idx)
|
||||
factor = Expr(:ref, p.pSy, idx)
|
||||
push!(p.coeff, 0.0)
|
||||
else
|
||||
error("undefined symbol $ident")
|
||||
end
|
||||
else
|
||||
error("undefined variable $ident")
|
||||
end
|
||||
end
|
||||
|
||||
elseif isnumber(p.sy)
|
||||
if p.numbers_as_parameters
|
||||
numStr = p.sy
|
||||
val = parse(Float64, numStr)
|
||||
next_symbol!(p)
|
||||
if p.sy == "f"
|
||||
# constant
|
||||
factor = sign * val # numbers are parsed without sign (if we parsed a sign above then we can include this in the constant here)
|
||||
sign = 1.0
|
||||
next_symbol!(p)
|
||||
elseif p.integers_as_constants && isinteger(val)
|
||||
# integers are parsed as constants
|
||||
factor = sign * val # numbers are parsed without sign (if we parsed a sign above then we can include this in the constant here)
|
||||
sign = 1.0
|
||||
else
|
||||
# parameter
|
||||
factor = new_param!(p, sign * val)
|
||||
sign = 1.0
|
||||
end
|
||||
else
|
||||
# otherwise all numbers are parsed as constants
|
||||
numStr = p.sy
|
||||
next_symbol!(p)
|
||||
|
||||
if p.sy == "//"
|
||||
num = parse(Int64, numStr)
|
||||
next_symbol!(p)
|
||||
denom = parse(Int64, p.sy)
|
||||
val = num // denom
|
||||
next_symbol!(p)
|
||||
else
|
||||
val = parse(Float64, numStr)
|
||||
end
|
||||
|
||||
factor = sign * val
|
||||
sign = 1.0
|
||||
end
|
||||
|
||||
elseif p.sy == "("
|
||||
next_symbol!(p)
|
||||
factor = parse_expr!(p)
|
||||
expect_and_next!(p, ")")
|
||||
|
||||
else
|
||||
error("cannot parse expression")
|
||||
end
|
||||
|
||||
if p.sy == "**" || p.sy == "^"
|
||||
next_symbol!(p)
|
||||
exponent = parse_factor!(p)
|
||||
factor = :($factor ^ $exponent) # pow_simpl(factor, exponent)
|
||||
end
|
||||
|
||||
if sign == -1
|
||||
neg_simpl(factor)
|
||||
else
|
||||
factor
|
||||
end
|
||||
end
|
||||
|
||||
function parse_paramlist!(p::Parser)::Vector
|
||||
parameters = Vector()
|
||||
expect_and_next!(p, "(")
|
||||
push!(parameters, parse_expr!(p))
|
||||
while p.sy == ","
|
||||
next_symbol!(p)
|
||||
push!(parameters, parse_expr!(p))
|
||||
end
|
||||
expect_and_next!(p, ")")
|
||||
return parameters
|
||||
end
|
||||
|
||||
function expect_and_next!(p::Parser, expectedSy::AbstractString)
|
||||
if p.sy != expectedSy
|
||||
error("expected: $(expectedSy) at column $(p.pos)")
|
||||
else
|
||||
next_symbol!(p)
|
||||
end
|
||||
end
|
||||
|
||||
function new_param!(p::Parser, val::Float64)::Expr
|
||||
push!(p.coeff, val)
|
||||
return Expr(:ref, p.pSy, length(p.coeff))
|
||||
end
|
||||
|
||||
|
||||
function isident(s::AbstractString)::Bool
|
||||
return s != "nan" && s != "inf" && !isnothing(match(r"^[_a-zA-Z][_a-zA-Z0-9]*$", s))
|
||||
end
|
||||
|
||||
function isnumber(s::AbstractString)::Bool
|
||||
return !isnothing(tryparse(Float64, s))
|
||||
end
|
||||
|
||||
function variable_index(p::Parser, str::AbstractString)
|
||||
return findfirst(s->s==str, p.varNames)
|
||||
end
|
||||
|
||||
function func_symbol(id::AbstractString)
|
||||
if id == "pow"
|
||||
return :^;
|
||||
else
|
||||
return Symbol(id)
|
||||
end
|
||||
end
|
||||
|
||||
function next_symbol!(p::Parser)
|
||||
s = p.str
|
||||
pos = p.pos
|
||||
# skip whitespace
|
||||
while pos <= length(s) && isspace(s[pos])
|
||||
pos += 1
|
||||
end
|
||||
|
||||
if pos > length(s)
|
||||
p.sy = nothing
|
||||
p.pos = pos
|
||||
return
|
||||
end
|
||||
|
||||
if isdigit(s[pos]) # numbers
|
||||
m = match(r"(\d+([.]\d*)?([eE][+-]?\d+)?|[.]\d+([eE][+-]?\d+)?)", s, pos) # match floating point number
|
||||
pos += length(m[1]) # get the whole match
|
||||
p.sy = m[1]
|
||||
elseif isletter(s[pos]) # identifiers
|
||||
idStr = string(s[pos])
|
||||
pos += 1
|
||||
while pos <= length(s) && (isdigit(s[pos]) || isletter(s[pos]) || s[pos] == '_')
|
||||
idStr = idStr * s[pos]
|
||||
pos += 1
|
||||
end
|
||||
p.sy = idStr
|
||||
elseif s[pos] == '*'
|
||||
pos += 1
|
||||
p.sy = "*"
|
||||
if s[pos] == '*'
|
||||
p.sy = "**"
|
||||
pos += 1
|
||||
end
|
||||
elseif s[pos] == '/'
|
||||
pos += 1
|
||||
p.sy = "/"
|
||||
if s[pos] == '/'
|
||||
p.sy = "//"
|
||||
pos += 1
|
||||
end
|
||||
else
|
||||
p.sy = string(s[pos]) # single character symbol
|
||||
pos += 1
|
||||
end
|
||||
|
||||
p.pos = pos
|
||||
# println((p.sy, pos)) # for debugging
|
||||
end
|
Reference in New Issue
Block a user