module ExpressionExecutorCuda include("Utils.jl") include("ExpressionProcessing.jl") include("Interpreter.jl") include("Transpiler.jl") module CpuInterpreter include("Code.jl") include("CpuInterpreter.jl") end export interpret_gpu,interpret_cpu export evaluate_gpu export test # Some assertions: # Variables and parameters start their naming with "1" meaning the first variable/parameter has to be "x1/p1" and not "x0/p0" # Matrix X is column major # each index i in exprs has to have the matching values in the column i in Matrix X so that X[:,i] contains the values for expr[i]. The same goes for p # This assertion is made, because in julia, the first index doesn't have to be 1 # # Evaluate Expressions on the GPU function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}}; repetitions=1)::Matrix{Float32} @assert axes(exprs) == axes(p) ncols = size(X, 2) results = Matrix{Float32}(undef, ncols, length(exprs)) for i in 1:repetitions # Simulate parameter tuning results = Interpreter.interpret(exprs, X, p) end return results end # Convert Expressions to PTX Code and execute that instead function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}}; repetitions=1)::Matrix{Float32} @assert axes(exprs) == axes(p) ncols = size(X, 2) results = Matrix{Float32}(undef, ncols, length(exprs)) for i in 1:repetitions # Simulate parameter tuning results = Transpiler.evaluate(exprs, X, p) end return results end # Evaluate Expressions on the CPU function interpret_cpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}}; repetitions=1)::Matrix{Float32} @assert axes(exprs) == axes(p) nrows = size(X, 1) # each column of the matrix has the result for an expr res = Matrix{Float32}(undef, nrows, length(exprs)) for i in eachindex(exprs) # The interpreter holds the postfix code and buffers for evaluation. It is costly to create interpreter = CpuInterpreter.Interpreter{Float32}(exprs[i], length(p[i])) # If an expression has to be evaluated multiple times (e.g. for different parameters), # it is worthwhile to reuse the interpreter to reduce the number of allocations for rep in 1:repetitions CpuInterpreter.interpret!((@view res[:,i]), interpreter, X, p[i]) end end res end # Flow # input: Vector expr == expressions contains eg. 4 expressions # Matrix X == |expr| columns, n rows. n == number of variabls x1..xn; n is the same for all expressions --- WRONG # Matrix X == k columns, n rows. k == number of variables in the expressions (every expression must have the same number of variables); n == number of different values for xk where k is the column # VectorVector p == vector size |expr| containing vector size m. m == number of parameters per expression. p can be different for each expression # # The following can be done on the CPU # convert expression to postfix notation (mandatory) # optional: replace every parameter with the correct value (should only improve performance if data transfer is the bottleneck) end