diff --git a/package/src/ExpressionExecutorCuda.jl b/package/src/ExpressionExecutorCuda.jl index cebebc4..e9635df 100644 --- a/package/src/ExpressionExecutorCuda.jl +++ b/package/src/ExpressionExecutorCuda.jl @@ -5,6 +5,14 @@ export interpret_gpu export evaluate_gpu export test +const SymbolTable64 = Dict{Tuple{Expr, Symbol},Float64} +# +# Some assertions: +# Variables and parameters start their naming with "1" meaning the first variable/parameter has to be "x1/p1" and not "x0/p0" +# each index i in exprs has to have the matching values in the column i in Matrix X so that X[:,i] contains the values for expr[i]. The same goes for p +# This assertion is made, because in julia, the first index doesn't have to be 1 +# + # Evaluate Expressions on the GPU function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Matrix{Float64} # Ensure that no two expressions are interpreted in the same "warp" @@ -17,8 +25,8 @@ function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{ end -# TODO: See if it is feasible to make 32 versions too (mostly because 32 is faster than 64) -# If AMD GPU support gets added, it might even be a good idea to add 16 bit floats, since they are even faster than 32 bit. On Nvidia 16 is either slower or equal in performance than 32 bit +# TODO: See if it is feasible to make 32 versions too (mostly because 32 is significantly faster than 64) +# If AMD GPU support gets added, it might even be a good idea to add 16 bit floats, since they are even faster than 32 bit. On Nvidia 16 is either slower or equal in performance to 32 bit function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32} end function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32} @@ -28,4 +36,75 @@ function test() Interpreter.CudaTest() end +"Performs pre processing steps to the expressions. + - It replaces every variable with the according value stored in X and p. + - It transforms the expressions into postfix form and returns them. +" +function preprocess_expressions!(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Vector{String} + symtable = construct_symtable(exprs, X, p) + postfixExpressions = + + # Test if multi threading provides a speedup and if it does, roughly determin the size at which it is beneficial. + for i in eachindex(exprs) + replace_variables!(exprs[i], symtable) + end + + return expr_to_postfix() end + +function expr_to_postfix()::Array{String} + # TODO +end + +# Probaly move the below function into another module + +"Replaces all the variables and parameters of the given expression with their corresponding value stored in the symtable" +function replace_variables!(ex::Expr, symtable::SymbolTable64) + for i in 1:length(ex.args) + arg = ex.args[i] + if typeof(arg) === Expr + replace_variables!(ex, symtable) + elseif haskey(symtable, (ex,arg)) # We found a variable/parameter and can replace it with the actual value + ex.args[i] = symtable[(ex,arg)] + end + end +end + +function construct_symtable(expressions::Vector{Expr}, mat::Matrix{Float64}, params::Vector{Vector{Float64}})::SymbolTable64 + symtable = SymbolTable64() + + for i in eachindex(expressions) + expr = expressions[i] + values = mat[:,i] + parameters = params[i] + + fill_symtable!(expr, symtable, values, "x") + fill_symtable!(expr, symtable, parameters, "p") + end + + return symtable +end + +function fill_symtable!(expr::Expr, symtable::SymbolTable64, values::Vector{Float64}, symbolPrefix::String) + varIndex = 1 + for j in eachindex(values) + val = values[j] + sym = Symbol(symbolPrefix, varIndex) + + symtable[expr,sym] = val + varIndex += 1 + end +end + +end + + + +# Flow +# input: Vector expr == expressions contains eg. 4 expressions +# Matrix X == |expr| columns, n rows. n == number of variabls x1..xn; n is the same for all expressions +# VectorVector p == vector size |expr| containing vector size m. m == number of parameters per expression. p can be different for each expression +# +# The following can be done on the CPU +# convert expression to postfix notation (mandatory) +# replace every variable with the according value from X and p (reduce extensive memory access on the GPU) diff --git a/package/src/Interpreter.jl b/package/src/Interpreter.jl index b364f2e..926db9c 100644 --- a/package/src/Interpreter.jl +++ b/package/src/Interpreter.jl @@ -3,7 +3,7 @@ using CUDA export CudaTest -@enum Operators Add=1 Subtract=2 +@enum Operators Add=1 Subtract=2 Multiply=3 Division=4 Power=5 Abs=6 Log=7 Exp=8 Sqrt=9 function CudaTest() N = 2^20