module ExpressionExecutorCuda include("Interpreter.jl") include("ExpressionProcessing.jl") export interpret_gpu export evaluate_gpu export test # const SymbolTable64 = Dict{Tuple{Expr, Symbol},Float64} # # Some assertions: # Variables and parameters start their naming with "1" meaning the first variable/parameter has to be "x1/p1" and not "x0/p0" # each index i in exprs has to have the matching values in the column i in Matrix X so that X[:,i] contains the values for expr[i]. The same goes for p # This assertion is made, because in julia, the first index doesn't have to be 1 # # Evaluate Expressions on the GPU function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Matrix{Float64} # Ensure that no two expressions are interpreted in the same "warp" exprsPostfix = ExpressionProcessing.expr_to_postfix(exprs[1]) end # Convert Expressions to PTX Code and execute that instead function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Matrix{Float64} # Look into this to maybe speed up PTX generation: https://cuda.juliagpu.org/stable/tutorials/introduction/#Parallelization-on-the-CPU end # TODO: See if it is feasible to make 32 versions too (mostly because 32 is significantly faster than 64) # If AMD GPU support gets added, it might even be a good idea to add 16 bit floats, since they are even faster than 32 bit. On Nvidia 16 is either slower or equal in performance to 32 bit function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32} end function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32} end function test() Interpreter.CudaTest() end "Performs pre processing steps to the expressions. - It replaces every variable with the according value stored in X and p. - It transforms the expressions into postfix form and returns them. " function preprocess_expressions!(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Array{String} symtable = ExpressionProcessing.construct_symtable(exprs, X, p) postfixExpressions = Array{String,1}() # Test if multi threading provides a speedup and if it does, roughly determin the size at which it is beneficial. for i in eachindex(exprs) expr = deepcopy(exprs[i]) ExpressionProcessing.replace_variables!(exprs[i], symtable, expr) push!(postfixExpressions, ExpressionProcessing.expr_to_postfix(exprs[i])) end return postfixExpressions end end # Flow # input: Vector expr == expressions contains eg. 4 expressions # Matrix X == |expr| columns, n rows. n == number of variabls x1..xn; n is the same for all expressions --- WRONG # Matrix X == k columns, n rows. k == number of variables in the expressions (every expression must have the same number of variables); n == number of different values for xk where k is the column # VectorVector p == vector size |expr| containing vector size m. m == number of parameters per expression. p can be different for each expression # # The following can be done on the CPU # convert expression to postfix notation (mandatory) # replace every variable with the according value from X and p (reduce extensive memory access on the GPU)