Some checks failed
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Has been cancelled
71 lines
3.2 KiB
Julia
71 lines
3.2 KiB
Julia
module ExpressionExecutorCuda
|
|
include("Interpreter.jl")
|
|
include("ExpressionProcessing.jl")
|
|
|
|
export interpret_gpu
|
|
export evaluate_gpu
|
|
export test
|
|
|
|
# const SymbolTable64 = Dict{Tuple{Expr, Symbol},Float64}
|
|
#
|
|
# Some assertions:
|
|
# Variables and parameters start their naming with "1" meaning the first variable/parameter has to be "x1/p1" and not "x0/p0"
|
|
# each index i in exprs has to have the matching values in the column i in Matrix X so that X[:,i] contains the values for expr[i]. The same goes for p
|
|
# This assertion is made, because in julia, the first index doesn't have to be 1
|
|
#
|
|
|
|
# Evaluate Expressions on the GPU
|
|
function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Matrix{Float64}
|
|
# Ensure that no two expressions are interpreted in the same "warp"
|
|
exprsPostfix = ExpressionProcessing.expr_to_postfix(exprs[1])
|
|
end
|
|
|
|
# Convert Expressions to PTX Code and execute that instead
|
|
function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Matrix{Float64}
|
|
# Look into this to maybe speed up PTX generation: https://cuda.juliagpu.org/stable/tutorials/introduction/#Parallelization-on-the-CPU
|
|
end
|
|
|
|
|
|
# TODO: See if it is feasible to make 32 versions too (mostly because 32 is significantly faster than 64)
|
|
# If AMD GPU support gets added, it might even be a good idea to add 16 bit floats, since they are even faster than 32 bit. On Nvidia 16 is either slower or equal in performance to 32 bit
|
|
function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32}
|
|
end
|
|
function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32}
|
|
end
|
|
|
|
function test()
|
|
Interpreter.CudaTest()
|
|
end
|
|
|
|
"Performs pre processing steps to the expressions.
|
|
- It replaces every variable with the according value stored in X and p.
|
|
- It transforms the expressions into postfix form and returns them.
|
|
"
|
|
function preprocess_expressions!(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Array{String}
|
|
symtable = ExpressionProcessing.construct_symtable(exprs, X, p)
|
|
postfixExpressions = Array{String,1}()
|
|
|
|
# Test if multi threading provides a speedup and if it does, roughly determin the size at which it is beneficial.
|
|
for i in eachindex(exprs)
|
|
expr = deepcopy(exprs[i])
|
|
ExpressionProcessing.replace_variables!(exprs[i], symtable, expr)
|
|
push!(postfixExpressions, ExpressionProcessing.expr_to_postfix(exprs[i]))
|
|
end
|
|
|
|
return postfixExpressions
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# Flow
|
|
# input: Vector expr == expressions contains eg. 4 expressions
|
|
# Matrix X == |expr| columns, n rows. n == number of variabls x1..xn; n is the same for all expressions --- WRONG
|
|
# Matrix X == k columns, n rows. k == number of variables in the expressions (every expression must have the same number of variables); n == number of different values for xk where k is the column
|
|
# VectorVector p == vector size |expr| containing vector size m. m == number of parameters per expression. p can be different for each expression
|
|
#
|
|
# The following can be done on the CPU
|
|
# convert expression to postfix notation (mandatory)
|
|
# replace every variable with the according value from X and p (reduce extensive memory access on the GPU)
|