master-thesis/package/src/ExpressionExecutorCuda.jl
Daniel d4f9156c08
Some checks failed
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Has been cancelled
Prepared for Cuda execution
2024-07-12 16:35:30 +02:00

71 lines
3.2 KiB
Julia

module ExpressionExecutorCuda
include("Interpreter.jl")
include("ExpressionProcessing.jl")
export interpret_gpu
export evaluate_gpu
export test
# const SymbolTable64 = Dict{Tuple{Expr, Symbol},Float64}
#
# Some assertions:
# Variables and parameters start their naming with "1" meaning the first variable/parameter has to be "x1/p1" and not "x0/p0"
# each index i in exprs has to have the matching values in the column i in Matrix X so that X[:,i] contains the values for expr[i]. The same goes for p
# This assertion is made, because in julia, the first index doesn't have to be 1
#
# Evaluate Expressions on the GPU
function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Matrix{Float64}
# Ensure that no two expressions are interpreted in the same "warp"
exprsPostfix = ExpressionProcessing.expr_to_postfix(exprs[1])
end
# Convert Expressions to PTX Code and execute that instead
function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Matrix{Float64}
# Look into this to maybe speed up PTX generation: https://cuda.juliagpu.org/stable/tutorials/introduction/#Parallelization-on-the-CPU
end
# TODO: See if it is feasible to make 32 versions too (mostly because 32 is significantly faster than 64)
# If AMD GPU support gets added, it might even be a good idea to add 16 bit floats, since they are even faster than 32 bit. On Nvidia 16 is either slower or equal in performance to 32 bit
function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32}
end
function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32}
end
function test()
Interpreter.CudaTest()
end
"Performs pre processing steps to the expressions.
- It replaces every variable with the according value stored in X and p.
- It transforms the expressions into postfix form and returns them.
"
function preprocess_expressions!(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Array{String}
symtable = ExpressionProcessing.construct_symtable(exprs, X, p)
postfixExpressions = Array{String,1}()
# Test if multi threading provides a speedup and if it does, roughly determin the size at which it is beneficial.
for i in eachindex(exprs)
expr = deepcopy(exprs[i])
ExpressionProcessing.replace_variables!(exprs[i], symtable, expr)
push!(postfixExpressions, ExpressionProcessing.expr_to_postfix(exprs[i]))
end
return postfixExpressions
end
end
# Flow
# input: Vector expr == expressions contains eg. 4 expressions
# Matrix X == |expr| columns, n rows. n == number of variabls x1..xn; n is the same for all expressions --- WRONG
# Matrix X == k columns, n rows. k == number of variables in the expressions (every expression must have the same number of variables); n == number of different values for xk where k is the column
# VectorVector p == vector size |expr| containing vector size m. m == number of parameters per expression. p can be different for each expression
#
# The following can be done on the CPU
# convert expression to postfix notation (mandatory)
# replace every variable with the according value from X and p (reduce extensive memory access on the GPU)