added first CUDA steps

2024-07-07 15:50:46 +02:00
parent fa643b8b27
commit 35ba685da0
2 changed files with 81 additions and 1 deletions
--- a/package/src/ExpressionExecutorCuda.jl
+++ b/package/src/ExpressionExecutorCuda.jl
@ -1,5 +1,31 @@
 module ExpressionExecutorCuda
+include("Interpreter.jl")

-# Write your package code here.
+export interpret_gpu
+export evaluate_gpu
+export test
+
+# Evaluate Expressions on the GPU
+function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Matrix{Float64}
+	# Ensure that no two expressions are interpreted in the same "warp"
+	expr1 = exprs[1]
+end
+
+# Convert Expressions to PTX Code and execute that instead
+function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Matrix{Float64}
+	# Look into this to maybe speed up PTX generation: https://cuda.juliagpu.org/stable/tutorials/introduction/#Parallelization-on-the-CPU
+end
+
+
+# TODO: See if it is feasible to make 32 versions too (mostly because 32 is faster than 64)
+# If AMD GPU support gets added, it might even be a good idea to add 16 bit floats, since they are even faster than 32 bit. On Nvidia 16 is either slower or equal in performance than 32 bit
+function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32}
+end
+function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32}
+end
+
+function test() 
+	Interpreter.CudaTest()
+end

 end