added first CUDA steps
Some checks failed
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Has been cancelled
Some checks failed
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Has been cancelled
This commit is contained in:
@ -1,5 +1,31 @@
|
||||
module ExpressionExecutorCuda
|
||||
include("Interpreter.jl")
|
||||
|
||||
# Write your package code here.
|
||||
export interpret_gpu
|
||||
export evaluate_gpu
|
||||
export test
|
||||
|
||||
# Evaluate Expressions on the GPU
|
||||
function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Matrix{Float64}
|
||||
# Ensure that no two expressions are interpreted in the same "warp"
|
||||
expr1 = exprs[1]
|
||||
end
|
||||
|
||||
# Convert Expressions to PTX Code and execute that instead
|
||||
function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float64}, p::Vector{Vector{Float64}})::Matrix{Float64}
|
||||
# Look into this to maybe speed up PTX generation: https://cuda.juliagpu.org/stable/tutorials/introduction/#Parallelization-on-the-CPU
|
||||
end
|
||||
|
||||
|
||||
# TODO: See if it is feasible to make 32 versions too (mostly because 32 is faster than 64)
|
||||
# If AMD GPU support gets added, it might even be a good idea to add 16 bit floats, since they are even faster than 32 bit. On Nvidia 16 is either slower or equal in performance than 32 bit
|
||||
function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32}
|
||||
end
|
||||
function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32}
|
||||
end
|
||||
|
||||
function test()
|
||||
Interpreter.CudaTest()
|
||||
end
|
||||
|
||||
end
|
||||
|
Reference in New Issue
Block a user