made it possible to send expression to gpu alongside the needed data
Some checks failed
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Has been cancelled
Some checks failed
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Has been cancelled
This commit is contained in:
@ -1,36 +1,8 @@
|
||||
module Interpreter
|
||||
using CUDA
|
||||
include("ExpressionProcessing.jl")
|
||||
using .ExpressionProcessing: PostfixType, Add, Subtract, Operator
|
||||
using ..ExpressionProcessing
|
||||
|
||||
export Interpret
|
||||
export CudaTest
|
||||
|
||||
function CudaTest()
|
||||
N = 2^20
|
||||
x = CUDA.fill(1.0f0, N)
|
||||
y = CUDA.fill(2.0f0, N)
|
||||
|
||||
kernelAdd = @cuda launch=false InterpretExplicit!(ExpressionProcessing.Add, x, y)
|
||||
# kernelAdd = @cuda launch=false InterpretExplicit!(Add, x, y, reference)
|
||||
config = launch_configuration(kernelAdd.fun)
|
||||
threads = min(length(y), config.threads)
|
||||
blocks = cld(length(y), threads)
|
||||
|
||||
kernelAdd(Add, x, y; threads, blocks)
|
||||
# println(y[1])
|
||||
# @test all(Array(y) .== 3.0f0)
|
||||
|
||||
kernelSubtract = @cuda launch=false InterpretExplicit!(ExpressionProcessing.Subtract, x, y)
|
||||
configSub = launch_configuration(kernelSubtract.fun)
|
||||
threadsSub = min(length(y), configSub.threads)
|
||||
blocksSub = cld(length(y), threadsSub)
|
||||
CUDA.fill!(y, 2.0f0)
|
||||
|
||||
# kernelSubtract(Subtract, x, y; threadsSub, blocksSub)
|
||||
# @test all(Array(y) .== -1.0f0)
|
||||
# println(y[1])
|
||||
end
|
||||
export interpret
|
||||
|
||||
"Interprets the given expressions with the values provided.
|
||||
# Arguments
|
||||
@ -38,19 +10,49 @@ end
|
||||
- variables::Matrix{Float64} : The variables to use. Each column is mapped to the variables x1..xn
|
||||
- parameters::Vector{Vector{Float64}} : The parameters to use. Each Vector contains the values for the parameters p1..pn. The number of parameters can be different for every expression
|
||||
"
|
||||
function Interpret(expressions::Vector{ExpressionProcessing.PostfixType}, variables::Matrix{Float64}, parameters::Vector{Vector{Float64}})
|
||||
function interpret(expressions::Vector{ExpressionProcessing.PostfixType}, variables::Matrix{Float64}, parameters::Vector{Vector{Float64}})
|
||||
# TODO:
|
||||
# create CUDA array and fill it with the expressions, variables and parameters
|
||||
# calculate needed number of threads, probably based off of the number of expressions, so I can ensure each warp takes the same execution path
|
||||
# Start the kernel
|
||||
cudaExprs = Vector{CuArray{ExpressionProcessing.PostfixType}}(undef, length(expressions))
|
||||
# create CUDA array and fill it with the variables and parameters
|
||||
# create CUDA array for calculation results
|
||||
variableRows = size(variables, 1)
|
||||
cudaVars = CuArray(variables)
|
||||
|
||||
paramRows = get_max_parameter_rows(parameters)
|
||||
p1aramCols = length(parameters)
|
||||
cudaParams = CuArray{Float64}(undef, p1aramCols, paramRows) # length(parameters) == number of expressions
|
||||
# TODO: Fill cudaParams
|
||||
|
||||
# TODO: Move CuArray(expression[i]) outside the loop for a more efficient transfer to GPU but leave kernel signature as is
|
||||
for i in eachindex(expressions)
|
||||
push!(cudaExprs, CuArray(expressions[i]))
|
||||
cudaExpr = CuArray(expressions[i])
|
||||
kernel = @cuda launch=false interpret_expression(cudaExpr, cudaVars, cudaParams, i)
|
||||
config = launch_configuration(kernel.fun)
|
||||
threads = min(variableRows, config.threads)
|
||||
blocks = cld(variableRows, threads)
|
||||
|
||||
kernel(cudaExpr, cudaVars, cudaParams, i; threads, blocks)
|
||||
end
|
||||
# cudaExprs = CuArray(copy(expressions))
|
||||
end
|
||||
|
||||
function interpret_expression(expression, variables, parameters, exprIndex::Int)
|
||||
#TODO Implement interpreter
|
||||
end
|
||||
|
||||
"Retrieves the number of entries for the largest inner vector"
|
||||
function get_max_parameter_rows(params::Vector{Vector{T}})::Int where T
|
||||
maxLength = 0
|
||||
for i in eachindex(params)
|
||||
if length(params) > maxLength
|
||||
maxLength = length(params)
|
||||
end
|
||||
end
|
||||
|
||||
return maxLength
|
||||
end
|
||||
|
||||
|
||||
|
||||
@deprecate InterpretExplicit!(op::Operator, x, y) interpret_expression(expression, variables, parameters, exprIndex::Int)
|
||||
# Kernel
|
||||
function InterpretExplicit!(op::Operator, x, y)
|
||||
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
||||
|
Reference in New Issue
Block a user