benchmarking: prepared tests for using actual data
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
This commit is contained in:
@ -23,8 +23,8 @@ function interpret(expressions::Vector{Expr}, variables::Matrix{Float32}, parame
|
||||
variableCols = size(variables, 2) # number of variable sets to use for each expression
|
||||
cudaVars = CuArray(variables)
|
||||
cudaParams = Utils.create_cuda_array(parameters, NaN32) # column corresponds to data for one expression
|
||||
cudaExprs = Utils.create_cuda_array(exprs, ExpressionElement(EMPTY, 0)) # column corresponds to data for one expression; TODO: replace this 0 with 'undef' if possible
|
||||
# put into seperate cuArray, as this is static and would be inefficient to send seperatly to every kernel
|
||||
cudaExprs = Utils.create_cuda_array(exprs, ExpressionElement(EMPTY, 0)) # column corresponds to data for one expression;
|
||||
# put into seperate cuArray, as this is static and would be inefficient to send seperatly to each kernel
|
||||
cudaStepsize = CuArray([Utils.get_max_inner_length(exprs), Utils.get_max_inner_length(parameters), size(variables, 1)]) # max num of values per expression; max nam of parameters per expression; number of variables per expression
|
||||
|
||||
# each expression has nr. of variable sets (nr. of columns of the variables) results and there are n expressions
|
||||
@ -32,9 +32,7 @@ function interpret(expressions::Vector{Expr}, variables::Matrix{Float32}, parame
|
||||
|
||||
# Start kernel for each expression to ensure that no warp is working on different expressions
|
||||
@inbounds for i in eachindex(exprs)
|
||||
# TODO: Currently only the first expression gets evaluated. Either use a view on "cudaExprs" to determine the correct expression or extend cudaStepsize to include this information (this information was removed in a previous commit)
|
||||
# If a "view" is used, then the ExpressionProcessing must be updated to always include the stop opcode at the end
|
||||
numThreads = min(variableCols, 128)
|
||||
numThreads = min(variableCols, 256)
|
||||
numBlocks = cld(variableCols, numThreads)
|
||||
|
||||
@cuda threads=numThreads blocks=numBlocks fastmath=true interpret_expression(cudaExprs, cudaVars, cudaParams, cudaResults, cudaStepsize, i)
|
||||
@ -43,7 +41,6 @@ function interpret(expressions::Vector{Expr}, variables::Matrix{Float32}, parame
|
||||
return cudaResults
|
||||
end
|
||||
|
||||
#TODO: Add @inbounds to all indexing after it is verified that all works https://cuda.juliagpu.org/stable/development/kernel/#Bounds-checking
|
||||
const MAX_STACK_SIZE = 25 # The depth of the stack to store the values and intermediate results
|
||||
function interpret_expression(expressions::CuDeviceArray{ExpressionElement}, variables::CuDeviceArray{Float32}, parameters::CuDeviceArray{Float32}, results::CuDeviceArray{Float32}, stepsize::CuDeviceArray{Int}, exprIndex::Int)
|
||||
varSetIndex = (blockIdx().x - 1) * blockDim().x + threadIdx().x # ctaid.x * ntid.x + tid.x (1-based)
|
||||
|
Reference in New Issue
Block a user