benchmarking: added final results for interpreter in benchmark 1
This commit is contained in:
@ -27,10 +27,14 @@ function interpret(cudaExprs, numExprs::Integer, exprsInnerLength::Integer,
|
||||
numThreads = min(variableColumns, 121)
|
||||
numBlocks = cld(variableColumns, numThreads)
|
||||
|
||||
@inbounds Threads.@threads for i in 1:numExprs # multithreaded to speedup dispatching (seems to have improved performance)
|
||||
Threads.@threads for i in 1:numExprs # multithreaded to speedup dispatching (seems to have improved performance)
|
||||
@cuda threads=numThreads blocks=numBlocks fastmath=true interpret_expression(cudaExprs, cudaVars, cudaParams, cudaResults, cudaStepsize, i)
|
||||
end
|
||||
|
||||
# Reduce GC pressure https://cuda.juliagpu.org/stable/usage/memory/#Avoiding-GC-pressure
|
||||
CUDA.unsafe_free!(cudaParams)
|
||||
CUDA.unsafe_free!(cudaStepsize)
|
||||
|
||||
return cudaResults
|
||||
end
|
||||
|
||||
|
Reference in New Issue
Block a user