benchmarking: added final results for interpreter in benchmark 1

This commit is contained in:
Daniel
2025-05-22 09:35:30 +02:00
parent 2701f1aa38
commit ad04f4715b
3 changed files with 8 additions and 5 deletions

View File

@ -27,10 +27,14 @@ function interpret(cudaExprs, numExprs::Integer, exprsInnerLength::Integer,
numThreads = min(variableColumns, 121)
numBlocks = cld(variableColumns, numThreads)
@inbounds Threads.@threads for i in 1:numExprs # multithreaded to speedup dispatching (seems to have improved performance)
Threads.@threads for i in 1:numExprs # multithreaded to speedup dispatching (seems to have improved performance)
@cuda threads=numThreads blocks=numBlocks fastmath=true interpret_expression(cudaExprs, cudaVars, cudaParams, cudaResults, cudaStepsize, i)
end
# Reduce GC pressure https://cuda.juliagpu.org/stable/usage/memory/#Avoiding-GC-pressure
CUDA.unsafe_free!(cudaParams)
CUDA.unsafe_free!(cudaStepsize)
return cudaResults
end