benchmarking: added final results for interpreter in benchmark 1

2025-05-22 09:35:30 +02:00
parent 2701f1aa38
commit ad04f4715b
3 changed files with 8 additions and 5 deletions
--- a/package/src/Interpreter.jl
+++ b/package/src/Interpreter.jl
@ -27,10 +27,14 @@ function interpret(cudaExprs, numExprs::Integer, exprsInnerLength::Integer,
 	numThreads = min(variableColumns, 121)
 	numBlocks = cld(variableColumns, numThreads)

-	@inbounds Threads.@threads for i in 1:numExprs # multithreaded to speedup dispatching (seems to have improved performance)
+	Threads.@threads for i in 1:numExprs # multithreaded to speedup dispatching (seems to have improved performance)
 		@cuda threads=numThreads blocks=numBlocks fastmath=true interpret_expression(cudaExprs, cudaVars, cudaParams, cudaResults, cudaStepsize, i)
 	end

+	# Reduce GC pressure https://cuda.juliagpu.org/stable/usage/memory/#Avoiding-GC-pressure
+	CUDA.unsafe_free!(cudaParams)
+	CUDA.unsafe_free!(cudaStepsize)
+
 	return cudaResults
 end