benchmarking: redid inbounds tests because first test run did not use inbounds everywhere

2025-04-06 14:29:07 +02:00
parent 20fcbab4ca
commit 258d33c338
3 changed files with 5 additions and 15 deletions
--- a/package/src/Transpiler.jl
+++ b/package/src/Transpiler.jl
@ -71,12 +71,12 @@ function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, paramet
 	cudaResults = CuArray{Float32}(undef, variableCols, length(expressions))

 	# execute each kernel (also try doing this with Threads.@threads. Since we can have multiple grids, this might improve performance)
-	@inbounds for i in eachindex(kernels)
+	for kernel in kernels
 		# config = launch_configuration(kernels[i])
 		threads = min(variableCols, 256)
 		blocks = cld(variableCols, threads)

-		cudacall(kernels[i], (CuPtr{Float32},CuPtr{Float32},CuPtr{Float32}), cudaVars, cudaParams, cudaResults; threads=threads, blocks=blocks)
+		cudacall(kernel, (CuPtr{Float32},CuPtr{Float32},CuPtr{Float32}), cudaVars, cudaParams, cudaResults; threads=threads, blocks=blocks)
 	end

 	return cudaResults