benchmarking: redid inbounds tests because first test run did not use inbounds everywhere

This commit is contained in:
Daniel 2025-04-06 14:29:07 +02:00
parent 20fcbab4ca
commit 258d33c338
3 changed files with 5 additions and 15 deletions

View File

@ -71,12 +71,12 @@ function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, paramet
cudaResults = CuArray{Float32}(undef, variableCols, length(expressions)) cudaResults = CuArray{Float32}(undef, variableCols, length(expressions))
# execute each kernel (also try doing this with Threads.@threads. Since we can have multiple grids, this might improve performance) # execute each kernel (also try doing this with Threads.@threads. Since we can have multiple grids, this might improve performance)
@inbounds for i in eachindex(kernels) for kernel in kernels
# config = launch_configuration(kernels[i]) # config = launch_configuration(kernels[i])
threads = min(variableCols, 256) threads = min(variableCols, 256)
blocks = cld(variableCols, threads) blocks = cld(variableCols, threads)
cudacall(kernels[i], (CuPtr{Float32},CuPtr{Float32},CuPtr{Float32}), cudaVars, cudaParams, cudaResults; threads=threads, blocks=blocks) cudacall(kernel, (CuPtr{Float32},CuPtr{Float32},CuPtr{Float32}), cudaVars, cudaParams, cudaResults; threads=threads, blocks=blocks)
end end
return cudaResults return cudaResults

View File

@ -64,6 +64,8 @@ end
# https://cuda.juliagpu.org/v2.6/lib/driver/#Memory-Management # https://cuda.juliagpu.org/v2.6/lib/driver/#Memory-Management
end end
After these tests have been redone, use Nsight Compute/Systems as described here:
#https://cuda.juliagpu.org/stable/development/profiling/#NVIDIA-Nsight-Systems
compareWithCPU = true compareWithCPU = true
@ -172,15 +174,3 @@ else
println(oldVsGPUT_std) println(oldVsGPUT_std)
end end
REDO @inbounds performance tests because I added more @inbounds and removed not needed code from interpreter
Also updated Expression processing and transpiler
After these tests have been redone, use Nsight Compute/Systems as described here:
#https://cuda.juliagpu.org/stable/development/profiling/#NVIDIA-Nsight-Systems

File diff suppressed because one or more lines are too long