benchmarking: redid inbounds tests because first test run did not use inbounds everywhere
This commit is contained in:
parent
20fcbab4ca
commit
258d33c338
package
|
@ -71,12 +71,12 @@ function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, paramet
|
||||||
cudaResults = CuArray{Float32}(undef, variableCols, length(expressions))
|
cudaResults = CuArray{Float32}(undef, variableCols, length(expressions))
|
||||||
|
|
||||||
# execute each kernel (also try doing this with Threads.@threads. Since we can have multiple grids, this might improve performance)
|
# execute each kernel (also try doing this with Threads.@threads. Since we can have multiple grids, this might improve performance)
|
||||||
@inbounds for i in eachindex(kernels)
|
for kernel in kernels
|
||||||
# config = launch_configuration(kernels[i])
|
# config = launch_configuration(kernels[i])
|
||||||
threads = min(variableCols, 256)
|
threads = min(variableCols, 256)
|
||||||
blocks = cld(variableCols, threads)
|
blocks = cld(variableCols, threads)
|
||||||
|
|
||||||
cudacall(kernels[i], (CuPtr{Float32},CuPtr{Float32},CuPtr{Float32}), cudaVars, cudaParams, cudaResults; threads=threads, blocks=blocks)
|
cudacall(kernel, (CuPtr{Float32},CuPtr{Float32},CuPtr{Float32}), cudaVars, cudaParams, cudaResults; threads=threads, blocks=blocks)
|
||||||
end
|
end
|
||||||
|
|
||||||
return cudaResults
|
return cudaResults
|
||||||
|
|
|
@ -64,6 +64,8 @@ end
|
||||||
# https://cuda.juliagpu.org/v2.6/lib/driver/#Memory-Management
|
# https://cuda.juliagpu.org/v2.6/lib/driver/#Memory-Management
|
||||||
end
|
end
|
||||||
|
|
||||||
|
After these tests have been redone, use Nsight Compute/Systems as described here:
|
||||||
|
#https://cuda.juliagpu.org/stable/development/profiling/#NVIDIA-Nsight-Systems
|
||||||
compareWithCPU = true
|
compareWithCPU = true
|
||||||
|
|
||||||
|
|
||||||
|
@ -172,15 +174,3 @@ else
|
||||||
println(oldVsGPUT_std)
|
println(oldVsGPUT_std)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
REDO @inbounds performance tests because I added more @inbounds and removed not needed code from interpreter
|
|
||||||
Also updated Expression processing and transpiler
|
|
||||||
|
|
||||||
After these tests have been redone, use Nsight Compute/Systems as described here:
|
|
||||||
#https://cuda.juliagpu.org/stable/development/profiling/#NVIDIA-Nsight-Systems
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user