benchmarking: added tests for performance tuning with Nsight compute and systems
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
This commit is contained in:
parent
5a9760d221
commit
9746db56c0
|
@ -5,7 +5,6 @@ using .Transpiler
|
|||
using .Interpreter
|
||||
|
||||
const BENCHMARKS_RESULTS_PATH = "./results"
|
||||
# University setup at 10.20.1.7 if needed
|
||||
exprsCPU = [
|
||||
# CPU interpreter requires an anonymous function and array ref s
|
||||
:(p[1] * x[1] + p[2]), # 5 op
|
||||
|
@ -64,9 +63,13 @@ end
|
|||
# https://cuda.juliagpu.org/v2.6/lib/driver/#Memory-Management
|
||||
end
|
||||
|
||||
After these tests have been redone, use Nsight Compute/Systems as described here:
|
||||
# After these tests have been redone, use Nsight Compute/Systems as described here:
|
||||
#https://cuda.juliagpu.org/stable/development/profiling/#NVIDIA-Nsight-Systems
|
||||
compareWithCPU = true
|
||||
# Systems and Compute installable via WSL. Compute UI can even be used inside wsl
|
||||
# Add /usr/local/cuda/bin in .bashrc to PATH to access ncu and nsys (depending how well this works with my 1080 do it on my machine, otherwise re do the tests and perform them on FH PCs)
|
||||
# University setup at 10.20.1.7 if needed
|
||||
|
||||
compareWithCPU = false
|
||||
|
||||
|
||||
suite = BenchmarkGroup()
|
||||
|
|
30
package/test/PerformanceTuning.jl
Normal file
30
package/test/PerformanceTuning.jl
Normal file
|
@ -0,0 +1,30 @@
|
|||
using CUDA
|
||||
|
||||
using .Transpiler
|
||||
using .Interpreter
|
||||
|
||||
varsets_medium = 1000
|
||||
X = randn(Float32, 5, varsets_medium)
|
||||
|
||||
exprsGPU = [
|
||||
# CPU interpreter requires an anonymous function and array ref s
|
||||
:(p1 * x1 + p2), # 5 op
|
||||
:((((x1 + x2) + x3) + x4) + x5), # 9 op
|
||||
:(log(abs(x1))), # 3 op
|
||||
:(powabs(p2 - powabs(p1 + x1, 1/x1),p3)) # 13 op
|
||||
] # 30 op
|
||||
|
||||
# p is the same for CPU and GPU
|
||||
p = [randn(Float32, 10) for _ in 1:length(exprsGPU)] # generate 10 random parameter values for each expr
|
||||
expr_reps = 1
|
||||
|
||||
|
||||
|
||||
@testset "Interpreter Tuning" begin
|
||||
CUDA.@profile interpret_gpu(exprsGPU, X, p; repetitions=expr_reps)
|
||||
end
|
||||
|
||||
|
||||
@testset "Transpiler Tuning" begin
|
||||
CUDA.@profile evaluate_gpu(exprsGPU, X, p; repetitions=expr_reps)
|
||||
end
|
|
@ -19,5 +19,6 @@ end
|
|||
# end
|
||||
|
||||
@testset "Performance tests" begin
|
||||
include("PerformanceTests.jl")
|
||||
include("PerformanceTuning.jl")
|
||||
# include("PerformanceTests.jl")
|
||||
end
|
Loading…
Reference in New Issue
Block a user