benchmarking: added tests for performance tuning with Nsight compute and systems

2025-04-12 10:24:45 +02:00
parent 5a9760d221
commit 9746db56c0
3 changed files with 38 additions and 4 deletions
--- a/package/test/PerformanceTests.jl
+++ b/package/test/PerformanceTests.jl
@ -5,7 +5,6 @@ using .Transpiler
 using .Interpreter
 const BENCHMARKS_RESULTS_PATH = "./results"
 # University setup at 10.20.1.7 if needed
 exprsCPU = [
 	# CPU interpreter requires an anonymous function and array ref s
 	:(p[1] * x[1] + p[2]), # 5 op
@ -64,9 +63,13 @@ end
 	# https://cuda.juliagpu.org/v2.6/lib/driver/#Memory-Management
 end
-After these tests have been redone, use Nsight Compute/Systems as described here: 
+# After these tests have been redone, use Nsight Compute/Systems as described here: 
 #https://cuda.juliagpu.org/stable/development/profiling/#NVIDIA-Nsight-Systems
-compareWithCPU = true
+# Systems and Compute installable via WSL. Compute UI can even be used inside wsl
 # Add /usr/local/cuda/bin in .bashrc to PATH to access ncu and nsys (depending how well this works with my 1080 do it on my machine, otherwise re do the tests and perform them on FH PCs)
 # University setup at 10.20.1.7 if needed
 compareWithCPU = false
 suite = BenchmarkGroup()
--- a/package/test/PerformanceTuning.jl
+++ b/package/test/PerformanceTuning.jl
@ -0,0 +1,30 @@
 using CUDA
 using .Transpiler
 using .Interpreter
 varsets_medium = 1000
 X = randn(Float32, 5, varsets_medium)
 exprsGPU = [
 	# CPU interpreter requires an anonymous function and array ref s
 	:(p1 * x1 + p2), # 5 op
 	:((((x1 + x2) + x3) + x4) + x5), # 9 op
 	:(log(abs(x1))), # 3 op
 	:(powabs(p2 - powabs(p1 + x1, 1/x1),p3)) # 13 op
 ] # 30 op
 # p is the same for CPU and GPU
 p = [randn(Float32, 10) for _ in 1:length(exprsGPU)] # generate 10 random parameter values for each expr
 expr_reps = 1
@testset "Interpreter Tuning" begin
    CUDA.@profile interpret_gpu(exprsGPU, X, p; repetitions=expr_reps)
 end
@testset "Transpiler Tuning" begin
    CUDA.@profile evaluate_gpu(exprsGPU, X, p; repetitions=expr_reps)
 end
--- a/package/test/runtests.jl
+++ b/package/test/runtests.jl
@ -19,5 +19,6 @@ end
 # end
@testset "Performance tests" begin
-	include("PerformanceTests.jl")
+	include("PerformanceTuning.jl")
 	# include("PerformanceTests.jl")
 end