benchmarking: added tests for performance tuning with Nsight compute and systems

2025-04-12 10:24:45 +02:00
parent 5a9760d221
commit 9746db56c0
3 changed files with 38 additions and 4 deletions
--- a/package/test/PerformanceTests.jl
+++ b/package/test/PerformanceTests.jl
@ -5,7 +5,6 @@ using .Transpiler
 using .Interpreter

 const BENCHMARKS_RESULTS_PATH = "./results"
-# University setup at 10.20.1.7 if needed
 exprsCPU = [
 	# CPU interpreter requires an anonymous function and array ref s
 	:(p[1] * x[1] + p[2]), # 5 op
@ -64,9 +63,13 @@ end
 	# https://cuda.juliagpu.org/v2.6/lib/driver/#Memory-Management
 end

-After these tests have been redone, use Nsight Compute/Systems as described here: 
+# After these tests have been redone, use Nsight Compute/Systems as described here: 
 #https://cuda.juliagpu.org/stable/development/profiling/#NVIDIA-Nsight-Systems
-compareWithCPU = true
+# Systems and Compute installable via WSL. Compute UI can even be used inside wsl
+# Add /usr/local/cuda/bin in .bashrc to PATH to access ncu and nsys (depending how well this works with my 1080 do it on my machine, otherwise re do the tests and perform them on FH PCs)
+# University setup at 10.20.1.7 if needed
+
+compareWithCPU = false


 suite = BenchmarkGroup()
--- a/package/test/PerformanceTuning.jl
+++ b/package/test/PerformanceTuning.jl
@ -0,0 +1,30 @@
+using CUDA
+
+using .Transpiler
+using .Interpreter
+
+varsets_medium = 1000
+X = randn(Float32, 5, varsets_medium)
+
+exprsGPU = [
+	# CPU interpreter requires an anonymous function and array ref s
+	:(p1 * x1 + p2), # 5 op
+	:((((x1 + x2) + x3) + x4) + x5), # 9 op
+	:(log(abs(x1))), # 3 op
+	:(powabs(p2 - powabs(p1 + x1, 1/x1),p3)) # 13 op
+] # 30 op
+
+# p is the same for CPU and GPU
+p = [randn(Float32, 10) for _ in 1:length(exprsGPU)] # generate 10 random parameter values for each expr
+expr_reps = 1
+
+
+
+@testset "Interpreter Tuning" begin
+    CUDA.@profile interpret_gpu(exprsGPU, X, p; repetitions=expr_reps)
+end
+
+
+@testset "Transpiler Tuning" begin
+    CUDA.@profile evaluate_gpu(exprsGPU, X, p; repetitions=expr_reps)
+end
--- a/package/test/runtests.jl
+++ b/package/test/runtests.jl
@ -19,5 +19,6 @@ end
 # end

@testset "Performance tests" begin
-	include("PerformanceTests.jl")
+	include("PerformanceTuning.jl")
+	# include("PerformanceTests.jl")
 end