diff --git a/package/test/PerformanceTests.jl b/package/test/PerformanceTests.jl index d2acbcc..84f0249 100644 --- a/package/test/PerformanceTests.jl +++ b/package/test/PerformanceTests.jl @@ -5,7 +5,6 @@ using .Transpiler using .Interpreter const BENCHMARKS_RESULTS_PATH = "./results" -# University setup at 10.20.1.7 if needed exprsCPU = [ # CPU interpreter requires an anonymous function and array ref s :(p[1] * x[1] + p[2]), # 5 op @@ -64,9 +63,13 @@ end # https://cuda.juliagpu.org/v2.6/lib/driver/#Memory-Management end -After these tests have been redone, use Nsight Compute/Systems as described here: +# After these tests have been redone, use Nsight Compute/Systems as described here: #https://cuda.juliagpu.org/stable/development/profiling/#NVIDIA-Nsight-Systems -compareWithCPU = true +# Systems and Compute installable via WSL. Compute UI can even be used inside wsl +# Add /usr/local/cuda/bin in .bashrc to PATH to access ncu and nsys (depending how well this works with my 1080 do it on my machine, otherwise re do the tests and perform them on FH PCs) +# University setup at 10.20.1.7 if needed + +compareWithCPU = false suite = BenchmarkGroup() diff --git a/package/test/PerformanceTuning.jl b/package/test/PerformanceTuning.jl new file mode 100644 index 0000000..2a66041 --- /dev/null +++ b/package/test/PerformanceTuning.jl @@ -0,0 +1,30 @@ +using CUDA + +using .Transpiler +using .Interpreter + +varsets_medium = 1000 +X = randn(Float32, 5, varsets_medium) + +exprsGPU = [ + # CPU interpreter requires an anonymous function and array ref s + :(p1 * x1 + p2), # 5 op + :((((x1 + x2) + x3) + x4) + x5), # 9 op + :(log(abs(x1))), # 3 op + :(powabs(p2 - powabs(p1 + x1, 1/x1),p3)) # 13 op +] # 30 op + +# p is the same for CPU and GPU +p = [randn(Float32, 10) for _ in 1:length(exprsGPU)] # generate 10 random parameter values for each expr +expr_reps = 1 + + + +@testset "Interpreter Tuning" begin + CUDA.@profile interpret_gpu(exprsGPU, X, p; repetitions=expr_reps) +end + + +@testset "Transpiler Tuning" begin + CUDA.@profile evaluate_gpu(exprsGPU, X, p; repetitions=expr_reps) +end \ No newline at end of file diff --git a/package/test/runtests.jl b/package/test/runtests.jl index e2b4ccb..96a8f0b 100644 --- a/package/test/runtests.jl +++ b/package/test/runtests.jl @@ -19,5 +19,6 @@ end # end @testset "Performance tests" begin - include("PerformanceTests.jl") + include("PerformanceTuning.jl") + # include("PerformanceTests.jl") end \ No newline at end of file