benchmarking: added tests for performance tuning with Nsight compute and systems
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run

This commit is contained in:
Daniel 2025-04-12 10:24:45 +02:00
parent 5a9760d221
commit 9746db56c0
3 changed files with 38 additions and 4 deletions

View File

@ -5,7 +5,6 @@ using .Transpiler
using .Interpreter using .Interpreter
const BENCHMARKS_RESULTS_PATH = "./results" const BENCHMARKS_RESULTS_PATH = "./results"
# University setup at 10.20.1.7 if needed
exprsCPU = [ exprsCPU = [
# CPU interpreter requires an anonymous function and array ref s # CPU interpreter requires an anonymous function and array ref s
:(p[1] * x[1] + p[2]), # 5 op :(p[1] * x[1] + p[2]), # 5 op
@ -64,9 +63,13 @@ end
# https://cuda.juliagpu.org/v2.6/lib/driver/#Memory-Management # https://cuda.juliagpu.org/v2.6/lib/driver/#Memory-Management
end end
After these tests have been redone, use Nsight Compute/Systems as described here: # After these tests have been redone, use Nsight Compute/Systems as described here:
#https://cuda.juliagpu.org/stable/development/profiling/#NVIDIA-Nsight-Systems #https://cuda.juliagpu.org/stable/development/profiling/#NVIDIA-Nsight-Systems
compareWithCPU = true # Systems and Compute installable via WSL. Compute UI can even be used inside wsl
# Add /usr/local/cuda/bin in .bashrc to PATH to access ncu and nsys (depending how well this works with my 1080 do it on my machine, otherwise re do the tests and perform them on FH PCs)
# University setup at 10.20.1.7 if needed
compareWithCPU = false
suite = BenchmarkGroup() suite = BenchmarkGroup()

View File

@ -0,0 +1,30 @@
using CUDA
using .Transpiler
using .Interpreter
varsets_medium = 1000
X = randn(Float32, 5, varsets_medium)
exprsGPU = [
# CPU interpreter requires an anonymous function and array ref s
:(p1 * x1 + p2), # 5 op
:((((x1 + x2) + x3) + x4) + x5), # 9 op
:(log(abs(x1))), # 3 op
:(powabs(p2 - powabs(p1 + x1, 1/x1),p3)) # 13 op
] # 30 op
# p is the same for CPU and GPU
p = [randn(Float32, 10) for _ in 1:length(exprsGPU)] # generate 10 random parameter values for each expr
expr_reps = 1
@testset "Interpreter Tuning" begin
CUDA.@profile interpret_gpu(exprsGPU, X, p; repetitions=expr_reps)
end
@testset "Transpiler Tuning" begin
CUDA.@profile evaluate_gpu(exprsGPU, X, p; repetitions=expr_reps)
end

View File

@ -19,5 +19,6 @@ end
# end # end
@testset "Performance tests" begin @testset "Performance tests" begin
include("PerformanceTests.jl") include("PerformanceTuning.jl")
# include("PerformanceTests.jl")
end end