master-thesis/package/test/PerformanceTests.jl
Daniel 690ee33db1
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
benchmarks: started preparing benchmarks
2025-03-29 12:01:06 +01:00

103 lines
3.5 KiB
Julia

using LinearAlgebra
using BenchmarkTools
using BenchmarkPlots, StatsPlots
using .Transpiler
using .Interpreter
# University setup at 10.20.1.7 if needed
exprsCPU = [
# CPU interpreter requires an anonymous function and array ref s
:(p[1] * x[1] + p[2]), # 5 op
:((((x[1] + x[2]) + x[3]) + x[4]) + x[5]), # 9 op
:(log(abs(x[1]))), # 3 op
:(powabs(p[2] - powabs(p[1] + x[1], 1/x[1]),p[3])) # 13 op
] # 30 op
exprsCPU = map(e -> Expr(:->, :(x,p), e), exprsCPU)
exprsGPU = [
# CPU interpreter requires an anonymous function and array ref s
:(p1 * x1 + p2), # 5 op
:((((x1 + x2) + x3) + x4) + x5), # 9 op
:(log(abs(x1))), # 3 op
:(powabs(p2 - powabs(p1 + x1, 1/x1),p3)) # 13 op
] # 30 op
# p is the same for CPU and GPU
p = [randn(Float32, 10) for _ in 1:length(exprsCPU)] # generate 10 random parameter values for each expr
nrows = 1000
X = randn(Float32, nrows, 5)
expr_reps = 100 # 100 parameter optimisation steps basically
@testset "CPU performance" begin
# warmup
# interpret_cpu(exprsCPU, X, p)
# @btime interpret_cpu(exprsCPU, X, p; repetitions=expr_reps) # repetitions simulates parameter optimisation
# @btime test_cpu_interpreter(1000)
# @btime fetch.([Threads.@spawn interpret_cpu(exprsCPU, X, p; repetitions=expr_reps) for i in 1:reps])
# test_cpu_interpreter(1000, parallel=true) # start julia -t 6 for six threads
# @btime test_cpu_interpreter(10000)
# @btime test_cpu_interpreter(10000, parallel=true)
end
ncols = 1000
X_GPU = randn(Float32, 5, ncols)
@testset "Interpreter Performance" begin
# Put data in shared memory:
# https://cuda.juliagpu.org/v2.6/api/kernel/#Shared-memory
# Make array const:
# https://cuda.juliagpu.org/v2.6/api/kernel/#Device-arrays
# Memory management like in C++ might help with performance improvements
# https://cuda.juliagpu.org/v2.6/lib/driver/#Memory-Management
end
@testset "Transpiler Performance" begin
# Put data in shared memory:
# https://cuda.juliagpu.org/v2.6/api/kernel/#Shared-memory
# Make array const:
# https://cuda.juliagpu.org/v2.6/api/kernel/#Device-arrays
# Memory management like in C++ might help with performance improvements
# https://cuda.juliagpu.org/v2.6/lib/driver/#Memory-Management
end
suite = BenchmarkGroup()
suite["CPU"] = BenchmarkGroup(["CPUInterpreter"])
# suite["GPUI"] = BenchmarkGroup(["GPUInterpreter"])
# suite["GPUT"] = BenchmarkGroup(["GPUTranspiler"])
X_small = randn(Float32, 100, 5)
suite["CPU"]["small varset"] = @benchmarkable interpret_cpu(exprsCPU, X_small, p; repetitions=expr_reps)
X_normal = randn(Float32, 1000, 5)
suite["CPU"]["normal varset"] = @benchmarkable interpret_cpu(exprsCPU, X_normal, p; repetitions=expr_reps)
X_large = randn(Float32, 10000, 5)
suite["CPU"]["large varset"] = @benchmarkable interpret_cpu(exprsCPU, X_large, p; repetitions=expr_reps)
# tune!(suite)
# BenchmarkTools.save("params.json", params(suite))
loadparams!(suite, BenchmarkTools.load("params.json")[1], :samples, :evals, :gctrial, :time_tolerance, :evals_set, :gcsample, :seconds, :overhead, :memory_tolerance)
results = run(suite, verbose=true, seconds=180)
# results2 = run(suite, verbose=true, seconds=180)
medianCPU = median(results["CPU"])
# medianCPU2 = median(results2["CPU"])
# medianInterpreter = median(results["GPUI"])
# medianTranspiler = median(results["GPUT"])
# jud = judge(medianCPU, medianCPU2; time_tolerance=0.001)
# println(jud)
# judge(medianCPU, medianInterpreter; time_tolerance=0.001)
# judge(medianCPU, medianTranspiler; time_tolerance=0.001)
# judge(medianInterpreter, medianTranspiler; time_tolerance=0.001)