benchmarking: started tuning benchmarking results. found some errors that need fixing
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run

This commit is contained in:
Daniel
2025-05-09 19:19:53 +02:00
parent 327e4ebf1b
commit 7121329a17
5 changed files with 39 additions and 33 deletions

View File

@ -1,9 +1,13 @@
using LinearAlgebra
using BenchmarkTools
using DelimitedFiles
using GZip
using .Transpiler
using .Interpreter
include("parser.jl") # to parse expressions from a file
const BENCHMARKS_RESULTS_PATH = "./results-fh-new"
# Number of expressions can get really big (into millions)
@ -11,6 +15,7 @@ const BENCHMARKS_RESULTS_PATH = "./results-fh-new"
data,varnames = readdlm("data/nikuradse_1.csv", ',', header=true);
X = convert(Matrix{Float32}, data)
X_t = permutedims(X) # for gpu
exprs = Expr[]
parameters = Vector{Vector{Float32}}()
@ -19,24 +24,15 @@ paramnames = ["p$i" for i in 1:20]
# data/esr_nvar2_len10.txt.gz_9.txt.gz has ~250_000 exprs
# data/esr_nvar2_len10.txt.gz_10.txt.gz has ~800_000 exrps
GZip.open("data/esr_nvar2_len10.txt.gz_9.txt.gz") do io
i = 0
for line in eachline(io)
expr, p = parse_infix(line, varnames, paramnames)
if i > 10
return
end
println(expr)
push!(exprs, expr)
push!(parameters, randn(Float32, length(p)))
i += 1
end
end
expr_reps = 100 # 100 parameter optimisation steps (local search; sequentially; only p changes but not X)
# TODO: Tipps for tuning:
# Put data in shared memory:
# https://cuda.juliagpu.org/v2.6/api/kernel/#Shared-memory
@ -60,18 +56,20 @@ suite["GPUI"] = BenchmarkGroup(["GPUInterpreter"])
suite["GPUT"] = BenchmarkGroup(["GPUTranspiler"])
if compareWithCPU
suite["CPU"]["nikuradse_1"] = @benchmarkable interpret_cpu(exprsCPU, X, parameters; repetitions=expr_reps)
suite["CPU"]["nikuradse_1"] = @benchmarkable interpret_cpu(exprs, X, parameters; repetitions=expr_reps)
suite["CPU"]["nikuradse_1_parallel"] = @benchmarkable interpret_cpu(exprs, X, parameters; repetitions=expr_reps, parallel=true)
end
# TODO: Most likely need to transpose X matrix here, as we are expecting a column major matrix for more efficient memory access
suite["GPUI"]["nikuradse_1"] = @benchmarkable interpret_gpu(exprsGPU, X, parameters; repetitions=expr_reps)
suite["GPUT"]["nikuradse_1"] = @benchmarkable evaluate_gpu(exprsGPU, X, parameters; repetitions=expr_reps)
suite["GPUI"]["nikuradse_1"] = @benchmarkable interpret_gpu(exprs, X_t, parameters; repetitions=expr_reps)
suite["GPUT"]["nikuradse_1"] = @benchmarkable evaluate_gpu(exprs, X_t, parameters; repetitions=expr_reps)
for i in 1:10
for i in 1:2
tune!(suite)
end
BenchmarkTools.save("params.json", params(suite))
throw("finished tuning")
loadparams!(suite, BenchmarkTools.load("params.json")[1], :samples, :evals, :gctrial, :time_tolerance, :evals_set, :gcsample, :seconds, :overhead, :memory_tolerance)
results = run(suite, verbose=true, seconds=3600) # 1 hour because of CPU. lets see if more is needed