benchmarking: started tuning benchmarking results. found some errors that need fixing
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
This commit is contained in:
@ -1,9 +1,13 @@
|
||||
using LinearAlgebra
|
||||
using BenchmarkTools
|
||||
using DelimitedFiles
|
||||
using GZip
|
||||
|
||||
using .Transpiler
|
||||
using .Interpreter
|
||||
|
||||
include("parser.jl") # to parse expressions from a file
|
||||
|
||||
const BENCHMARKS_RESULTS_PATH = "./results-fh-new"
|
||||
|
||||
# Number of expressions can get really big (into millions)
|
||||
@ -11,6 +15,7 @@ const BENCHMARKS_RESULTS_PATH = "./results-fh-new"
|
||||
|
||||
data,varnames = readdlm("data/nikuradse_1.csv", ',', header=true);
|
||||
X = convert(Matrix{Float32}, data)
|
||||
X_t = permutedims(X) # for gpu
|
||||
|
||||
exprs = Expr[]
|
||||
parameters = Vector{Vector{Float32}}()
|
||||
@ -19,24 +24,15 @@ paramnames = ["p$i" for i in 1:20]
|
||||
# data/esr_nvar2_len10.txt.gz_9.txt.gz has ~250_000 exprs
|
||||
# data/esr_nvar2_len10.txt.gz_10.txt.gz has ~800_000 exrps
|
||||
GZip.open("data/esr_nvar2_len10.txt.gz_9.txt.gz") do io
|
||||
i = 0
|
||||
for line in eachline(io)
|
||||
expr, p = parse_infix(line, varnames, paramnames)
|
||||
|
||||
if i > 10
|
||||
return
|
||||
end
|
||||
println(expr)
|
||||
|
||||
push!(exprs, expr)
|
||||
push!(parameters, randn(Float32, length(p)))
|
||||
|
||||
i += 1
|
||||
end
|
||||
end
|
||||
expr_reps = 100 # 100 parameter optimisation steps (local search; sequentially; only p changes but not X)
|
||||
|
||||
|
||||
# TODO: Tipps for tuning:
|
||||
# Put data in shared memory:
|
||||
# https://cuda.juliagpu.org/v2.6/api/kernel/#Shared-memory
|
||||
@ -60,18 +56,20 @@ suite["GPUI"] = BenchmarkGroup(["GPUInterpreter"])
|
||||
suite["GPUT"] = BenchmarkGroup(["GPUTranspiler"])
|
||||
|
||||
if compareWithCPU
|
||||
suite["CPU"]["nikuradse_1"] = @benchmarkable interpret_cpu(exprsCPU, X, parameters; repetitions=expr_reps)
|
||||
suite["CPU"]["nikuradse_1"] = @benchmarkable interpret_cpu(exprs, X, parameters; repetitions=expr_reps)
|
||||
suite["CPU"]["nikuradse_1_parallel"] = @benchmarkable interpret_cpu(exprs, X, parameters; repetitions=expr_reps, parallel=true)
|
||||
end
|
||||
|
||||
# TODO: Most likely need to transpose X matrix here, as we are expecting a column major matrix for more efficient memory access
|
||||
suite["GPUI"]["nikuradse_1"] = @benchmarkable interpret_gpu(exprsGPU, X, parameters; repetitions=expr_reps)
|
||||
suite["GPUT"]["nikuradse_1"] = @benchmarkable evaluate_gpu(exprsGPU, X, parameters; repetitions=expr_reps)
|
||||
suite["GPUI"]["nikuradse_1"] = @benchmarkable interpret_gpu(exprs, X_t, parameters; repetitions=expr_reps)
|
||||
suite["GPUT"]["nikuradse_1"] = @benchmarkable evaluate_gpu(exprs, X_t, parameters; repetitions=expr_reps)
|
||||
|
||||
for i in 1:10
|
||||
for i in 1:2
|
||||
tune!(suite)
|
||||
end
|
||||
BenchmarkTools.save("params.json", params(suite))
|
||||
|
||||
throw("finished tuning")
|
||||
|
||||
loadparams!(suite, BenchmarkTools.load("params.json")[1], :samples, :evals, :gctrial, :time_tolerance, :evals_set, :gcsample, :seconds, :overhead, :memory_tolerance)
|
||||
|
||||
results = run(suite, verbose=true, seconds=3600) # 1 hour because of CPU. lets see if more is needed
|
||||
|
Reference in New Issue
Block a user