benchmarking: further tests done. Seems like transpiler takes ages, need to investigate further
Some checks failed
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Has been cancelled

This commit is contained in:
Daniel 2025-05-11 16:54:19 +02:00
parent 5b31fbb270
commit 3d80ae95e4
6 changed files with 39 additions and 38 deletions

View File

@ -34,6 +34,7 @@ function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector
for i in 1:repetitions # Simulate parameter tuning -> local search (X remains the same, p gets changed in small steps and must be performed sequentially, which it is with this impl)
results = Interpreter.interpret(exprs, X, p)
println("got results")
end
return results

View File

@ -27,15 +27,17 @@ NOTE: This function is not thread save, especially cache access is not thread sa
function expr_to_postfix(expression::Expr)::PostfixType
expr = expression
if expression.head === :->
if typeof(expression.args[2]) == Float64
println()
println("Expression: $expression")
println("Expr: $expr")
println()
dump(expression; maxdepth=10)
end
# if typeof(expression.args[2]) == Float64
# println()
# println("Expression: $expression")
# println("Expr: $expr")
# println()
# dump(expression; maxdepth=10)
# end
# if the expression equals (x, p) -> (...) then the below statement extracts the expression to evaluate
if expression.args[2].head == :block # expressions that are not generated with the parser (./test/parser.jl) contain this extra "block" node, which needs to be skipped
if typeof(expression.args[2]) == Float64
return [convert_to_ExpressionElement(expression.args[2])]
elseif expression.args[2].head == :block # expressions that are not generated with the parser (./test/parser.jl) contain this extra "block" node, which needs to be skipped
expr = expression.args[2].args[2]
else # ... if the are generated with the parser, this node is not present and therefore doesn't need to be skipped
expr = expression.args[2]
@ -48,8 +50,6 @@ function expr_to_postfix(expression::Expr)::PostfixType
postfix = PostfixType()
# Special handling in the case where the expression is an array access
# This can happen if the token is a variable/parameter of the form x[n]/p[n]
if expr.head == :ref

View File

@ -30,7 +30,7 @@ function interpret(expressions::Vector{Expr}, variables::Matrix{Float32}, parame
cudaResults = CuArray{Float32}(undef, variableCols, length(exprs))
# Start kernel for each expression to ensure that no warp is working on different expressions
@inbounds for i in eachindex(exprs)
@inbounds Threads.@threads for i in eachindex(exprs)
numThreads = min(variableCols, 256)
numBlocks = cld(variableCols, numThreads)

View File

@ -157,7 +157,7 @@ function get_kernel_signature(kernelName::String, parameters::Vector{DataType},
println(signatureBuffer, "(")
for i in eachindex(parameters)
print(signatureBuffer, " .param .u64", " ", "param_", i)
print(signatureBuffer, " .param .u64 param_", i)
parametersLocation = Utils.get_next_free_register(regManager, "rd")
println(paramLoadingBuffer, "ld.param.u64 $parametersLocation, [param_$i];")

View File

@ -63,11 +63,11 @@ if compareWithCPU
end
# cacheInterpreter = Dict{Expr, PostfixType}()
# suite["GPUI"]["nikuradse_1"] = @benchmarkable interpret_gpu(exprs, X_t, parameters; repetitions=expr_reps)
suite["GPUI"]["nikuradse_1"] = @benchmarkable interpret_gpu(exprs, X_t, parameters; repetitions=expr_reps)
# cacheTranspilerFront = Dict{Expr, PostfixType}()
# cacheTranspilerRes = Dict{Expr, CuFunction}()
suite["GPUT"]["nikuradse_1"] = @benchmarkable evaluate_gpu(exprs, X_t, parameters; repetitions=expr_reps)
suite["GPUT"]["nikuradse_1"] = @benchmarkable evaluate_gpu(exprs, X_t, parameters; repetitions=expr_reps) # Takes forever. Needs more investigation
tune!(suite)
BenchmarkTools.save("params.json", params(suite))

View File

@ -1 +1 @@
[{"Julia":"1.11.4","BenchmarkTools":{"major":1,"minor":6,"patch":0,"prerelease":[],"build":[]}},[["BenchmarkGroup",{"data":{"CPU":["BenchmarkGroup",{"data":{"medium varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"large varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"small varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":["CPUInterpreter"]}],"GPUT":["BenchmarkGroup",{"data":{"medium varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"large varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"small varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":["GPUTranspiler"]}],"GPUI":["BenchmarkGroup",{"data":{"medium varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"large varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"small varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":["GPUInterpreter"]}]},"tags":[]}]]]
[{"Julia":"1.11.5","BenchmarkTools":{"major":1,"minor":6,"patch":0,"prerelease":[],"build":[]}},[["BenchmarkGroup",{"data":{"CPU":["BenchmarkGroup",{"data":{},"tags":["CPUInterpreter"]}],"GPUT":["BenchmarkGroup",{"data":{},"tags":["GPUTranspiler"]}],"GPUI":["BenchmarkGroup",{"data":{"nikuradse_1":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":["GPUInterpreter"]}]},"tags":[]}]]]