benchmarking: further tests done. Seems like transpiler takes ages, need to investigate further
Some checks failed
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Has been cancelled
Some checks failed
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Has been cancelled
This commit is contained in:
parent
5b31fbb270
commit
3d80ae95e4
|
@ -34,6 +34,7 @@ function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector
|
|||
|
||||
for i in 1:repetitions # Simulate parameter tuning -> local search (X remains the same, p gets changed in small steps and must be performed sequentially, which it is with this impl)
|
||||
results = Interpreter.interpret(exprs, X, p)
|
||||
println("got results")
|
||||
end
|
||||
|
||||
return results
|
||||
|
|
|
@ -27,15 +27,17 @@ NOTE: This function is not thread save, especially cache access is not thread sa
|
|||
function expr_to_postfix(expression::Expr)::PostfixType
|
||||
expr = expression
|
||||
if expression.head === :->
|
||||
if typeof(expression.args[2]) == Float64
|
||||
println()
|
||||
println("Expression: $expression")
|
||||
println("Expr: $expr")
|
||||
println()
|
||||
dump(expression; maxdepth=10)
|
||||
end
|
||||
# if typeof(expression.args[2]) == Float64
|
||||
# println()
|
||||
# println("Expression: $expression")
|
||||
# println("Expr: $expr")
|
||||
# println()
|
||||
# dump(expression; maxdepth=10)
|
||||
# end
|
||||
# if the expression equals (x, p) -> (...) then the below statement extracts the expression to evaluate
|
||||
if expression.args[2].head == :block # expressions that are not generated with the parser (./test/parser.jl) contain this extra "block" node, which needs to be skipped
|
||||
if typeof(expression.args[2]) == Float64
|
||||
return [convert_to_ExpressionElement(expression.args[2])]
|
||||
elseif expression.args[2].head == :block # expressions that are not generated with the parser (./test/parser.jl) contain this extra "block" node, which needs to be skipped
|
||||
expr = expression.args[2].args[2]
|
||||
else # ... if the are generated with the parser, this node is not present and therefore doesn't need to be skipped
|
||||
expr = expression.args[2]
|
||||
|
@ -48,8 +50,6 @@ function expr_to_postfix(expression::Expr)::PostfixType
|
|||
|
||||
postfix = PostfixType()
|
||||
|
||||
|
||||
|
||||
# Special handling in the case where the expression is an array access
|
||||
# This can happen if the token is a variable/parameter of the form x[n]/p[n]
|
||||
if expr.head == :ref
|
||||
|
|
|
@ -30,7 +30,7 @@ function interpret(expressions::Vector{Expr}, variables::Matrix{Float32}, parame
|
|||
cudaResults = CuArray{Float32}(undef, variableCols, length(exprs))
|
||||
|
||||
# Start kernel for each expression to ensure that no warp is working on different expressions
|
||||
@inbounds for i in eachindex(exprs)
|
||||
@inbounds Threads.@threads for i in eachindex(exprs)
|
||||
numThreads = min(variableCols, 256)
|
||||
numBlocks = cld(variableCols, numThreads)
|
||||
|
||||
|
|
|
@ -157,7 +157,7 @@ function get_kernel_signature(kernelName::String, parameters::Vector{DataType},
|
|||
println(signatureBuffer, "(")
|
||||
|
||||
for i in eachindex(parameters)
|
||||
print(signatureBuffer, " .param .u64", " ", "param_", i)
|
||||
print(signatureBuffer, " .param .u64 param_", i)
|
||||
|
||||
parametersLocation = Utils.get_next_free_register(regManager, "rd")
|
||||
println(paramLoadingBuffer, "ld.param.u64 $parametersLocation, [param_$i];")
|
||||
|
|
|
@ -63,11 +63,11 @@ if compareWithCPU
|
|||
end
|
||||
|
||||
# cacheInterpreter = Dict{Expr, PostfixType}()
|
||||
# suite["GPUI"]["nikuradse_1"] = @benchmarkable interpret_gpu(exprs, X_t, parameters; repetitions=expr_reps)
|
||||
suite["GPUI"]["nikuradse_1"] = @benchmarkable interpret_gpu(exprs, X_t, parameters; repetitions=expr_reps)
|
||||
|
||||
# cacheTranspilerFront = Dict{Expr, PostfixType}()
|
||||
# cacheTranspilerRes = Dict{Expr, CuFunction}()
|
||||
suite["GPUT"]["nikuradse_1"] = @benchmarkable evaluate_gpu(exprs, X_t, parameters; repetitions=expr_reps)
|
||||
suite["GPUT"]["nikuradse_1"] = @benchmarkable evaluate_gpu(exprs, X_t, parameters; repetitions=expr_reps) # Takes forever. Needs more investigation
|
||||
|
||||
tune!(suite)
|
||||
BenchmarkTools.save("params.json", params(suite))
|
||||
|
|
|
@ -1 +1 @@
|
|||
[{"Julia":"1.11.4","BenchmarkTools":{"major":1,"minor":6,"patch":0,"prerelease":[],"build":[]}},[["BenchmarkGroup",{"data":{"CPU":["BenchmarkGroup",{"data":{"medium varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"large varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"small varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":["CPUInterpreter"]}],"GPUT":["BenchmarkGroup",{"data":{"medium varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"large varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"small varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":["GPUTranspiler"]}],"GPUI":["BenchmarkGroup",{"data":{"medium varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"large varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"small varset":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":1000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":["GPUInterpreter"]}]},"tags":[]}]]]
|
||||
[{"Julia":"1.11.5","BenchmarkTools":{"major":1,"minor":6,"patch":0,"prerelease":[],"build":[]}},[["BenchmarkGroup",{"data":{"CPU":["BenchmarkGroup",{"data":{},"tags":["CPUInterpreter"]}],"GPUT":["BenchmarkGroup",{"data":{},"tags":["GPUTranspiler"]}],"GPUI":["BenchmarkGroup",{"data":{"nikuradse_1":["Parameters",{"gctrial":true,"time_tolerance":0.05,"evals_set":false,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":["GPUInterpreter"]}]},"tags":[]}]]]
|
Loading…
Reference in New Issue
Block a user