benchmarking: removed caches to get initial performance measurement. still some problems
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
This commit is contained in:
@ -8,9 +8,10 @@ using ..Utils
|
||||
const BYTES = sizeof(Float32)
|
||||
const Operand = Union{Float32, String} # Operand is either fixed value or register
|
||||
|
||||
const cacheFrontend = Dict{Expr, PostfixType}()
|
||||
const transpilerCache = Dict{Expr, CuFunction}() # needed if multiple runs with the same expr but different parameters are performed
|
||||
|
||||
"
|
||||
- kwparam ```frontendCache```: The cache that stores the (partial) results of the frontend, to speedup the pre-processing
|
||||
- kwparam ```frontendCache```: The cache that stores the result of the transpilation. Useful for parameter optimisation, as the same expression gets executed multiple times
|
||||
"
|
||||
function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, parameters::Vector{Vector{Float32}})::Matrix{Float32}
|
||||
varRows = size(variables, 1)
|
||||
variableCols = size(variables, 2)
|
||||
@ -48,22 +49,33 @@ function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, paramet
|
||||
# end
|
||||
|
||||
@inbounds for i in eachindex(expressions)
|
||||
if haskey(transpilerCache, expressions[i])
|
||||
kernels[i] = transpilerCache[expressions[i]]
|
||||
continue
|
||||
end
|
||||
# if haskey(resultCache, expressions[i])
|
||||
# kernels[i] = resultCache[expressions[i]]
|
||||
# continue
|
||||
# end
|
||||
|
||||
formattedExpr = ExpressionProcessing.expr_to_postfix(expressions[i], cacheFrontend)
|
||||
formattedExpr = ExpressionProcessing.expr_to_postfix(expressions[i])
|
||||
kernel = transpile(formattedExpr, varRows, Utils.get_max_inner_length(parameters), variableCols, i-1) # i-1 because julia is 1-based but PTX needs 0-based indexing
|
||||
|
||||
linker = CuLink()
|
||||
add_data!(linker, "ExpressionProcessing", kernel)
|
||||
|
||||
image = complete(linker)
|
||||
|
||||
mod = CuModule(image)
|
||||
kernels[i] = CuFunction(mod, "ExpressionProcessing")
|
||||
transpilerCache[expressions[i]] = kernels[i]
|
||||
# try
|
||||
linker = CuLink()
|
||||
add_data!(linker, "ExpressionProcessing", kernel)
|
||||
|
||||
image = complete(linker)
|
||||
|
||||
mod = CuModule(image)
|
||||
kernels[i] = CuFunction(mod, "ExpressionProcessing")
|
||||
# resultCache[expressions[i]] = kernels[i]
|
||||
# catch
|
||||
# dump(expressions[i]; maxdepth=10)
|
||||
# println()
|
||||
# println()
|
||||
# println(kernel)
|
||||
# println()
|
||||
# println()
|
||||
# error(current_exceptions())
|
||||
# end
|
||||
|
||||
end
|
||||
|
||||
cudaVars = CuArray(variables) # maybe put in shared memory (see PerformanceTests.jl for more info)
|
||||
@ -203,7 +215,12 @@ function generate_calculation_code(expression::ExpressionProcessing.PostfixType,
|
||||
for token in expression
|
||||
|
||||
if token.Type == FLOAT32
|
||||
push!(operands, reinterpret(Float32, token.Value))
|
||||
value = reinterpret(Float32, token.Value)
|
||||
if isfinite(value)
|
||||
push!(operands, value)
|
||||
else
|
||||
push!(operands, "0f" * string(token.Value, base = 16)) # otherwise, values like "Inf" would be written as "Inf" and therefore not understandable to the PTX compiler
|
||||
end
|
||||
elseif token.Type == OPERATOR
|
||||
operator = reinterpret(Operator, token.Value)
|
||||
|
||||
|
Reference in New Issue
Block a user