benchmarking: removed caches to get initial performance measurement. still some problems
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run

This commit is contained in:
Daniel 2025-05-10 13:11:27 +02:00
parent 6d3c3164cf
commit 2ba1fef5ba
5 changed files with 68 additions and 54 deletions

View File

@ -9,9 +9,10 @@ include("Code.jl")
include("CpuInterpreter.jl")
end
using ..ExpressionProcessing
export interpret_gpu,interpret_cpu
export evaluate_gpu
export test
# Some assertions:
# Variables and parameters start their naming with "1" meaning the first variable/parameter has to be "x1/p1" and not "x0/p0"

View File

@ -2,7 +2,7 @@ module ExpressionProcessing
export expr_to_postfix, is_binary_operator
export PostfixType
export Operator, ADD, SUBTRACT, MULTIPLY, DIVIDE, POWER, ABS, LOG, EXP, SQRT
export Operator, ADD, SUBTRACT, MULTIPLY, DIVIDE, POWER, ABS, LOG, EXP, SQRT, INV
export ElementType, EMPTY, FLOAT32, OPERATOR, VARIABLE, PARAMETER
export ExpressionElement
@ -14,7 +14,7 @@ const unary_operators = [ABS, LOG, EXP, SQRT]
struct ExpressionElement
Type::ElementType
Value::Int32 # Reinterpret the stored value to type "ElementType" when using it
Value::UInt32 # Reinterpret the stored value to type "ElementType" when using it
end
const PostfixType = Vector{ExpressionElement}
@ -24,25 +24,29 @@ Converts a julia expression to its postfix notation.
NOTE: All 64-Bit values will be converted to 32-Bit. Be aware of the lost precision.
NOTE: This function is not thread save, especially cache access is not thread save
"
function expr_to_postfix(expression::Expr, cache::Dict{Expr, PostfixType})::PostfixType
function expr_to_postfix(expression::Expr)::PostfixType
expr = expression
if expression.head === :->
# if the expression equals (x, p) -> (...) then the below statement extracts the expression to evaluate
expr = expression.args[2].args[2]
if expression.args[2].head == :block # expressions that are not generated with the parser (./test/parser.jl) contain this extra "block" node, which needs to be skipped
expr = expression.args[2].args[2]
else # ... if the are generated with the parser, this node is not present and therefore doesn't need to be skipped
expr = expression.args[2]
end
end
if haskey(cache, expr)
return cache[expr]
end
# if haskey(cache, expr)
# return cache[expr]
# end
postfix = PostfixType()
# Special handling in the case where the expression is an array access
# This can happen if the token is a variable/parameter of the form x[n]/p[n]
if expr.head == :ref
exprElement = convert_to_ExpressionElement(expr.args[1], expr.args[2]) # we assume that an array access never contains an expression, as this would make not much sense in this case
exprElement = convert_to_ExpressionElement(expr.args[1], expr.args[2]) # we assume that an array access never contains an expression, as this would not make much sense in this case
push!(postfix, exprElement)
cache[expr] = postfix
# cache[expr] = postfix
return postfix
end
@ -52,7 +56,7 @@ function expr_to_postfix(expression::Expr, cache::Dict{Expr, PostfixType})::Post
arg = expr.args[j]
if typeof(arg) === Expr
append!(postfix, expr_to_postfix(arg, cache))
append!(postfix, expr_to_postfix(arg))
elseif typeof(arg) === Symbol # variables/parameters of the form xn/pn
exprElement = convert_to_ExpressionElement(arg)
push!(postfix, exprElement)
@ -74,7 +78,7 @@ function expr_to_postfix(expression::Expr, cache::Dict{Expr, PostfixType})::Post
push!(postfix, convert_to_ExpressionElement(operator))
end
cache[expr] = postfix
# cache[expr] = postfix
return postfix
end
@ -106,24 +110,10 @@ function get_operator(op::Symbol)::Operator
end
end
"Extracts the number from a variable/parameter and returns it. If the symbol is a parameter ```pn```, the resulting value will be negativ.
```x0 and p0``` are not allowed."
function convert_var_to_int(var::Symbol)::Int32
varStr = String(var)
number = parse(Int32, SubString(varStr, 2))
if varStr[1] == 'p'
number = -number
end
return number
end
"parses a symbol to be either a variable or a parameter and returns the corresponding Expressionelement"
function convert_to_ExpressionElement(element::Symbol)::ExpressionElement
varStr = String(element)
index = parse(Int32, SubString(varStr, 2))
index = parse(UInt32, SubString(varStr, 2))
if varStr[1] == 'x'
return ExpressionElement(VARIABLE, index)
@ -136,24 +126,24 @@ end
"parses a symbol to be either a variable or a parameter and returns the corresponding Expressionelement"
function convert_to_ExpressionElement(element::Symbol, index::Integer)::ExpressionElement
if element == :x
return ExpressionElement(VARIABLE, convert(Int32, index))
return ExpressionElement(VARIABLE, convert(UInt32, index))
elseif element == :p
return ExpressionElement(PARAMETER, convert(Int32, index))
return ExpressionElement(PARAMETER, convert(UInt32, index))
else
throw("Cannot parse symbol to be either a variable or a parameter. Symbol was '$varStr'")
end
end
function convert_to_ExpressionElement(element::Float32)::ExpressionElement
value = reinterpret(Int32, element)
value = reinterpret(UInt32, element)
return ExpressionElement(FLOAT32, value)
end
function convert_to_ExpressionElement(element::Float64)::ExpressionElement
value = reinterpret(Int32, convert(Float32, element))
value = reinterpret(UInt32, convert(Float32, element))
return ExpressionElement(FLOAT32, value)
end
function convert_to_ExpressionElement(element::Operator)::ExpressionElement
value = reinterpret(Int32, element)
value = reinterpret(UInt32, element)
return ExpressionElement(OPERATOR, value)
end

View File

@ -6,18 +6,17 @@ using ..Utils
export interpret
const cacheFrontend = Dict{Expr, PostfixType}()
"Interprets the given expressions with the values provided.
# Arguments
- expressions::Vector{ExpressionProcessing.PostfixType} : The expressions to execute in postfix form
- variables::Matrix{Float32} : The variables to use. Each column is mapped to the variables x1..xn
- parameters::Vector{Vector{Float32}} : The parameters to use. Each Vector contains the values for the parameters p1..pn. The number of parameters can be different for every expression
- kwparam ```frontendCache```: The cache that stores the (partial) results of the frontend
"
function interpret(expressions::Vector{Expr}, variables::Matrix{Float32}, parameters::Vector{Vector{Float32}})::Matrix{Float32}
exprs = Vector{ExpressionProcessing.PostfixType}(undef, length(expressions))
@inbounds for i in eachindex(expressions)
exprs[i] = ExpressionProcessing.expr_to_postfix(expressions[i], cacheFrontend)
exprs[i] = ExpressionProcessing.expr_to_postfix(expressions[i])
end
variableCols = size(variables, 2) # number of variable sets to use for each expression
@ -99,6 +98,7 @@ function interpret_expression(expressions::CuDeviceArray{ExpressionElement}, var
elseif opcode == SQRT
operationStack[operationStackTop] = sqrt(operationStack[operationStackTop])
elseif opcode == INV
# operationStack[operationStackTop] = 1f0 / operationStack[operationStackTop]
operationStack[operationStackTop] = inv(operationStack[operationStackTop])
end
else

View File

@ -8,9 +8,10 @@ using ..Utils
const BYTES = sizeof(Float32)
const Operand = Union{Float32, String} # Operand is either fixed value or register
const cacheFrontend = Dict{Expr, PostfixType}()
const transpilerCache = Dict{Expr, CuFunction}() # needed if multiple runs with the same expr but different parameters are performed
"
- kwparam ```frontendCache```: The cache that stores the (partial) results of the frontend, to speedup the pre-processing
- kwparam ```frontendCache```: The cache that stores the result of the transpilation. Useful for parameter optimisation, as the same expression gets executed multiple times
"
function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, parameters::Vector{Vector{Float32}})::Matrix{Float32}
varRows = size(variables, 1)
variableCols = size(variables, 2)
@ -48,22 +49,33 @@ function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, paramet
# end
@inbounds for i in eachindex(expressions)
if haskey(transpilerCache, expressions[i])
kernels[i] = transpilerCache[expressions[i]]
continue
end
# if haskey(resultCache, expressions[i])
# kernels[i] = resultCache[expressions[i]]
# continue
# end
formattedExpr = ExpressionProcessing.expr_to_postfix(expressions[i], cacheFrontend)
formattedExpr = ExpressionProcessing.expr_to_postfix(expressions[i])
kernel = transpile(formattedExpr, varRows, Utils.get_max_inner_length(parameters), variableCols, i-1) # i-1 because julia is 1-based but PTX needs 0-based indexing
linker = CuLink()
add_data!(linker, "ExpressionProcessing", kernel)
image = complete(linker)
mod = CuModule(image)
kernels[i] = CuFunction(mod, "ExpressionProcessing")
transpilerCache[expressions[i]] = kernels[i]
# try
linker = CuLink()
add_data!(linker, "ExpressionProcessing", kernel)
image = complete(linker)
mod = CuModule(image)
kernels[i] = CuFunction(mod, "ExpressionProcessing")
# resultCache[expressions[i]] = kernels[i]
# catch
# dump(expressions[i]; maxdepth=10)
# println()
# println()
# println(kernel)
# println()
# println()
# error(current_exceptions())
# end
end
cudaVars = CuArray(variables) # maybe put in shared memory (see PerformanceTests.jl for more info)
@ -203,7 +215,12 @@ function generate_calculation_code(expression::ExpressionProcessing.PostfixType,
for token in expression
if token.Type == FLOAT32
push!(operands, reinterpret(Float32, token.Value))
value = reinterpret(Float32, token.Value)
if isfinite(value)
push!(operands, value)
else
push!(operands, "0f" * string(token.Value, base = 16)) # otherwise, values like "Inf" would be written as "Inf" and therefore not understandable to the PTX compiler
end
elseif token.Type == OPERATOR
operator = reinterpret(Operator, token.Value)

View File

@ -2,9 +2,11 @@ using LinearAlgebra
using BenchmarkTools
using DelimitedFiles
using GZip
using CUDA
using .Transpiler
using .Interpreter
using .ExpressionProcessing
include("parser.jl") # to parse expressions from a file
@ -48,7 +50,7 @@ expr_reps = 100 # 100 parameter optimisation steps (local search; sequentially;
# Add /usr/local/cuda/bin in .bashrc to PATH to access ncu and nsys (do the tests on FH PCs)
# University setup at 10.20.1.7 and 10.20.1.13
compareWithCPU = true
compareWithCPU = false
suite = BenchmarkGroup()
suite["CPU"] = BenchmarkGroup(["CPUInterpreter"])
@ -60,10 +62,14 @@ if compareWithCPU
suite["CPU"]["nikuradse_1_parallel"] = @benchmarkable interpret_cpu(exprs, X, parameters; repetitions=expr_reps, parallel=true)
end
# cacheInterpreter = Dict{Expr, PostfixType}()
suite["GPUI"]["nikuradse_1"] = @benchmarkable interpret_gpu(exprs, X_t, parameters; repetitions=expr_reps)
# cacheTranspilerFront = Dict{Expr, PostfixType}()
# cacheTranspilerRes = Dict{Expr, CuFunction}()
suite["GPUT"]["nikuradse_1"] = @benchmarkable evaluate_gpu(exprs, X_t, parameters; repetitions=expr_reps)
for i in 1:2
for i in 1:1
tune!(suite)
end
BenchmarkTools.save("params.json", params(suite))