benchmarking: removed caches to get initial performance measurement. still some problems
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run

This commit is contained in:
Daniel 2025-05-10 13:11:27 +02:00
parent 6d3c3164cf
commit 2ba1fef5ba
5 changed files with 68 additions and 54 deletions

View File

@ -9,9 +9,10 @@ include("Code.jl")
include("CpuInterpreter.jl") include("CpuInterpreter.jl")
end end
using ..ExpressionProcessing
export interpret_gpu,interpret_cpu export interpret_gpu,interpret_cpu
export evaluate_gpu export evaluate_gpu
export test
# Some assertions: # Some assertions:
# Variables and parameters start their naming with "1" meaning the first variable/parameter has to be "x1/p1" and not "x0/p0" # Variables and parameters start their naming with "1" meaning the first variable/parameter has to be "x1/p1" and not "x0/p0"

View File

@ -2,7 +2,7 @@ module ExpressionProcessing
export expr_to_postfix, is_binary_operator export expr_to_postfix, is_binary_operator
export PostfixType export PostfixType
export Operator, ADD, SUBTRACT, MULTIPLY, DIVIDE, POWER, ABS, LOG, EXP, SQRT export Operator, ADD, SUBTRACT, MULTIPLY, DIVIDE, POWER, ABS, LOG, EXP, SQRT, INV
export ElementType, EMPTY, FLOAT32, OPERATOR, VARIABLE, PARAMETER export ElementType, EMPTY, FLOAT32, OPERATOR, VARIABLE, PARAMETER
export ExpressionElement export ExpressionElement
@ -14,7 +14,7 @@ const unary_operators = [ABS, LOG, EXP, SQRT]
struct ExpressionElement struct ExpressionElement
Type::ElementType Type::ElementType
Value::Int32 # Reinterpret the stored value to type "ElementType" when using it Value::UInt32 # Reinterpret the stored value to type "ElementType" when using it
end end
const PostfixType = Vector{ExpressionElement} const PostfixType = Vector{ExpressionElement}
@ -24,25 +24,29 @@ Converts a julia expression to its postfix notation.
NOTE: All 64-Bit values will be converted to 32-Bit. Be aware of the lost precision. NOTE: All 64-Bit values will be converted to 32-Bit. Be aware of the lost precision.
NOTE: This function is not thread save, especially cache access is not thread save NOTE: This function is not thread save, especially cache access is not thread save
" "
function expr_to_postfix(expression::Expr, cache::Dict{Expr, PostfixType})::PostfixType function expr_to_postfix(expression::Expr)::PostfixType
expr = expression expr = expression
if expression.head === :-> if expression.head === :->
# if the expression equals (x, p) -> (...) then the below statement extracts the expression to evaluate # if the expression equals (x, p) -> (...) then the below statement extracts the expression to evaluate
expr = expression.args[2].args[2] if expression.args[2].head == :block # expressions that are not generated with the parser (./test/parser.jl) contain this extra "block" node, which needs to be skipped
expr = expression.args[2].args[2]
else # ... if the are generated with the parser, this node is not present and therefore doesn't need to be skipped
expr = expression.args[2]
end
end end
if haskey(cache, expr) # if haskey(cache, expr)
return cache[expr] # return cache[expr]
end # end
postfix = PostfixType() postfix = PostfixType()
# Special handling in the case where the expression is an array access # Special handling in the case where the expression is an array access
# This can happen if the token is a variable/parameter of the form x[n]/p[n] # This can happen if the token is a variable/parameter of the form x[n]/p[n]
if expr.head == :ref if expr.head == :ref
exprElement = convert_to_ExpressionElement(expr.args[1], expr.args[2]) # we assume that an array access never contains an expression, as this would make not much sense in this case exprElement = convert_to_ExpressionElement(expr.args[1], expr.args[2]) # we assume that an array access never contains an expression, as this would not make much sense in this case
push!(postfix, exprElement) push!(postfix, exprElement)
cache[expr] = postfix # cache[expr] = postfix
return postfix return postfix
end end
@ -52,7 +56,7 @@ function expr_to_postfix(expression::Expr, cache::Dict{Expr, PostfixType})::Post
arg = expr.args[j] arg = expr.args[j]
if typeof(arg) === Expr if typeof(arg) === Expr
append!(postfix, expr_to_postfix(arg, cache)) append!(postfix, expr_to_postfix(arg))
elseif typeof(arg) === Symbol # variables/parameters of the form xn/pn elseif typeof(arg) === Symbol # variables/parameters of the form xn/pn
exprElement = convert_to_ExpressionElement(arg) exprElement = convert_to_ExpressionElement(arg)
push!(postfix, exprElement) push!(postfix, exprElement)
@ -74,7 +78,7 @@ function expr_to_postfix(expression::Expr, cache::Dict{Expr, PostfixType})::Post
push!(postfix, convert_to_ExpressionElement(operator)) push!(postfix, convert_to_ExpressionElement(operator))
end end
cache[expr] = postfix # cache[expr] = postfix
return postfix return postfix
end end
@ -106,24 +110,10 @@ function get_operator(op::Symbol)::Operator
end end
end end
"Extracts the number from a variable/parameter and returns it. If the symbol is a parameter ```pn```, the resulting value will be negativ.
```x0 and p0``` are not allowed."
function convert_var_to_int(var::Symbol)::Int32
varStr = String(var)
number = parse(Int32, SubString(varStr, 2))
if varStr[1] == 'p'
number = -number
end
return number
end
"parses a symbol to be either a variable or a parameter and returns the corresponding Expressionelement" "parses a symbol to be either a variable or a parameter and returns the corresponding Expressionelement"
function convert_to_ExpressionElement(element::Symbol)::ExpressionElement function convert_to_ExpressionElement(element::Symbol)::ExpressionElement
varStr = String(element) varStr = String(element)
index = parse(Int32, SubString(varStr, 2)) index = parse(UInt32, SubString(varStr, 2))
if varStr[1] == 'x' if varStr[1] == 'x'
return ExpressionElement(VARIABLE, index) return ExpressionElement(VARIABLE, index)
@ -136,24 +126,24 @@ end
"parses a symbol to be either a variable or a parameter and returns the corresponding Expressionelement" "parses a symbol to be either a variable or a parameter and returns the corresponding Expressionelement"
function convert_to_ExpressionElement(element::Symbol, index::Integer)::ExpressionElement function convert_to_ExpressionElement(element::Symbol, index::Integer)::ExpressionElement
if element == :x if element == :x
return ExpressionElement(VARIABLE, convert(Int32, index)) return ExpressionElement(VARIABLE, convert(UInt32, index))
elseif element == :p elseif element == :p
return ExpressionElement(PARAMETER, convert(Int32, index)) return ExpressionElement(PARAMETER, convert(UInt32, index))
else else
throw("Cannot parse symbol to be either a variable or a parameter. Symbol was '$varStr'") throw("Cannot parse symbol to be either a variable or a parameter. Symbol was '$varStr'")
end end
end end
function convert_to_ExpressionElement(element::Float32)::ExpressionElement function convert_to_ExpressionElement(element::Float32)::ExpressionElement
value = reinterpret(Int32, element) value = reinterpret(UInt32, element)
return ExpressionElement(FLOAT32, value) return ExpressionElement(FLOAT32, value)
end end
function convert_to_ExpressionElement(element::Float64)::ExpressionElement function convert_to_ExpressionElement(element::Float64)::ExpressionElement
value = reinterpret(Int32, convert(Float32, element)) value = reinterpret(UInt32, convert(Float32, element))
return ExpressionElement(FLOAT32, value) return ExpressionElement(FLOAT32, value)
end end
function convert_to_ExpressionElement(element::Operator)::ExpressionElement function convert_to_ExpressionElement(element::Operator)::ExpressionElement
value = reinterpret(Int32, element) value = reinterpret(UInt32, element)
return ExpressionElement(OPERATOR, value) return ExpressionElement(OPERATOR, value)
end end

View File

@ -6,18 +6,17 @@ using ..Utils
export interpret export interpret
const cacheFrontend = Dict{Expr, PostfixType}()
"Interprets the given expressions with the values provided. "Interprets the given expressions with the values provided.
# Arguments # Arguments
- expressions::Vector{ExpressionProcessing.PostfixType} : The expressions to execute in postfix form - expressions::Vector{ExpressionProcessing.PostfixType} : The expressions to execute in postfix form
- variables::Matrix{Float32} : The variables to use. Each column is mapped to the variables x1..xn - variables::Matrix{Float32} : The variables to use. Each column is mapped to the variables x1..xn
- parameters::Vector{Vector{Float32}} : The parameters to use. Each Vector contains the values for the parameters p1..pn. The number of parameters can be different for every expression - parameters::Vector{Vector{Float32}} : The parameters to use. Each Vector contains the values for the parameters p1..pn. The number of parameters can be different for every expression
- kwparam ```frontendCache```: The cache that stores the (partial) results of the frontend
" "
function interpret(expressions::Vector{Expr}, variables::Matrix{Float32}, parameters::Vector{Vector{Float32}})::Matrix{Float32} function interpret(expressions::Vector{Expr}, variables::Matrix{Float32}, parameters::Vector{Vector{Float32}})::Matrix{Float32}
exprs = Vector{ExpressionProcessing.PostfixType}(undef, length(expressions)) exprs = Vector{ExpressionProcessing.PostfixType}(undef, length(expressions))
@inbounds for i in eachindex(expressions) @inbounds for i in eachindex(expressions)
exprs[i] = ExpressionProcessing.expr_to_postfix(expressions[i], cacheFrontend) exprs[i] = ExpressionProcessing.expr_to_postfix(expressions[i])
end end
variableCols = size(variables, 2) # number of variable sets to use for each expression variableCols = size(variables, 2) # number of variable sets to use for each expression
@ -99,6 +98,7 @@ function interpret_expression(expressions::CuDeviceArray{ExpressionElement}, var
elseif opcode == SQRT elseif opcode == SQRT
operationStack[operationStackTop] = sqrt(operationStack[operationStackTop]) operationStack[operationStackTop] = sqrt(operationStack[operationStackTop])
elseif opcode == INV elseif opcode == INV
# operationStack[operationStackTop] = 1f0 / operationStack[operationStackTop]
operationStack[operationStackTop] = inv(operationStack[operationStackTop]) operationStack[operationStackTop] = inv(operationStack[operationStackTop])
end end
else else

View File

@ -8,9 +8,10 @@ using ..Utils
const BYTES = sizeof(Float32) const BYTES = sizeof(Float32)
const Operand = Union{Float32, String} # Operand is either fixed value or register const Operand = Union{Float32, String} # Operand is either fixed value or register
const cacheFrontend = Dict{Expr, PostfixType}() "
const transpilerCache = Dict{Expr, CuFunction}() # needed if multiple runs with the same expr but different parameters are performed - kwparam ```frontendCache```: The cache that stores the (partial) results of the frontend, to speedup the pre-processing
- kwparam ```frontendCache```: The cache that stores the result of the transpilation. Useful for parameter optimisation, as the same expression gets executed multiple times
"
function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, parameters::Vector{Vector{Float32}})::Matrix{Float32} function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, parameters::Vector{Vector{Float32}})::Matrix{Float32}
varRows = size(variables, 1) varRows = size(variables, 1)
variableCols = size(variables, 2) variableCols = size(variables, 2)
@ -48,22 +49,33 @@ function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, paramet
# end # end
@inbounds for i in eachindex(expressions) @inbounds for i in eachindex(expressions)
if haskey(transpilerCache, expressions[i]) # if haskey(resultCache, expressions[i])
kernels[i] = transpilerCache[expressions[i]] # kernels[i] = resultCache[expressions[i]]
continue # continue
end # end
formattedExpr = ExpressionProcessing.expr_to_postfix(expressions[i], cacheFrontend) formattedExpr = ExpressionProcessing.expr_to_postfix(expressions[i])
kernel = transpile(formattedExpr, varRows, Utils.get_max_inner_length(parameters), variableCols, i-1) # i-1 because julia is 1-based but PTX needs 0-based indexing kernel = transpile(formattedExpr, varRows, Utils.get_max_inner_length(parameters), variableCols, i-1) # i-1 because julia is 1-based but PTX needs 0-based indexing
linker = CuLink() # try
add_data!(linker, "ExpressionProcessing", kernel) linker = CuLink()
add_data!(linker, "ExpressionProcessing", kernel)
image = complete(linker)
image = complete(linker)
mod = CuModule(image)
kernels[i] = CuFunction(mod, "ExpressionProcessing") mod = CuModule(image)
transpilerCache[expressions[i]] = kernels[i] kernels[i] = CuFunction(mod, "ExpressionProcessing")
# resultCache[expressions[i]] = kernels[i]
# catch
# dump(expressions[i]; maxdepth=10)
# println()
# println()
# println(kernel)
# println()
# println()
# error(current_exceptions())
# end
end end
cudaVars = CuArray(variables) # maybe put in shared memory (see PerformanceTests.jl for more info) cudaVars = CuArray(variables) # maybe put in shared memory (see PerformanceTests.jl for more info)
@ -203,7 +215,12 @@ function generate_calculation_code(expression::ExpressionProcessing.PostfixType,
for token in expression for token in expression
if token.Type == FLOAT32 if token.Type == FLOAT32
push!(operands, reinterpret(Float32, token.Value)) value = reinterpret(Float32, token.Value)
if isfinite(value)
push!(operands, value)
else
push!(operands, "0f" * string(token.Value, base = 16)) # otherwise, values like "Inf" would be written as "Inf" and therefore not understandable to the PTX compiler
end
elseif token.Type == OPERATOR elseif token.Type == OPERATOR
operator = reinterpret(Operator, token.Value) operator = reinterpret(Operator, token.Value)

View File

@ -2,9 +2,11 @@ using LinearAlgebra
using BenchmarkTools using BenchmarkTools
using DelimitedFiles using DelimitedFiles
using GZip using GZip
using CUDA
using .Transpiler using .Transpiler
using .Interpreter using .Interpreter
using .ExpressionProcessing
include("parser.jl") # to parse expressions from a file include("parser.jl") # to parse expressions from a file
@ -48,7 +50,7 @@ expr_reps = 100 # 100 parameter optimisation steps (local search; sequentially;
# Add /usr/local/cuda/bin in .bashrc to PATH to access ncu and nsys (do the tests on FH PCs) # Add /usr/local/cuda/bin in .bashrc to PATH to access ncu and nsys (do the tests on FH PCs)
# University setup at 10.20.1.7 and 10.20.1.13 # University setup at 10.20.1.7 and 10.20.1.13
compareWithCPU = true compareWithCPU = false
suite = BenchmarkGroup() suite = BenchmarkGroup()
suite["CPU"] = BenchmarkGroup(["CPUInterpreter"]) suite["CPU"] = BenchmarkGroup(["CPUInterpreter"])
@ -60,10 +62,14 @@ if compareWithCPU
suite["CPU"]["nikuradse_1_parallel"] = @benchmarkable interpret_cpu(exprs, X, parameters; repetitions=expr_reps, parallel=true) suite["CPU"]["nikuradse_1_parallel"] = @benchmarkable interpret_cpu(exprs, X, parameters; repetitions=expr_reps, parallel=true)
end end
# cacheInterpreter = Dict{Expr, PostfixType}()
suite["GPUI"]["nikuradse_1"] = @benchmarkable interpret_gpu(exprs, X_t, parameters; repetitions=expr_reps) suite["GPUI"]["nikuradse_1"] = @benchmarkable interpret_gpu(exprs, X_t, parameters; repetitions=expr_reps)
# cacheTranspilerFront = Dict{Expr, PostfixType}()
# cacheTranspilerRes = Dict{Expr, CuFunction}()
suite["GPUT"]["nikuradse_1"] = @benchmarkable evaluate_gpu(exprs, X_t, parameters; repetitions=expr_reps) suite["GPUT"]["nikuradse_1"] = @benchmarkable evaluate_gpu(exprs, X_t, parameters; repetitions=expr_reps)
for i in 1:2 for i in 1:1
tune!(suite) tune!(suite)
end end
BenchmarkTools.save("params.json", params(suite)) BenchmarkTools.save("params.json", params(suite))