diff --git a/package/src/ExpressionExecutorCuda.jl b/package/src/ExpressionExecutorCuda.jl index ad8eef1..04ca11a 100644 --- a/package/src/ExpressionExecutorCuda.jl +++ b/package/src/ExpressionExecutorCuda.jl @@ -9,9 +9,10 @@ include("Code.jl") include("CpuInterpreter.jl") end +using ..ExpressionProcessing + export interpret_gpu,interpret_cpu export evaluate_gpu -export test # Some assertions: # Variables and parameters start their naming with "1" meaning the first variable/parameter has to be "x1/p1" and not "x0/p0" diff --git a/package/src/ExpressionProcessing.jl b/package/src/ExpressionProcessing.jl index 0acebc0..d5f4f7e 100644 --- a/package/src/ExpressionProcessing.jl +++ b/package/src/ExpressionProcessing.jl @@ -2,7 +2,7 @@ module ExpressionProcessing export expr_to_postfix, is_binary_operator export PostfixType -export Operator, ADD, SUBTRACT, MULTIPLY, DIVIDE, POWER, ABS, LOG, EXP, SQRT +export Operator, ADD, SUBTRACT, MULTIPLY, DIVIDE, POWER, ABS, LOG, EXP, SQRT, INV export ElementType, EMPTY, FLOAT32, OPERATOR, VARIABLE, PARAMETER export ExpressionElement @@ -14,7 +14,7 @@ const unary_operators = [ABS, LOG, EXP, SQRT] struct ExpressionElement Type::ElementType - Value::Int32 # Reinterpret the stored value to type "ElementType" when using it + Value::UInt32 # Reinterpret the stored value to type "ElementType" when using it end const PostfixType = Vector{ExpressionElement} @@ -24,25 +24,29 @@ Converts a julia expression to its postfix notation. NOTE: All 64-Bit values will be converted to 32-Bit. Be aware of the lost precision. NOTE: This function is not thread save, especially cache access is not thread save " -function expr_to_postfix(expression::Expr, cache::Dict{Expr, PostfixType})::PostfixType +function expr_to_postfix(expression::Expr)::PostfixType expr = expression if expression.head === :-> # if the expression equals (x, p) -> (...) then the below statement extracts the expression to evaluate - expr = expression.args[2].args[2] + if expression.args[2].head == :block # expressions that are not generated with the parser (./test/parser.jl) contain this extra "block" node, which needs to be skipped + expr = expression.args[2].args[2] + else # ... if the are generated with the parser, this node is not present and therefore doesn't need to be skipped + expr = expression.args[2] + end end - if haskey(cache, expr) - return cache[expr] - end + # if haskey(cache, expr) + # return cache[expr] + # end postfix = PostfixType() # Special handling in the case where the expression is an array access # This can happen if the token is a variable/parameter of the form x[n]/p[n] if expr.head == :ref - exprElement = convert_to_ExpressionElement(expr.args[1], expr.args[2]) # we assume that an array access never contains an expression, as this would make not much sense in this case + exprElement = convert_to_ExpressionElement(expr.args[1], expr.args[2]) # we assume that an array access never contains an expression, as this would not make much sense in this case push!(postfix, exprElement) - cache[expr] = postfix + # cache[expr] = postfix return postfix end @@ -52,7 +56,7 @@ function expr_to_postfix(expression::Expr, cache::Dict{Expr, PostfixType})::Post arg = expr.args[j] if typeof(arg) === Expr - append!(postfix, expr_to_postfix(arg, cache)) + append!(postfix, expr_to_postfix(arg)) elseif typeof(arg) === Symbol # variables/parameters of the form xn/pn exprElement = convert_to_ExpressionElement(arg) push!(postfix, exprElement) @@ -74,7 +78,7 @@ function expr_to_postfix(expression::Expr, cache::Dict{Expr, PostfixType})::Post push!(postfix, convert_to_ExpressionElement(operator)) end - cache[expr] = postfix + # cache[expr] = postfix return postfix end @@ -106,24 +110,10 @@ function get_operator(op::Symbol)::Operator end end -"Extracts the number from a variable/parameter and returns it. If the symbol is a parameter ```pn```, the resulting value will be negativ. - -```x0 and p0``` are not allowed." -function convert_var_to_int(var::Symbol)::Int32 - varStr = String(var) - number = parse(Int32, SubString(varStr, 2)) - - if varStr[1] == 'p' - number = -number - end - - return number -end - "parses a symbol to be either a variable or a parameter and returns the corresponding Expressionelement" function convert_to_ExpressionElement(element::Symbol)::ExpressionElement varStr = String(element) - index = parse(Int32, SubString(varStr, 2)) + index = parse(UInt32, SubString(varStr, 2)) if varStr[1] == 'x' return ExpressionElement(VARIABLE, index) @@ -136,24 +126,24 @@ end "parses a symbol to be either a variable or a parameter and returns the corresponding Expressionelement" function convert_to_ExpressionElement(element::Symbol, index::Integer)::ExpressionElement if element == :x - return ExpressionElement(VARIABLE, convert(Int32, index)) + return ExpressionElement(VARIABLE, convert(UInt32, index)) elseif element == :p - return ExpressionElement(PARAMETER, convert(Int32, index)) + return ExpressionElement(PARAMETER, convert(UInt32, index)) else throw("Cannot parse symbol to be either a variable or a parameter. Symbol was '$varStr'") end end function convert_to_ExpressionElement(element::Float32)::ExpressionElement - value = reinterpret(Int32, element) + value = reinterpret(UInt32, element) return ExpressionElement(FLOAT32, value) end function convert_to_ExpressionElement(element::Float64)::ExpressionElement - value = reinterpret(Int32, convert(Float32, element)) + value = reinterpret(UInt32, convert(Float32, element)) return ExpressionElement(FLOAT32, value) end function convert_to_ExpressionElement(element::Operator)::ExpressionElement - value = reinterpret(Int32, element) + value = reinterpret(UInt32, element) return ExpressionElement(OPERATOR, value) end diff --git a/package/src/Interpreter.jl b/package/src/Interpreter.jl index 450773d..cf7b9bd 100644 --- a/package/src/Interpreter.jl +++ b/package/src/Interpreter.jl @@ -6,18 +6,17 @@ using ..Utils export interpret -const cacheFrontend = Dict{Expr, PostfixType}() - "Interprets the given expressions with the values provided. # Arguments - expressions::Vector{ExpressionProcessing.PostfixType} : The expressions to execute in postfix form - variables::Matrix{Float32} : The variables to use. Each column is mapped to the variables x1..xn - parameters::Vector{Vector{Float32}} : The parameters to use. Each Vector contains the values for the parameters p1..pn. The number of parameters can be different for every expression + - kwparam ```frontendCache```: The cache that stores the (partial) results of the frontend " function interpret(expressions::Vector{Expr}, variables::Matrix{Float32}, parameters::Vector{Vector{Float32}})::Matrix{Float32} exprs = Vector{ExpressionProcessing.PostfixType}(undef, length(expressions)) @inbounds for i in eachindex(expressions) - exprs[i] = ExpressionProcessing.expr_to_postfix(expressions[i], cacheFrontend) + exprs[i] = ExpressionProcessing.expr_to_postfix(expressions[i]) end variableCols = size(variables, 2) # number of variable sets to use for each expression @@ -99,6 +98,7 @@ function interpret_expression(expressions::CuDeviceArray{ExpressionElement}, var elseif opcode == SQRT operationStack[operationStackTop] = sqrt(operationStack[operationStackTop]) elseif opcode == INV + # operationStack[operationStackTop] = 1f0 / operationStack[operationStackTop] operationStack[operationStackTop] = inv(operationStack[operationStackTop]) end else diff --git a/package/src/Transpiler.jl b/package/src/Transpiler.jl index 738956f..a5b2cda 100644 --- a/package/src/Transpiler.jl +++ b/package/src/Transpiler.jl @@ -8,9 +8,10 @@ using ..Utils const BYTES = sizeof(Float32) const Operand = Union{Float32, String} # Operand is either fixed value or register -const cacheFrontend = Dict{Expr, PostfixType}() -const transpilerCache = Dict{Expr, CuFunction}() # needed if multiple runs with the same expr but different parameters are performed - +" + - kwparam ```frontendCache```: The cache that stores the (partial) results of the frontend, to speedup the pre-processing + - kwparam ```frontendCache```: The cache that stores the result of the transpilation. Useful for parameter optimisation, as the same expression gets executed multiple times +" function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, parameters::Vector{Vector{Float32}})::Matrix{Float32} varRows = size(variables, 1) variableCols = size(variables, 2) @@ -48,22 +49,33 @@ function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, paramet # end @inbounds for i in eachindex(expressions) - if haskey(transpilerCache, expressions[i]) - kernels[i] = transpilerCache[expressions[i]] - continue - end + # if haskey(resultCache, expressions[i]) + # kernels[i] = resultCache[expressions[i]] + # continue + # end - formattedExpr = ExpressionProcessing.expr_to_postfix(expressions[i], cacheFrontend) + formattedExpr = ExpressionProcessing.expr_to_postfix(expressions[i]) kernel = transpile(formattedExpr, varRows, Utils.get_max_inner_length(parameters), variableCols, i-1) # i-1 because julia is 1-based but PTX needs 0-based indexing - linker = CuLink() - add_data!(linker, "ExpressionProcessing", kernel) - - image = complete(linker) - - mod = CuModule(image) - kernels[i] = CuFunction(mod, "ExpressionProcessing") - transpilerCache[expressions[i]] = kernels[i] + # try + linker = CuLink() + add_data!(linker, "ExpressionProcessing", kernel) + + image = complete(linker) + + mod = CuModule(image) + kernels[i] = CuFunction(mod, "ExpressionProcessing") + # resultCache[expressions[i]] = kernels[i] + # catch + # dump(expressions[i]; maxdepth=10) + # println() + # println() + # println(kernel) + # println() + # println() + # error(current_exceptions()) + # end + end cudaVars = CuArray(variables) # maybe put in shared memory (see PerformanceTests.jl for more info) @@ -203,7 +215,12 @@ function generate_calculation_code(expression::ExpressionProcessing.PostfixType, for token in expression if token.Type == FLOAT32 - push!(operands, reinterpret(Float32, token.Value)) + value = reinterpret(Float32, token.Value) + if isfinite(value) + push!(operands, value) + else + push!(operands, "0f" * string(token.Value, base = 16)) # otherwise, values like "Inf" would be written as "Inf" and therefore not understandable to the PTX compiler + end elseif token.Type == OPERATOR operator = reinterpret(Operator, token.Value) diff --git a/package/test/PerformanceTests.jl b/package/test/PerformanceTests.jl index 1cc332b..28dab05 100644 --- a/package/test/PerformanceTests.jl +++ b/package/test/PerformanceTests.jl @@ -2,9 +2,11 @@ using LinearAlgebra using BenchmarkTools using DelimitedFiles using GZip +using CUDA using .Transpiler using .Interpreter +using .ExpressionProcessing include("parser.jl") # to parse expressions from a file @@ -48,7 +50,7 @@ expr_reps = 100 # 100 parameter optimisation steps (local search; sequentially; # Add /usr/local/cuda/bin in .bashrc to PATH to access ncu and nsys (do the tests on FH PCs) # University setup at 10.20.1.7 and 10.20.1.13 -compareWithCPU = true +compareWithCPU = false suite = BenchmarkGroup() suite["CPU"] = BenchmarkGroup(["CPUInterpreter"]) @@ -60,10 +62,14 @@ if compareWithCPU suite["CPU"]["nikuradse_1_parallel"] = @benchmarkable interpret_cpu(exprs, X, parameters; repetitions=expr_reps, parallel=true) end +# cacheInterpreter = Dict{Expr, PostfixType}() suite["GPUI"]["nikuradse_1"] = @benchmarkable interpret_gpu(exprs, X_t, parameters; repetitions=expr_reps) + +# cacheTranspilerFront = Dict{Expr, PostfixType}() +# cacheTranspilerRes = Dict{Expr, CuFunction}() suite["GPUT"]["nikuradse_1"] = @benchmarkable evaluate_gpu(exprs, X_t, parameters; repetitions=expr_reps) -for i in 1:2 +for i in 1:1 tune!(suite) end BenchmarkTools.save("params.json", params(suite))