benchmarking: removed caches to get initial performance measurement. still some problems

2025-05-10 13:11:27 +02:00
parent 6d3c3164cf
commit 2ba1fef5ba
5 changed files with 68 additions and 54 deletions
--- a/package/src/ExpressionExecutorCuda.jl
+++ b/package/src/ExpressionExecutorCuda.jl
@ -9,9 +9,10 @@ include("Code.jl")
 include("CpuInterpreter.jl")
 end

+using ..ExpressionProcessing
+
 export interpret_gpu,interpret_cpu
 export evaluate_gpu
-export test

 # Some assertions:
 # Variables and parameters start their naming with "1" meaning the first variable/parameter has to be "x1/p1" and not "x0/p0"
--- a/package/src/ExpressionProcessing.jl
+++ b/package/src/ExpressionProcessing.jl
@ -2,7 +2,7 @@ module ExpressionProcessing

 export expr_to_postfix, is_binary_operator
 export PostfixType
-export Operator, ADD, SUBTRACT, MULTIPLY, DIVIDE, POWER, ABS, LOG, EXP, SQRT
+export Operator, ADD, SUBTRACT, MULTIPLY, DIVIDE, POWER, ABS, LOG, EXP, SQRT, INV
 export ElementType, EMPTY, FLOAT32, OPERATOR, VARIABLE, PARAMETER
 export ExpressionElement

@ -14,7 +14,7 @@ const unary_operators = [ABS, LOG, EXP, SQRT]

 struct ExpressionElement
 	Type::ElementType
-	Value::Int32 # Reinterpret the stored value to type "ElementType" when using it
+	Value::UInt32 # Reinterpret the stored value to type "ElementType" when using it
 end

 const PostfixType = Vector{ExpressionElement}
@ -24,25 +24,29 @@ Converts a julia expression to its postfix notation.
 NOTE: All 64-Bit values will be converted to 32-Bit. Be aware of the lost precision.
 NOTE: This function is not thread save, especially cache access is not thread save
 "
-function expr_to_postfix(expression::Expr, cache::Dict{Expr, PostfixType})::PostfixType
+function expr_to_postfix(expression::Expr)::PostfixType
 	expr = expression
 	if expression.head === :->
 		# if the expression equals (x, p) -> (...) then the below statement extracts the expression to evaluate
-		expr = expression.args[2].args[2] 
+		if expression.args[2].head == :block # expressions that are not generated with the parser (./test/parser.jl) contain this extra "block" node, which needs to be skipped
+			expr = expression.args[2].args[2] 
+		else # ... if the are generated with the parser, this node is not present and therefore doesn't need to be skipped
+			expr = expression.args[2]
+		end
 	end

-	if haskey(cache, expr)
-		return cache[expr]
-	end
+	# if haskey(cache, expr)
+	# 	return cache[expr]
+	# end

 	postfix = PostfixType()

 	# Special handling in the case where the expression is an array access
 	# This can happen if the token is a variable/parameter of the form x[n]/p[n]
 	if expr.head == :ref
-		exprElement = convert_to_ExpressionElement(expr.args[1], expr.args[2]) # we assume that an array access never contains an expression, as this would make not much sense in this case
+		exprElement = convert_to_ExpressionElement(expr.args[1], expr.args[2]) # we assume that an array access never contains an expression, as this would not make much sense in this case
 		push!(postfix, exprElement)
-		cache[expr] = postfix
+		# cache[expr] = postfix
 		return postfix
 	end

@ -52,7 +56,7 @@ function expr_to_postfix(expression::Expr, cache::Dict{Expr, PostfixType})::Post
 		arg = expr.args[j]

 		if typeof(arg) === Expr
-			append!(postfix, expr_to_postfix(arg, cache))
+			append!(postfix, expr_to_postfix(arg))
 		elseif typeof(arg) === Symbol # variables/parameters of the form xn/pn
 			exprElement = convert_to_ExpressionElement(arg)
 			push!(postfix, exprElement)
@ -74,7 +78,7 @@ function expr_to_postfix(expression::Expr, cache::Dict{Expr, PostfixType})::Post
 		push!(postfix, convert_to_ExpressionElement(operator))
 	end

-	cache[expr] = postfix
+	# cache[expr] = postfix
 	return postfix
 end

@ -106,24 +110,10 @@ function get_operator(op::Symbol)::Operator
 	end
 end

-"Extracts the number from a variable/parameter and returns it. If the symbol is a parameter ```pn```, the resulting value will be negativ.
-
-```x0 and p0``` are not allowed."
-function convert_var_to_int(var::Symbol)::Int32
-	varStr = String(var)
-	number = parse(Int32, SubString(varStr, 2))
-
-	if varStr[1] == 'p'
-		number = -number
-	end
-
-	return number
-end
-
 "parses a symbol to be either a variable or a parameter and returns the corresponding Expressionelement"
 function convert_to_ExpressionElement(element::Symbol)::ExpressionElement
 	varStr = String(element)
-	index = parse(Int32, SubString(varStr, 2))
+	index = parse(UInt32, SubString(varStr, 2))

 	if varStr[1] == 'x'
 		return ExpressionElement(VARIABLE, index)
@ -136,24 +126,24 @@ end
 "parses a symbol to be either a variable or a parameter and returns the corresponding Expressionelement"
 function convert_to_ExpressionElement(element::Symbol, index::Integer)::ExpressionElement
 	if element == :x
-		return ExpressionElement(VARIABLE, convert(Int32, index))
+		return ExpressionElement(VARIABLE, convert(UInt32, index))
 	elseif element == :p
-		return ExpressionElement(PARAMETER, convert(Int32, index))
+		return ExpressionElement(PARAMETER, convert(UInt32, index))
 	else
 		throw("Cannot parse symbol to be either a variable or a parameter. Symbol was '$varStr'")
 	end
 end

 function convert_to_ExpressionElement(element::Float32)::ExpressionElement
-	value = reinterpret(Int32, element)
+	value = reinterpret(UInt32, element)
 	return ExpressionElement(FLOAT32, value)
 end
 function convert_to_ExpressionElement(element::Float64)::ExpressionElement
-	value = reinterpret(Int32, convert(Float32, element))
+	value = reinterpret(UInt32, convert(Float32, element))
 	return ExpressionElement(FLOAT32, value)
 end
 function convert_to_ExpressionElement(element::Operator)::ExpressionElement
-	value = reinterpret(Int32, element)
+	value = reinterpret(UInt32, element)
 	return ExpressionElement(OPERATOR, value)
 end

--- a/package/src/Interpreter.jl
+++ b/package/src/Interpreter.jl
@ -6,18 +6,17 @@ using ..Utils

 export interpret

-const cacheFrontend = Dict{Expr, PostfixType}()
-
 "Interprets the given expressions with the values provided.
 # Arguments
 - expressions::Vector{ExpressionProcessing.PostfixType} : The expressions to execute in postfix form
 - variables::Matrix{Float32} : The variables to use. Each column is mapped to the variables x1..xn
 - parameters::Vector{Vector{Float32}} : The parameters to use. Each Vector contains the values for the parameters p1..pn. The number of parameters can be different for every expression
+ - kwparam ```frontendCache```: The cache that stores the (partial) results of the frontend
 "
 function interpret(expressions::Vector{Expr}, variables::Matrix{Float32}, parameters::Vector{Vector{Float32}})::Matrix{Float32}
 	exprs = Vector{ExpressionProcessing.PostfixType}(undef, length(expressions))
 	@inbounds for i in eachindex(expressions)
-		exprs[i] = ExpressionProcessing.expr_to_postfix(expressions[i], cacheFrontend)
+		exprs[i] = ExpressionProcessing.expr_to_postfix(expressions[i])
 	end
 	
 	variableCols = size(variables, 2) # number of variable sets to use for each expression
@ -99,6 +98,7 @@ function interpret_expression(expressions::CuDeviceArray{ExpressionElement}, var
 			elseif opcode == SQRT
 				operationStack[operationStackTop] = sqrt(operationStack[operationStackTop])
 			elseif opcode == INV
+				# operationStack[operationStackTop] = 1f0 / operationStack[operationStackTop]
 				operationStack[operationStackTop] = inv(operationStack[operationStackTop])
 			end
 		else
--- a/package/src/Transpiler.jl
+++ b/package/src/Transpiler.jl
@ -8,9 +8,10 @@ using ..Utils
 const BYTES = sizeof(Float32)
 const Operand = Union{Float32, String} # Operand is either fixed value or register

-const cacheFrontend = Dict{Expr, PostfixType}()
-const transpilerCache = Dict{Expr, CuFunction}() # needed if multiple runs with the same expr but different parameters are performed
-
+"
+ - kwparam ```frontendCache```: The cache that stores the (partial) results of the frontend, to speedup the pre-processing
+ - kwparam ```frontendCache```: The cache that stores the result of the transpilation. Useful for parameter optimisation, as the same expression gets executed multiple times
+"
 function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, parameters::Vector{Vector{Float32}})::Matrix{Float32}
 	varRows = size(variables, 1)
 	variableCols = size(variables, 2)
@ -48,22 +49,33 @@ function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, paramet
 	# end

 	@inbounds for i in eachindex(expressions)
-		if haskey(transpilerCache, expressions[i])
-			kernels[i] = transpilerCache[expressions[i]]
-			continue
-		end
+		# if haskey(resultCache, expressions[i])
+		# 	kernels[i] = resultCache[expressions[i]]
+		# 	continue
+		# end

-		formattedExpr = ExpressionProcessing.expr_to_postfix(expressions[i], cacheFrontend)
+		formattedExpr = ExpressionProcessing.expr_to_postfix(expressions[i])
 		kernel = transpile(formattedExpr, varRows, Utils.get_max_inner_length(parameters), variableCols, i-1) # i-1 because julia is 1-based but PTX needs 0-based indexing
 		
-		linker = CuLink()
-		add_data!(linker, "ExpressionProcessing", kernel)
-		
-		image = complete(linker)
-		
-		mod = CuModule(image)
-		kernels[i] = CuFunction(mod, "ExpressionProcessing")
-		transpilerCache[expressions[i]] = kernels[i]
+		# try
+			linker = CuLink()
+			add_data!(linker, "ExpressionProcessing", kernel)
+			
+			image = complete(linker)
+			
+			mod = CuModule(image)
+			kernels[i] = CuFunction(mod, "ExpressionProcessing")
+		# 	resultCache[expressions[i]] = kernels[i]
+		# catch
+		# 	dump(expressions[i]; maxdepth=10)
+		# 	println()
+		# 	println()
+		# 	println(kernel)
+		# 	println()
+		# 	println()
+		# 	error(current_exceptions())
+		# end
+
 	end

 	cudaVars = CuArray(variables) # maybe put in shared memory (see PerformanceTests.jl for more info)
@ -203,7 +215,12 @@ function generate_calculation_code(expression::ExpressionProcessing.PostfixType,
 	for token in expression

 		if token.Type == FLOAT32
-			push!(operands, reinterpret(Float32, token.Value))
+			value = reinterpret(Float32, token.Value)
+			if isfinite(value)
+				push!(operands, value)
+			else
+				push!(operands, "0f" * string(token.Value, base = 16)) # otherwise, values like "Inf" would be written as "Inf" and therefore not understandable to the PTX compiler
+			end
 		elseif token.Type == OPERATOR
 			operator = reinterpret(Operator, token.Value)

--- a/package/test/PerformanceTests.jl
+++ b/package/test/PerformanceTests.jl
@ -2,9 +2,11 @@ using LinearAlgebra
 using BenchmarkTools
 using DelimitedFiles
 using GZip
+using CUDA

 using .Transpiler
 using .Interpreter
+using .ExpressionProcessing

 include("parser.jl") # to parse expressions from a file

@ -48,7 +50,7 @@ expr_reps = 100 # 100 parameter optimisation steps (local search; sequentially;
 # Add /usr/local/cuda/bin in .bashrc to PATH to access ncu and nsys (do the tests on FH PCs)
 # University setup at 10.20.1.7 and 10.20.1.13

-compareWithCPU = true
+compareWithCPU = false

 suite = BenchmarkGroup()
 suite["CPU"] = BenchmarkGroup(["CPUInterpreter"])
@ -60,10 +62,14 @@ if compareWithCPU
 	suite["CPU"]["nikuradse_1_parallel"] = @benchmarkable interpret_cpu(exprs, X, parameters; repetitions=expr_reps, parallel=true)
 end

+# cacheInterpreter = Dict{Expr, PostfixType}()
 suite["GPUI"]["nikuradse_1"] = @benchmarkable interpret_gpu(exprs, X_t, parameters; repetitions=expr_reps)
+
+# cacheTranspilerFront = Dict{Expr, PostfixType}()
+# cacheTranspilerRes = Dict{Expr, CuFunction}()
 suite["GPUT"]["nikuradse_1"] = @benchmarkable evaluate_gpu(exprs, X_t, parameters; repetitions=expr_reps)

-for i in 1:2
+for i in 1:1
 	tune!(suite)
 end
 BenchmarkTools.save("params.json", params(suite))