benchmarking: removed caches to get initial performance measurement. still some problems

2025-05-10 13:11:27 +02:00
parent 6d3c3164cf
commit 2ba1fef5ba
5 changed files with 68 additions and 54 deletions
--- a/package/src/Transpiler.jl
+++ b/package/src/Transpiler.jl
@ -8,9 +8,10 @@ using ..Utils
 const BYTES = sizeof(Float32)
 const Operand = Union{Float32, String} # Operand is either fixed value or register

-const cacheFrontend = Dict{Expr, PostfixType}()
-const transpilerCache = Dict{Expr, CuFunction}() # needed if multiple runs with the same expr but different parameters are performed
-
+"
+ - kwparam ```frontendCache```: The cache that stores the (partial) results of the frontend, to speedup the pre-processing
+ - kwparam ```frontendCache```: The cache that stores the result of the transpilation. Useful for parameter optimisation, as the same expression gets executed multiple times
+"
 function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, parameters::Vector{Vector{Float32}})::Matrix{Float32}
 	varRows = size(variables, 1)
 	variableCols = size(variables, 2)
@ -48,22 +49,33 @@ function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, paramet
 	# end

 	@inbounds for i in eachindex(expressions)
-		if haskey(transpilerCache, expressions[i])
-			kernels[i] = transpilerCache[expressions[i]]
-			continue
-		end
+		# if haskey(resultCache, expressions[i])
+		# 	kernels[i] = resultCache[expressions[i]]
+		# 	continue
+		# end

-		formattedExpr = ExpressionProcessing.expr_to_postfix(expressions[i], cacheFrontend)
+		formattedExpr = ExpressionProcessing.expr_to_postfix(expressions[i])
 		kernel = transpile(formattedExpr, varRows, Utils.get_max_inner_length(parameters), variableCols, i-1) # i-1 because julia is 1-based but PTX needs 0-based indexing
 		
-		linker = CuLink()
-		add_data!(linker, "ExpressionProcessing", kernel)
-		
-		image = complete(linker)
-		
-		mod = CuModule(image)
-		kernels[i] = CuFunction(mod, "ExpressionProcessing")
-		transpilerCache[expressions[i]] = kernels[i]
+		# try
+			linker = CuLink()
+			add_data!(linker, "ExpressionProcessing", kernel)
+			
+			image = complete(linker)
+			
+			mod = CuModule(image)
+			kernels[i] = CuFunction(mod, "ExpressionProcessing")
+		# 	resultCache[expressions[i]] = kernels[i]
+		# catch
+		# 	dump(expressions[i]; maxdepth=10)
+		# 	println()
+		# 	println()
+		# 	println(kernel)
+		# 	println()
+		# 	println()
+		# 	error(current_exceptions())
+		# end
+
 	end

 	cudaVars = CuArray(variables) # maybe put in shared memory (see PerformanceTests.jl for more info)
@ -203,7 +215,12 @@ function generate_calculation_code(expression::ExpressionProcessing.PostfixType,
 	for token in expression

 		if token.Type == FLOAT32
-			push!(operands, reinterpret(Float32, token.Value))
+			value = reinterpret(Float32, token.Value)
+			if isfinite(value)
+				push!(operands, value)
+			else
+				push!(operands, "0f" * string(token.Value, base = 16)) # otherwise, values like "Inf" would be written as "Inf" and therefore not understandable to the PTX compiler
+			end
 		elseif token.Type == OPERATOR
 			operator = reinterpret(Operator, token.Value)