made it possible to send expression to gpu alongside the needed data

2024-07-19 14:32:09 +02:00
parent 3145691d27
commit e1097202ab
4 changed files with 128 additions and 111 deletions
--- a/package/src/Interpreter.jl
+++ b/package/src/Interpreter.jl
@ -1,36 +1,8 @@
 module Interpreter
 using CUDA
-include("ExpressionProcessing.jl")
-using .ExpressionProcessing: PostfixType, Add, Subtract, Operator
+using ..ExpressionProcessing

-export Interpret
-export CudaTest
-
-function CudaTest()
-	N = 2^20
-	x = CUDA.fill(1.0f0, N)
-	y = CUDA.fill(2.0f0, N)
-
-	kernelAdd = @cuda launch=false InterpretExplicit!(ExpressionProcessing.Add, x, y)
-	# kernelAdd = @cuda launch=false InterpretExplicit!(Add, x, y, reference)
-	config = launch_configuration(kernelAdd.fun)
-	threads = min(length(y), config.threads)
-	blocks = cld(length(y), threads)
-	
-	kernelAdd(Add, x, y; threads, blocks)
-	# println(y[1])
-	# @test all(Array(y) .== 3.0f0)
-	
-	kernelSubtract = @cuda launch=false InterpretExplicit!(ExpressionProcessing.Subtract, x, y)
-	configSub = launch_configuration(kernelSubtract.fun)
-	threadsSub = min(length(y), configSub.threads)
-	blocksSub = cld(length(y), threadsSub)
-	CUDA.fill!(y, 2.0f0)
-	
-	# kernelSubtract(Subtract, x, y; threadsSub, blocksSub)
-	# @test all(Array(y) .== -1.0f0)
-	# println(y[1])
-end
+export interpret

 "Interprets the given expressions with the values provided.
 # Arguments
@ -38,19 +10,49 @@ end
 - variables::Matrix{Float64} : The variables to use. Each column is mapped to the variables x1..xn
 - parameters::Vector{Vector{Float64}} : The parameters to use. Each Vector contains the values for the parameters p1..pn. The number of parameters can be different for every expression
 "
-function Interpret(expressions::Vector{ExpressionProcessing.PostfixType}, variables::Matrix{Float64}, parameters::Vector{Vector{Float64}})
+function interpret(expressions::Vector{ExpressionProcessing.PostfixType}, variables::Matrix{Float64}, parameters::Vector{Vector{Float64}})
 	# TODO: 
-	#      create CUDA array and fill it with the expressions, variables and parameters
-	#      calculate needed number of threads, probably based off of the number of expressions, so I can ensure each warp takes the same execution path
-	#      Start the kernel
-	cudaExprs = Vector{CuArray{ExpressionProcessing.PostfixType}}(undef, length(expressions))
+	#      create CUDA array and fill it with the variables and parameters
+	#      create CUDA array for calculation results
+	variableRows = size(variables, 1)
+	cudaVars = CuArray(variables)
+	
+	paramRows = get_max_parameter_rows(parameters)
+	p1aramCols = length(parameters)
+	cudaParams = CuArray{Float64}(undef, p1aramCols, paramRows) # length(parameters) == number of expressions
+	# TODO: Fill cudaParams
+
+	# TODO: Move CuArray(expression[i]) outside the loop for a more efficient transfer to GPU but leave kernel signature as is
 	for i in eachindex(expressions)
-		push!(cudaExprs, CuArray(expressions[i]))
+		cudaExpr = CuArray(expressions[i])
+		kernel = @cuda launch=false interpret_expression(cudaExpr, cudaVars, cudaParams, i)
+		config = launch_configuration(kernel.fun)
+		threads = min(variableRows, config.threads)
+		blocks = cld(variableRows, threads)
+
+		kernel(cudaExpr, cudaVars, cudaParams, i; threads, blocks)
 	end
-	# cudaExprs = CuArray(copy(expressions))
+end
+
+function interpret_expression(expression, variables, parameters, exprIndex::Int)
+	#TODO Implement interpreter
+end
+
+"Retrieves the number of entries for the largest inner vector"
+function get_max_parameter_rows(params::Vector{Vector{T}})::Int where T
+	maxLength = 0
+	for i in eachindex(params)
+		if length(params) > maxLength
+			maxLength = length(params)
+		end
+	end
+
+	return maxLength
 end


+
+@deprecate InterpretExplicit!(op::Operator, x, y) interpret_expression(expression, variables, parameters, exprIndex::Int)
 # Kernel
 function InterpretExplicit!(op::Operator, x, y)
 	index = (blockIdx().x - 1) * blockDim().x + threadIdx().x