layed groundwork for implementing interpretation

2024-07-21 13:45:57 +02:00
parent e1097202ab
commit c871487a55
4 changed files with 108 additions and 53 deletions
--- a/package/src/Interpreter.jl
+++ b/package/src/Interpreter.jl
@ -12,59 +12,93 @@ export interpret
 "
 function interpret(expressions::Vector{ExpressionProcessing.PostfixType}, variables::Matrix{Float64}, parameters::Vector{Vector{Float64}})
 	# TODO: 
-	#      create CUDA array and fill it with the variables and parameters
 	#      create CUDA array for calculation results
 	variableRows = size(variables, 1)
 	cudaVars = CuArray(variables)
-	
-	paramRows = get_max_parameter_rows(parameters)
-	p1aramCols = length(parameters)
-	cudaParams = CuArray{Float64}(undef, p1aramCols, paramRows) # length(parameters) == number of expressions
-	# TODO: Fill cudaParams
+	cudaParams = create_cuda_array(parameters, NaN64)
+	cudaExprs = create_cuda_array(expressions, ExpressionElement(EMPTY, 0))
+	cudaStepsize = CuArray([get_max_inner_length(expressions), get_max_inner_length(parameters)]) # put into seperate cuArray, as this is static and would be inefficient to send seperatly to every kernel

-	# TODO: Move CuArray(expression[i]) outside the loop for a more efficient transfer to GPU but leave kernel signature as is
+	# Start kernel for each expression to ensure that no warp is working on different expressions
 	for i in eachindex(expressions)
-		cudaExpr = CuArray(expressions[i])
-		kernel = @cuda launch=false interpret_expression(cudaExpr, cudaVars, cudaParams, i)
+		kernel = @cuda launch=false interpret_expression(cudaExprs, cudaVars, cudaParams, cudaStepsize, i)
 		config = launch_configuration(kernel.fun)
 		threads = min(variableRows, config.threads)
 		blocks = cld(variableRows, threads)

-		kernel(cudaExpr, cudaVars, cudaParams, i; threads, blocks)
+		kernel(cudaExprs, cudaVars, cudaParams, cudaStepsize, i; threads, blocks)
 	end
 end

-function interpret_expression(expression, variables, parameters, exprIndex::Int)
-	#TODO Implement interpreter
+function interpret_expression(expressions::CuDeviceArray{ExpressionElement}, variables::CuDeviceArray{Float64}, parameters::CuDeviceArray{Float64}, stepsize::CuDeviceArray{Int}, exprIndex::Int)
+	firstExprIndex = (exprIndex - 1 * stepsize[1]) + 1 # Inclusive
+	lastExprIndex = firstExprIndex + stepsize[1] # Exclusive
+	firstParamIndex = (exprIndex - 1 * stepsize[2]) + 1 # Inclusive
+	# lastParamIndex = firstParamIndex + stepsize[2] # Exclusive (probably not needed)
+	
+	for i in firstExprIndex:lastExprIndex
+		# TODO Implement interpreter
+		#      - start at firstExprIndex and interpret until the first ExpressionElement is "Empty" or we reached lastExprIndex
+	end
+
+	return
 end

+
 "Retrieves the number of entries for the largest inner vector"
-function get_max_parameter_rows(params::Vector{Vector{T}})::Int where T
+function get_max_inner_length(vec::Vector{Vector{T}})::Int where T
 	maxLength = 0
-	for i in eachindex(params)
-		if length(params) > maxLength
-			maxLength = length(params)
+	@inbounds for i in eachindex(vec)
+		if length(vec[i]) > maxLength
+			maxLength = length(vec[i])
 		end
 	end

 	return maxLength
 end

+"Returns a CuArray filed with the data provided. The inner vectors do not have to have the same length. All missing elements will be the value ```invalidElement```"
+function create_cuda_array(data::Vector{Vector{T}}, invalidElement::T)::CuArray{T} where T
+	dataCols = get_max_inner_length(data)
+	dataRows = length(data)
+	dataMat = convert_to_matrix(data, invalidElement)
+	cudaArr = CuArray{T}(undef, dataCols, dataRows) # length(parameters) == number of expressions
+	copyto!(cudaArr, dataMat)
+
+	return cudaArr
+end
+
+"Converts a vector of vectors into a matrix. The inner vectors do not need to have the same length.
+
+All entries that cannot be filled have ```invalidElement``` as their value
+"
+function convert_to_matrix(vec::Vector{Vector{T}}, invalidElement::T)::Matrix{T} where T
+	vecCols = get_max_inner_length(vec)
+	vecRows = length(vec)
+	vecMat = fill(invalidElement, vecCols, vecRows)
+	
+	for i in eachindex(vec)
+		vecMat[:,i] = copyto!(vecMat[:,i], vec[i])
+	end
+
+	return vecMat
+end


-@deprecate InterpretExplicit!(op::Operator, x, y) interpret_expression(expression, variables, parameters, exprIndex::Int)
+
+# @deprecate InterpretExplicit!(op::Operator, x, y) interpret_expression(expression, variables, parameters, exprIndex::Int)
 # Kernel
 function InterpretExplicit!(op::Operator, x, y)
 	index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
 	stride = gridDim().x * blockDim().x

-	if op == Add
+	if op == ADD
 		# @cuprintln("Performing Addition") # Will only be displayed when the GPU is synchronized
 		for i = index:stride:length(y)
 			@inbounds y[i] += x[i]
 		end
 		return
-	elseif op == Subtract
+	elseif op == SUBTRACT
 		# @cuprintln("Performing Subtraction") # Will only be displayed when the GPU is synchronized
 		for i = index:stride:length(y)
 			@inbounds y[i] -= x[i]