tried streamlining register management

2024-12-07 10:12:53 +01:00
parent 68cedd75fc
commit 1e7f6e9010
2 changed files with 35 additions and 30 deletions
--- a/package/src/ExpressionExecutorCuda.jl
+++ b/package/src/ExpressionExecutorCuda.jl
@ -14,7 +14,6 @@ export test

 # Evaluate Expressions on the GPU
 function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32}
-	# Ensure that no two expressions are interpreted in the same "warp"
 	exprsPostfix = ExpressionProcessing.expr_to_postfix(exprs[1])
 end

--- a/package/src/Transpiler.jl
+++ b/package/src/Transpiler.jl
@ -100,6 +100,32 @@ end
 # Note: Maybe make an additional function that transpiles and executed the code. This would then be the function the user calls
 #

+# TODO: I guess I need to move this to different module because it seems like I can't access inner functions
+let registers = Dict() # stores the count of the register already used.
+	global get_next_free_register
+	global get_used_registers
+
+	# By convention these names correspond to the following types:
+	# - p -> pred
+	# - f32 -> float32
+	# - b32 -> 32 bit
+	# - var -> float32
+	# - param -> float32 !! although, they might get inserted as fixed number and not be sent to gpu?
+	function get_next_free_register(name::String)::String
+		if haskey(registers, name)
+			registers[name] += 1
+		else
+			registers[name] = 1
+		end
+
+		return String(["%", name, registers[name]])
+	end
+
+	function get_used_registers()
+		return pairs(registers)
+	end
+end
+
 # To increase performance, it would probably be best for all helper functions to return their IO Buffer and not a string
 const exitJumpLocationMarker = "\$L__BB0_2"
 function transpile(expression::ExpressionProcessing.PostfixType)::String
@ -133,7 +159,7 @@ function transpile(expression::ExpressionProcessing.PostfixType)::String
 	return generatedCode
 end

-# TODO: Make version, target and address_size configurable
+# TODO: Make version, target and address_size configurable; also see what address_size means exactly
 function get_cuda_header()::String
 	return "
 .version 7.1
@ -178,9 +204,9 @@ function get_guard_clause()::String
 	println(guardBuffer, "mov.u32    %r2, %tid.x;") # id of the current thread

 	println(guardBuffer, "mad.lo.s32    %r3, %r0, %r1, %r2;") # the current index (basically index of variable set)
-	println(guardBuffer, "setp.ge.s32    %p0, %r3, %r4;") # guard clause (p1 = r4 > r5 -> index > nrOfVariableSets)
+	println(guardBuffer, "setp.ge.s32    %p0, %r3, %r4;") # guard clause (p0 = r3 > r4 -> index > nrOfVariableSets)

-	# branch to end if p1 is true
+	# branch to end if p0 is true
 	print(guardBuffer, "@%p0 bra    $exitJumpLocationMarker;")

 	return String(take!(guardBuffer))
@ -203,10 +229,6 @@ function get_register_definitions(nrPred::Int, nr32Bit::Int, nrFloat32::Int)::St
 	return String(take!(registersBuffer))
 end

-# TODO: Dont convert expression to postfix! It seems like this is not the best way since postfix evaluation assumes to be calculated in a stack
-# where results get pushed back to the stack. This however is not the best behaviour for this kind of calculation.
-# Probably do this: Get Expr -> traverse tree -> if child node is Expr: basically replace that node with the register containing the result of that Expr
-
 # Current assumption: Expression only made out of constant values
 function generate_calculation_code(expression::ExpressionProcessing.PostfixType)::Tuple{String, Int}
 	codeBuffer = IOBuffer()
@ -220,37 +242,21 @@ function generate_calculation_code(expression::ExpressionProcessing.PostfixType)
 		if token.Type == FLOAT32
 			push!(operands, reinterpret(Float32, token.Value))
 		elseif token.Type == OPERATOR
+			# function call to see if operator is unary -> adapt below calculation; probably able to reuse register
 			operator = get_ptx_operator(reinterpret(Operator, token.Value))
 			register = "%f$registerCounter"
 			print(codeBuffer, "    $operator $register, ")

-			# Ugly temporary proof of concept which is ignoring unary operators
-			# if length(operands) == 0
-			# 	print(codeBuffer, "%f")
-			# 	print(codeBuffer, registerCounter - 2) # add result before previous result
-			# end
-			# print(codeBuffer, " ")
-			# if length(operands) <= 1
-			# 	print(codeBuffer, "%f")
-			# 	print(codeBuffer, registerCounter - 1) # add previous result
-			# end
-			# print(codeBuffer, " ")
-
 			ops = last(operands, 2)
 			pop!(operands);pop!(operands)
-			print(codeBuffer, join(ops, ", ")) # if operands has too few values it means the previous calculation is needed. So we need to use registerCounter - 1 or registerCounter - 2 previous registers
+			print(codeBuffer, join(ops, ", "))
 			println(codeBuffer, ";")

-			# empty!(operands)
 			push!(operands, register)
 			registerCounter += 1
+		elseif token.Type == INDEX
+			# TODO
 		end
-		
-		# read to operator
-		# add code for calculation
-
-		# on first iteration this would be either 2 or 3 steps (two if unary and three if binary operator)
-		# on all other operations either 1 or 2 (one if unary and two if binary operator)
 	end

 	return (String(take!(codeBuffer)), registerCounter)
@ -266,8 +272,7 @@ function type_to_ptx_type(type::DataType)::String
 	end
 end

-# TODO: Probably change this, to return the entire calculation not just the operator. Because for POWER and EXP we need multiple instructions to calculate them.
-# Left out for now since I don't have register management yet
+# TODO: Probably change this, to return the entire calculation not just the operator. Because for POWER and EXP we need multiple instructions to calculate them (seperation of concerns).
 function get_ptx_operator(operator::Operator)::String
 	if operator == ADD
 		return "add.f32"
@ -293,5 +298,6 @@ function get_ptx_operator(operator::Operator)::String
 end


+
 end