diff --git a/package/src/Transpiler.jl b/package/src/Transpiler.jl index b4f0d67..029a3d5 100644 --- a/package/src/Transpiler.jl +++ b/package/src/Transpiler.jl @@ -87,14 +87,14 @@ function transpile(expression::ExpressionProcessing.PostfixType) println(ptxBuffer, "{") # Register definition + println(ptxBuffer, get_register_definitions(1, 5)) # TODO: Actually calculate the number of needed registers and extend to more register kinds # Parameter loading println(ptxBuffer, get_guard_clause()) # Code goes here # exit jump location - print(ptxBuffer, exitJumpLocationMarker) - println(ptxBuffer, ": ret;") + print(ptxBuffer, exitJumpLocationMarker); println(ptxBuffer, ": ret;") println(ptxBuffer, "}") println(String(take!(ptxBuffer))) end @@ -132,7 +132,7 @@ end Constructs the PTX code used for handling the case where too many threads are started. Assumes the following: - - There are the unused ```32 bit``` registers ```r1, r2, r3, r4``` + - There are the unused ```32 bit``` registers ```r1, r2, r3, r4 (index of the variable set)``` - There is an unused ```predicate``` register ```p1``` - The ```32 bit``` register ```r5``` contains the number of variable sets " @@ -154,6 +154,25 @@ function get_guard_clause()::String return String(take!(guardBuffer)) end +function get_register_definitions(nrPred::Int, nr32Bit::Int):::String + registersBuffer = IOBuffer() + + if nrPred > 0 + print(registersBuffer, ".reg .pred") + print(registersBuffer, " %p<") + print(registersBuffer, nrPred + 1) + println(registersBuffer, ">;") + end + if nr32Bit > 0 + print(registersBuffer, ".reg .b32") + print(registersBuffer, " %r<") + print(registersBuffer, nr32Bit + 1) + println(registersBuffer, ">;") + end + + return String(take!(registersBuffer)) +end + function type_to_cuda_type(type::DataType)::String if type == Int64 return ".s64"