tried streamlining register management
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
This commit is contained in:
parent
68cedd75fc
commit
1e7f6e9010
|
@ -14,7 +14,6 @@ export test
|
||||||
|
|
||||||
# Evaluate Expressions on the GPU
|
# Evaluate Expressions on the GPU
|
||||||
function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32}
|
function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{Float32}})::Matrix{Float32}
|
||||||
# Ensure that no two expressions are interpreted in the same "warp"
|
|
||||||
exprsPostfix = ExpressionProcessing.expr_to_postfix(exprs[1])
|
exprsPostfix = ExpressionProcessing.expr_to_postfix(exprs[1])
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -100,6 +100,32 @@ end
|
||||||
# Note: Maybe make an additional function that transpiles and executed the code. This would then be the function the user calls
|
# Note: Maybe make an additional function that transpiles and executed the code. This would then be the function the user calls
|
||||||
#
|
#
|
||||||
|
|
||||||
|
# TODO: I guess I need to move this to different module because it seems like I can't access inner functions
|
||||||
|
let registers = Dict() # stores the count of the register already used.
|
||||||
|
global get_next_free_register
|
||||||
|
global get_used_registers
|
||||||
|
|
||||||
|
# By convention these names correspond to the following types:
|
||||||
|
# - p -> pred
|
||||||
|
# - f32 -> float32
|
||||||
|
# - b32 -> 32 bit
|
||||||
|
# - var -> float32
|
||||||
|
# - param -> float32 !! although, they might get inserted as fixed number and not be sent to gpu?
|
||||||
|
function get_next_free_register(name::String)::String
|
||||||
|
if haskey(registers, name)
|
||||||
|
registers[name] += 1
|
||||||
|
else
|
||||||
|
registers[name] = 1
|
||||||
|
end
|
||||||
|
|
||||||
|
return String(["%", name, registers[name]])
|
||||||
|
end
|
||||||
|
|
||||||
|
function get_used_registers()
|
||||||
|
return pairs(registers)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# To increase performance, it would probably be best for all helper functions to return their IO Buffer and not a string
|
# To increase performance, it would probably be best for all helper functions to return their IO Buffer and not a string
|
||||||
const exitJumpLocationMarker = "\$L__BB0_2"
|
const exitJumpLocationMarker = "\$L__BB0_2"
|
||||||
function transpile(expression::ExpressionProcessing.PostfixType)::String
|
function transpile(expression::ExpressionProcessing.PostfixType)::String
|
||||||
|
@ -133,7 +159,7 @@ function transpile(expression::ExpressionProcessing.PostfixType)::String
|
||||||
return generatedCode
|
return generatedCode
|
||||||
end
|
end
|
||||||
|
|
||||||
# TODO: Make version, target and address_size configurable
|
# TODO: Make version, target and address_size configurable; also see what address_size means exactly
|
||||||
function get_cuda_header()::String
|
function get_cuda_header()::String
|
||||||
return "
|
return "
|
||||||
.version 7.1
|
.version 7.1
|
||||||
|
@ -178,9 +204,9 @@ function get_guard_clause()::String
|
||||||
println(guardBuffer, "mov.u32 %r2, %tid.x;") # id of the current thread
|
println(guardBuffer, "mov.u32 %r2, %tid.x;") # id of the current thread
|
||||||
|
|
||||||
println(guardBuffer, "mad.lo.s32 %r3, %r0, %r1, %r2;") # the current index (basically index of variable set)
|
println(guardBuffer, "mad.lo.s32 %r3, %r0, %r1, %r2;") # the current index (basically index of variable set)
|
||||||
println(guardBuffer, "setp.ge.s32 %p0, %r3, %r4;") # guard clause (p1 = r4 > r5 -> index > nrOfVariableSets)
|
println(guardBuffer, "setp.ge.s32 %p0, %r3, %r4;") # guard clause (p0 = r3 > r4 -> index > nrOfVariableSets)
|
||||||
|
|
||||||
# branch to end if p1 is true
|
# branch to end if p0 is true
|
||||||
print(guardBuffer, "@%p0 bra $exitJumpLocationMarker;")
|
print(guardBuffer, "@%p0 bra $exitJumpLocationMarker;")
|
||||||
|
|
||||||
return String(take!(guardBuffer))
|
return String(take!(guardBuffer))
|
||||||
|
@ -203,10 +229,6 @@ function get_register_definitions(nrPred::Int, nr32Bit::Int, nrFloat32::Int)::St
|
||||||
return String(take!(registersBuffer))
|
return String(take!(registersBuffer))
|
||||||
end
|
end
|
||||||
|
|
||||||
# TODO: Dont convert expression to postfix! It seems like this is not the best way since postfix evaluation assumes to be calculated in a stack
|
|
||||||
# where results get pushed back to the stack. This however is not the best behaviour for this kind of calculation.
|
|
||||||
# Probably do this: Get Expr -> traverse tree -> if child node is Expr: basically replace that node with the register containing the result of that Expr
|
|
||||||
|
|
||||||
# Current assumption: Expression only made out of constant values
|
# Current assumption: Expression only made out of constant values
|
||||||
function generate_calculation_code(expression::ExpressionProcessing.PostfixType)::Tuple{String, Int}
|
function generate_calculation_code(expression::ExpressionProcessing.PostfixType)::Tuple{String, Int}
|
||||||
codeBuffer = IOBuffer()
|
codeBuffer = IOBuffer()
|
||||||
|
@ -220,37 +242,21 @@ function generate_calculation_code(expression::ExpressionProcessing.PostfixType)
|
||||||
if token.Type == FLOAT32
|
if token.Type == FLOAT32
|
||||||
push!(operands, reinterpret(Float32, token.Value))
|
push!(operands, reinterpret(Float32, token.Value))
|
||||||
elseif token.Type == OPERATOR
|
elseif token.Type == OPERATOR
|
||||||
|
# function call to see if operator is unary -> adapt below calculation; probably able to reuse register
|
||||||
operator = get_ptx_operator(reinterpret(Operator, token.Value))
|
operator = get_ptx_operator(reinterpret(Operator, token.Value))
|
||||||
register = "%f$registerCounter"
|
register = "%f$registerCounter"
|
||||||
print(codeBuffer, " $operator $register, ")
|
print(codeBuffer, " $operator $register, ")
|
||||||
|
|
||||||
# Ugly temporary proof of concept which is ignoring unary operators
|
|
||||||
# if length(operands) == 0
|
|
||||||
# print(codeBuffer, "%f")
|
|
||||||
# print(codeBuffer, registerCounter - 2) # add result before previous result
|
|
||||||
# end
|
|
||||||
# print(codeBuffer, " ")
|
|
||||||
# if length(operands) <= 1
|
|
||||||
# print(codeBuffer, "%f")
|
|
||||||
# print(codeBuffer, registerCounter - 1) # add previous result
|
|
||||||
# end
|
|
||||||
# print(codeBuffer, " ")
|
|
||||||
|
|
||||||
ops = last(operands, 2)
|
ops = last(operands, 2)
|
||||||
pop!(operands);pop!(operands)
|
pop!(operands);pop!(operands)
|
||||||
print(codeBuffer, join(ops, ", ")) # if operands has too few values it means the previous calculation is needed. So we need to use registerCounter - 1 or registerCounter - 2 previous registers
|
print(codeBuffer, join(ops, ", "))
|
||||||
println(codeBuffer, ";")
|
println(codeBuffer, ";")
|
||||||
|
|
||||||
# empty!(operands)
|
|
||||||
push!(operands, register)
|
push!(operands, register)
|
||||||
registerCounter += 1
|
registerCounter += 1
|
||||||
|
elseif token.Type == INDEX
|
||||||
|
# TODO
|
||||||
end
|
end
|
||||||
|
|
||||||
# read to operator
|
|
||||||
# add code for calculation
|
|
||||||
|
|
||||||
# on first iteration this would be either 2 or 3 steps (two if unary and three if binary operator)
|
|
||||||
# on all other operations either 1 or 2 (one if unary and two if binary operator)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
return (String(take!(codeBuffer)), registerCounter)
|
return (String(take!(codeBuffer)), registerCounter)
|
||||||
|
@ -266,8 +272,7 @@ function type_to_ptx_type(type::DataType)::String
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# TODO: Probably change this, to return the entire calculation not just the operator. Because for POWER and EXP we need multiple instructions to calculate them.
|
# TODO: Probably change this, to return the entire calculation not just the operator. Because for POWER and EXP we need multiple instructions to calculate them (seperation of concerns).
|
||||||
# Left out for now since I don't have register management yet
|
|
||||||
function get_ptx_operator(operator::Operator)::String
|
function get_ptx_operator(operator::Operator)::String
|
||||||
if operator == ADD
|
if operator == ADD
|
||||||
return "add.f32"
|
return "add.f32"
|
||||||
|
@ -293,5 +298,6 @@ function get_ptx_operator(operator::Operator)::String
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user