added guard clause generation
Some checks failed
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Has been cancelled
CompatHelper / CompatHelper (push) Has been cancelled
Some checks failed
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Has been cancelled
CompatHelper / CompatHelper (push) Has been cancelled
This commit is contained in:
parent
d875fc7325
commit
7283082699
|
@ -44,8 +44,8 @@ end
|
||||||
#TODO: Add @inbounds to all indexing after it is verified that all works https://cuda.juliagpu.org/stable/development/kernel/#Bounds-checking
|
#TODO: Add @inbounds to all indexing after it is verified that all works https://cuda.juliagpu.org/stable/development/kernel/#Bounds-checking
|
||||||
const MAX_STACK_SIZE = 25 # The max number of values the expression can have. so Constant values, Variables and parameters
|
const MAX_STACK_SIZE = 25 # The max number of values the expression can have. so Constant values, Variables and parameters
|
||||||
function interpret_expression(expressions::CuDeviceArray{ExpressionElement}, variables::CuDeviceArray{Float64}, parameters::CuDeviceArray{Float64}, results::CuDeviceArray{Float64}, stepsize::CuDeviceArray{Int}, exprIndex::Int)
|
function interpret_expression(expressions::CuDeviceArray{ExpressionElement}, variables::CuDeviceArray{Float64}, parameters::CuDeviceArray{Float64}, results::CuDeviceArray{Float64}, stepsize::CuDeviceArray{Int}, exprIndex::Int)
|
||||||
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
index = (blockIdx().x - 1) * blockDim().x + threadIdx().x # ctaid.x * ntid.x + tid.x
|
||||||
stride = gridDim().x * blockDim().x
|
stride = gridDim().x * blockDim().x # nctaid.x * ntid.x
|
||||||
|
|
||||||
firstExprIndex = ((exprIndex - 1) * stepsize[1]) + 1 # Inclusive
|
firstExprIndex = ((exprIndex - 1) * stepsize[1]) + 1 # Inclusive
|
||||||
lastExprIndex = firstExprIndex + stepsize[1] - 1 # Inclusive
|
lastExprIndex = firstExprIndex + stepsize[1] - 1 # Inclusive
|
||||||
|
|
|
@ -78,36 +78,83 @@ function culoadtest(N::Int32, op = "add.f32")
|
||||||
@time CUDA.@sync cudacall(func, Tuple{CuPtr{Cfloat},CuPtr{Cfloat},CuPtr{Cfloat},Cint}, d_a, d_b, d_c, N; threads=threadsPerBlock, blocks=blocksPerGrid)
|
@time CUDA.@sync cudacall(func, Tuple{CuPtr{Cfloat},CuPtr{Cfloat},CuPtr{Cfloat},Cint}, d_a, d_b, d_c, N; threads=threadsPerBlock, blocks=blocksPerGrid)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
const exitJumpLocationMarker = "\$L__BB0_2"
|
||||||
function transpile(expression::ExpressionProcessing.PostfixType)
|
function transpile(expression::ExpressionProcessing.PostfixType)
|
||||||
|
ptxBuffer = IOBuffer()
|
||||||
|
|
||||||
|
println(ptxBuffer, get_cuda_header())
|
||||||
|
println(ptxBuffer, get_kernel_signature("ExpressionProcessing", [Int64, Float64]))
|
||||||
|
println(ptxBuffer, "{")
|
||||||
|
|
||||||
|
# Register definition
|
||||||
|
# Parameter loading
|
||||||
|
println(ptxBuffer, get_guard_clause())
|
||||||
|
|
||||||
|
# Code goes here
|
||||||
|
|
||||||
|
# exit jump location
|
||||||
|
print(ptxBuffer, exitJumpLocationMarker)
|
||||||
|
println(ptxBuffer, ": ret;")
|
||||||
|
println(ptxBuffer, "}")
|
||||||
|
println(String(take!(ptxBuffer)))
|
||||||
end
|
end
|
||||||
|
|
||||||
# TODO: Make version, target and address_size configurable
|
# TODO: Make version, target and address_size configurable
|
||||||
function get_cuda_header()::String
|
function get_cuda_header()::String
|
||||||
return "
|
return "
|
||||||
.version 7.1
|
.version 7.1
|
||||||
.target sm_52
|
.target sm_52
|
||||||
.address_size 64
|
.address_size 64
|
||||||
"
|
"
|
||||||
end
|
end
|
||||||
|
|
||||||
function get_kernel_signature(kernelName::String, parameters::Vector{Type})::String
|
function get_kernel_signature(kernelName::String, parameters::Vector{DataType})::String
|
||||||
signature = ".visible .entry " * kernelName
|
signatureBuffer = IOBuffer()
|
||||||
|
print(signatureBuffer, ".visible .entry ")
|
||||||
|
print(signatureBuffer, kernelName)
|
||||||
|
println(signatureBuffer, "(")
|
||||||
|
|
||||||
stringBuilder = IOBuffer()
|
|
||||||
print(stringBuilder, "(")
|
|
||||||
|
|
||||||
for i in eachindex(parameters)
|
for i in eachindex(parameters)
|
||||||
type = type_to_cuda_type(parameters[i])
|
type = type_to_cuda_type(parameters[i])
|
||||||
print(stringBuilder,
|
print(signatureBuffer,
|
||||||
".param ", type, " ", kernelName, "_param_", i, ",")
|
" .param ", type, " ", kernelName, "_param_", i)
|
||||||
|
if i != lastindex(parameters)
|
||||||
|
println(signatureBuffer, ",")
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
print(stringBuilder, ")")
|
print(signatureBuffer, ")")
|
||||||
return String(take!(stringBuilder))
|
return String(take!(signatureBuffer))
|
||||||
end
|
end
|
||||||
|
|
||||||
function type_to_cuda_type(type::Type)::String
|
"
|
||||||
|
Constructs the PTX code used for handling the case where too many threads are started.
|
||||||
|
|
||||||
|
Assumes the following:
|
||||||
|
- There are the unused ```32 bit``` registers ```r1, r2, r3, r4```
|
||||||
|
- There is an unused ```predicate``` register ```p1```
|
||||||
|
- The ```32 bit``` register ```r5``` contains the number of variable sets
|
||||||
|
"
|
||||||
|
function get_guard_clause()::String
|
||||||
|
guardBuffer = IOBuffer()
|
||||||
|
|
||||||
|
println(guardBuffer, "mov.u32 %r1, %ntid.x;") # nr of thread ids
|
||||||
|
println(guardBuffer, "mov.u32 %r2, %ctaid.x;") # nr of threads per cta
|
||||||
|
println(guardBuffer, "mov.u32 %r3, %tid.x;") # id of the current thread
|
||||||
|
|
||||||
|
println(guardBuffer, "mad.lo.s32 %r4, %r1, %r2, %r3;") # the current index (basically index of variable set)
|
||||||
|
println(guardBuffer, "setp.ge.s32 %p1, %r4, %r5;") # guard clause (p1 = r4 > r5 -> index > nrOfVariableSets)
|
||||||
|
|
||||||
|
# branch to end if p1 is true
|
||||||
|
print(guardBuffer, "@%p1 bra ")
|
||||||
|
print(guardBuffer, exitJumpLocationMarker)
|
||||||
|
println(guardBuffer, ";")
|
||||||
|
|
||||||
|
return String(take!(guardBuffer))
|
||||||
|
end
|
||||||
|
|
||||||
|
function type_to_cuda_type(type::DataType)::String
|
||||||
if type == Int64
|
if type == Int64
|
||||||
return ".s64"
|
return ".s64"
|
||||||
elseif type == Float64
|
elseif type == Float64
|
||||||
|
|
|
@ -25,17 +25,7 @@ function testHelper(expression::Expr, variables::Matrix{Float64}, parameters::Ve
|
||||||
@test isequal(result[1,1], expectedResult)
|
@test isequal(result[1,1], expectedResult)
|
||||||
end
|
end
|
||||||
|
|
||||||
@testset "Test TMP interpretation" begin
|
|
||||||
postfixExpr = expr_to_postfix(expressions[1])
|
|
||||||
postfixExprs = Vector([postfixExpr])
|
|
||||||
push!(postfixExprs, expr_to_postfix(expressions[2]))
|
|
||||||
|
|
||||||
# CUDA.@sync interpret(postfixExprs, variables, parameters)
|
|
||||||
end
|
|
||||||
|
|
||||||
@testset "Test conversion to matrix" begin
|
@testset "Test conversion to matrix" begin
|
||||||
return
|
|
||||||
|
|
||||||
reference = Matrix{Float64}(undef, 2, 2)
|
reference = Matrix{Float64}(undef, 2, 2)
|
||||||
reference[1,1] = 5.0
|
reference[1,1] = 5.0
|
||||||
reference[2,1] = NaN64
|
reference[2,1] = NaN64
|
||||||
|
@ -43,14 +33,12 @@ end
|
||||||
reference[2,2] = 0.0
|
reference[2,2] = 0.0
|
||||||
# reference = Matrix([5.0, NaN],
|
# reference = Matrix([5.0, NaN],
|
||||||
# [5.0, 0.0])
|
# [5.0, 0.0])
|
||||||
CUDA.@sync result = Interpreter.convert_to_matrix(parameters, NaN64)
|
result = Interpreter.convert_to_matrix(parameters, NaN64)
|
||||||
|
|
||||||
@test isequal(result, reference)
|
@test isequal(result, reference)
|
||||||
end
|
end
|
||||||
|
|
||||||
@testset "Test commutative interpretation" begin
|
@testset "Test commutative interpretation" begin
|
||||||
return
|
|
||||||
|
|
||||||
var = Matrix{Float64}(undef, 2, 1)
|
var = Matrix{Float64}(undef, 2, 1)
|
||||||
param = Vector{Vector{Float64}}(undef, 1)
|
param = Vector{Vector{Float64}}(undef, 1)
|
||||||
expectedResult = 8.0 # Not using "eval" because the variables are not stored in global scope
|
expectedResult = 8.0 # Not using "eval" because the variables are not stored in global scope
|
||||||
|
@ -71,8 +59,6 @@ end
|
||||||
end
|
end
|
||||||
|
|
||||||
@testset "Test non commutative interpretation" begin
|
@testset "Test non commutative interpretation" begin
|
||||||
return
|
|
||||||
|
|
||||||
var = Matrix{Float64}(undef, 2, 1)
|
var = Matrix{Float64}(undef, 2, 1)
|
||||||
param = Vector{Vector{Float64}}(undef, 1)
|
param = Vector{Vector{Float64}}(undef, 1)
|
||||||
expectedResult = -2.0 # Not using "eval" because the variables are not stored in global scope
|
expectedResult = -2.0 # Not using "eval" because the variables are not stored in global scope
|
||||||
|
@ -103,8 +89,6 @@ end
|
||||||
end
|
end
|
||||||
|
|
||||||
@testset "Test single value operator interpretation" begin
|
@testset "Test single value operator interpretation" begin
|
||||||
return
|
|
||||||
|
|
||||||
var = Matrix{Float64}(undef, 1, 1)
|
var = Matrix{Float64}(undef, 1, 1)
|
||||||
param = Vector{Vector{Float64}}(undef, 1)
|
param = Vector{Vector{Float64}}(undef, 1)
|
||||||
expectedResult = 3.0 # Not using "eval" because the variables are not stored in global scope
|
expectedResult = 3.0 # Not using "eval" because the variables are not stored in global scope
|
||||||
|
|
32
package/test/TranspilerTests.jl
Normal file
32
package/test/TranspilerTests.jl
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
using CUDA
|
||||||
|
using .ExpressionProcessing
|
||||||
|
using .Transpiler
|
||||||
|
|
||||||
|
expressions = Vector{Expr}(undef, 2)
|
||||||
|
variables = Matrix{Float64}(undef, 2,2)
|
||||||
|
parameters = Vector{Vector{Float64}}(undef, 2)
|
||||||
|
|
||||||
|
# Resulting value should be 10 for the first expression
|
||||||
|
expressions[1] = :(x1 + 1 * x2 + p1)
|
||||||
|
expressions[2] = :(5 + x1 + 1 * x2 + p1 + p2)
|
||||||
|
variables[1,1] = 2.0
|
||||||
|
variables[2,1] = 3.0
|
||||||
|
variables[1,2] = 0.0
|
||||||
|
variables[2,2] = 5.0
|
||||||
|
parameters[1] = Vector{Float64}(undef, 1)
|
||||||
|
parameters[2] = Vector{Float64}(undef, 2)
|
||||||
|
parameters[1][1] = 5.0
|
||||||
|
parameters[2][1] = 5.0
|
||||||
|
parameters[2][2] = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
@testset "Test TMP transpiler" begin
|
||||||
|
postfixExpr = expr_to_postfix(expressions[1])
|
||||||
|
postfixExprs = Vector([postfixExpr])
|
||||||
|
push!(postfixExprs, expr_to_postfix(expressions[2]))
|
||||||
|
|
||||||
|
Transpiler.transpile(postfixExpr)
|
||||||
|
# CUDA.@sync interpret(postfixExprs, variables, parameters)
|
||||||
|
end
|
||||||
|
|
||||||
|
#TODO: test performance of transpiler PTX generation when doing "return String(take!(buffer))" vs "return take!(buffer)"
|
|
@ -4,8 +4,10 @@ using Test
|
||||||
const baseFolder = dirname(dirname(pathof(ExpressionExecutorCuda)))
|
const baseFolder = dirname(dirname(pathof(ExpressionExecutorCuda)))
|
||||||
include(joinpath(baseFolder, "src", "ExpressionProcessing.jl"))
|
include(joinpath(baseFolder, "src", "ExpressionProcessing.jl"))
|
||||||
include(joinpath(baseFolder, "src", "Interpreter.jl"))
|
include(joinpath(baseFolder, "src", "Interpreter.jl"))
|
||||||
|
include(joinpath(baseFolder, "src", "Transpiler.jl"))
|
||||||
|
|
||||||
@testset "ExpressionExecutorCuda.jl" begin
|
@testset "ExpressionExecutorCuda.jl" begin
|
||||||
include("ExpressionProcessingTests.jl")
|
include("ExpressionProcessingTests.jl")
|
||||||
include("InterpreterTests.jl")
|
include("InterpreterTests.jl")
|
||||||
|
include("TranspilerTests.jl")
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue
Block a user