Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
139 lines
3.5 KiB
Julia
139 lines
3.5 KiB
Julia
using CUDA
|
|
using .ExpressionProcessing
|
|
using .Transpiler
|
|
|
|
expressions = Vector{Expr}(undef, 2)
|
|
variables = Matrix{Float32}(undef, 2,2)
|
|
parameters = Vector{Vector{Float32}}(undef, 2)
|
|
|
|
# Resulting value should be 1.14... for the first expression
|
|
expressions[1] = :(1 + 3 * 5 / 7 - sqrt(4))
|
|
expressions[2] = :(5 + x1 + 1 * x2 + p1 + p2)
|
|
variables[1,1] = 2.0
|
|
variables[2,1] = 3.0
|
|
variables[1,2] = 0.0
|
|
variables[2,2] = 5.0
|
|
parameters[1] = Vector{Float32}(undef, 1)
|
|
parameters[2] = Vector{Float32}(undef, 2)
|
|
parameters[1][1] = 5.0
|
|
parameters[2][1] = 5.0
|
|
parameters[2][2] = 0.0
|
|
|
|
|
|
@testset "Test TMP transpiler" begin
|
|
postfixExpr = expr_to_postfix(expressions[1])
|
|
postfixExprs = Vector([postfixExpr])
|
|
push!(postfixExprs, expr_to_postfix(expressions[2]))
|
|
push!(postfixExprs, expr_to_postfix(:(5^3 + x1 - p1)))
|
|
|
|
# generatedCode = Transpiler.transpile(postfixExpr)
|
|
# generatedCode = Transpiler.transpile(postfixExprs[3], 2, 3, 2, 3) # TEMP
|
|
# println(generatedCode)
|
|
# CUDA.@sync interpret(postfixExprs, variables, parameters)
|
|
|
|
# This is just here for testing. This will be called inside the execute method in the Transpiler module
|
|
# linker = CuLink()
|
|
# add_data!(linker, "ExpressionProcessing", generatedCode)
|
|
|
|
# image = complete(linker)
|
|
|
|
# mod = CuModule(image)
|
|
# func = CuFunction(mod, "ExpressionProcessing")
|
|
end
|
|
|
|
@testset "Test transpiler evaluation" begin
|
|
# postfixExprs = Vector{Expr}()
|
|
# push!(postfixExprs, expressions[1])
|
|
# push!(postfixExprs, expressions[2])
|
|
|
|
expr = Vector{Expr}()
|
|
push!(expr, expressions[1])
|
|
@time Transpiler.evaluate(expr, variables, parameters)
|
|
end
|
|
|
|
#TODO: test performance of transpiler PTX generation when doing "return String(take!(buffer))" vs "return take!(buffer)"
|
|
|
|
function test_kernel(results)
|
|
@inbounds results[1] = 10f0
|
|
|
|
return nothing
|
|
end
|
|
|
|
@testset "TEMP" begin
|
|
return
|
|
results = CuArray{Float32}(undef, 2)
|
|
# @device_code_ptx @cuda test_kernel(results)
|
|
|
|
|
|
# println(CUDA.code_ptx(kernel.fun, ))
|
|
# return
|
|
|
|
ptx = "
|
|
.version 8.5
|
|
.target sm_61
|
|
.address_size 64
|
|
|
|
.visible .entry ExpressionProcessing(
|
|
.param .u64 param_1)
|
|
{
|
|
.reg .b64 %parameter<1>;
|
|
.reg .b64 %i<1>;
|
|
//.reg .b64 %rd<6>;
|
|
|
|
ld.param.u64 %i0, [param_1];
|
|
cvta.to.global.u64 %parameter0, %i0;
|
|
|
|
st.global.f32 [%parameter0], 10.0;
|
|
ret;
|
|
}"
|
|
|
|
ptx = ".version 8.5
|
|
.target sm_61
|
|
.address_size 64
|
|
|
|
.visible .entry ExpressionProcessing(
|
|
.param .u64 param_1)
|
|
{
|
|
.reg .b64 %parameter<1>;
|
|
.reg .b32 %r<4>;
|
|
.reg .pred %p<1>;
|
|
.reg .b64 %i<1>;
|
|
|
|
ld.param.u64 %i0, [param_1];
|
|
cvta.to.global.u64 %parameter0, %i0;
|
|
|
|
mov.u32 %r0, %ntid.x;
|
|
mov.u32 %r1, %ctaid.x;
|
|
mov.u32 %r2, %tid.x;
|
|
mad.lo.s32 %r3, %r0, %r1, %r2;
|
|
setp.gt.s32 %p0, %r3, 2;
|
|
@%p0 bra \$L__BB0_2;
|
|
st.global.f32 [%parameter0], 10.0;
|
|
\$L__BB0_2: ret;
|
|
}"
|
|
|
|
linker = CuLink()
|
|
add_data!(linker, "ExpressionProcessing", ptx)
|
|
|
|
image = complete(linker)
|
|
|
|
mod = CuModule(image)
|
|
func = CuFunction(mod, "ExpressionProcessing")
|
|
|
|
variableCols = 2
|
|
cudaResults = CuArray{Float32}(undef, 1)
|
|
# cd = CUDA.alloc(CUDA.DeviceMemory, (variableCols * length(expressions)) * sizeof(Float32))
|
|
# cudaResults = CUDA.fill(0f0, variableCols * length(expressions))
|
|
# cudaResults = cu(zeros(Float32, variableCols * length(expressions)))
|
|
|
|
config = launch_configuration(func)
|
|
threads = min(variableCols, config.threads)
|
|
blocks = cld(variableCols, threads)
|
|
|
|
cudacall(func, Tuple{CuPtr{Float32}}, cudaResults; threads=4, blocks=1)
|
|
# launch(func, cudaResults; threads=threads, blocks=blocks)
|
|
|
|
println(Array(cudaResults))
|
|
end
|
|
|
|
# TODO: University setup at 10.20.1.7 |