transpiler: invalid memory access error finally fixed
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run

This commit is contained in:
2025-03-27 22:32:24 +01:00
parent 561b37160b
commit 9df78ca72e
7 changed files with 133 additions and 30 deletions

View File

@ -48,30 +48,70 @@ end
expr = Vector{Expr}()
push!(expr, expressions[1])
# @time Transpiler.evaluate(expr, variables, parameters)
@time Transpiler.evaluate(expr, variables, parameters)
end
#TODO: test performance of transpiler PTX generation when doing "return String(take!(buffer))" vs "return take!(buffer)"
function test_kernel(results)
@inbounds results[1] = 10f0
return nothing
end
@testset "TEMP" begin
return
results = CuArray{Float32}(undef, 2)
# @device_code_ptx @cuda test_kernel(results)
# println(CUDA.code_ptx(kernel.fun, ))
# return
ptx = "
.version 7.1
.version 8.5
.target sm_61
.address_size 64
.visible .entry ExpressionProcessing(
.param .u32 param_1)
.param .u64 param_1)
{
.reg .u32 %parameter<1>;
.reg .u32 %i<1>;
.reg .b64 %parameter<1>;
.reg .b64 %i<1>;
//.reg .b64 %rd<6>;
ld.param.u32 %i0, [param_1];
cvta.to.global.u32 %parameter0, %i0;
ld.param.u64 %i0, [param_1];
cvta.to.global.u64 %parameter0, %i0;
st.global.f32 [%parameter0], 10.0;
ret;
}"
ptx = ".version 8.5
.target sm_61
.address_size 64
.visible .entry ExpressionProcessing(
.param .u64 param_1)
{
.reg .b64 %parameter<1>;
.reg .b32 %r<4>;
.reg .pred %p<1>;
.reg .b64 %i<1>;
ld.param.u64 %i0, [param_1];
cvta.to.global.u64 %parameter0, %i0;
mov.u32 %r0, %ntid.x;
mov.u32 %r1, %ctaid.x;
mov.u32 %r2, %tid.x;
mad.lo.s32 %r3, %r0, %r1, %r2;
setp.gt.s32 %p0, %r3, 2;
@%p0 bra \$L__BB0_2;
st.global.f32 [%parameter0], 10.0;
\$L__BB0_2: ret;
}"
linker = CuLink()
add_data!(linker, "ExpressionProcessing", ptx)
@ -90,11 +130,10 @@ end
threads = min(variableCols, config.threads)
blocks = cld(variableCols, threads)
cudacall(func, Tuple{CuPtr{Float32}}, cudaResults; threads=1, blocks=1)
cudacall(func, Tuple{CuPtr{Float32}}, cudaResults; threads=4, blocks=1)
# launch(func, cudaResults; threads=threads, blocks=blocks)
println(Array(cudaResults))
end
# TODO: University setup at 10.20.1.7