transpiler: invalid memory access error finally fixed
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
This commit is contained in:
@ -53,7 +53,7 @@ function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, paramet
|
||||
cudaParams = Utils.create_cuda_array(parameters, NaN32) # maybe make constant (see runtests.jl for more info)
|
||||
|
||||
# each expression has nr. of variable sets (nr. of columns of the variables) results and there are n expressions
|
||||
cudaResults = CuArray{Float32}(undef, variableCols, length(expressions))
|
||||
cudaResults = CuArray{Float32}(undef, variableCols * length(expressions))
|
||||
# cudaResults = CUDA.zeros(variableCols * length(expressions))
|
||||
# ptr = CuPtr{Float32}(C_NULL)
|
||||
# CUDA.cuMemAlloc(ptr, sizeof(Float32) * 10)
|
||||
@ -68,8 +68,9 @@ function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, paramet
|
||||
blocks = cld(variableCols, threads)
|
||||
|
||||
# cudacall(kernels[i], (CuPtr{Float32},CuPtr{Float32},CuPtr{Float32}), cudaVars, cudaParams, cudaResults; threads=threads, blocks=blocks)
|
||||
cudacall(kernels[i], (CuPtr{Float32},), cudaResults; threads=threads, blocks=blocks)
|
||||
# launch(kernels[i], cudaVars, cudaParams, cudaResults; threads=threads, blocks=blocks)
|
||||
launch(kernels[i], cudaResults; threads=threads, blocks=blocks)
|
||||
# launch(kernels[i], cudaResults; threads=threads, blocks=blocks)
|
||||
end
|
||||
|
||||
println(Array(cudaResults))
|
||||
@ -120,9 +121,9 @@ end
|
||||
# TODO: Make version, target and address_size configurable; also see what address_size means exactly
|
||||
function get_cuda_header()::String
|
||||
return "
|
||||
.version 7.1
|
||||
.version 8.5
|
||||
.target sm_61
|
||||
.address_size 32
|
||||
.address_size 64
|
||||
"
|
||||
end
|
||||
|
||||
@ -137,11 +138,11 @@ function get_kernel_signature(kernelName::String, parameters::Vector{DataType}):
|
||||
println(signatureBuffer, "(")
|
||||
|
||||
for i in eachindex(parameters)
|
||||
print(signatureBuffer, " .param .u32", " ", "param_", i)
|
||||
print(signatureBuffer, " .param .u64", " ", "param_", i)
|
||||
|
||||
parametersLocation = get_next_free_register("i")
|
||||
println(paramLoadingBuffer, "ld.param.u32 $parametersLocation, [param_$i];")
|
||||
println(paramLoadingBuffer, "cvta.to.global.u32 $(get_next_free_register("parameter")), $parametersLocation;")
|
||||
println(paramLoadingBuffer, "ld.param.u64 $parametersLocation, [param_$i];")
|
||||
println(paramLoadingBuffer, "cvta.to.global.u64 $(get_next_free_register("parameter")), $parametersLocation;")
|
||||
if i != lastindex(parameters)
|
||||
println(signatureBuffer, ",")
|
||||
end
|
||||
@ -169,12 +170,12 @@ function get_guard_clause(exitJumpLocation::String, nrOfVarSets::Integer)::Tuple
|
||||
println(guardBuffer, "mov.u32 $currentThreadId, %tid.x;")
|
||||
|
||||
globalThreadId = get_next_free_register("r") # basically the index of the thread in the variable set
|
||||
# breakCondition = get_next_free_register("p")
|
||||
breakCondition = get_next_free_register("p")
|
||||
println(guardBuffer, "mad.lo.s32 $globalThreadId, $threadIds, $threadsPerCTA, $currentThreadId;")
|
||||
# println(guardBuffer, "setp.ge.s32 $breakCondition, $globalThreadId, $nrOfVarSets;") # guard clause = index > nrOfVariableSets
|
||||
println(guardBuffer, "setp.gt.s32 $breakCondition, $globalThreadId, $nrOfVarSets;") # guard clause = index > nrOfVariableSets
|
||||
|
||||
# branch to end if breakCondition is true
|
||||
# print(guardBuffer, "@$breakCondition bra $exitJumpLocation;")
|
||||
print(guardBuffer, "@$breakCondition bra $exitJumpLocation;")
|
||||
|
||||
return (String(take!(guardBuffer)), globalThreadId)
|
||||
end
|
||||
@ -186,7 +187,7 @@ function generate_calculation_code(expression::ExpressionProcessing.PostfixType,
|
||||
parametersLocation::String, parametersSetSize::Integer, resultsLocation::String,
|
||||
threadIdReg::String, expressionIndex::Integer, nrOfVarSets::Integer)::String
|
||||
|
||||
return "st.global.f32 [$resultsLocation], 10.0;"
|
||||
# return "st.global.f32 [$resultsLocation], 10.0;"
|
||||
|
||||
codeBuffer = IOBuffer()
|
||||
operands = Vector{Operand}()
|
||||
@ -360,9 +361,9 @@ let registers = Dict() # stores the count of the register already used.
|
||||
elseif definition.first == "r"
|
||||
regType = ".b32"
|
||||
elseif definition.first == "parameter"
|
||||
regType = ".u32"
|
||||
regType = ".b64"
|
||||
elseif definition.first == "i"
|
||||
regType = ".u32"
|
||||
regType = ".b64"
|
||||
else
|
||||
throw(ArgumentError("Unknown register name used. Name '$(definition.first)' cannot be mapped to a PTX type."))
|
||||
end
|
||||
|
Reference in New Issue
Block a user