benchmarking: prepared tests for using actual data
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
This commit is contained in:
@ -56,8 +56,6 @@ function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, paramet
|
||||
formattedExpr = ExpressionProcessing.expr_to_postfix(expressions[i], cacheFrontend)
|
||||
kernel = transpile(formattedExpr, varRows, Utils.get_max_inner_length(parameters), variableCols, i-1) # i-1 because julia is 1-based but PTX needs 0-based indexing
|
||||
|
||||
# println(kernel)
|
||||
|
||||
linker = CuLink()
|
||||
add_data!(linker, "ExpressionProcessing", kernel)
|
||||
|
||||
@ -77,7 +75,7 @@ function evaluate(expressions::Vector{Expr}, variables::Matrix{Float32}, paramet
|
||||
# execute each kernel (also try doing this with Threads.@threads. Since we can have multiple grids, this might improve performance)
|
||||
for kernel in kernels
|
||||
# config = launch_configuration(kernels[i])
|
||||
threads = min(variableCols, 96)
|
||||
threads = min(variableCols, 256)
|
||||
blocks = cld(variableCols, threads)
|
||||
|
||||
cudacall(kernel, (CuPtr{Float32},CuPtr{Float32},CuPtr{Float32}), cudaVars, cudaParams, cudaResults; threads=threads, blocks=blocks)
|
||||
@ -99,7 +97,7 @@ function transpile(expression::ExpressionProcessing.PostfixType, varSetSize::Int
|
||||
ptxBuffer = IOBuffer()
|
||||
regManager = Utils.RegisterManager(Dict(), Dict())
|
||||
|
||||
# TODO: Suboptimal solution
|
||||
# TODO: Suboptimal solution. get_kernel_signature should also return the name of the registers used for the parameters, so further below, we do not have to hard-code them
|
||||
signature, paramLoading = get_kernel_signature("ExpressionProcessing", [Float32, Float32, Float32], regManager) # Vars, Params, Results
|
||||
guardClause, threadId64Reg = get_guard_clause(exitJumpLocationMarker, nrOfVariableSets, regManager)
|
||||
|
||||
@ -123,7 +121,7 @@ function transpile(expression::ExpressionProcessing.PostfixType, varSetSize::Int
|
||||
return generatedCode
|
||||
end
|
||||
|
||||
# TODO: Make version, target and address_size configurable; also see what address_size means exactly
|
||||
# TODO: Make version, target and address_size configurable
|
||||
function get_cuda_header()::String
|
||||
return "
|
||||
.version 8.5
|
||||
|
Reference in New Issue
Block a user