evaluation: found thath benchmark 2 can't be executed by any implementation due to RAM constraints
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
This commit is contained in:
@ -16,7 +16,7 @@ export interpret_gpu,interpret_cpu
|
||||
export evaluate_gpu
|
||||
|
||||
# Some assertions:
|
||||
# Variables and parameters start their naming with "1" meaning the first variable/parameter has to be "x1/p1" and not "x0/p0"
|
||||
# Variables and parameters start their indexing with "1" meaning the first variable/parameter has to be "x1/p1" and not "x0/p0"
|
||||
# Matrix X is column major
|
||||
# each index i in exprs has to have the matching values in the column i in Matrix X so that X[:,i] contains the values for expr[i]. The same goes for p
|
||||
# This assertion is made, because in julia, the first index doesn't have to be 1
|
||||
@ -109,14 +109,4 @@ function interpret_cpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector
|
||||
res
|
||||
end
|
||||
|
||||
# Flow
|
||||
# input: Vector expr == expressions contains eg. 4 expressions
|
||||
# Matrix X == |expr| columns, n rows. n == number of variabls x1..xn; n is the same for all expressions --- WRONG
|
||||
# Matrix X == k columns, n rows. k == number of variables in the expressions (every expression must have the same number of variables); n == number of different values for xk where k is the column
|
||||
# VectorVector p == vector size |expr| containing vector size m. m == number of parameters per expression. p can be different for each expression
|
||||
#
|
||||
# The following can be done on the CPU
|
||||
# convert expression to postfix notation (mandatory)
|
||||
# optional: replace every parameter with the correct value (should only improve performance if data transfer is the bottleneck)
|
||||
|
||||
end
|
||||
|
@ -24,7 +24,7 @@ function interpret(cudaExprs, numExprs::Integer, exprsInnerLength::Integer,
|
||||
cudaResults = CuArray{Float32}(undef, variableColumns, numExprs)
|
||||
|
||||
# Start kernel for each expression to ensure that no warp is working on different expressions
|
||||
numThreads = min(variableColumns, 121)
|
||||
numThreads = min(variableColumns, 128)
|
||||
numBlocks = cld(variableColumns, numThreads)
|
||||
|
||||
Threads.@threads for i in 1:numExprs # multithreaded to speedup dispatching (seems to have improved performance)
|
||||
|
@ -19,7 +19,7 @@ function evaluate(expressions::Vector{ExpressionProcessing.PostfixType}, cudaVar
|
||||
# each expression has nr. of variable sets (nr. of columns of the variables) results and there are n expressions
|
||||
cudaResults = CuArray{Float32}(undef, variableColumns, length(expressions))
|
||||
|
||||
threads = min(variableColumns, 256)
|
||||
threads = min(variableColumns, 128)
|
||||
blocks = cld(variableColumns, threads)
|
||||
|
||||
kernelName = "evaluate_gpu"
|
||||
|
Reference in New Issue
Block a user