evaluation: found thath benchmark 2 can't be executed by any implementation due to RAM constraints
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run

This commit is contained in:
2025-05-24 16:58:35 +02:00
parent 5f44e4d122
commit 14b2e23d9a
5 changed files with 13 additions and 13 deletions

View File

@ -24,7 +24,7 @@ function interpret(cudaExprs, numExprs::Integer, exprsInnerLength::Integer,
cudaResults = CuArray{Float32}(undef, variableColumns, numExprs)
# Start kernel for each expression to ensure that no warp is working on different expressions
numThreads = min(variableColumns, 121)
numThreads = min(variableColumns, 128)
numBlocks = cld(variableColumns, numThreads)
Threads.@threads for i in 1:numExprs # multithreaded to speedup dispatching (seems to have improved performance)