master-thesis/package/test/CpuInterpreterTests.jl
Daniel 9df78ca72e
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
transpiler: invalid memory access error finally fixed
2025-03-27 22:32:24 +01:00

48 lines
2.0 KiB
Julia

using LinearAlgebra
using BenchmarkTools
function test_cpu_interpreter(nrows; parallel = false)
exprs = [
# CPU interpreter requires an anonymous function and array ref s
:(p[1] * x[1] + p[2]), # 5 op
:((((x[1] + x[2]) + x[3]) + x[4]) + x[5]), # 9 op
:(log(abs(x[1]))), # 3 op
:(powabs(p[2] - powabs(p[1] + x[1], 1/x[1]),p[3])) # 13 op
] # 30 op
exprs = map(e -> Expr(:->, :(x,p), e), exprs)
X = randn(Float32, nrows, 10)
p = [randn(Float32, 10) for _ in 1:length(exprs)] # generate 10 random parameter values for each expr
# warmup
interpret_cpu(exprs, X, p)
expr_reps = 100 # for each expr
reps= 100
if parallel
# t_sec = @elapsed fetch.([Threads.@spawn interpret_cpu(exprs, X, p; repetitions=expr_reps) for i in 1:reps])
@btime parallel(exprs, X, p, expr_reps, reps)
println("~ $(round(30 * reps * expr_reps * nrows / 1e9 / t_sec, digits=2)) GFLOPS ($(Threads.nthreads()) threads) ($(round(LinearAlgebra.peakflops(1000, eltype=Float32, ntrials=1) / 1e9, digits=2)) GFLOPS (peak, single-core))")
else
# t_sec = @elapsed for i in 1:reps interpret_cpu(exprs, X, p; repetitions=expr_reps) end
@btime single(exprs, X, p, expr_reps, reps)
println("~ $(round(30 * reps * expr_reps * nrows / 1e9 / t_sec, digits=2)) GFLOPS (single-core) ($(round(LinearAlgebra.peakflops(1000, eltype=Float32, ntrials=1) / 1e9, digits=2)) GFLOPS (peak, single-core))")
end
true
end
function parallel(exprs, X, p, expr_reps, reps)
fetch.([Threads.@spawn interpret_cpu(exprs, X, p; repetitions=expr_reps) for i in 1:reps])
end
function single(exprs, X, p, expr_reps, reps)
for i in 1:reps interpret_cpu(exprs, X, p; repetitions=expr_reps) end
end
# LinearAlgebra.BLAS.set_num_threads(1) # only use a single thread for peakflops
@test test_cpu_interpreter(1000)
@test test_cpu_interpreter(1000, parallel=true) # start julia -t 6 for six threads
@test test_cpu_interpreter(10000)
@test test_cpu_interpreter(10000, parallel=true)