75 lines
2.9 KiB
Julia
75 lines
2.9 KiB
Julia
using LinearAlgebra
|
|
using BenchmarkTools
|
|
using DelimitedFiles
|
|
using GZip
|
|
|
|
include("parser.jl") # to parse expressions from a file
|
|
|
|
function test_cpu_interpreter(nrows; parallel = false)
|
|
exprs = [
|
|
# CPU interpreter requires an anonymous function and array ref s
|
|
:(p[1] * x[1] + p[2]), # 5 op
|
|
:((((x[1] + x[2]) + x[3]) + x[4]) + x[5]), # 9 op
|
|
:(log(abs(x[1]))), # 3 op
|
|
:(powabs(p[2] - powabs(p[1] + x[1], 1/x[1]),p[3])) # 13 op
|
|
] # 30 op
|
|
exprs = map(e -> Expr(:->, :(x,p), e), exprs)
|
|
X = randn(Float32, nrows, 10)
|
|
p = [randn(Float32, 10) for _ in 1:length(exprs)] # generate 10 random parameter values for each expr
|
|
|
|
# warmup
|
|
interpret_cpu(exprs, X, p)
|
|
expr_reps = 100 # for each expr
|
|
reps= 100
|
|
|
|
if parallel
|
|
# t_sec = @elapsed fetch.([Threads.@spawn interpret_cpu(exprs, X, p; repetitions=expr_reps) for i in 1:reps])
|
|
@btime parallel(exprs, X, p, expr_reps, reps)
|
|
println("~ $(round(30 * reps * expr_reps * nrows / 1e9 / t_sec, digits=2)) GFLOPS ($(Threads.nthreads()) threads) ($(round(LinearAlgebra.peakflops(1000, eltype=Float32, ntrials=1) / 1e9, digits=2)) GFLOPS (peak, single-core))")
|
|
else
|
|
# t_sec = @elapsed for i in 1:reps interpret_cpu(exprs, X, p; repetitions=expr_reps) end
|
|
@btime single(exprs, X, p, expr_reps, reps)
|
|
println("~ $(round(30 * reps * expr_reps * nrows / 1e9 / t_sec, digits=2)) GFLOPS (single-core) ($(round(LinearAlgebra.peakflops(1000, eltype=Float32, ntrials=1) / 1e9, digits=2)) GFLOPS (peak, single-core))")
|
|
end
|
|
true
|
|
end
|
|
|
|
function parallel(exprs, X, p, expr_reps, reps)
|
|
fetch.([Threads.@spawn interpret_cpu(exprs, X, p; repetitions=expr_reps) for i in 1:reps])
|
|
end
|
|
|
|
function single(exprs, X, p, expr_reps, reps)
|
|
for i in 1:reps interpret_cpu(exprs, X, p; repetitions=expr_reps) end
|
|
end
|
|
|
|
|
|
# LinearAlgebra.BLAS.set_num_threads(1) # only use a single thread for peakflops
|
|
|
|
@test test_cpu_interpreter(1000)
|
|
@test test_cpu_interpreter(1000, parallel=true) # start julia -t 6 for six threads
|
|
@test test_cpu_interpreter(10000)
|
|
@test test_cpu_interpreter(10000, parallel=true)
|
|
|
|
|
|
function test_cpu_interpreter_nikuradse()
|
|
data,varnames = readdlm("data/nikuradse_1.csv", ',', header=true);
|
|
X = convert(Matrix{Float32}, data)
|
|
|
|
exprs = Expr[]
|
|
parameters = Vector{Vector{Float32}}()
|
|
varnames = ["x$i" for i in 1:10]
|
|
paramnames = ["p$i" for i in 1:20]
|
|
# data/esr_nvar2_len10.txt.gz_9.txt.gz has ~250_000 exprs
|
|
# data/esr_nvar2_len10.txt.gz_10.txt.gz has ~800_000 exrps
|
|
GZip.open("data/esr_nvar2_len10.txt.gz_9.txt.gz") do io
|
|
for line in eachline(io)
|
|
expr, p = parse_infix(line, varnames, paramnames)
|
|
|
|
push!(exprs, expr)
|
|
push!(parameters, randn(Float32, length(p)))
|
|
end
|
|
end
|
|
|
|
|
|
interpret_cpu(exprs, X, parameters) # TODO: sufficient to do up to 10 repetitions per expression,
|
|
end |