benchmarking: tuned interpreter blocksize
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
This commit is contained in:
@ -1,30 +1,38 @@
|
||||
using CUDA
|
||||
using DelimitedFiles
|
||||
using GZip
|
||||
|
||||
using .Transpiler
|
||||
using .Interpreter
|
||||
|
||||
varsets_medium = 10000
|
||||
X = randn(Float32, 5, varsets_medium)
|
||||
include("parser.jl") # to parse expressions from a file
|
||||
|
||||
exprsGPU = [
|
||||
# CPU interpreter requires an anonymous function and array ref s
|
||||
:(p1 * x1 + p2), # 5 op
|
||||
:((((x1 + x2) + x3) + x4) + x5), # 9 op
|
||||
:(log(abs(x1))), # 3 op
|
||||
:(powabs(p2 - powabs(p1 + x1, 1/x1),p3)) # 13 op
|
||||
] # 30 op
|
||||
|
||||
# p is the same for CPU and GPU
|
||||
p = [randn(Float32, 10) for _ in 1:length(exprsGPU)] # generate 10 random parameter values for each expr
|
||||
data,varnames = readdlm("data/nikuradse_1.csv", ',', header=true);
|
||||
X = permutedims(convert(Matrix{Float32}, data))
|
||||
|
||||
exprs = Expr[]
|
||||
parameters = Vector{Vector{Float32}}()
|
||||
varnames = ["x$i" for i in 1:10]
|
||||
paramnames = ["p$i" for i in 1:20]
|
||||
# data/esr_nvar2_len10.txt.gz_9.txt.gz has ~250_000 exprs
|
||||
# data/esr_nvar2_len10.txt.gz_10.txt.gz has ~800_000 exrps
|
||||
GZip.open("data/esr_nvar2_len10.txt.gz_3.txt.gz") do io
|
||||
for line in eachline(io)
|
||||
expr, p = parse_infix(line, varnames, paramnames)
|
||||
|
||||
push!(exprs, expr)
|
||||
push!(parameters, randn(Float32, length(p)))
|
||||
end
|
||||
end
|
||||
expr_reps = 1
|
||||
|
||||
|
||||
|
||||
@testset "Interpreter Tuning" begin
|
||||
CUDA.@profile interpret_gpu(exprsGPU, X, p; repetitions=expr_reps)
|
||||
# CUDA.@profile interpret_gpu(exprs, X, parameters; repetitions=expr_reps)
|
||||
end
|
||||
|
||||
|
||||
@testset "Transpiler Tuning" begin
|
||||
CUDA.@profile evaluate_gpu(exprsGPU, X, p; repetitions=expr_reps)
|
||||
CUDA.@profile evaluate_gpu(exprs, X, parameters; repetitions=expr_reps)
|
||||
end
|
Reference in New Issue
Block a user