using CUDA using .Transpiler using .Interpreter varsets_medium = 1000 X = randn(Float32, 5, varsets_medium) exprsGPU = [ # CPU interpreter requires an anonymous function and array ref s :(p1 * x1 + p2), # 5 op :((((x1 + x2) + x3) + x4) + x5), # 9 op :(log(abs(x1))), # 3 op :(powabs(p2 - powabs(p1 + x1, 1/x1),p3)) # 13 op ] # 30 op # p is the same for CPU and GPU p = [randn(Float32, 10) for _ in 1:length(exprsGPU)] # generate 10 random parameter values for each expr expr_reps = 1 @testset "Interpreter Tuning" begin CUDA.@profile interpret_gpu(exprsGPU, X, p; repetitions=expr_reps) end @testset "Transpiler Tuning" begin CUDA.@profile evaluate_gpu(exprsGPU, X, p; repetitions=expr_reps) end