small updates and notes for further writing
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
Some checks are pending
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Waiting to run
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Waiting to run
This commit is contained in:
parent
ef721b13e0
commit
c62aff806a
2
LICENSE
2
LICENSE
|
@ -1,6 +1,6 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2024 Daniel Wiplinger
|
||||
Copyright (c) 2024 Daniel Roth
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
|
|
@ -27,7 +27,7 @@ function interpret_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector
|
|||
|
||||
results = Matrix{Float32}(undef, ncols, length(exprs))
|
||||
|
||||
for i in 1:repetitions # Simulate parameter tuning
|
||||
for i in 1:repetitions # Simulate parameter tuning -> local search (X remains the same, p gets changed in small steps and must be performed sequentially)
|
||||
results = Interpreter.interpret(exprs, X, p)
|
||||
end
|
||||
|
||||
|
@ -41,7 +41,7 @@ function evaluate_gpu(exprs::Vector{Expr}, X::Matrix{Float32}, p::Vector{Vector{
|
|||
|
||||
results = Matrix{Float32}(undef, ncols, length(exprs))
|
||||
|
||||
for i in 1:repetitions # Simulate parameter tuning
|
||||
for i in 1:repetitions # Simulate parameter tuning -> local search (X remains the same, p gets changed in small steps and must be performed sequentially)
|
||||
results = Transpiler.evaluate(exprs, X, p)
|
||||
end
|
||||
|
||||
|
|
|
@ -5,6 +5,10 @@ using .Transpiler
|
|||
using .Interpreter
|
||||
|
||||
const BENCHMARKS_RESULTS_PATH = "./results"
|
||||
|
||||
# TODO: Expressions can get much much bigger (into millions) (will be provided by Mr. Kronberger)
|
||||
# TODO: Variable-Sets: 1000 can be considered the minimum; 100.000 can be considered the maximum (will be provided by Mr. Kronberger)
|
||||
|
||||
exprsCPU = [
|
||||
# CPU interpreter requires an anonymous function and array ref s
|
||||
:(p[1] * x[1] + p[2]), # 5 op
|
||||
|
@ -24,7 +28,7 @@ exprsGPU = [
|
|||
|
||||
# p is the same for CPU and GPU
|
||||
p = [randn(Float32, 10) for _ in 1:length(exprsCPU)] # generate 10 random parameter values for each expr
|
||||
expr_reps = 100 # 100 parameter optimisation steps basically
|
||||
expr_reps = 100 # 100 parameter optimisation steps (local search; sequentially; only p changes but not X)
|
||||
|
||||
|
||||
@testset "CPU performance" begin
|
||||
|
@ -89,15 +93,15 @@ if compareWithCPU
|
|||
suite["CPU"]["large varset"] = @benchmarkable interpret_cpu(exprsCPU, X_large, p; repetitions=expr_reps)
|
||||
end
|
||||
|
||||
X_small_GPU = randn(Float32, 5, varsets_small)
|
||||
X_small_GPU = randn(Float32, 5, varsets_small) # column-major
|
||||
suite["GPUI"]["small varset"] = @benchmarkable interpret_gpu(exprsGPU, X_small_GPU, p; repetitions=expr_reps)
|
||||
suite["GPUT"]["small varset"] = @benchmarkable evaluate_gpu(exprsGPU, X_small_GPU, p; repetitions=expr_reps)
|
||||
|
||||
X_medium_GPU = randn(Float32, 5, varsets_medium)
|
||||
X_medium_GPU = randn(Float32, 5, varsets_medium) # column-major
|
||||
suite["GPUI"]["medium varset"] = @benchmarkable interpret_gpu(exprsGPU, X_medium_GPU, p; repetitions=expr_reps)
|
||||
suite["GPUT"]["medium varset"] = @benchmarkable evaluate_gpu(exprsGPU, X_medium_GPU, p; repetitions=expr_reps)
|
||||
|
||||
X_large_GPU = randn(Float32, 5, varsets_large)
|
||||
X_large_GPU = randn(Float32, 5, varsets_large) # column-major
|
||||
suite["GPUI"]["large varset"] = @benchmarkable interpret_gpu(exprsGPU, X_large_GPU, p; repetitions=expr_reps)
|
||||
suite["GPUT"]["large varset"] = @benchmarkable evaluate_gpu(exprsGPU, X_large_GPU, p; repetitions=expr_reps)
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
using ExpressionExecutorCuda
|
||||
using Test
|
||||
|
||||
using BenchmarkTools
|
||||
|
||||
const baseFolder = dirname(dirname(pathof(ExpressionExecutorCuda)))
|
||||
include(joinpath(baseFolder, "src", "Utils.jl"))
|
||||
include(joinpath(baseFolder, "src", "ExpressionProcessing.jl"))
|
||||
|
@ -20,5 +22,5 @@ end
|
|||
|
||||
@testset "Performance tests" begin
|
||||
# include("PerformanceTuning.jl")
|
||||
include("PerformanceTests.jl")
|
||||
# include("PerformanceTests.jl")
|
||||
end
|
|
@ -1,3 +1,5 @@
|
|||
RE-READ to ensure that concepts why this is done to improve performance and why this should be the "locally best" implementation (most should be in implementation though)
|
||||
|
||||
\chapter{Concept and Design}
|
||||
\label{cha:conceptdesign}
|
||||
% introduction to what needs to be done. also clarify terms "Host" and "Device" here
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
somewhere in here explain why one kernel per expression and not one kernel for all expressions
|
||||
|
||||
Go into the details why this implementation is tuned towards performance and should be the optimum at that
|
||||
|
||||
\section{Technologies}
|
||||
Short section; CUDA, PTX, Julia, CUDA.jl
|
||||
|
||||
|
|
BIN
thesis/main.pdf
BIN
thesis/main.pdf
Binary file not shown.
Loading…
Reference in New Issue
Block a user