benchmarking: added benchmark4 interpreter results; extended evaluation section

2025-05-24 13:17:15 +02:00
parent 2bbdef6837
commit 5f44e4d122
7 changed files with 407 additions and 4 deletions
--- a/package/test/benchmarks/3/gpu_transpiler_yet_to_be_done.json
+++ b/package/test/benchmarks/3/gpu_transpiler_yet_to_be_done.json
@ -42,8 +42,8 @@
                                            }
                                        ],
                                        "times": [
-                                            --3.7202049569362e13,
+                                            3.7202049569362e13,
-                                            --3.7400159760069e13
+                                            3.7400159760069e13
                                        ]
                                    }
                                ]
--- a/package/test/benchmarks/4/gpu_transpiler_yet_to_be_done.json
+++ b/package/test/benchmarks/4/gpu_transpiler_yet_to_be_done.json
@ -0,0 +1,196 @@
 [
 	{
 		"Julia": "1.11.5",
 		"BenchmarkTools": {
 			"major": 1,
 			"minor": 6,
 			"patch": 0,
 			"prerelease": [],
 			"build": []
 		}
 	},
 	[
 		[
 			"BenchmarkGroup",
 			{
 				"data": {
 					"GPUT": [
 						"BenchmarkGroup",
 						{
 							"data": {
                                "nikuradse_1": [
                                    "Trial",
                                    {
                                        "allocs": 1534112879,
                                        "gctimes": [
                                            3.398826747854e12,
                                            2.618070795579e12
                                        ],
                                        "memory": 51380857328968,
                                        "params": [
                                            "Parameters",
                                            {
                                                "gctrial": true,
                                                "time_tolerance": 0.05,
                                                "evals_set": false,
                                                "samples": 50,
                                                "evals": 1,
                                                "gcsample": false,
                                                "seconds": 43200.0,
                                                "overhead": 0.0,
                                                "memory_tolerance": 0.01
                                            }
                                        ],
                                        "times": [
                                            --3.7202049569362e13,
                                            --3.7400159760069e13
                                        ]
                                    }
                                ]
                            },
 							"tags": [
 								"GPUTranspiler"
 							]
 						}
 					],
 					"GPUI": [
                        "BenchmarkGroup",
                        {
                            "data": {
                                "nikuradse_1": [
                                    "Trial",
                                    {
                                        "allocs": 32241320,
                                        "gctimes": [
                                            3.76843873e8,
                                            3.87520681e8,
                                            3.53674001e8,
                                            3.67061252e8,
                                            3.741527e8,
                                            3.69293996e8,
                                            3.63305802e8,
                                            3.61913634e8,
                                            3.51818682e8,
                                            3.48188601e8,
                                            3.62864887e8,
                                            3.47736729e8,
                                            3.50237523e8,
                                            3.53595403e8,
                                            3.51245475e8,
                                            3.57725399e8,
                                            3.48667085e8,
                                            3.5174771e8,
                                            3.50159541e8,
                                            3.57487652e8,
                                            3.61893033e8,
                                            3.67797485e8,
                                            3.44948035e8,
                                            3.50222654e8,
                                            3.36037781e8,
                                            3.50770955e8,
                                            3.48655148e8,
                                            3.46508038e8,
                                            3.48958873e8,
                                            4.49202169e8,
                                            3.53247995e8,
                                            3.71504213e8,
                                            3.5431637e8,
                                            3.59468716e8,
                                            3.46016454e8,
                                            3.69149583e8,
                                            3.65486404e8,
                                            4.45340687e8,
                                            4.37909167e8,
                                            3.3690913e8,
                                            3.50482929e8,
                                            3.49559472e8,
                                            3.38465639e8,
                                            3.44654417e8,
                                            3.49173998e8,
                                            3.50582847e8,
                                            3.55724581e8,
                                            3.4921611e8,
                                            3.55360179e8,
                                            3.48805235e8
                                        ],
                                        "memory": 45874227656,
                                        "params": [
                                            "Parameters",
                                            {
                                                "gctrial": true,
                                                "time_tolerance": 0.05,
                                                "evals_set": false,
                                                "samples": 50,
                                                "evals": 1,
                                                "gcsample": false,
                                                "seconds": 43200.0,
                                                "overhead": 0.0,
                                                "memory_tolerance": 0.01
                                            }
                                        ],
                                        "times": [
                                            3.07178374e10,
                                            3.0668015775e10,
                                            3.0731090373e10,
                                            3.0442775184e10,
                                            3.0456642482e10,
                                            3.0082122734e10,
                                            3.0126331654e10,
                                            3.0751723908e10,
                                            3.1179628532e10,
                                            3.0065663574e10,
                                            3.0464515622e10,
                                            3.0393855038e10,
                                            3.1635622751e10,
                                            3.0447222014e10,
                                            2.973601985e10,
                                            3.0033623194e10,
                                            3.0580015719e10,
                                            3.1400733412e10,
                                            3.0272328646e10,
                                            3.0223853837e10,
                                            2.9915814997e10,
                                            3.0818324531e10,
                                            3.0179331592e10,
                                            3.0293039282e10,
                                            3.0017377964e10,
                                            3.0087189496e10,
                                            3.0582174914e10,
                                            2.996325235e10,
                                            3.0134649182e10,
                                            3.1042223141e10,
                                            3.0007740363e10,
                                            3.0437426607e10,
                                            3.0810836436e10,
                                            3.1234163757e10,
                                            3.0221879009e10,
                                            3.0338940936e10,
                                            3.1233683944e10,
                                            3.1019897889e10,
                                            3.1380379599e10,
                                            2.9821214171e10,
                                            3.0882968215e10,
                                            3.0159994975e10,
                                            3.0309932542e10,
                                            2.9969275606e10,
                                            3.0447151474e10,
                                            3.0342592912e10,
                                            3.024330255e10,
                                            3.0258060029e10,
                                            3.0095601739e10,
                                            3.0209601692e10
                                        ]
                                    }
                                ]
                            },
                            "tags": [
                                "GPUInterpreter"
                            ]
                        }
                    ]
 				},
 				"tags": []
 			}
 		]
 	]
 ]
--- a/package/test/benchmarks/4/gpui_blocksize_192.json
+++ b/package/test/benchmarks/4/gpui_blocksize_192.json
@ -0,0 +1,196 @@
 [
 	{
 		"Julia": "1.11.5",
 		"BenchmarkTools": {
 			"major": 1,
 			"minor": 6,
 			"patch": 0,
 			"prerelease": [],
 			"build": []
 		}
 	},
 	[
 		[
 			"BenchmarkGroup",
 			{
 				"data": {
 					"GPUT": [
 						"BenchmarkGroup",
 						{
 							"data": {
                                "nikuradse_1": [
                                    "Trial",
                                    {
                                        "allocs": 1534112879,
                                        "gctimes": [
                                            3.398826747854e12,
                                            2.618070795579e12
                                        ],
                                        "memory": 51380857328968,
                                        "params": [
                                            "Parameters",
                                            {
                                                "gctrial": true,
                                                "time_tolerance": 0.05,
                                                "evals_set": false,
                                                "samples": 50,
                                                "evals": 1,
                                                "gcsample": false,
                                                "seconds": 43200.0,
                                                "overhead": 0.0,
                                                "memory_tolerance": 0.01
                                            }
                                        ],
                                        "times": [
                                            --3.7202049569362e13,
                                            --3.7400159760069e13
                                        ]
                                    }
                                ]
                            },
 							"tags": [
 								"GPUTranspiler"
 							]
 						}
 					],
 					"GPUI": [
                        "BenchmarkGroup",
                        {
                            "data": {
                                "nikuradse_1": [
                                    "Trial",
                                    {
                                        "allocs": 32241307,
                                        "gctimes": [
                                            2.99988451e8,
                                            3.18541335e8,
                                            3.40658917e8,
                                            3.20735576e8,
                                            3.17668135e8,
                                            3.11634185e8,
                                            3.55400831e8,
                                            3.25257947e8,
                                            3.25941878e8,
                                            3.31627658e8,
                                            3.2513644e8,
                                            5.34886621e8,
                                            4.30305899e8,
                                            4.75073379e8,
                                            5.41262095e8,
                                            5.14748243e8,
                                            4.91966069e8,
                                            4.55043676e8,
                                            4.70840046e8,
                                            5.50526217e8,
                                            4.31207494e8,
                                            4.76072811e8,
                                            5.04324319e8,
                                            5.72218216e8,
                                            4.11391335e8,
                                            4.73366047e8,
                                            5.12748251e8,
                                            4.58269866e8,
                                            3.87267173e8,
                                            5.38187011e8,
                                            4.56822334e8,
                                            4.24688896e8,
                                            5.94190171e8,
                                            5.28701852e8,
                                            5.15021748e8,
                                            6.10057318e8,
                                            4.74982584e8,
                                            4.33478296e8,
                                            4.33664662e8,
                                            4.22168618e8,
                                            4.16528265e8,
                                            4.15685104e8,
                                            4.23277232e8,
                                            3.74337751e8,
                                            4.25875703e8,
                                            5.42365157e8,
                                            4.94701466e8,
                                            4.83233782e8,
                                            4.24986417e8,
                                            4.8780606e8
                                        ],
                                        "memory": 45874227384,
                                        "params": [
                                            "Parameters",
                                            {
                                                "gctrial": true,
                                                "time_tolerance": 0.05,
                                                "evals_set": false,
                                                "samples": 50,
                                                "evals": 1,
                                                "gcsample": false,
                                                "seconds": 43200.0,
                                                "overhead": 0.0,
                                                "memory_tolerance": 0.01
                                            }
                                        ],
                                        "times": [
                                            3.055626804e10,
                                            3.0413771477e10,
                                            3.0058609633e10,
                                            3.007921294e10,
                                            3.0178903964e10,
                                            3.0243374529e10,
                                            3.0043488197e10,
                                            2.9849309299e10,
                                            3.0134058306e10,
                                            3.0627343705e10,
                                            3.0130179115e10,
                                            4.8987140933e10,
                                            1.0029494223e11,
                                            9.991837876e10,
                                            1.01083284461e11,
                                            1.00013926981e11,
                                            1.00050439359e11,
                                            1.00453826906e11,
                                            1.00398291414e11,
                                            1.0026599822e11,
                                            1.00645806674e11,
                                            9.9875971997e10,
                                            9.9612950384e10,
                                            1.00253673473e11,
                                            9.9643175894e10,
                                            1.0027620915e11,
                                            9.9714066248e10,
                                            1.00141668213e11,
                                            1.00269405678e11,
                                            1.00149909912e11,
                                            1.00645303739e11,
                                            9.9693734213e10,
                                            1.01986856167e11,
                                            1.00367529986e11,
                                            9.986664487e10,
                                            1.01112512248e11,
                                            9.9866828996e10,
                                            9.887153973e10,
                                            9.9119068947e10,
                                            9.9161506987e10,
                                            9.8659948079e10,
                                            9.9016722639e10,
                                            9.9226347837e10,
                                            9.9361219392e10,
                                            9.9532328849e10,
                                            9.9181660704e10,
                                            9.9525871099e10,
                                            9.877397928e10,
                                            9.8880425186e10,
                                            9.9195828801e10
                                        ]
                                    }
                                ]
                            },
                            "tags": [
                                "GPUInterpreter"
                            ]
                        }
                    ]
 				},
 				"tags": []
 			}
 		]
 	]
 ]
--- a/thesis/chapters/evaluation.tex
+++ b/thesis/chapters/evaluation.tex
@ -71,8 +71,10 @@ This section presents the results of the benchmarks described above. First the r
 \subsection{Interpreter}
 % Results only for Interpreter (also contains final kernel configuration and probably quick overview/recap of the implementation used and described in Implementation section)
-In this section, the results for the interpreter are presented in detail. ...
+In this section, the results for the GPU-based interpreter are presented in detail. Following the benchmark results, the process of tuning the interpreter is described as well as how to adapt the tuning for the different benchmarks. This part not only contains the tuning of the GPU, but also performance improvements done on the CPU side.
 \subsubsection{Benchmark 1}
 The first benchmark consisted of $250\,000$ expressions and $362$ variable sets with $100$ parameter optimisation steps. Because each expression needs to be evaluated with each variable set for each parameter optimisation step, a total of $9.05\,\textit{billion}$ evaluations have been performed per sample. In Figure \ref{fig:gpu_i_benchmark_1} the result over all $50$ samples is presented. The median value across all executions is $466.3$ seconds with a standard deviation of $14.2$ seconds.
 \begin{figure}
 	\centering
 	\includegraphics[width=.9\textwidth]{results/gpu-interpreter-final-performance-benchmark1.png}
@ -80,14 +82,23 @@ In this section, the results for the interpreter are presented in detail. ...
 	\label{fig:gpu_i_benchmark_1}
 \end{figure}
 % talk about kernel configuration (along the lines of: results achieved with block size of X) etc. Also include that CPU and GPU utilisation was 100% the entire time. If this is too short, just add it to the above paragraph and make the 4 benchmark sections relatively short, as the most interesting information is in the performance tuning and comparison sections anyway
 \subsubsection{Benchmark 2}
 \subsubsection{Benchmark 3}
 std of 750.1 ms
 \begin{figure}
 	\centering
 	\includegraphics[width=.9\textwidth]{results/gpu-interpreter-final-performance-benchmark3.png}
 	\caption{The results of the GPU-based interpreter for benchmark 3}
 	\label{fig:gpu_i_benchmark_3}
 \end{figure}
 \subsubsection{Benchmark 4}
 \subsubsection{Performance Tuning} % either subsubSection or change the title to "Performance Tuning Interpreter"
-Document the process of performance tuning
+Document the process of performance tuning (mostly GPU, but also talk about CPU. Especially the re-aranging of data transfer and non usage of a cache)
 Initial: no cache; 256 blocksize; exprs pre-processed and sent to GPU on every call; vars sent on every call; frontend + dispatch are multithreaded
--- a/thesis/images/results/gpu-interpreter-final-performance-benchmark1.png
+++ b/thesis/images/results/gpu-interpreter-final-performance-benchmark1.png
--- a/thesis/images/results/gpu-interpreter-final-performance-benchmark3.png
+++ b/thesis/images/results/gpu-interpreter-final-performance-benchmark3.png
--- a/thesis/main.pdf
+++ b/thesis/main.pdf