Related Work: finished equation learning section; started GPGPU section

2025-03-01 13:14:37 +01:00
parent 28ef6b121e
commit 34d98f9997
3 changed files with 92 additions and 4 deletions
--- a/thesis/references.bib
+++ b/thesis/references.bib
@ -392,3 +392,86 @@ Publisher: Multidisciplinary Digital Publishing Institute},
 	keywords = {Statistics - Methodology},
 	file = {Preprint PDF:C\:\\Users\\danwi\\Zotero\\storage\\3MP48UI3\\Jin et al. - 2020 - Bayesian Symbolic Regression.pdf:application/pdf;Snapshot:C\:\\Users\\danwi\\Zotero\\storage\\UNNZKPRJ\\1910.html:text/html},
 }
+
+@inproceedings{winter_are_2021,
+	location = {New York, {NY}, {USA}},
+	title = {Are dynamic memory managers on {GPUs} slow? a survey and benchmarks},
+	isbn = {978-1-4503-8294-6},
+	url = {https://doi.org/10.1145/3437801.3441612},
+	doi = {10.1145/3437801.3441612},
+	series = {{PPoPP} '21},
+	shorttitle = {Are dynamic memory managers on {GPUs} slow?},
+	abstract = {Dynamic memory management on {GPUs} is generally understood to be a challenging topic. On current {GPUs}, hundreds of thousands of threads might concurrently allocate new memory or free previously allocated memory. This leads to problems with thread contention, synchronization overhead and fragmentation. Various approaches have been proposed in the last ten years and we set out to evaluate them on a level playing field on modern hardware to answer the question, if dynamic memory managers are as slow as commonly thought of. In this survey paper, we provide a consistent framework to evaluate all publicly available memory managers in a large set of scenarios. We summarize each approach and thoroughly evaluate allocation performance (thread-based as well as warp-based), and look at performance scaling, fragmentation and real-world performance considering a synthetic workload as well as updating dynamic graphs. We discuss the strengths and weaknesses of each approach and provide guidelines for the respective best usage scenario. We provide a unified interface to integrate any of the tested memory managers into an application and switch between them for benchmarking purposes. Given our results, we can dispel some of the dread associated with dynamic memory managers on the {GPU}.},
+	pages = {219--233},
+	booktitle = {Proceedings of the 26th {ACM} {SIGPLAN} Symposium on Principles and Practice of Parallel Programming},
+	publisher = {Association for Computing Machinery},
+	author = {Winter, Martin and Parger, Mathias and Mlakar, Daniel and Steinberger, Markus},
+	urldate = {2025-02-27},
+	date = {2021-02-17},
+}
+
+@article{bartlett_exhaustive_2024,
+	title = {Exhaustive Symbolic Regression},
+	volume = {28},
+	issn = {1941-0026},
+	url = {https://ieeexplore.ieee.org/abstract/document/10136815},
+	doi = {10.1109/TEVC.2023.3280250},
+	abstract = {Symbolic regression ({SR}) algorithms attempt to learn analytic expressions which fit data accurately and in a highly interpretable manner. Conventional {SR} suffers from two fundamental issues which we address here. First, these methods search the space stochastically (typically using genetic programming) and hence do not necessarily find the best function. Second, the criteria used to select the equation optimally balancing accuracy with simplicity have been variable and subjective. To address these issues we introduce exhaustive {SR} ({ESR}), which systematically and efficiently considers all possible equations—made with a given basis set of operators and up to a specified maximum complexity—and is therefore guaranteed to find the true optimum (if parameters are perfectly optimized) and a complete function ranking subject to these constraints. We implement the minimum description length principle as a rigorous method for combining these preferences into a single objective. To illustrate the power of {ESR} we apply it to a catalog of cosmic chronometers and the Pantheon+ sample of supernovae to learn the Hubble rate as a function of redshift, finding 40 functions (out of 5.2 million trial functions) that fit the data more economically than the Friedmann equation. These low-redshift data therefore do not uniquely prefer the expansion history of the standard model of cosmology. We make our code and full equation sets publicly available.},
+	pages = {950--964},
+	number = {4},
+	journaltitle = {{IEEE} Transactions on Evolutionary Computation},
+	author = {Bartlett, Deaglan J. and Desmond, Harry and Ferreira, Pedro G.},
+	urldate = {2025-02-28},
+	date = {2024-08},
+	note = {Conference Name: {IEEE} Transactions on Evolutionary Computation},
+	keywords = {Optimization, Complexity theory, Mathematical models, Biological system modeling, Cosmology data analysis, minimum description length, model selection, Numerical models, Search problems, Standards, symbolic regression ({SR})},
+	file = {Eingereichte Version:C\:\\Users\\danwi\\Zotero\\storage\\Y6LFWDH2\\Bartlett et al. - 2024 - Exhaustive Symbolic Regression.pdf:application/pdf;IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\2HU5A8RL\\10136815.html:text/html},
+}
+
+@inproceedings{dokken_gpu_2005,
+	location = {New York, {NY}, {USA}},
+	title = {The {GPU} as a high performance computational resource},
+	isbn = {978-1-59593-204-4},
+	url = {https://doi.org/10.1145/1090122.1090126},
+	doi = {10.1145/1090122.1090126},
+	series = {{SCCG} '05},
+	abstract = {With the introduction in 2003 of standard {GPUs} with 32 bit floating point numbers and programmable Vertex and Fragment processors, the processing power of the {GPU} was made available to non-graphics applications. As the {GPU} is aimed at computer graphics, the concepts in {GPU}-programming are based on computer graphics terminology, and the strategies for programming have to be based on the architecture of the graphics pipeline. At {SINTEF} in Norway a 4-year strategic institute project (2004-2007) "Graphics hardware as a high-end computational resource", http://www.math.sintef.no/gpu/ aims at making {GPUs} available as a computational resource both to academia and industry. This paper addresses the challenges of {GPU}-programming and results of the project's first year.},
+	pages = {21--26},
+	booktitle = {Proceedings of the 21st Spring Conference on Computer Graphics},
+	publisher = {Association for Computing Machinery},
+	author = {Dokken, Tor and Hagen, Trond R. and Hjelmervik, Jon M.},
+	urldate = {2025-03-01},
+	date = {2005-05-12},
+}
+
+@inproceedings{huang_gpu_2008,
+	title = {{GPU} as a General Purpose Computing Resource},
+	url = {https://ieeexplore.ieee.org/abstract/document/4710975/references#references},
+	doi = {10.1109/PDCAT.2008.38},
+	abstract = {In the last few years, {GPUs}(Graphics Processing Units) have made rapid development. Their ever-increasing computing power and decreasing cost have attracted attention from both industry and academia. In addition to graphics applications, researchers are interested in using them for general purpose computing. Recently, {NVIDIA} released a new computing architecture, {CUDA} (compute united device architecture), for its {GeForce} 8 series, Quadro {FX}, and Tesla {GPU} products. This new architecture can change fundamentally the way in which {GPUs} are used. In this paper, we study the programmability of {CUDA} and its {GeForce} 8 {GPU} and compare its performance with general purpose processors, in order to investigate its suitability for general purpose computation.},
+	eventtitle = {2008 Ninth International Conference on Parallel and Distributed Computing, Applications and Technologies},
+	pages = {151--158},
+	booktitle = {2008 Ninth International Conference on Parallel and Distributed Computing, Applications and Technologies},
+	author = {Huang, Qihang and Huang, Zhiyi and Werstein, Paul and Purvis, Martin},
+	urldate = {2025-03-01},
+	date = {2008-12},
+	note = {{ISSN}: 2379-5352},
+	keywords = {Application software, Central Processing Unit, Computer architecture, Computer graphics, Distributed computing, Grid computing, Multicore processing, Pipelines, Programming profession, Rendering (computer graphics)},
+	file = {IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\2FJP9K25\\references.html:text/html},
+}
+
+@article{han_hicuda_2011,
+	title = {{hiCUDA}: High-Level {GPGPU} Programming},
+	volume = {22},
+	url = {https://ieeexplore.ieee.org/abstract/document/5445082},
+	shorttitle = {{hiCUDA}},
+	abstract = {Graphics Processing Units ({GPUs}) have become a competitive accelerator for applications outside the graphics domain, mainly driven by the improvements in {GPU} programmability. Although the Compute Unified Device Architecture ({CUDA}) is a simple C-like interface for programming {NVIDIA} {GPUs}, porting applications to {CUDA} remains a challenge to average programmers. In particular, {CUDA} places on the programmer the burden of packaging {GPU} code in separate functions, of explicitly managing data transfer between the host and {GPU} memories, and of manually optimizing the utilization of the {GPU} memory. Practical experience shows that the programmer needs to make significant code changes, often tedious and error-prone, before getting an optimized program. We have designed {hiCUDA}},
+	pages = {78--90},
+	number = {1},
+	journaltitle = {{IEEE} Transactions on Parallel and Distributed Systems},
+	author = {Han, Tianyi David and Abdelrahman, Tarek S.},
+	urldate = {2025-03-01},
+	date = {2011},
+	note = {Conference Name: {IEEE} Transactions on Parallel and Distributed Systems},
+	file = {IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\5K63T7RB\\5445082.html:text/html},
+}