master-thesis/thesis/references.bib

817 lines
80 KiB
BibTeX
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

@article{besard_rapid_2019,
title = {Rapid software prototyping for heterogeneous and distributed platforms},
volume = {132},
issn = {09659978},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0965997818310123},
doi = {10.1016/j.advengsoft.2019.02.002},
pages = {29--46},
journaltitle = {Advances in Engineering Software},
shortjournal = {Advances in Engineering Software},
author = {Besard, Tim and Churavy, Valentin and Edelman, Alan and Sutter, Bjorn De},
urldate = {2024-11-22},
date = {2019-06},
langid = {english},
file = {Volltext:C\:\\Users\\danwi\\Zotero\\storage\\VNWQAR9Q\\Besard et al. - 2019 - Rapid software prototyping for heterogeneous and distributed platforms.pdf:application/pdf},
}
@article{besard_effective_2019,
title = {Effective Extensible Programming: Unleashing Julia on {GPUs}},
volume = {30},
rights = {https://ieeexplore.ieee.org/Xplorehelp/downloads/license-information/{IEEE}.html},
issn = {1045-9219, 1558-2183, 2161-9883},
url = {https://ieeexplore.ieee.org/document/8471188/},
doi = {10.1109/TPDS.2018.2872064},
shorttitle = {Effective Extensible Programming},
pages = {827--841},
number = {4},
journaltitle = {{IEEE} Transactions on Parallel and Distributed Systems},
shortjournal = {{IEEE} Trans. Parallel Distrib. Syst.},
author = {Besard, Tim and Foket, Christophe and De Sutter, Bjorn},
urldate = {2024-11-22},
date = {2019-04-01},
file = {Eingereichte Version:C\:\\Users\\danwi\\Zotero\\storage\\T34I73BI\\Besard et al. - 2019 - Effective Extensible Programming Unleashing Julia on GPUs.pdf:application/pdf},
}
@inproceedings{lin_comparing_2021,
location = {St. Louis, {MO}, {USA}},
title = {Comparing Julia to Performance Portable Parallel Programming Models for {HPC}},
rights = {https://ieeexplore.ieee.org/Xplorehelp/downloads/license-information/{IEEE}.html},
isbn = {978-1-6654-1118-9},
url = {https://ieeexplore.ieee.org/document/9652798/},
doi = {10.1109/PMBS54543.2021.00016},
eventtitle = {2021 International Workshop on Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems ({PMBS})},
pages = {94--105},
booktitle = {2021 International Workshop on Performance Modeling, Benchmarking and Simulation of High Performance Computer Systems ({PMBS})},
publisher = {{IEEE}},
author = {Lin, Wei-Chen and {McIntosh}-Smith, Simon},
urldate = {2024-11-22},
date = {2021-11},
file = {Eingereichte Version:C\:\\Users\\danwi\\Zotero\\storage\\U6EQPD62\\Lin und McIntosh-Smith - 2021 - Comparing Julia to Performance Portable Parallel Programming Models for HPC.pdf:application/pdf},
}
@online{nvidia_cuda_2024,
title = {{CUDA} C++ Programming Guide},
url = {https://docs.nvidia.com/cuda/cuda-c-programming-guide/},
author = {{Nvidia}},
urldate = {2024-11-22},
date = {2024-11},
}
@article{koster_massively_2020,
title = {Massively Parallel Rule-Based Interpreter Execution on {GPUs} Using Thread Compaction},
volume = {48},
issn = {1573-7640},
url = {https://doi.org/10.1007/s10766-020-00670-2},
doi = {10.1007/s10766-020-00670-2},
abstract = {Interpreters are well researched in the field of compiler construction and program generation. They are typically used to realize program execution of different programming languages without a compilation step. However, they can also be used to model complex rule-based simulations: The interpreter applies all rules one after another. These can be iteratively applied on a globally updated state in order to get the final simulation result. Many simulations for domain-specific problems already leverage the parallel processing capabilities of Graphics Processing Units ({GPUs}). They use hardware-specific tuned rule implementations to achieve maximum performance. However, every interpreter-based system requires a high-level algorithm that detects active rules and determines when they are evaluated. A common approach in this context is the use of different interpreter routines for every problem domain. Executing such functions in an efficient way mainly involves dealing with hardware peculiarities like thread divergences, {ALU} computations and memory operations. Furthermore, the interpreter is often executed on multiple states in parallel these days. This is particularly important for heuristic search or what-if analyses, for instance. In this paper, we present a novel and easy-to-implement method based on thread compaction to realize generic rule-based interpreters in an efficient way on {GPUs}. It is optimized for many states using a specially designed memory layout. Benchmarks on our evaluation scenarios show that the performance can be significantly increased in comparison to existing commonly-used implementations.},
pages = {675--691},
number = {4},
journaltitle = {International Journal of Parallel Programming},
shortjournal = {Int J Parallel Prog},
author = {Köster, M. and Groß, J. and Krüger, A.},
urldate = {2024-11-29},
date = {2020-08-01},
langid = {english},
keywords = {{GPU}, Interpreter execution, Memory layout, Thread compaction},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\8ETAIXGL\\Köster et al. - 2020 - Massively Parallel Rule-Based Interpreter Execution on GPUs Using Thread Compaction.pdf:application/pdf},
}
@inproceedings{krolik_r3d3_2021,
title = {r3d3: Optimized Query Compilation on {GPUs}},
url = {https://ieeexplore.ieee.org/document/9370323},
doi = {10.1109/CGO51591.2021.9370323},
shorttitle = {r3d3},
abstract = {Query compilation is an effective approach to improve the performance of repeated database queries. {GPU}-based approaches have significant promise, but face difficulties in managing compilation time, data transfer costs, and in addressing a reasonably comprehensive range of {SQL} operations. In this work we describe a hybrid {AoT}/{JIT} approach to {GPU}-based query compilation. We use multiple optimizations to reduce execution, compile, and data transfer times, improving performance over both other {GPU}-based approaches and {CPU}-based query compilers as well. Our design addresses a wide range of {SQL} queries, sufficient to demonstrate the practicality of using {GPUs} for query optimization.},
eventtitle = {2021 {IEEE}/{ACM} International Symposium on Code Generation and Optimization ({CGO})},
pages = {277--288},
booktitle = {2021 {IEEE}/{ACM} International Symposium on Code Generation and Optimization ({CGO})},
author = {Krolik, Alexander and Verbrugge, Clark and Hendren, Laurie},
urldate = {2024-11-29},
date = {2021-02},
keywords = {Compilers, Data transfer, Databases, {GPUs}, Graphics processing units, Memory management, Optimization, Query processing, Runtime, {SQL} database queries},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\NJM2FK56\\Krolik et al. - 2021 - r3d3 Optimized Query Compilation on GPUs.pdf:application/pdf;IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\F6KDT83Y\\9370323.html:text/html},
}
@inproceedings{koster_high-performance_2020,
location = {Cham},
title = {High-Performance Simulations on {GPUs} Using Adaptive Time Steps},
isbn = {978-3-030-60245-1},
doi = {10.1007/978-3-030-60245-1_26},
abstract = {Graphics Processing Units ({GPUs}) are widely spread nowadays due to their parallel processing capabilities. Leveraging these hardware features is particularly important for computationally expensive tasks and workloads. Prominent use cases are optimization problems and simulations that can be parallelized and tuned for these architectures. In the general domain of simulations (numerical and discrete), the overall logic is split into several components that are executed one after another. These components need step-size information which determines the number of steps (e.g. the elapsed time) they have to perform. Small step sizes are often required to ensure a valid simulation result with respect to precision and constraint correctness. Unfortunately, they are often the main bottleneck of the simulation. In this paper, we introduce a new and generic way of realizing high-performance simulations with multiple components using adaptive time steps on {GPUs}. Our method relies on a code-analysis phase that resolves data dependencies between different components. This knowledge is used to generate specially-tuned execution kernels that encapsulate the underlying component logic. An evaluation on our simulation benchmarks shows that we are able to considerably improve runtime performance compared to prior work.},
pages = {369--385},
booktitle = {Algorithms and Architectures for Parallel Processing},
publisher = {Springer International Publishing},
author = {Köster, Marcel and Groß, Julian and Krüger, Antonio},
editor = {Qiu, Meikang},
date = {2020},
langid = {english},
keywords = {Related-Work},
}
@inproceedings{koster_macsq_2022,
location = {Cham},
title = {{MACSQ}: Massively Accelerated {DeepQ} Learning on {GPUs} Using On-the-fly State Construction},
isbn = {978-3-030-96772-7},
doi = {10.1007/978-3-030-96772-7_35},
shorttitle = {{MACSQ}},
abstract = {The current trend of using artificial neural networks to solve computationally intensive problems is omnipresent. In this scope, {DeepQ} learning is a common choice for agent-based problems. {DeepQ} combines the concept of Q-Learning with (deep) neural networks to learn different Q-values/matrices based on environmental conditions. Unfortunately, {DeepQ} learning requires hundreds of thousands of iterations/Q-samples that must be generated and learned for large-scale problems. Gathering data sets for such challenging tasks is extremely time consuming and requires large data-storage containers. Consequently, a common solution is the automatic generation of input samples for agent-based {DeepQ} networks. However, a usual workflow is to create the samples separately from the training process in either a (set of) pre-processing step(s) or interleaved with the training process. This requires the input Q-samples to be materialized in order to be fed into the training step of the attached neural network. In this paper, we propose a new {GPU}-focussed method for on-the-fly generation of training samples tightly coupled with the training process itself. This allows us to skip the materialization process of all samples (e.g. avoid dumping them disk), as they are (re)constructed when needed. Our method significantly outperforms usual workflows that generate the input samples on the {CPU} in terms of runtime performance and memory/storage consumption.},
pages = {383--395},
booktitle = {Parallel and Distributed Computing, Applications and Technologies},
publisher = {Springer International Publishing},
author = {Köster, Marcel and Groß, Julian and Krüger, Antonio},
editor = {Shen, Hong and Sang, Yingpeng and Zhang, Yong and Xiao, Nong and Arabnia, Hamid R. and Fox, Geoffrey and Gupta, Ajay and Malek, Manu},
date = {2022},
langid = {english},
keywords = {Related Work},
}
@inproceedings{dietz_mimd_2010,
location = {Berlin, Heidelberg},
title = {{MIMD} Interpretation on a {GPU}},
isbn = {978-3-642-13374-9},
doi = {10.1007/978-3-642-13374-9_5},
abstract = {Programming heterogeneous parallel computer systems is notoriously difficult, but {MIMD} models have proven to be portable across multi-core processors, clusters, and massively parallel systems. It would be highly desirable for {GPUs} (Graphics Processing Units) also to be able to leverage algorithms and programming tools designed for {MIMD} targets. Unfortunately, most {GPU} hardware implements a very restrictive multi-threaded {SIMD}-based execution model.},
pages = {65--79},
booktitle = {Languages and Compilers for Parallel Computing},
publisher = {Springer},
author = {Dietz, Henry G. and Young, B. Dalton},
editor = {Gao, Guang R. and Pollock, Lori L. and Cavazos, John and Li, Xiaoming},
date = {2010},
langid = {english},
keywords = {Hilfreich},
}
@inproceedings{langdon_simd_2008,
location = {Berlin, Heidelberg},
title = {A {SIMD} Interpreter for Genetic Programming on {GPU} Graphics Cards},
isbn = {978-3-540-78671-9},
doi = {10.1007/978-3-540-78671-9_7},
abstract = {Mackey-Glass chaotic time series prediction and nuclear protein classification show the feasibility of evaluating genetic programming populations directly on parallel consumer gaming graphics processing units. Using a Linux {KDE} computer equipped with an {nVidia} {GeForce} 8800 {GTX} graphics processing unit card the C++ {SPMD} interpretter evolves programs at Giga {GP} operations per second (895 million {GPops}). We use the {RapidMind} general processing on {GPU} ({GPGPU}) framework to evaluate an entire population of a quarter of a million individual programs on a non-trivial problem in 4 seconds. An efficient reverse polish notation ({RPN}) tree based {GP} is given.},
pages = {73--85},
booktitle = {Genetic Programming},
publisher = {Springer},
author = {Langdon, W. B. and Banzhaf, Wolfgang},
editor = {ONeill, Michael and Vanneschi, Leonardo and Gustafson, Steven and Esparcia Alcázar, Anna Isabel and De Falco, Ivanoe and Della Cioppa, Antonio and Tarantino, Ernesto},
date = {2008},
langid = {english},
keywords = {Hilfreich},
}
@inproceedings{cano_gpu-parallel_2014,
location = {New York, {NY}, {USA}},
title = {{GPU}-parallel subtree interpreter for genetic programming},
isbn = {978-1-4503-2662-9},
url = {https://dl.acm.org/doi/10.1145/2576768.2598272},
doi = {10.1145/2576768.2598272},
series = {{GECCO} '14},
abstract = {Genetic Programming ({GP}) is a computationally intensive technique but its nature is embarrassingly parallel. Graphic Processing Units ({GPUs}) are many-core architectures which have been widely employed to speed up the evaluation of {GP}. In recent years, many works have shown the high performance and efficiency of {GPUs} on evaluating both the individuals and the fitness cases in parallel. These approaches are known as population parallel and data parallel. This paper presents a parallel {GP} interpreter which extends these approaches and adds a new parallelization level based on the concurrent evaluation of the individual's subtrees. A {GP} individual defined by a tree structure with nodes and branches comprises different depth levels in which there are independent subtrees which can be evaluated concurrently. Threads can cooperate to evaluate different subtrees and share the results via {GPU}'s shared memory. The experimental results show the better performance of the proposal in terms of the {GP} operations per second ({GPops}/s) that the {GP} interpreter is capable of processing, achieving up to 21 billion {GPops}/s using a {NVIDIA} 480 {GPU}. However, some issues raised due to limitations of currently available hardware are to be overcomed by the dynamic parallelization capabilities of the next generation of {GPUs}.},
pages = {887--894},
booktitle = {Proceedings of the 2014 Annual Conference on Genetic and Evolutionary Computation},
publisher = {Association for Computing Machinery},
author = {Cano, Alberto and Ventura, Sebastian},
urldate = {2024-11-28},
date = {2014-07-12},
keywords = {Hilfreich},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\NYV739K8\\Cano und Ventura - 2014 - GPU-parallel subtree interpreter for genetic programming.pdf:application/pdf},
}
@inproceedings{pfahler_semantic_2020,
location = {New York, {NY}, {USA}},
title = {Semantic Search in Millions of Equations},
isbn = {978-1-4503-7998-4},
url = {https://dl.acm.org/doi/10.1145/3394486.3403056},
doi = {10.1145/3394486.3403056},
series = {{KDD} '20},
abstract = {Given the increase of publications, search for relevant papers becomes tedious. In particular, search across disciplines or schools of thinking is not supported. This is mainly due to the retrieval with keyword queries: technical terms differ in different sciences or at different times. Relevant articles might better be identified by their mathematical problem descriptions. Just looking at the equations in a paper already gives a hint to whether the paper is relevant. Hence, we propose a new approach for retrieval of mathematical expressions based on machine learning. We design an unsupervised representation learning task that combines embedding learning with self-supervised learning. Using graph convolutional neural networks we embed mathematical expression into low-dimensional vector spaces that allow efficient nearest neighbor queries. To train our models, we collect a huge dataset with over 29 million mathematical expressions from over 900,000 publications published on {arXiv}.org. The math is converted into an {XML} format, which we view as graph data. Our empirical evaluations involving a new dataset of manually annotated search queries show the benefits of using embedding models for mathematical retrieval.},
pages = {135--143},
booktitle = {Proceedings of the 26th {ACM} {SIGKDD} International Conference on Knowledge Discovery \& Data Mining},
publisher = {Association for Computing Machinery},
author = {Pfahler, Lukas and Morik, Katharina},
urldate = {2024-11-30},
date = {2020-08-20},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\TQBLKG25\\Pfahler und Morik - 2020 - Semantic Search in Millions of Equations.pdf:application/pdf},
}
@misc{werner_informed_2021,
title = {Informed Equation Learning},
url = {http://arxiv.org/abs/2105.06331},
doi = {10.48550/arXiv.2105.06331},
abstract = {Distilling data into compact and interpretable analytic equations is one of the goals of science. Instead, contemporary supervised machine learning methods mostly produce unstructured and dense maps from input to output. Particularly in deep learning, this property is owed to the generic nature of simple standard link functions. To learn equations rather than maps, standard non-linearities can be replaced with structured building blocks of atomic functions. However, without strong priors on sparsity and structure, representational complexity and numerical conditioning limit this direct approach. To scale to realistic settings in science and engineering, we propose an informed equation learning system. It provides a way to incorporate expert knowledge about what are permitted or prohibited equation components, as well as a domain-dependent structured sparsity prior. Our system then utilizes a robust method to learn equations with atomic functions exhibiting singularities, as e.g. logarithm and division. We demonstrate several artificial and real-world experiments from the engineering domain, in which our system learns interpretable models of high predictive power.},
number = {{arXiv}:2105.06331},
publisher = {{arXiv}},
author = {Werner, Matthias and Junginger, Andrej and Hennig, Philipp and Martius, Georg},
urldate = {2024-11-30},
date = {2021-05-13},
eprinttype = {arxiv},
eprint = {2105.06331},
keywords = {Computer Science - Machine Learning},
file = {Preprint PDF:C\:\\Users\\danwi\\Zotero\\storage\\HEYBR254\\Werner et al. - 2021 - Informed Equation Learning.pdf:application/pdf},
}
@article{memarzia_-depth_2015,
title = {An In-depth Study on the Performance Impact of {CUDA}, {OpenCL}, and {PTX} Code},
volume = {10},
abstract = {In recent years, the rise of {GPGPU} as a viable solution for high performance computing has been accompanied by fresh challenges for developers. Chief among these challenges is efficiently harnessing the formidable power of the {GPU} and finding performance bottlenecks. Many factors play a role in a {GPU} applications performance. This creates the need for studies performance comparisons, and ways to analyze programs from a fundamental level. With that in mind, our goal is to present an in-depth performance comparison of the {CUDA} and {OpenCL} platforms, and study how {PTX} code can affect performance. In order to achieve this goal, we explore the subject from three different angles: kernel execution times, data transfers that occur between the host and device, and the {PTX} code that is generated by each platforms compiler. We carry out our experiments using ten real-world {GPU} kernels from the digital image processing domain, a selection of variable input data sizes, and a pair of {GPUs} based on the Nvidia Fermi and Kepler architectures. We show how {PTX} statistics and analysis can be used to provide further insight on performance discrepancies and bottlenecks. Our results indicate that, in an unbiased comparison such as this one, the {OpenCL} and {CUDA} platforms are essentially similar in terms of performance.},
author = {Memarzia, Puya and Khunjush, Farshad},
date = {2015},
langid = {english},
file = {PDF:C\:\\Users\\danwi\\Zotero\\storage\\GKAYMMNN\\Memarzia und Khunjush - 2015 - An In-depth Study on the Performance Impact of CUDA, OpenCL, and PTX Code.pdf:application/pdf},
}
@article{bastidas_fuertes_transpiler-based_2023,
title = {Transpiler-Based Architecture Design Model for Back-End Layers in Software Development},
volume = {13},
rights = {http://creativecommons.org/licenses/by/3.0/},
issn = {2076-3417},
url = {https://www.mdpi.com/2076-3417/13/20/11371},
doi = {10.3390/app132011371},
abstract = {The utilization of software architectures and designs is widespread in software development, offering conceptual frameworks to address recurring challenges. A transpiler is a tool that automatically converts source code from one high-level programming language to another, ensuring algorithmic equivalence. This study introduces an innovative software architecture design model that integrates transpilers into the back-end layer, enabling the automatic transformation of business logic and back-end components from a single source code (the coding artifact) into diverse equivalent versions using distinct programming languages (the automatically produced code). This work encompasses both abstract and detailed design aspects, covering the proposal, automated processes, layered design, development environment, nest implementations, and cross-cutting components. In addition, it defines the main target audiences, discusses pros and cons, examines their relationships with prevalent design paradigms, addresses considerations about compatibility and debugging, and emphasizes the pivotal role of the transpiler. An empirical experiment involving the practical application of this model was conducted by implementing a collaborative to-do list application. This paper comprehensively outlines the relevant methodological approach, strategic planning, precise execution, observed outcomes, and insightful reflections while underscoring the the models pragmatic viability and highlighting its relevance across various software development contexts. Our contribution aims to enrich the field of software architecture design by introducing a new way of designing multi-programming-language software.},
pages = {11371},
number = {20},
journaltitle = {Applied Sciences},
author = {Bastidas Fuertes, Andrés and Pérez, María and Meza, Jaime},
urldate = {2025-01-03},
date = {2023-01},
langid = {english},
note = {Number: 20
Publisher: Multidisciplinary Digital Publishing Institute},
keywords = {back-end layers, design model, software architecture, software development, source-to-source transformations, transpiler},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\AD55DPJ4\\Bastidas Fuertes et al. - 2023 - Transpiler-Based Architecture Design Model for Back-End Layers in Software Development.pdf:application/pdf},
}
@incollection{adam_no_2019,
location = {Cham},
title = {No Free Lunch Theorem: A Review},
isbn = {978-3-030-12767-1},
url = {https://doi.org/10.1007/978-3-030-12767-1_5},
shorttitle = {No Free Lunch Theorem},
abstract = {The “No Free Lunch” theorem states that, averaged over all optimization problems, without re-sampling, all optimization algorithms perform equally well. Optimization, search, and supervised learning are the areas that have benefited more from this important theoretical concept. Formulation of the initial No Free Lunch theorem, very soon, gave rise to a number of research works which resulted in a suite of theorems that define an entire research field with significant results in other scientific areas where successfully exploring a search space is an essential and critical task. The objective of this paper is to go through the main research efforts that contributed to this research field, reveal the main issues, and disclose those points that are helpful in understanding the hypotheses, the restrictions, or even the inability of applying No Free Lunch theorems.},
pages = {57--82},
booktitle = {Approximation and Optimization : Algorithms, Complexity and Applications},
publisher = {Springer International Publishing},
author = {Adam, Stavros P. and Alexandropoulos, Stamatios-Aggelos N. and Pardalos, Panos M. and Vrahatis, Michael N.},
editor = {Demetriou, Ioannis C. and Pardalos, Panos M.},
urldate = {2025-02-14},
date = {2019},
langid = {english},
doi = {10.1007/978-3-030-12767-1_5},
}
@inproceedings{michalakes_gpu_2008,
title = {{GPU} acceleration of numerical weather prediction},
url = {https://ieeexplore.ieee.org/abstract/document/4536351},
doi = {10.1109/IPDPS.2008.4536351},
abstract = {Weather and climate prediction software has enjoyed the benefits of exponentially increasing processor power for almost 50 years. Even with the advent of large-scale parallelism in weather models, much of the performance increase has come from increasing processor speed rather than increased parallelism. This free ride is nearly over. Recent results also indicate that simply increasing the use of large- scale parallelism will prove ineffective for many scenarios. We present an alternative method of scaling model performance by exploiting emerging architectures using the fine-grain parallelism once used in vector machines. The paper shows the promise of this approach by demonstrating a 20 times speedup for a computationally intensive portion of the Weather Research and Forecast ({WRF}) model on an {NVIDIA} 8800 {GTX} graphics processing unit ({GPU}). We expect an overall 1.3 times speedup from this change alone.},
eventtitle = {2008 {IEEE} International Symposium on Parallel and Distributed Processing},
pages = {1--7},
booktitle = {2008 {IEEE} International Symposium on Parallel and Distributed Processing},
author = {Michalakes, John and Vachharajani, Manish},
urldate = {2025-02-14},
date = {2008-04},
keywords = {Acceleration, Bandwidth, Computer architecture, Concurrent computing, Graphics, Large-scale systems, Parallel processing, Predictive models, Weather forecasting, Yarn},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\ZFEVRLEZ\\Michalakes und Vachharajani - 2008 - GPU acceleration of numerical weather prediction.pdf:application/pdf;IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\PYY4F7JB\\4536351.html:text/html},
}
@article{han_packetshader_2010,
title = {{PacketShader}: a {GPU}-accelerated software router},
volume = {40},
issn = {0146-4833},
url = {https://doi.org/10.1145/1851275.1851207},
doi = {10.1145/1851275.1851207},
shorttitle = {{PacketShader}},
abstract = {We present {PacketShader}, a high-performance software router framework for general packet processing with Graphics Processing Unit ({GPU}) acceleration. {PacketShader} exploits the massively-parallel processing power of {GPU} to address the {CPU} bottleneck in current software routers. Combined with our high-performance packet I/O engine, {PacketShader} outperforms existing software routers by more than a factor of four, forwarding 64B {IPv}4 packets at 39 Gbps on a single commodity {PC}. We have implemented {IPv}4 and {IPv}6 forwarding, {OpenFlow} switching, and {IPsec} tunneling to demonstrate the flexibility and performance advantage of {PacketShader}. The evaluation results show that {GPU} brings significantly higher throughput over the {CPU}-only implementation, confirming the effectiveness of {GPU} for computation and memory-intensive operations in packet processing.},
pages = {195--206},
number = {4},
journaltitle = {{SIGCOMM} Comput. Commun. Rev.},
author = {Han, Sangjin and Jang, Keon and Park, {KyoungSoo} and Moon, Sue},
urldate = {2025-02-14},
date = {2010-08-30},
}
@article{georgescu_gpu_2013,
title = {{GPU} Acceleration for {FEM}-Based Structural Analysis},
volume = {20},
issn = {1886-1784},
url = {https://doi.org/10.1007/s11831-013-9082-8},
doi = {10.1007/s11831-013-9082-8},
abstract = {Graphic Processing Units ({GPUs}) have greatly exceeded their initial role of graphics accelerators and have taken a new role of co-processors for computation—heavy tasks. Both hardware and software ecosystems have now matured, with fully {IEEE} compliant double precision and memory correction being supported and a rich set of software tools and libraries being available. This in turn has lead to their increased adoption in a growing number of fields, both in academia and, more recently, in industry. In this review we investigate the adoption of {GPUs} as accelerators in the field of Finite Element Structural Analysis, a design tool that is now essential in many branches of engineering. We survey the work that has been done in accelerating the most time consuming steps of the analysis, indicate the speedup that has been achieved and, where available, highlight software libraries and packages that will enable the reader to take advantage of such acceleration. Overall, we try to draw a high level picture of where the state of the art is currently at.},
pages = {111--121},
number = {2},
journaltitle = {Archives of Computational Methods in Engineering},
shortjournal = {Arch Computat Methods Eng},
author = {Georgescu, Serban and Chow, Peter and Okuda, Hiroshi},
urldate = {2025-02-14},
date = {2013-06-01},
langid = {english},
keywords = {Compute Unify Device Architecture, Element Stiffness Matrice, Global Stiffness Matrix, Iterative Solver, Matrix Solver},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\352VGH3Y\\Georgescu et al. - 2013 - GPU Acceleration for FEM-Based Structural Analysis.pdf:application/pdf},
}
@article{brunton_discovering_2016,
title = {Discovering governing equations from data by sparse identification of nonlinear dynamical systems},
volume = {113},
url = {https://www.pnas.org/doi/abs/10.1073/pnas.1517384113},
doi = {10.1073/pnas.1517384113},
abstract = {Extracting governing equations from data is a central challenge in many diverse areas of science and engineering. Data are abundant whereas models often remain elusive, as in climate science, neuroscience, ecology, finance, and epidemiology, to name only a few examples. In this work, we combine sparsity-promoting techniques and machine learning with nonlinear dynamical systems to discover governing equations from noisy measurement data. The only assumption about the structure of the model is that there are only a few important terms that govern the dynamics, so that the equations are sparse in the space of possible functions; this assumption holds for many physical systems in an appropriate basis. In particular, we use sparse regression to determine the fewest terms in the dynamic governing equations required to accurately represent the data. This results in parsimonious models that balance accuracy with model complexity to avoid overfitting. We demonstrate the algorithm on a wide range of problems, from simple canonical systems, including linear and nonlinear oscillators and the chaotic Lorenz system, to the fluid vortex shedding behind an obstacle. The fluid example illustrates the ability of this method to discover the underlying dynamics of a system that took experts in the community nearly 30 years to resolve. We also show that this method generalizes to parameterized systems and systems that are time-varying or have external forcing.},
pages = {3932--3937},
number = {15},
journaltitle = {Proceedings of the National Academy of Sciences},
author = {Brunton, Steven L. and Proctor, Joshua L. and Kutz, J. Nathan},
urldate = {2025-02-26},
date = {2016-04-12},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\6R643NFZ\\Brunton et al. - 2016 - Discovering governing equations from data by sparse identification of nonlinear dynamical systems.pdf:application/pdf},
}
@article{dong_evolving_2024,
title = {Evolving Equation Learner For Symbolic Regression},
issn = {1941-0026},
url = {https://ieeexplore.ieee.org/abstract/document/10538006/metrics#metrics},
doi = {10.1109/TEVC.2024.3404650},
abstract = {Symbolic regression, a multifaceted optimization challenge involving the refinement of both structural components and coefficients, has gained significant research interest in recent years. The Equation Learner ({EQL}), a neural network designed to optimize both equation structure and coefficients through gradient-based optimization algorithms, has emerged as an important topic of concern within this field. Thus far, several variations of {EQL} have been introduced. Nevertheless, these existing {EQL} methodologies suffer from a fundamental constraint that they necessitate a predefined network structure. This limitation imposes constraints on the complexity of equations and makes them ill-suited for high-dimensional or high-order problem domains. To tackle the aforementioned shortcomings, we present a novel approach known as the evolving Equation Learner ({eEQL}). {eEQL} introduces a unique network structure characterized by automatically defined functions ({ADFs}). This new architectural design allows for dynamic adaptations of the network structure. Moreover, by engaging in self-learning and self-evolution during the search process, {eEQL} facilitates the generation of intricate, high-order, and constructive sub-functions. This enhancement can improve the accuracy and efficiency of the algorithm. To evaluate its performance, the proposed {eEQL} method has been tested across various datasets, including benchmark datasets, physics datasets, and real-world datasets. The results have demonstrated that our approach outperforms several well-known methods.},
pages = {1--1},
journaltitle = {{IEEE} Transactions on Evolutionary Computation},
author = {Dong, Junlan and Zhong, Jinghui and Liu, Wei-Li and Zhang, Jun},
urldate = {2025-02-26},
date = {2024},
note = {Conference Name: {IEEE} Transactions on Evolutionary Computation},
keywords = {Optimization, Adaptation models, Complexity theory, Equation Learner, Evolutionary computation, Evolving equation learner, Mathematical models, Neural networks, Progressive Evolutionary Structure Search, Training},
file = {IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\8PQADTZP\\metrics.html:text/html},
}
@incollection{korns_accuracy_2011,
location = {New York, {NY}},
title = {Accuracy in Symbolic Regression},
isbn = {978-1-4614-1770-5},
url = {https://doi.org/10.1007/978-1-4614-1770-5_8},
abstract = {This chapter asserts that, in current state-of-the-art symbolic regression engines, accuracy is poor. That is to say that state-of-the-art symbolic regression engines return a champion with good fitness; however, obtaining a champion with the correct formula is not forthcoming even in cases of only one basis function with minimally complex grammar depth. Ideally, users expect that for test problems created with no noise, using only functions in the specified grammar, with only one basis function and some minimal grammar depth, that state-of-the-art symbolic regression systems should return the exact formula (or at least an isomorph) used to create the test data. Unfortunately, this expectation cannot currently be achieved using published state-of-the-art symbolic regression techniques. Several classes of test formulas, which prove intractable, are examined and an understanding of why they are intractable is developed. Techniques in Abstract Expression Grammars are employed to render these problems tractable, including manipulation of the epigenome during the evolutionary process, together with breeding of multiple targeted epigenomes in separate population islands. Aselected set of currently intractable problems are shown to be solvable, using these techniques, and a proposal is put forward for a discipline-wide program of improving accuracy in state-of-the-art symbolic regression systems.},
pages = {129--151},
booktitle = {Genetic Programming Theory and Practice {IX}},
publisher = {Springer},
author = {Korns, Michael F.},
editor = {Riolo, Rick and Vladislavleva, Ekaterina and Moore, Jason H.},
urldate = {2025-02-27},
date = {2011},
langid = {english},
doi = {10.1007/978-1-4614-1770-5_8},
}
@article{keijzer_scaled_2004,
title = {Scaled Symbolic Regression},
volume = {5},
issn = {1573-7632},
url = {https://doi.org/10.1023/B:GENP.0000030195.77571.f9},
doi = {10.1023/B:GENP.0000030195.77571.f9},
abstract = {Performing a linear regression on the outputs of arbitrary symbolic expressions has empirically been found to provide great benefits. Here some basic theoretical results of linear regression are reviewed on their applicability for use in symbolic regression. It will be proven that the use of a scaled error measure, in which the error is calculated after scaling, is expected to perform better than its unscaled counterpart on all possible symbolic regression problems. As the method (i) does not introduce additional parameters to a symbolic regression run, (ii) is guaranteed to improve results on most symbolic regression problems (and is not worse on any other problem), and (iii) has a well-defined upper bound on the error, scaled squared error is an ideal candidate to become the standard error measure for practical applications of symbolic regression.},
pages = {259--269},
number = {3},
journaltitle = {Genetic Programming and Evolvable Machines},
shortjournal = {Genet Program Evolvable Mach},
author = {Keijzer, Maarten},
urldate = {2025-02-27},
date = {2004-09-01},
langid = {english},
keywords = {Artificial Intelligence, genetic programming, linear regression, symbolic regression},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\ZH9LAN74\\Keijzer - 2004 - Scaled Symbolic Regression.pdf:application/pdf},
}
@misc{jin_bayesian_2020,
title = {Bayesian Symbolic Regression},
url = {http://arxiv.org/abs/1910.08892},
doi = {10.48550/arXiv.1910.08892},
abstract = {Interpretability is crucial for machine learning in many scenarios such as quantitative finance, banking, healthcare, etc. Symbolic regression ({SR}) is a classic interpretable machine learning method by bridging X and Y using mathematical expressions composed of some basic functions. However, the search space of all possible expressions grows exponentially with the length of the expression, making it infeasible for enumeration. Genetic programming ({GP}) has been traditionally and commonly used in {SR} to search for the optimal solution, but it suffers from several limitations, e.g. the difficulty in incorporating prior knowledge; overly-complicated output expression and reduced interpretability etc. To address these issues, we propose a new method to fit {SR} under a Bayesian framework. Firstly, Bayesian model can naturally incorporate prior knowledge (e.g., preference of basis functions, operators and raw features) to improve the efficiency of fitting {SR}. Secondly, to improve interpretability of expressions in {SR}, we aim to capture concise but informative signals. To this end, we assume the expected signal has an additive structure, i.e., a linear combination of several concise expressions, whose complexity is controlled by a well-designed prior distribution. In our setup, each expression is characterized by a symbolic tree, and the proposed {SR} model could be solved by sampling symbolic trees from the posterior distribution using an efficient Markov chain Monte Carlo ({MCMC}) algorithm. Finally, compared with {GP}, the proposed {BSR}(Bayesian Symbolic Regression) method saves computer memory with no need to keep an updated 'genome pool'. Numerical experiments show that, compared with {GP}, the solutions of {BSR} are closer to the ground truth and the expressions are more concise. Meanwhile we find the solution of {BSR} is robust to hyper-parameter specifications such as the number of trees.},
number = {{arXiv}:1910.08892},
publisher = {{arXiv}},
author = {Jin, Ying and Fu, Weilin and Kang, Jian and Guo, Jiadong and Guo, Jian},
urldate = {2025-02-27},
date = {2020-01-16},
eprinttype = {arxiv},
eprint = {1910.08892 [stat]},
keywords = {Statistics - Methodology},
file = {Preprint PDF:C\:\\Users\\danwi\\Zotero\\storage\\3MP48UI3\\Jin et al. - 2020 - Bayesian Symbolic Regression.pdf:application/pdf;Snapshot:C\:\\Users\\danwi\\Zotero\\storage\\UNNZKPRJ\\1910.html:text/html},
}
@inproceedings{winter_are_2021,
location = {New York, {NY}, {USA}},
title = {Are dynamic memory managers on {GPUs} slow? a survey and benchmarks},
isbn = {978-1-4503-8294-6},
url = {https://doi.org/10.1145/3437801.3441612},
doi = {10.1145/3437801.3441612},
series = {{PPoPP} '21},
shorttitle = {Are dynamic memory managers on {GPUs} slow?},
abstract = {Dynamic memory management on {GPUs} is generally understood to be a challenging topic. On current {GPUs}, hundreds of thousands of threads might concurrently allocate new memory or free previously allocated memory. This leads to problems with thread contention, synchronization overhead and fragmentation. Various approaches have been proposed in the last ten years and we set out to evaluate them on a level playing field on modern hardware to answer the question, if dynamic memory managers are as slow as commonly thought of. In this survey paper, we provide a consistent framework to evaluate all publicly available memory managers in a large set of scenarios. We summarize each approach and thoroughly evaluate allocation performance (thread-based as well as warp-based), and look at performance scaling, fragmentation and real-world performance considering a synthetic workload as well as updating dynamic graphs. We discuss the strengths and weaknesses of each approach and provide guidelines for the respective best usage scenario. We provide a unified interface to integrate any of the tested memory managers into an application and switch between them for benchmarking purposes. Given our results, we can dispel some of the dread associated with dynamic memory managers on the {GPU}.},
pages = {219--233},
booktitle = {Proceedings of the 26th {ACM} {SIGPLAN} Symposium on Principles and Practice of Parallel Programming},
publisher = {Association for Computing Machinery},
author = {Winter, Martin and Parger, Mathias and Mlakar, Daniel and Steinberger, Markus},
urldate = {2025-02-27},
date = {2021-02-17},
}
@article{bartlett_exhaustive_2024,
title = {Exhaustive Symbolic Regression},
volume = {28},
issn = {1941-0026},
url = {https://ieeexplore.ieee.org/abstract/document/10136815},
doi = {10.1109/TEVC.2023.3280250},
abstract = {Symbolic regression ({SR}) algorithms attempt to learn analytic expressions which fit data accurately and in a highly interpretable manner. Conventional {SR} suffers from two fundamental issues which we address here. First, these methods search the space stochastically (typically using genetic programming) and hence do not necessarily find the best function. Second, the criteria used to select the equation optimally balancing accuracy with simplicity have been variable and subjective. To address these issues we introduce exhaustive {SR} ({ESR}), which systematically and efficiently considers all possible equations—made with a given basis set of operators and up to a specified maximum complexity—and is therefore guaranteed to find the true optimum (if parameters are perfectly optimized) and a complete function ranking subject to these constraints. We implement the minimum description length principle as a rigorous method for combining these preferences into a single objective. To illustrate the power of {ESR} we apply it to a catalog of cosmic chronometers and the Pantheon+ sample of supernovae to learn the Hubble rate as a function of redshift, finding 40 functions (out of 5.2 million trial functions) that fit the data more economically than the Friedmann equation. These low-redshift data therefore do not uniquely prefer the expansion history of the standard model of cosmology. We make our code and full equation sets publicly available.},
pages = {950--964},
number = {4},
journaltitle = {{IEEE} Transactions on Evolutionary Computation},
author = {Bartlett, Deaglan J. and Desmond, Harry and Ferreira, Pedro G.},
urldate = {2025-02-28},
date = {2024-08},
keywords = {Optimization, Complexity theory, Mathematical models, Biological system modeling, Cosmology data analysis, minimum description length, model selection, Numerical models, Search problems, Standards, symbolic regression ({SR})},
file = {Eingereichte Version:C\:\\Users\\danwi\\Zotero\\storage\\Y6LFWDH2\\Bartlett et al. - 2024 - Exhaustive Symbolic Regression.pdf:application/pdf;IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\2HU5A8RL\\10136815.html:text/html},
}
@inproceedings{dokken_gpu_2005,
location = {New York, {NY}, {USA}},
title = {The {GPU} as a high performance computational resource},
isbn = {978-1-59593-204-4},
url = {https://doi.org/10.1145/1090122.1090126},
doi = {10.1145/1090122.1090126},
series = {{SCCG} '05},
abstract = {With the introduction in 2003 of standard {GPUs} with 32 bit floating point numbers and programmable Vertex and Fragment processors, the processing power of the {GPU} was made available to non-graphics applications. As the {GPU} is aimed at computer graphics, the concepts in {GPU}-programming are based on computer graphics terminology, and the strategies for programming have to be based on the architecture of the graphics pipeline. At {SINTEF} in Norway a 4-year strategic institute project (2004-2007) "Graphics hardware as a high-end computational resource", http://www.math.sintef.no/gpu/ aims at making {GPUs} available as a computational resource both to academia and industry. This paper addresses the challenges of {GPU}-programming and results of the project's first year.},
pages = {21--26},
booktitle = {Proceedings of the 21st Spring Conference on Computer Graphics},
publisher = {Association for Computing Machinery},
author = {Dokken, Tor and Hagen, Trond R. and Hjelmervik, Jon M.},
urldate = {2025-03-01},
date = {2005-05-12},
}
@inproceedings{huang_gpu_2008,
title = {{GPU} as a General Purpose Computing Resource},
url = {https://ieeexplore.ieee.org/abstract/document/4710975/references#references},
doi = {10.1109/PDCAT.2008.38},
abstract = {In the last few years, {GPUs}(Graphics Processing Units) have made rapid development. Their ever-increasing computing power and decreasing cost have attracted attention from both industry and academia. In addition to graphics applications, researchers are interested in using them for general purpose computing. Recently, {NVIDIA} released a new computing architecture, {CUDA} (compute united device architecture), for its {GeForce} 8 series, Quadro {FX}, and Tesla {GPU} products. This new architecture can change fundamentally the way in which {GPUs} are used. In this paper, we study the programmability of {CUDA} and its {GeForce} 8 {GPU} and compare its performance with general purpose processors, in order to investigate its suitability for general purpose computation.},
eventtitle = {2008 Ninth International Conference on Parallel and Distributed Computing, Applications and Technologies},
pages = {151--158},
booktitle = {2008 Ninth International Conference on Parallel and Distributed Computing, Applications and Technologies},
author = {Huang, Qihang and Huang, Zhiyi and Werstein, Paul and Purvis, Martin},
urldate = {2025-03-01},
date = {2008-12},
keywords = {Computer architecture, Application software, Central Processing Unit, Computer graphics, Distributed computing, Grid computing, Multicore processing, Pipelines, Programming profession, Rendering (computer graphics)},
file = {IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\2FJP9K25\\references.html:text/html},
}
@article{verbraeck_interactive_2021,
title = {Interactive Black-Hole Visualization},
volume = {27},
issn = {1941-0506},
url = {https://ieeexplore.ieee.org/abstract/document/9226126},
doi = {10.1109/TVCG.2020.3030452},
abstract = {We present an efficient algorithm for visualizing the effect of black holes on its distant surroundings as seen from an observer nearby in orbit. Our solution is {GPU}-based and builds upon a two-step approach, where we first derive an adaptive grid to map the 360-view around the observer to the distorted celestial sky, which can be directly reused for different camera orientations. Using a grid, we can rapidly trace rays back to the observer through the distorted spacetime, avoiding the heavy workload of standard tracing solutions at real-time rates. By using a novel interpolation technique we can also simulate an observer path by smoothly transitioning between multiple grids. Our approach accepts real star catalogues and environment maps of the celestial sky and generates the resulting black-hole deformations in real time.},
pages = {796--805},
number = {2},
journaltitle = {{IEEE} Transactions on Visualization and Computer Graphics},
author = {Verbraeck, Annemieke and Eisemann, Elmar},
urldate = {2025-03-02},
date = {2021-02},
keywords = {Rendering (computer graphics), Algorithms, Cameras, Computer Graphics Techniques, Distortion, Engineering, Mathematics, Observers, Physical \& Environmental Sciences, Ray tracing, Real-time systems, Visualization},
file = {PDF:C\:\\Users\\danwi\\Zotero\\storage\\HDASRGYN\\Verbraeck und Eisemann - 2021 - Interactive Black-Hole Visualization.pdf:application/pdf},
}
@inproceedings{schuurman_step-by-step_2013,
location = {New York, {NY}, {USA}},
title = {Step-by-step design and simulation of a simple {CPU} architecture},
isbn = {978-1-4503-1868-6},
url = {https://dl.acm.org/doi/10.1145/2445196.2445296},
doi = {10.1145/2445196.2445296},
series = {{SIGCSE} '13},
abstract = {This paper describes a sequence of assignments, each building upon the next, leading students to a working simulation of a simple 8-bit {CPU} (Central Processing Unit). The design features a classic Von Neumann architecture comprising a simple data path with a few registers, a simple {ALU} (Arithmetic Logic Unit), and a microprogram to direct all the control signals. The first step involves the design of the {ALU} which is capable of eight basic operations. The second step guides students to construct a datapath complete with several 8-bit registers. The third step involves the design and implementation of a control unit which uses a microprogram to implement machine code instructions. The microprogram implements nine basic machine language instructions which are sufficient for writing many simple programs. The final step involves adding program memory and an input and output device to form a simple working simulation of a computer. At this point, students may hand-assemble code for their {CPU} and simulate its execution. All simulations are performed using a free and open source simulator called Logisim which performs digital logic simulations with the ability to build larger circuits from smaller subcircuits. Students can set an adjustable clock rate and observe the internal {CPU} state and registers as it retrieves instructions and steps through the microcode. The basic {CPU} architecture provides many opportunities for more advanced exercises, such as adding an instruction fetch unit, adding pipelining, or adding more machine language instructions. The assignments were introduced in a second year course on computer organization, providing an effective hands-on approach to understanding how a {CPU} actually operates.},
pages = {335--340},
booktitle = {Proceeding of the 44th {ACM} technical symposium on Computer science education},
publisher = {Association for Computing Machinery},
author = {Schuurman, Derek C.},
urldate = {2025-03-08},
date = {2013-03-06},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\KM664H87\\Schuurman - 2013 - Step-by-step design and simulation of a simple CPU architecture.pdf:application/pdf},
}
@article{franchetti_efficient_2005,
title = {Efficient Utilization of {SIMD} Extensions},
volume = {93},
issn = {1558-2256},
url = {https://ieeexplore.ieee.org/abstract/document/1386659},
doi = {10.1109/JPROC.2004.840491},
abstract = {This paper targets automatic performance tuning of numerical kernels in the presence of multilayered memory hierarchies and single-instruction, multiple-data ({SIMD}) parallelism. The studied {SIMD} instruction set extensions include Intel's {SSE} family, {AMD}'s 3DNow!, Motorola's {AltiVec}, and {IBM}'s {BlueGene}/L {SIMD} instructions. {FFTW}, {ATLAS}, and {SPIRAL} demonstrate that near-optimal performance of numerical kernels across a variety of modern computers featuring deep memory hierarchies can be achieved only by means of automatic performance tuning. These software packages generate and optimize {ANSI} C code and feed it into the target machine's general-purpose C compiler to maintain portability. The scalar C code produced by performance tuning systems poses a severe challenge for vectorizing compilers. The particular code structure hampers automatic vectorization and, thus, inhibits satisfactory performance on processors featuring short vector extensions. This paper describes special-purpose compiler technology that supports automatic performance tuning on machines with vector instructions. The work described includes: 1) symbolic vectorization of digital signal processing transforms; 2) straight-line code vectorization for numerical kernels; and 3) compiler back ends for straight-line code with vector instructions. Methods from all three areas were combined with {FFTW}, {SPIRAL}, and {ATLAS} to optimize both for memory hierarchy and vector instructions. Experiments show that the presented methods lead to substantial speedups (up to 1.8 for two-way and 3.3 for four-way vector extensions) over the best scalar C codes generated by the original systems as well as roughly matching the performance of hand-tuned vendor libraries.},
pages = {409--425},
number = {2},
journaltitle = {Proceedings of the {IEEE}},
author = {Franchetti, F. and Kral, S. and Lorenz, J. and Ueberhuber, C.W.},
urldate = {2025-03-08},
date = {2005-02},
keywords = {Concurrent computing, Parallel processing, Automatic vectorization, Boosting, Computer aided instruction, Computer applications, Digital signal processing, digital signal processing ({DSP}), fast Fourier transform ({FFT}), Kernel, Registers, short vector single instruction, multiple data ({SIMD}), Signal processing algorithms, Spirals, symbolic vectorization},
file = {Eingereichte Version:C\:\\Users\\danwi\\Zotero\\storage\\J48HM9VD\\Franchetti et al. - 2005 - Efficient Utilization of SIMD Extensions.pdf:application/pdf;IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\W6PT75CV\\1386659.html:text/html},
}
@inproceedings{tian_compiling_2012,
title = {Compiling C/C++ {SIMD} Extensions for Function and Loop Vectorizaion on Multicore-{SIMD} Processors},
url = {https://ieeexplore.ieee.org/abstract/document/6270606},
doi = {10.1109/IPDPSW.2012.292},
abstract = {{SIMD} vectorization has received significant attention in the past decade as an important method to accelerate scientific applications, media and embedded applications on {SIMD} architectures such as Intel® {SSE}, {AVX}, and {IBM}* {AltiVec}. However, most of the focus has been directed at loops, effectively executing their iterations on multiple {SIMD} lanes concurrently relying upon program hints and compiler analysis. This paper presents a set of new C/C++ high-level vector extensions for {SIMD} programming, and the Intel® C++ product compiler that is extended to translate these vector extensions and produce optimized {SIMD} instruction sequences of vectorized functions and loops. For a function, our main idea is to vectorize the entire function for callers instead of just vectorizing loops (if any) inside the function. It poses the challenge of dealing with complicated control-flow in the function body, and matching caller and callee for {SIMD} vector calls while vectorizing caller functions (or loops) and callee functions. Our compilation methods for automatically compiling vector extensions are described. We present performance results of several non-trivial visual computing, computational, and simulation workloads, utilizing {SIMD} units through the vector extensions on Intel® Multicore 128-bit {SIMD} processors, and we show that significant {SIMD} speedups (3.07x to 4.69x) are achieved over the serial execution.},
eventtitle = {2012 {IEEE} 26th International Parallel and Distributed Processing Symposium Workshops \& {PhD} Forum},
pages = {2349--2358},
booktitle = {2012 {IEEE} 26th International Parallel and Distributed Processing Symposium Workshops \& {PhD} Forum},
author = {Tian, Xinmin and Saito, Hideki and Girkar, Milind and Preis, Serguei V. and Kozhukhov, Sergey S. and Cherkasov, Aleksei G. and Nelson, Clark and Panchenko, Nikolay and Geva, Robert},
urldate = {2025-03-08},
date = {2012-05},
keywords = {{GPU}, Parallel processing, Cloning, Compiler, Graphics processing unit, Hardware, Multicore, Programming, {SIMD}, Vectorization, Vectors},
file = {IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\HBSGBKT2\\6270606.html:text/html},
}
@inproceedings{lee_debunking_2010,
location = {New York, {NY}, {USA}},
title = {Debunking the 100X {GPU} vs. {CPU} myth: an evaluation of throughput computing on {CPU} and {GPU}},
isbn = {978-1-4503-0053-7},
url = {https://dl.acm.org/doi/10.1145/1815961.1816021},
doi = {10.1145/1815961.1816021},
series = {{ISCA} '10},
shorttitle = {Debunking the 100X {GPU} vs. {CPU} myth},
abstract = {Recent advances in computing have led to an explosion in the amount of data being generated. Processing the ever-growing data in a timely manner has made throughput computing an important aspect for emerging applications. Our analysis of a set of important throughput computing kernels shows that there is an ample amount of parallelism in these kernels which makes them suitable for today's multi-core {CPUs} and {GPUs}. In the past few years there have been many studies claiming {GPUs} deliver substantial speedups (between 10X and 1000X) over multi-core {CPUs} on these kernels. To understand where such large performance difference comes from, we perform a rigorous performance analysis and find that after applying optimizations appropriate for both {CPUs} and {GPUs} the performance gap between an Nvidia {GTX}280 processor and the Intel Core i7-960 processor narrows to only 2.5x on average. In this paper, we discuss optimization techniques for both {CPU} and {GPU}, analyze what architecture features contributed to performance differences between the two architectures, and recommend a set of architectural features which provide significant improvement in architectural efficiency for throughput kernels.},
pages = {451--460},
booktitle = {Proceedings of the 37th annual international symposium on Computer architecture},
publisher = {Association for Computing Machinery},
author = {Lee, Victor W. and Kim, Changkyu and Chhugani, Jatin and Deisher, Michael and Kim, Daehyun and Nguyen, Anthony D. and Satish, Nadathur and Smelyanskiy, Mikhail and Chennupaty, Srinivas and Hammarlund, Per and Singhal, Ronak and Dubey, Pradeep},
urldate = {2025-03-08},
date = {2010-06-19},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\D64U9R8Q\\Lee et al. - 2010 - Debunking the 100X GPU vs. CPU myth an evaluation of throughput computing on CPU and GPU.pdf:application/pdf},
}
@inproceedings{kyung_implementation_2014,
title = {An implementation of a {SIMT} architecture-based stream processor},
url = {https://ieeexplore.ieee.org/abstract/document/7022313},
doi = {10.1109/TENCON.2014.7022313},
abstract = {In this paper, we designed a {SIMT} architecture-based stream processor for parallel processing in the mobile environment. The designed processor is a superscalar architecture and can issue up to four instructions. Considering the limited resources of the mobile environment, this processor was consisted of 16 stream processors ({SPs}). To verify the operation of the designed processor, a functional level simulation was conducted with the Modelsim {SE} 10.0b simulator. We synthesized on Virtex-7 {FPGA} as the target with the Xilinx {ISE} 14.7 tool and the results analyzed. The performance of the designed processor was 150M Triangles/Sec, 4.8 {GFLOPS} at 100 {MHz}. When the performance was compared with that of conventional processors, the proposed architecture of the processor attested to be effective in processing 3D graphics and parallel general-purpose computing in the mobile environment.},
eventtitle = {{TENCON} 2014 - 2014 {IEEE} Region 10 Conference},
pages = {1--5},
booktitle = {{TENCON} 2014 - 2014 {IEEE} Region 10 Conference},
author = {Kyung, Gyutaek and Jung, Changmin and Lee, Kwangyeob},
urldate = {2025-03-08},
date = {2014-10},
note = {{ISSN}: 2159-3450},
keywords = {Graphics processing units, Computer architecture, Graphics, Registers, Educational institutions, {GPGPU}, Instruction sets, Mobile communication, {SIMT} Architecture, Stream Processor},
file = {IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\9B85REHH\\7022313.html:text/html},
}
@report{collange_stack-less_2011,
title = {Stack-less {SIMT} reconvergence at low cost},
url = {https://hal.science/hal-00622654},
abstract = {Parallel architectures following the {SIMT} model such as {GPUs} benefit from application regularity by issuing concurrent threads running in lockstep on {SIMD} units. As threads take different paths across the control-flow graph, lockstep execution is partially lost, and must be regained whenever possible in order to maximize the occupancy of {SIMD} units. In this paper, we propose a technique to handle {SIMT} control divergence that operates in constant space and handles indirect jumps and recursion. We describe a possible implementation which leverage the existing memory divergence management unit, ensuring a low hardware cost. In terms of performance, this solution is at least as efficient as existing techniques.},
institution = {{ENS} Lyon},
type = {Research Report},
author = {Collange, Caroline},
date = {2011-09},
keywords = {{GPU}, {SIMD}, Control-flow reconvergence, {SIMT}},
file = {HAL PDF Full Text:C\:\\Users\\danwi\\Zotero\\storage\\M2WPWNXF\\Collange - 2011 - Stack-less SIMT reconvergence at low cost.pdf:application/pdf},
}
@inproceedings{fung_thread_2011,
title = {Thread block compaction for efficient {SIMT} control flow},
url = {https://ieeexplore.ieee.org/abstract/document/5749714},
doi = {10.1109/HPCA.2011.5749714},
abstract = {Manycore accelerators such as graphics processor units ({GPUs}) organize processing units into single-instruction, multiple data “cores” to improve throughput per unit hardware cost. Programming models for these accelerators encourage applications to run kernels with large groups of parallel scalar threads. The hardware groups these threads into warps/wavefronts and executes them in lockstep-dubbed single-instruction, multiple-thread ({SIMT}) by {NVIDIA}. While current {GPUs} employ a per-warp (or per-wavefront) stack to manage divergent control flow, it incurs decreased efficiency for applications with nested, data-dependent control flow. In this paper, we propose and evaluate the benefits of extending the sharing of resources in a block of warps, already used for scratchpad memory, to exploit control flow locality among threads (where such sharing may at first seem detrimental). In our proposal, warps within a thread block share a common block-wide stack for divergence handling. At a divergent branch, threads are compacted into new warps in hardware. Our simulation results show that this compaction mechanism provides an average speedup of 22\% over a baseline per-warp, stack-based reconvergence mechanism, and 17\% versus dynamic warp formation on a set of {CUDA} applications that suffer significantly from control flow divergence.},
eventtitle = {2011 {IEEE} 17th International Symposium on High Performance Computer Architecture},
pages = {25--36},
booktitle = {2011 {IEEE} 17th International Symposium on High Performance Computer Architecture},
author = {Fung, Wilson W. L. and Aamodt, Tor M.},
urldate = {2025-03-08},
date = {2011-02},
keywords = {Pipelines, Kernel, Graphics processing unit, Hardware, Instruction sets, Compaction, Random access memory},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\TRPWUTI6\\Fung und Aamodt - 2011 - Thread block compaction for efficient SIMT control flow.pdf:application/pdf;IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\LYPYEA8U\\5749714.html:text/html},
}
@online{amd_hip_2025,
title = {{HIP} programming model — {HIP} 6.3.42134 Documentation},
url = {https://rocm.docs.amd.com/projects/HIP/en/latest/understand/programming_model.html#programming-model-simt},
author = {{AMD}},
urldate = {2025-03-09},
date = {2025-03},
file = {HIP programming model — HIP 6.3.42134 Documentation:C\:\\Users\\danwi\\Zotero\\storage\\6KRNU6PG\\programming_model.html:text/html},
}
@online{sutter_free_2004,
title = {The Free Lunch Is Over: A Fundamental Turn Toward Concurrency in Software},
url = {http://www.gotw.ca/publications/concurrency-ddj.htm},
author = {Sutter, Herb},
urldate = {2025-03-13},
date = {2004-12},
file = {Free_Lunch.pdf:C\:\\Users\\danwi\\Zotero\\storage\\ICE8KXP8\\Free_Lunch.pdf:application/pdf;The Free Lunch Is Over\: A Fundamental Turn Toward Concurrency in Software:C\:\\Users\\danwi\\Zotero\\storage\\UU2CZWUR\\concurrency-ddj.html:text/html},
}
@article{koza_genetic_1994,
title = {Genetic programming as a means for programming computers by natural selection},
volume = {4},
rights = {http://www.springer.com/tdm},
issn = {0960-3174, 1573-1375},
url = {http://link.springer.com/10.1007/BF00175355},
doi = {10.1007/BF00175355},
number = {2},
journaltitle = {Statistics and Computing},
shortjournal = {Stat Comput},
author = {Koza, {JohnR}.},
urldate = {2025-03-13},
date = {1994-06},
langid = {english},
file = {PDF:C\:\\Users\\danwi\\Zotero\\storage\\SAHSU45G\\Koza - 1994 - Genetic programming as a means for programming computers by natural selection.pdf:application/pdf},
}
@article{koza_human-competitive_2010,
title = {Human-competitive results produced by genetic programming},
volume = {11},
issn = {1389-2576, 1573-7632},
url = {http://link.springer.com/10.1007/s10710-010-9112-3},
doi = {10.1007/s10710-010-9112-3},
pages = {251--284},
number = {3},
journaltitle = {Genetic Programming and Evolvable Machines},
shortjournal = {Genet Program Evolvable Mach},
author = {Koza, John R.},
urldate = {2025-03-13},
date = {2010-09},
langid = {english},
file = {Full Text:C\:\\Users\\danwi\\Zotero\\storage\\Y32QERP5\\Koza - 2010 - Human-competitive results produced by genetic programming.pdf:application/pdf},
}
@misc{martius_extrapolation_2016,
title = {Extrapolation and learning equations},
rights = {{arXiv}.org perpetual, non-exclusive license},
url = {https://arxiv.org/abs/1610.02995},
doi = {10.48550/ARXIV.1610.02995},
abstract = {In classical machine learning, regression is treated as a black box process of identifying a suitable function from a hypothesis set without attempting to gain insight into the mechanism connecting inputs and outputs. In the natural sciences, however, finding an interpretable function for a phenomenon is the prime goal as it allows to understand and generalize results. This paper proposes a novel type of function learning network, called equation learner ({EQL}), that can learn analytical expressions and is able to extrapolate to unseen domains. It is implemented as an end-to-end differentiable feed-forward network and allows for efficient gradient based training. Due to sparsity regularization concise interpretable expressions can be obtained. Often the true underlying source expression is identified.},
publisher = {{arXiv}},
author = {Martius, Georg and Lampert, Christoph H.},
urldate = {2025-03-13},
date = {2016},
note = {Version Number: 1},
keywords = {68T05, 68T30, 68T40, 62J02, 65D15, Artificial Intelligence (cs.{AI}), {FOS}: Computer and information sciences, I.2.6; I.2.8, Machine Learning (cs.{LG})},
}
@misc{sahoo_learning_2018,
title = {Learning Equations for Extrapolation and Control},
rights = {{arXiv}.org perpetual, non-exclusive license},
url = {https://arxiv.org/abs/1806.07259},
doi = {10.48550/ARXIV.1806.07259},
abstract = {We present an approach to identify concise equations from data using a shallow neural network approach. In contrast to ordinary black-box regression, this approach allows understanding functional relations and generalizing them from observed data to unseen parts of the parameter space. We show how to extend the class of learnable equations for a recently proposed equation learning network to include divisions, and we improve the learning and model selection strategy to be useful for challenging real-world data. For systems governed by analytical expressions, our method can in many cases identify the true underlying equation and extrapolate to unseen domains. We demonstrate its effectiveness by experiments on a cart-pendulum system, where only 2 random rollouts are required to learn the forward dynamics and successfully achieve the swing-up task.},
publisher = {{arXiv}},
author = {Sahoo, Subham S. and Lampert, Christoph H. and Martius, Georg},
urldate = {2025-03-13},
date = {2018},
keywords = {{FOS}: Computer and information sciences, I.2.6; I.2.8, Machine Learning (cs.{LG}), 68T05, 68T30, 68T40, 62M20, 62J02, 65D15, 70E60, 93C40, Machine Learning (stat.{ML})},
}
@article{han_hicuda_2011,
title = {{hiCUDA}: High-Level {GPGPU} Programming},
volume = {22},
rights = {https://ieeexplore.ieee.org/Xplorehelp/downloads/license-information/{IEEE}.html},
issn = {1045-9219},
url = {http://ieeexplore.ieee.org/document/5445082/},
doi = {10.1109/TPDS.2010.62},
shorttitle = {{hiCUDA}},
pages = {78--90},
number = {1},
journaltitle = {{IEEE} Transactions on Parallel and Distributed Systems},
shortjournal = {{IEEE} Trans. Parallel Distrib. Syst.},
author = {Han, Tianyi David and Abdelrahman, Tarek S.},
urldate = {2025-03-13},
date = {2011-01},
file = {PDF:C\:\\Users\\danwi\\Zotero\\storage\\PTANK4EC\\Han and Abdelrahman - 2011 - hiCUDA High-Level GPGPU Programming.pdf:application/pdf},
}
@article{brodtkorb_graphics_2013,
title = {Graphics processing unit ({GPU}) programming strategies and trends in {GPU} computing},
volume = {73},
rights = {https://www.elsevier.com/tdm/userlicense/1.0/},
issn = {07437315},
url = {https://linkinghub.elsevier.com/retrieve/pii/S0743731512000998},
doi = {10.1016/j.jpdc.2012.04.003},
pages = {4--13},
number = {1},
journaltitle = {Journal of Parallel and Distributed Computing},
shortjournal = {Journal of Parallel and Distributed Computing},
author = {Brodtkorb, André R. and Hagen, Trond R. and Sætra, Martin L.},
date = {2013-01},
langid = {english},
file = {Full Text:C\:\\Users\\danwi\\Zotero\\storage\\GZVCZUFG\\Brodtkorb et al. - 2013 - Graphics processing unit (GPU) programming strategies and trends in GPU computing.pdf:application/pdf},
}
@inproceedings{hissbach_overview_2022,
title = {An Overview of Techniques for Egocentric Black Hole Visualization and Their Suitability for Planetarium Applications},
isbn = {978-3-03868-189-2},
doi = {10.2312/vmv.20221207},
booktitle = {Vision, Modeling, and Visualization},
publisher = {The Eurographics Association},
author = {Hissbach, Anny-Marleen and Dick, Christian and Lawonn, Kai},
editor = {Bender, Jan and Botsch, Mario and Keim, Daniel A.},
date = {2022},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\TBBLEZ5N\\Hissbach et al. - 2022 - An Overview of Techniques for Egocentric Black Hole Visualization and Their Suitability for Planetar.pdf:application/pdf},
}
@inbook{guillemot_climate_2022,
edition = {1},
title = {Climate Models},
isbn = {978-1-009-08209-9 978-1-316-51427-6},
url = {https://www.cambridge.org/core/product/identifier/9781009082099%23CN-bp-14/type/book_part},
pages = {126--136},
booktitle = {A Critical Assessment of the Intergovernmental Panel on Climate Change},
publisher = {Cambridge University Press},
author = {Guillemot, Hélène},
bookauthor = {Hulme, Mike},
editor = {De Pryck, Kari},
urldate = {2025-03-14},
date = {2022-12-31},
file = {Full Text:C\:\\Users\\danwi\\Zotero\\storage\\MUKXXCV9\\Guillemot - 2022 - Climate Models.pdf:application/pdf},
}
@inproceedings{bomarito_bayesian_2022,
location = {Boston Massachusetts},
title = {Bayesian model selection for reducing bloat and overfitting in genetic programming for symbolic regression},
isbn = {978-1-4503-9268-6},
url = {https://dl.acm.org/doi/10.1145/3520304.3528899},
doi = {10.1145/3520304.3528899},
eventtitle = {{GECCO} '22: Genetic and Evolutionary Computation Conference},
pages = {526--529},
booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference Companion},
publisher = {{ACM}},
author = {Bomarito, G. F. and Leser, P. E. and Strauss, N. C. M. and Garbrecht, K. M. and Hochhalter, J. D.},
urldate = {2025-03-14},
date = {2022-07-09},
langid = {english},
file = {PDF:C\:\\Users\\danwi\\Zotero\\storage\\ZPS5ZYYQ\\Bomarito et al. - 2022 - Bayesian model selection for reducing bloat and overfitting in genetic programming for symbolic regr.pdf:application/pdf},
}
@article{dabhi_survey_2012,
title = {A Survey on Techniques of Improving Generalization Ability of Genetic Programming Solutions},
rights = {{arXiv}.org perpetual, non-exclusive license},
url = {https://arxiv.org/abs/1211.1119},
doi = {10.48550/ARXIV.1211.1119},
abstract = {In the field of empirical modeling using Genetic Programming ({GP}), it is important to evolve solution with good generalization ability. Generalization ability of {GP} solutions get affected by two important issues: bloat and over-fitting. We surveyed and classified existing literature related to different techniques used by {GP} research community to deal with these issues. We also point out limitation of these techniques, if any. Moreover, the classification of different bloat control approaches and measures for bloat and over-fitting are also discussed. We believe that this work will be useful to {GP} practitioners in following ways: (i) to better understand concepts of generalization in {GP} (ii) comparing existing bloat and over-fitting control techniques and (iii) selecting appropriate approach to improve generalization ability of {GP} evolved solutions.},
author = {Dabhi, Vipul K. and Chaudhary, Sanjay},
urldate = {2025-03-14},
date = {2012},
keywords = {{FOS}: Computer and information sciences, Neural and Evolutionary Computing (cs.{NE})},
file = {PDF:C\:\\Users\\danwi\\Zotero\\storage\\JCULR888\\Dabhi and Chaudhary - 2012 - A Survey on Techniques of Improving Generalization Ability of Genetic Programming Solutions.pdf:application/pdf},
}
@book{kronberger_symbolic_2024,
title = {Symbolic Regression},
isbn = {978-1-315-16640-7},
url = {http://dx.doi.org/10.1201/9781315166407},
pagetotal = {308},
publisher = {Chapman and Hall/{CRC}},
author = {Kronberger, Gabriel and Burlacu, Bogdan and Kommenda, Michael and Winkler, Stephan M. and Affenzeller, Michael},
date = {2024-07},
file = {PDF:C\:\\Users\\danwi\\Zotero\\storage\\43RPG26H\\Kronberger et al. - 2024 - Symbolic Regression.pdf:application/pdf},
}
@misc{sun_symbolic_2023,
title = {Symbolic Physics Learner: Discovering governing equations via Monte Carlo tree search},
url = {http://arxiv.org/abs/2205.13134},
doi = {10.48550/arXiv.2205.13134},
shorttitle = {Symbolic Physics Learner},
abstract = {Nonlinear dynamics is ubiquitous in nature and commonly seen in various science and engineering disciplines. Distilling analytical expressions that govern nonlinear dynamics from limited data remains vital but challenging. To tackle this fundamental issue, we propose a novel Symbolic Physics Learner ({SPL}) machine to discover the mathematical structure of nonlinear dynamics. The key concept is to interpret mathematical operations and system state variables by computational rules and symbols, establish symbolic reasoning of mathematical formulas via expression trees, and employ a Monte Carlo tree search ({MCTS}) agent to explore optimal expression trees based on measurement data. The {MCTS} agent obtains an optimistic selection policy through the traversal of expression trees, featuring the one that maps to the arithmetic expression of underlying physics. Salient features of the proposed framework include search flexibility and enforcement of parsimony for discovered equations. The efficacy and superiority of the {SPL} machine are demonstrated by numerical examples, compared with state-of-the-art baselines.},
number = {{arXiv}:2205.13134},
publisher = {{arXiv}},
author = {Sun, Fangzheng and Liu, Yang and Wang, Jian-Xun and Sun, Hao},
urldate = {2025-03-14},
date = {2023-02-02},
keywords = {Computer Science - Artificial Intelligence, Computer Science - Machine Learning, Computer Science - Symbolic Computation, Nonlinear Sciences - Chaotic Dynamics, Physics - Computational Physics},
file = {Preprint PDF:C\:\\Users\\danwi\\Zotero\\storage\\YBXYH5D6\\Sun et al. - 2023 - Symbolic Physics Learner Discovering governing equations via Monte Carlo tree search.pdf:application/pdf;Snapshot:C\:\\Users\\danwi\\Zotero\\storage\\D9SDYVT3\\2205.html:text/html},
}
@article{makke_interpretable_2024,
title = {Interpretable scientific discovery with symbolic regression: a review},
volume = {57},
issn = {1573-7462},
url = {https://doi.org/10.1007/s10462-023-10622-0},
doi = {10.1007/s10462-023-10622-0},
shorttitle = {Interpretable scientific discovery with symbolic regression},
abstract = {Symbolic regression is emerging as a promising machine learning method for learning succinct underlying interpretable mathematical expressions directly from data. Whereas it has been traditionally tackled with genetic programming, it has recently gained a growing interest in deep learning as a data-driven model discovery tool, achieving significant advances in various application domains ranging from fundamental to applied sciences. In this survey, we present a structured and comprehensive overview of symbolic regression methods, review the adoption of these methods for model discovery in various areas, and assess their effectiveness. We have also grouped state-of-the-art symbolic regression applications in a categorized manner in a living review.},
pages = {2},
number = {1},
journaltitle = {Artificial Intelligence Review},
shortjournal = {Artif Intell Rev},
author = {Makke, Nour and Chawla, Sanjay},
urldate = {2025-03-14},
date = {2024-01-02},
langid = {english},
keywords = {Artificial Intelligence, Automated Scientific Discovery, Interpretable {AI}, Symbolic Regression},
file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\7PFYYUJZ\\Makke and Chawla - 2024 - Interpretable scientific discovery with symbolic regression a review.pdf:application/pdf},
}
@misc{lemos_rediscovering_2022,
title = {Rediscovering orbital mechanics with machine learning},
url = {http://arxiv.org/abs/2202.02306},
doi = {10.48550/arXiv.2202.02306},
abstract = {We present an approach for using machine learning to automatically discover the governing equations and hidden properties of real physical systems from observations. We train a "graph neural network" to simulate the dynamics of our solar system's Sun, planets, and large moons from 30 years of trajectory data. We then use symbolic regression to discover an analytical expression for the force law implicitly learned by the neural network, which our results showed is equivalent to Newton's law of gravitation. The key assumptions that were required were translational and rotational equivariance, and Newton's second and third laws of motion. Our approach correctly discovered the form of the symbolic force law. Furthermore, our approach did not require any assumptions about the masses of planets and moons or physical constants. They, too, were accurately inferred through our methods. Though, of course, the classical law of gravitation has been known since Isaac Newton, our result serves as a validation that our method can discover unknown laws and hidden properties from observed data. More broadly this work represents a key step toward realizing the potential of machine learning for accelerating scientific discovery.},
number = {{arXiv}:2202.02306},
publisher = {{arXiv}},
author = {Lemos, Pablo and Jeffrey, Niall and Cranmer, Miles and Ho, Shirley and Battaglia, Peter},
urldate = {2025-03-14},
date = {2022-02-04},
keywords = {Astrophysics - Earth and Planetary Astrophysics, Astrophysics - Instrumentation and Methods for Astrophysics, Computer Science - Machine Learning},
file = {Preprint PDF:C\:\\Users\\danwi\\Zotero\\storage\\9YPFHHRY\\Lemos et al. - 2022 - Rediscovering orbital mechanics with machine learning.pdf:application/pdf;Snapshot:C\:\\Users\\danwi\\Zotero\\storage\\YIFHYWCY\\2202.html:text/html},
}