relwork: finished relwork

2025-03-21 14:35:55 +01:00
parent db3ea32b66
commit a718a3572e
6 changed files with 323 additions and 31 deletions
--- a/thesis/references.bib
+++ b/thesis/references.bib
@ -231,11 +231,8 @@
 	number = {20},
 	journaltitle = {Applied Sciences},
 	author = {Bastidas Fuertes, Andrés and Pérez, María and Meza, Jaime},
-	urldate = {2025-01-03},
-	date = {2023-01},
+	date = {2023-10},
 	langid = {english},
-	note = {Number: 20
-Publisher: Multidisciplinary Digital Publishing Institute},
 	keywords = {back-end layers, design model, software architecture, software development, source-to-source transformations, transpiler},
 	file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\AD55DPJ4\\Bastidas Fuertes et al. - 2023 - Transpiler-Based Architecture Design Model for Back-End Layers in Software Development.pdf:application/pdf},
 }
@ -252,10 +249,8 @@ Publisher: Multidisciplinary Digital Publishing Institute},
 	publisher = {Springer International Publishing},
 	author = {Adam, Stavros P. and Alexandropoulos, Stamatios-Aggelos N. and Pardalos, Panos M. and Vrahatis, Michael N.},
 	editor = {Demetriou, Ioannis C. and Pardalos, Panos M.},
-	urldate = {2025-02-14},
 	date = {2019},
 	langid = {english},
-	doi = {10.1007/978-3-030-12767-1_5},
 }

@inproceedings{michalakes_gpu_2008,
@ -550,7 +545,6 @@ Publisher: Multidisciplinary Digital Publishing Institute},
 	author = {Kyung, Gyutaek and Jung, Changmin and Lee, Kwangyeob},
 	urldate = {2025-03-08},
 	date = {2014-10},
-	note = {{ISSN}: 2159-3450},
 	keywords = {Graphics processing units, Computer architecture, Graphics, Registers, Educational institutions, {GPGPU}, Instruction sets, Mobile communication, {SIMT} Architecture, Stream Processor},
 	file = {IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\9B85REHH\\7022313.html:text/html},
 }
@ -576,7 +570,6 @@ Publisher: Multidisciplinary Digital Publishing Institute},
 	pages = {25--36},
 	booktitle = {2011 {IEEE} 17th International Symposium on High Performance Computer Architecture},
 	author = {Fung, Wilson W. L. and Aamodt, Tor M.},
-	urldate = {2025-03-08},
 	date = {2011-02},
 	keywords = {Pipelines, Kernel, Graphics processing unit, Hardware, Instruction sets, Compaction, Random access memory},
 	file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\TRPWUTI6\\Fung und Aamodt - 2011 - Thread block compaction for efficient SIMT control flow.pdf:application/pdf;IEEE Xplore Abstract Record:C\:\\Users\\danwi\\Zotero\\storage\\LYPYEA8U\\5749714.html:text/html},
@ -644,7 +637,6 @@ Publisher: Multidisciplinary Digital Publishing Institute},
 	author = {Martius, Georg and Lampert, Christoph H.},
 	urldate = {2025-03-13},
 	date = {2016},
-	note = {Version Number: 1},
 	keywords = {68T05, 68T30, 68T40, 62J02, 65D15, Artificial Intelligence (cs.{AI}), {FOS}: Computer and information sciences, I.2.6; I.2.8, Machine Learning (cs.{LG})},
 }

@ -734,7 +726,6 @@ Publisher: Multidisciplinary Digital Publishing Institute},
 	booktitle = {Proceedings of the Genetic and Evolutionary Computation Conference Companion},
 	publisher = {{ACM}},
 	author = {Bomarito, G. F. and Leser, P. E. and Strauss, N. C. M. and Garbrecht, K. M. and Hochhalter, J. D.},
-	urldate = {2025-03-14},
 	date = {2022-07-09},
 	langid = {english},
 	file = {PDF:C\:\\Users\\danwi\\Zotero\\storage\\ZPS5ZYYQ\\Bomarito et al. - 2022 - Bayesian model selection for reducing bloat and overfitting in genetic programming for symbolic regr.pdf:application/pdf},
@ -747,7 +738,6 @@ Publisher: Multidisciplinary Digital Publishing Institute},
 	doi = {10.48550/ARXIV.1211.1119},
 	abstract = {In the field of empirical modeling using Genetic Programming ({GP}), it is important to evolve solution with good generalization ability. Generalization ability of {GP} solutions get affected by two important issues: bloat and over-fitting. We surveyed and classified existing literature related to different techniques used by {GP} research community to deal with these issues. We also point out limitation of these techniques, if any. Moreover, the classification of different bloat control approaches and measures for bloat and over-fitting are also discussed. We believe that this work will be useful to {GP} practitioners in following ways: (i) to better understand concepts of generalization in {GP} (ii) comparing existing bloat and over-fitting control techniques and (iii) selecting appropriate approach to improve generalization ability of {GP} evolved solutions.},
 	author = {Dabhi, Vipul K. and Chaudhary, Sanjay},
-	urldate = {2025-03-14},
 	date = {2012},
 	keywords = {{FOS}: Computer and information sciences, Neural and Evolutionary Computing (cs.{NE})},
 	file = {PDF:C\:\\Users\\danwi\\Zotero\\storage\\JCULR888\\Dabhi and Chaudhary - 2012 - A Survey on Techniques of Improving Generalization Ability of Genetic Programming Solutions.pdf:application/pdf},
@ -878,7 +868,6 @@ Publisher: Multidisciplinary Digital Publishing Institute},
 	url = {http://dx.doi.org/10.1016/C2014-0-01395-0},
 	publisher = {Elsevier},
 	author = {Cooper, Keith D. and Torczon, Linda},
-	urldate = {2025-03-18},
 	date = {2022},
 	langid = {english},
 }
@ -910,9 +899,8 @@ Publisher: Multidisciplinary Digital Publishing Institute},
 	journaltitle = {Journal of Parallel and Distributed Computing},
 	shortjournal = {Journal of Parallel and Distributed Computing},
 	author = {Khairy, Mahmoud and Wassal, Amr G. and Zahran, Mohamed},
-	urldate = {2025-03-20},
 	date = {2019-05-01},
-	keywords = {Control divergence, {GPGPU}, Heterogeneous architecture, Memory systems},
+	keywords = {{GPGPU}, Control divergence, Heterogeneous architecture, Memory systems},
 	file = {PDF:C\:\\Users\\danwi\\Zotero\\storage\\FQJC5EUT\\Khairy et al. - 2019 - A survey of architectural approaches for improving GPGPU performance, programmability and heterogene.pdf:application/pdf},
 }

@ -947,3 +935,225 @@ Publisher: Multidisciplinary Digital Publishing Institute},
 	author = {Johnson, Stephen C},
 	date = {1975},
 }
+
+@article{bastidas_fuertes_transpilers_2023,
+	title = {Transpilers: A Systematic Mapping Review of Their Usage in Research and Industry},
+	volume = {13},
+	rights = {http://creativecommons.org/licenses/by/3.0/},
+	issn = {2076-3417},
+	url = {https://www.mdpi.com/2076-3417/13/6/3667},
+	doi = {10.3390/app13063667},
+	shorttitle = {Transpilers},
+	abstract = {Transpilers refer to a special type of compilation that takes source code and translates it into target source code. This type of technique has been used for different types of implementations in scientific studies. A review of the research areas related to the use of transpilers allows the understanding of the direction in this branch of knowledge. The objective was to carry out an exhaustive and extended mapping of the usage and implementation of transpilers in research studies in the last 10 years. A systematic mapping review was carried out for answering the 5 research questions proposed. The {PSALSAR} method is used as a guide to the steps needed for the review. In total, from 1181 articles collected, 683 primary studies were selected, reviewed, and analyzed. Proposals from the industry were also analyzed. A new method for automatic data tabulation has been proposed for the mapping objective, using a relational database and {SQL} language. It was identified that the most common uses of transpilers are related to performance optimizations, parallel programming, embedded systems, compilers, testing, {AI}, graphics, and software development. In conclusion, it was possible to determine the extent and identification of research sub-areas and their impact on the usage of the transpilers. Future research could be considered about the usage of transpilers in transactional software, migration strategies for legacy systems, {AI}, math, multiplatform games and apps, automatic source code generation, and networking.},
+	pages = {3667},
+	number = {6},
+	journaltitle = {Applied Sciences},
+	author = {Bastidas Fuertes, Andrés and Pérez, María and Meza Hormaza, Jaime},
+	date = {2023-01},
+	langid = {english},
+	keywords = {cross compiler, software architecture, source-to-source compiler, systematic literature review, transcompiler, transpiler},
+	file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\7IRRHZJG\\Bastidas Fuertes et al. - 2023 - Transpilers A Systematic Mapping Review of Their Usage in Research and Industry.pdf:application/pdf},
+}
+
+@online{microsoft_typescript_2025,
+	title = {{TypeScript}: The starting point for learning {TypeScript}},
+	url = {https://www.typescriptlang.org/docs/handbook/intro.html},
+	author = {{Microsoft}},
+	urldate = {2025-03-21},
+	date = {2025-03},
+}
+
+@inproceedings{ling_rust_2022,
+	location = {New York, {NY}, {USA}},
+	title = {In rust we trust: a transpiler from unsafe C to safer rust},
+	isbn = {978-1-4503-9223-5},
+	url = {https://dl.acm.org/doi/10.1145/3510454.3528640},
+	doi = {10.1145/3510454.3528640},
+	series = {{ICSE} '22},
+	shorttitle = {In rust we trust},
+	abstract = {Rust is a type-safe system programming language with a compiler checking memory and concurrency safety. For a smooth transition from existing C projects, a source-to-source transpiler can autotransform C programs into Rust using program transformation. However, existing C-to-Rust transformation tools (e.g. the open-source C2Rust transpiler1 project) have the drawback of preserving the unsafe semantics of C, while rewriting them in Rust's syntax. The work by Emre et el. [2] acknowledged these drawbacks, and used rustc compiler feedback to refactor one certain type of raw pointers to Rust references to improve overall safety and idiomaticness of C2Rust output. Focusing on improving {API}-safeness (i.e. lowering unsafe keyword usage in function signatures), we apply source-to-source transformation technique to auto-refactor C2Rust output using code structure pattern matching and transformation, which does not rely on rustc compiler feedback. And by relaxing the semantics-preserving constraints of transformations, we present {CRustS}2 a fully-automated source-to-source transformation approach that increases the ratio of the transformed code passing the safety checks of the rustc compiler. Our method uses 220 new {TXL} [1] source-to-source transformation rules, of which 198 are strictly semantics-preserving and 22 are semantics-approximating, thus reducing the scope of unsafe expressions and exposing more opportunities for safe Rust refactoring. Our method has been evaluated on both open-source and commercial C projects, and demonstrates significantly higher safe code ratios after the transformations, with function-level safe code ratios comparable to the average level of idiomatic Rust projects.},
+	pages = {354--355},
+	booktitle = {Proceedings of the {ACM}/{IEEE} 44th International Conference on Software Engineering: Companion Proceedings},
+	publisher = {Association for Computing Machinery},
+	author = {Ling, Michael and Yu, Yijun and Wu, Haitao and Wang, Yuan and Cordy, James R. and Hassan, Ahmed E.},
+	urldate = {2025-03-21},
+	date = {2022-10-19},
+	file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\RC6EAG9X\\Ling et al. - 2022 - In rust we trust a transpiler from unsafe C to safer rust.pdf:application/pdf},
+}
+
+@article{marcelino_transpiling_2022,
+	title = {Transpiling Python to Julia using {PyJL}},
+	rights = {Creative Commons Attribution 4.0 International},
+	url = {https://zenodo.org/record/6332890},
+	doi = {10.5281/ZENODO.6332890},
+	author = {Marcelino, Miguel and Leitão, António Menezes},
+	date = {2022},
+	keywords = {Automatic Transpilation, Julia, Library Translation, Python, Source-to-Source Compiler},
+	file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\MD8RTI6D\\Marcelino and Leitão - Transpiling Python to Julia using PyJL.pdf:application/pdf},
+}
+
+@online{intel_mcs86_1978,
+	title = {{MCS}·86 Assembly Language Converter Operating Instructions For {ISIS}·{II} Users},
+	url = {http://www.bitsavers.org/pdf/intel/ISIS_II/9800642A_MCS-86_Assembly_Language_Converter_Operating_Instructions_for_ISIS-II_Users_Mar79.pdf},
+	author = {{Intel}},
+	urldate = {2025-03-21},
+	date = {1978},
+	note = {Technical Report},
+	file = {http\://www.bitsavers.org/pdf/intel/ISIS_II/9800642A_MCS-86_Assembly_Language_Converter_Operating_Instructions_for_ISIS-II_Users_Mar79.pdf:C\:\\Users\\danwi\\Zotero\\storage\\N63NW3B5\\9800642A_MCS-86_Assembly_Language_Converter_Operating_Instructions_for_ISIS-II_Users_Mar79.pdf:application/pdf},
+}
+
+@article{wang_automatic_2015,
+	title = {Automatic scoping of task clauses for the {OpenMP} tasking model},
+	volume = {71},
+	issn = {1573-0484},
+	url = {https://doi.org/10.1007/s11227-014-1326-3},
+	doi = {10.1007/s11227-014-1326-3},
+	abstract = {{OpenMP} provides an easy-to-learn and powerful programming environment for the development of parallel programs. We propose here an algorithm for the automatic correction of the {OpenMP} tasking model. Assuming a compiler or programmer has identified task regions in the source programs, the proposed algorithm will automatically generate correct task clauses and synchronization. The proposed algorithm is implemented here based on the {ROSE} compiler infrastructure; 14 benchmark programs are tested, each of which has had all clauses in the task directives removed for the evaluation. The results of this experimental evaluation show that the proposed technique can successfully generate correct clauses for the tested benchmark programs. The proposed technique can simplify the parallelizing of programs using the {OpenMP} tasking model, making parallel programming more effective and productive.},
+	pages = {808--823},
+	number = {3},
+	journaltitle = {The Journal of Supercomputing},
+	shortjournal = {J Supercomput},
+	author = {Wang, Chun-Kun and Chen, Peng-Sheng},
+	urldate = {2025-03-21},
+	date = {2015-03-01},
+	langid = {english},
+	keywords = {{OpenMP}, Parallelization, Tasking model, Validation},
+	file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\65ARV27L\\Wang and Chen - 2015 - Automatic scoping of task clauses for the OpenMP tasking model.pdf:application/pdf},
+}
+
+@inproceedings{chaber_effectiveness_2016,
+	title = {Effectiveness of {PID} and {DMC} control algorithms automatic code generation for microcontrollers: Application to a thermal process},
+	url = {https://ieeexplore.ieee.org/document/7739817/references#references},
+	doi = {10.1109/SYSTOL.2016.7739817},
+	shorttitle = {Effectiveness of {PID} and {DMC} control algorithms automatic code generation for microcontrollers},
+	abstract = {An effective approach to implement control algorithms using code auto-generation is presented. Using {MATLAB} and C languages as input, an optimised pure C code is generated using a custom transcompiler. The considered solution is focused on microcontrollers from the {STM}32 family but any other can be used due to flexibility of the presented system. Controller development for a laboratory thermal process is thoroughly described, {PID} and {DMC} algorithms are used. Electronic connection between microcontroller and the process is discussed. Results of the experiments are reported.},
+	eventtitle = {2016 3rd Conference on Control and Fault-Tolerant Systems ({SysTol})},
+	pages = {618--623},
+	booktitle = {2016 3rd Conference on Control and Fault-Tolerant Systems ({SysTol})},
+	author = {Chaber, Patryk and Ławryńczuk, Maciej},
+	urldate = {2025-03-21},
+	date = {2016-09},
+	keywords = {Fans, Hardware, Heating, {MATLAB}, Microcontrollers, Process control, Standards},
+}
+
+@inproceedings{moses_high-performance_2023,
+	location = {New York, {NY}, {USA}},
+	title = {High-Performance {GPU}-to-{CPU} Transpilation and Optimization via High-Level Parallel Constructs},
+	isbn = {979-8-4007-0015-6},
+	url = {https://dl.acm.org/doi/10.1145/3572848.3577475},
+	doi = {10.1145/3572848.3577475},
+	series = {{PPoPP} '23},
+	abstract = {While parallelism remains the main source of performance, architectural implementations and programming models change with each new hardware generation, often leading to costly application re-engineering. Most tools for performance portability require manual and costly application porting to yet another programming model.We propose an alternative approach that automatically translates programs written in one programming model ({CUDA}), into another ({CPU} threads) based on Polygeist/{MLIR}. Our approach includes a representation of parallel constructs that allows conventional compiler transformations to apply transparently and without modification and enables parallelism-specific optimizations. We evaluate our framework by transpiling and optimizing the {CUDA} Rodinia benchmark suite for a multi-core {CPU} and achieve a 58\% geomean speedup over handwritten {OpenMP} code. Further, we show how {CUDA} kernels from {PyTorch} can efficiently run and scale on the {CPU}-only Supercomputer Fugaku without user intervention. Our {PyTorch} compatibility layer making use of transpiled {CUDA} {PyTorch} kernels outperforms the {PyTorch} {CPU} native backend by 2.7×.},
+	pages = {119--134},
+	booktitle = {Proceedings of the 28th {ACM} {SIGPLAN} Annual Symposium on Principles and Practice of Parallel Programming},
+	publisher = {Association for Computing Machinery},
+	author = {Moses, William S. and Ivanov, Ivan R. and Domke, Jens and Endo, Toshio and Doerfert, Johannes and Zinenko, Oleksandr},
+	date = {2023-02-21},
+	file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\5I8STUQP\\Moses et al. - 2023 - High-Performance GPU-to-CPU Transpilation and Optimization via High-Level Parallel Constructs.pdf:application/pdf},
+}
+
+@inproceedings{lin_rtl_2023,
+	location = {New York, {NY}, {USA}},
+	title = {From {RTL} to {CUDA}: A {GPU} Acceleration Flow for {RTL} Simulation with Batch Stimulus},
+	isbn = {978-1-4503-9733-9},
+	url = {https://dl.acm.org/doi/10.1145/3545008.3545091},
+	doi = {10.1145/3545008.3545091},
+	series = {{ICPP} '22},
+	shorttitle = {From {RTL} to {CUDA}},
+	abstract = {High-throughput {RTL} simulation is critical for verifying today’s highly complex {SoCs}. Recent research has explored accelerating {RTL} simulation by leveraging event-driven approaches or partitioning heuristics to speed up simulation on a single stimulus. To further accelerate throughput performance, industry-quality functional verification signoff must explore running multiple stimulus (i.e., batch stimulus) simultaneously, either with directed tests or random inputs. In this paper, we propose {RTLFlow}, a {GPU}-accelerated {RTL} simulation flow with batch stimulus. {RTLflow} first transpiles {RTL} into {CUDA} kernels that each simulates a partition of the {RTL} simultaneously across multiple stimulus. It also leverages {CUDA} Graph and pipeline scheduling for efficient runtime execution. Measuring experimental results on a large industrial design ({NVDLA}) with 65536 stimulus, we show that {RTLflow} running on a single A6000 {GPU} can achieve a 40 × runtime speed-up when compared to an 80-thread multi-core {CPU} baseline.},
+	pages = {1--12},
+	booktitle = {Proceedings of the 51st International Conference on Parallel Processing},
+	publisher = {Association for Computing Machinery},
+	author = {Lin, Dian-Lun and Ren, Haoxing and Zhang, Yanqing and Khailany, Brucek and Huang, Tsung-Wei},
+	date = {2023-01-13},
+	file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\U36JRJA8\\Lin et al. - 2023 - From RTL to CUDA A GPU Acceleration Flow for RTL Simulation with Batch Stimulus.pdf:application/pdf},
+}
+
+@book{wang_electronic_2009,
+	title = {Electronic Design Automation: Synthesis, Verification, and Test},
+	isbn = {978-0-08-092200-3},
+	shorttitle = {Electronic Design Automation},
+	abstract = {This book provides broad and comprehensive coverage of the entire {EDA} flow. {EDA}/{VLSI} practitioners and researchers in need of fluency in an "adjacent" field will find this an invaluable reference to the basic {EDA} concepts, principles, data structures, algorithms, and architectures for the design, verification, and test of {VLSI} circuits. Anyone who needs to learn the concepts, principles, data structures, algorithms, and architectures of the {EDA} flow will benefit from this book. - Covers complete spectrum of the {EDA} flow, from {ESL} design modeling to logic/test synthesis, verification, physical design, and test - helps {EDA} newcomers to get "up-and-running" quickly - Includes comprehensive coverage of {EDA} concepts, principles, data structures, algorithms, and architectures - helps all readers improve their {VLSI} design competence - Contains latest advancements not yet available in other books, including Test compression, {ESL} design modeling, large-scale floorplanning, placement, routing, synthesis of clock and power/ground networks - helps readers to design/develop testable chips or products - Includes industry best-practices wherever appropriate in most chapters - helps readers avoid costly mistakes},
+	pagetotal = {971},
+	publisher = {Morgan Kaufmann},
+	author = {Wang, Laung-Terng and Chang, Yao-Wen and Cheng, Kwang-Ting},
+	date = {2009-03-11},
+	langid = {english},
+	keywords = {Computers / Computer Science, Technology \& Engineering / Industrial Design / Product},
+}
+
+@inproceedings{zhang_opportunities_2020,
+	location = {New York, {NY}, {USA}},
+	title = {Opportunities for {RTL} and gate level simulation using {GPUs}},
+	isbn = {978-1-4503-8026-3},
+	url = {https://dl.acm.org/doi/10.1145/3400302.3415773},
+	doi = {10.1145/3400302.3415773},
+	series = {{ICCAD} '20},
+	abstract = {This paper summarizes the opportunities in accelerating simulation on parallel processing hardware platforms such as {GPUs}. First, we give a summary of prior art. Then, we propose the idea that coding frameworks usually used for popular machine learning ({ML}) topics, such as {PyTorch}/{DGL}.ai, can also be used for exploring simulation purposes. We demo a crude oblivious two-value cycle gate-level simulator using the higher level {ML} framework {APIs} that exhibits \&gt;20X speedup, despite its simplistic construction. Next, we summarize recent advances in {GPU} features that may provide additional opportunities to further state-of-the-art results. Finally, we conclude and touch upon some potential areas for furthering research into the topic of {GPU} accelerated simulation.},
+	pages = {1--5},
+	booktitle = {Proceedings of the 39th International Conference on Computer-Aided Design},
+	publisher = {Association for Computing Machinery},
+	author = {Zhang, Yanqing and Ren, Haoxing and Khailany, Brucek},
+	urldate = {2025-03-21},
+	date = {2020-12-17},
+	file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\6JZSGT83\\Zhang et al. - 2020 - Opportunities for RTL and gate level simulation using GPUs.pdf:application/pdf},
+}
+
+@article{romer_structure_1996,
+	title = {The structure and performance of interpreters},
+	volume = {31},
+	issn = {0362-1340},
+	url = {https://dl.acm.org/doi/10.1145/248209.237175},
+	doi = {10.1145/248209.237175},
+	abstract = {Interpreted languages have become increasingly popular due to demands for rapid program development, ease of use, portability, and safety. Beyond the general impression that they are "slow," however, little has been documented about the performance of interpreters as a class of applications.This paper examines interpreter performance by measuring and analyzing interpreters from both software and hardware perspectives. As examples, we measure the {MIPSI}, Java, Perl, and Tcl interpreters running an array of micro and macro benchmarks on a {DEC} Alpha platform. Our measurements of these interpreters relate performance to the complexity of the interpreter's virtual machine and demonstrate that native runtime libraries can play a key role in providing good performance. From an architectural perspective, we show that interpreter performance is primarily a function of the interpreter itself and is relatively independent of the application being interpreted. We also demonstrate that high-level interpreters' demands on processor resources are comparable to those of other complex compiled programs, such as gcc. We conclude that interpreters, as a class of applications, do not currently motivate special hardware support for increased performance.},
+	pages = {150--159},
+	number = {9},
+	journaltitle = {{SIGPLAN} Not.},
+	author = {Romer, Theodore H. and Lee, Dennis and Voelker, Geoffrey M. and Wolman, Alec and Wong, Wayne A. and Baer, Jean-Loup and Bershad, Brian N. and Levy, Henry M.},
+	urldate = {2025-03-21},
+	date = {1996-09-01},
+	file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\76EU5U2P\\Romer et al. - 1996 - The structure and performance of interpreters.pdf:application/pdf},
+}
+
+@misc{fua_comparing_2020,
+	title = {Comparing Python, Go, and C++ on the N-Queens Problem},
+	url = {http://arxiv.org/abs/2001.02491},
+	doi = {10.48550/arXiv.2001.02491},
+	abstract = {Python currently is the dominant language in the field of Machine Learning but is often criticized for being slow to perform certain tasks. In this report, we use the well-known \$N\$-queens puzzle as a benchmark to show that once compiled using the Numba compiler it becomes competitive with C++ and Go in terms of execution speed while still allowing for very fast prototyping. This is true of both sequential and parallel programs. In most cases that arise in an academic environment, it therefore makes sense to develop in ordinary Python, identify computational bottlenecks, and use Numba to remove them.},
+	number = {{arXiv}:2001.02491},
+	publisher = {{arXiv}},
+	author = {Fua, Pascal and Lis, Krzysztof},
+	date = {2020-01-08},
+	keywords = {Computer Science - Mathematical Software},
+	file = {Preprint PDF:C\:\\Users\\danwi\\Zotero\\storage\\WZRCTXMG\\Fua and Lis - 2020 - Comparing Python, Go, and C++ on the N-Queens Problem.pdf:application/pdf},
+}
+
+@inproceedings{gherardi_java_2012,
+	location = {Berlin, Heidelberg},
+	title = {A Java vs. C++ Performance Evaluation: A 3D Modeling Benchmark},
+	isbn = {978-3-642-34327-8},
+	doi = {10.1007/978-3-642-34327-8_17},
+	shorttitle = {A Java vs. C++ Performance Evaluation},
+	abstract = {Along the years robotics software and applications have been typically implemented in compiled languages, such as C and C++, rather than interpreted languages, like Java. This choice has been due to their well-known faster behaviors, which meet the high performance requirements of robotics. Nevertheless, several projects that implement robotics functionality in Java can be found in literature and different experiments conduced by computer scientists have proved that the difference between Java and C++ is not so evident.},
+	pages = {161--172},
+	booktitle = {Simulation, Modeling, and Programming for Autonomous Robots},
+	publisher = {Springer},
+	author = {Gherardi, Luca and Brugali, Davide and Comotti, Daniele},
+	editor = {Noda, Itsuki and Ando, Noriaki and Brugali, Davide and Kuffner, James J.},
+	date = {2012},
+	langid = {english},
+}
+
+@inproceedings{eltantawy_mimd_2016,
+	title = {{MIMD} synchronization on {SIMT} architectures},
+	url = {https://ieeexplore.ieee.org/abstract/document/7783714},
+	doi = {10.1109/MICRO.2016.7783714},
+	abstract = {In the single-instruction multiple-threads ({SIMT}) execution model, small groups of scalar threads operate in lockstep. Within each group, current {SIMT} hardware implementations serialize the execution of threads that follow different paths, and to ensure efficiency, revert to lockstep execution as soon as possible. These constraints must be considered when adapting algorithms that employ synchronization. A deadlock-free program on a multiple-instruction multiple-data ({MIMD}) architecture may deadlock on a {SIMT} machine. To avoid this, programmers need to restructure control flow with {SIMT} scheduling constraints in mind. This requires programmers to be familiar with the underlying {SIMT} hardware. In this paper, we propose a static analysis technique that detects {SIMT} deadlocks by inspecting the application control flow graph ({CFG}). We further propose a {CFG} transformation that avoids {SIMT} deadlocks when synchronization is local to a function. Both the analysis and the transformation algorithms are implemented as {LLVM} compiler passes. Finally, we propose an adaptive hardware reconvergence mechanism that supports {MIMD} synchronization without changing the application {CFG}, but which can leverage our compiler analysis to gain efficiency. The static detection has a false detection rate of only 4\%-5\%. The automated transformation has an average performance overhead of 8.2\%-10.9\% compared to manual transformation. Our hardware approach performs on par with the compiler transformation, however, it avoids synchronization scope limitations, static instruction and register overheads, and debuggability challenges that are present in the compiler only solution.},
+	eventtitle = {2016 49th Annual {IEEE}/{ACM} International Symposium on Microarchitecture ({MICRO})},
+	pages = {1--14},
+	booktitle = {2016 49th Annual {IEEE}/{ACM} International Symposium on Microarchitecture ({MICRO})},
+	author = {{ElTantawy}, Ahmed and Aamodt, Tor M.},
+	date = {2016-10},
+	keywords = {Graphics processing units, Hardware, Instruction sets, Manuals, Programming, Synchronization, System recovery},
+	file = {Full Text PDF:C\:\\Users\\danwi\\Zotero\\storage\\EKKWUQQM\\ElTantawy and Aamodt - 2016 - MIMD synchronization on SIMT architectures.pdf:application/pdf},
+}