concept & design: improved existing sections
This commit is contained in:
parent
8afc3a5e3b
commit
2a8de064a6
|
@ -1,36 +1,36 @@
|
|||
<mxfile host="app.diagrams.net" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0" version="26.2.4">
|
||||
<mxfile host="app.diagrams.net" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0" version="26.2.6">
|
||||
<diagram name="Page-1" id="R-oAYELteez0U9UgfQ2t">
|
||||
<mxGraphModel dx="1723" dy="956" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1169" pageHeight="827" math="0" shadow="0">
|
||||
<mxGraphModel dx="2068" dy="1147" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1169" pageHeight="827" math="0" shadow="0">
|
||||
<root>
|
||||
<mxCell id="0" />
|
||||
<mxCell id="1" parent="0" />
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-14" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="GDUa8-GdCzSgoxu7vCdt-4" target="GDUa8-GdCzSgoxu7vCdt-12">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-14" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="GDUa8-GdCzSgoxu7vCdt-4" target="GDUa8-GdCzSgoxu7vCdt-12" edge="1">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-4" value="Pre-Processing" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-4" value="Pre-Processing" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
||||
<mxGeometry x="500" y="280" width="120" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-8" value="Interpreter" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;width=90;height=40;" vertex="1" parent="1">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-8" value="Interpreter" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;width=90;height=40;" parent="1" vertex="1">
|
||||
<mxGeometry x="440" y="160" width="440" height="480" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-9" value="" style="ellipse;html=1;shape=endState;fillColor=#000000;strokeColor=#000000;" vertex="1" parent="1">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-9" value="" style="ellipse;html=1;shape=endState;fillColor=#000000;strokeColor=#000000;" parent="1" vertex="1">
|
||||
<mxGeometry x="270" y="520" width="40" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-13" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="GDUa8-GdCzSgoxu7vCdt-10" target="GDUa8-GdCzSgoxu7vCdt-4">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-13" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="GDUa8-GdCzSgoxu7vCdt-10" target="GDUa8-GdCzSgoxu7vCdt-4" edge="1">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-15" value="<div align="left"><font style="font-size: 12px;"><b>Input:</b></font></div><div align="left"><font style="font-size: 12px;">Expressions</font></div><div align="left"><font style="font-size: 12px;">Variable-Sets</font></div><div align="left"><font style="font-size: 12px;">Parameters</font></div>" style="edgeLabel;html=1;align=left;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="GDUa8-GdCzSgoxu7vCdt-13">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-15" value="<div align="left"><font style="font-size: 12px;"><b>Input:</b></font></div><div align="left"><font style="font-size: 12px;">Expressions</font></div><div align="left"><font style="font-size: 12px;">Variable-Sets</font></div><div align="left"><font style="font-size: 12px;">Parameters</font></div>" style="edgeLabel;html=1;align=left;verticalAlign=middle;resizable=0;points=[];" parent="GDUa8-GdCzSgoxu7vCdt-13" vertex="1" connectable="0">
|
||||
<mxGeometry x="-0.4633" relative="1" as="geometry">
|
||||
<mxPoint x="-33" as="offset" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-10" value="" style="ellipse;html=1;shape=endState;fillColor=#000000;strokeColor=none;" vertex="1" parent="1">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-10" value="" style="ellipse;html=1;shape=endState;fillColor=#000000;strokeColor=none;" parent="1" vertex="1">
|
||||
<mxGeometry x="270" y="280" width="40" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-11" value="Host" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;" vertex="1" parent="1">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-11" value="CPU" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;" parent="1" vertex="1">
|
||||
<mxGeometry x="460" y="220" width="400" height="140" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-18" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="1" source="GDUa8-GdCzSgoxu7vCdt-12" target="GDUa8-GdCzSgoxu7vCdt-17">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-18" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" source="GDUa8-GdCzSgoxu7vCdt-12" target="GDUa8-GdCzSgoxu7vCdt-17" edge="1">
|
||||
<mxGeometry relative="1" as="geometry">
|
||||
<Array as="points">
|
||||
<mxPoint x="770" y="356" />
|
||||
|
@ -38,35 +38,35 @@
|
|||
</Array>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-19" value="<div align="left"><font style="font-size: 12px;"><b>Input:<br></b></font></div><div align="left"><font style="font-size: 12px;">Processed Expressions</font></div><div align="left"><font style="font-size: 12px;">Variable-Sets</font></div><div align="left"><font style="font-size: 12px;">Parameters</font></div>" style="edgeLabel;html=1;align=left;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="GDUa8-GdCzSgoxu7vCdt-18">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-19" value="<div align="left"><font style="font-size: 12px;"><b>Input:<br></b></font></div><div align="left"><font style="font-size: 12px;">Processed Expressions</font></div><div align="left"><font style="font-size: 12px;">Variable-Sets</font></div><div align="left"><font style="font-size: 12px;">Parameters</font></div>" style="edgeLabel;html=1;align=left;verticalAlign=middle;resizable=0;points=[];" parent="GDUa8-GdCzSgoxu7vCdt-18" vertex="1" connectable="0">
|
||||
<mxGeometry x="0.1565" y="-2" relative="1" as="geometry">
|
||||
<mxPoint x="-48" y="-26" as="offset" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-12" value="Dispatch Kernel" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-12" value="Dispatch Kernel" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
||||
<mxGeometry x="710" y="280" width="120" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-16" value="Device" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;" vertex="1" parent="1">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-16" value="GPU" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;" parent="1" vertex="1">
|
||||
<mxGeometry x="680" y="456" width="180" height="139" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-25" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0.5;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="GDUa8-GdCzSgoxu7vCdt-17" target="GDUa8-GdCzSgoxu7vCdt-21">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-25" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0.5;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="GDUa8-GdCzSgoxu7vCdt-17" target="GDUa8-GdCzSgoxu7vCdt-21" edge="1">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-17" value="Evaluation" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-17" value="Evaluation" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
||||
<mxGeometry x="710" y="520" width="120" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-20" value="Host" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;" vertex="1" parent="1">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-20" value="CPU" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;" parent="1" vertex="1">
|
||||
<mxGeometry x="460" y="456" width="170" height="139" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-22" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0.5;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="GDUa8-GdCzSgoxu7vCdt-21" target="GDUa8-GdCzSgoxu7vCdt-9">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-22" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0.5;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="GDUa8-GdCzSgoxu7vCdt-21" target="GDUa8-GdCzSgoxu7vCdt-9" edge="1">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-26" value="<div><font style="font-size: 12px;"><b>Output:</b></font></div><div><font style="font-size: 12px;">Evaluation-Results</font></div>" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="GDUa8-GdCzSgoxu7vCdt-22">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-26" value="<div><font style="font-size: 12px;"><b>Output:</b></font></div><div><font style="font-size: 12px;">Evaluation-Results</font></div>" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="GDUa8-GdCzSgoxu7vCdt-22" vertex="1" connectable="0">
|
||||
<mxGeometry x="0.4108" y="-1" relative="1" as="geometry">
|
||||
<mxPoint x="13" y="1" as="offset" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-21" value="Retrieve Results" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxCell id="GDUa8-GdCzSgoxu7vCdt-21" value="Retrieve Results" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
||||
<mxGeometry x="485" y="520" width="120" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
</root>
|
||||
|
|
|
@ -1,36 +1,36 @@
|
|||
<mxfile host="app.diagrams.net" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0" version="26.2.5">
|
||||
<mxfile host="app.diagrams.net" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0" version="26.2.6">
|
||||
<diagram name="Page-1" id="KFoKKVRmhU8qG_-FEeqA">
|
||||
<mxGraphModel dx="985" dy="546" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1169" pageHeight="827" math="0" shadow="0">
|
||||
<mxGraphModel dx="2068" dy="1147" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1169" pageHeight="827" math="0" shadow="0">
|
||||
<root>
|
||||
<mxCell id="0" />
|
||||
<mxCell id="1" parent="0" />
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-1" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="tQMPqDGkYp4bv8unJ6VJ-21" target="tQMPqDGkYp4bv8unJ6VJ-11">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-1" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="tQMPqDGkYp4bv8unJ6VJ-21" target="tQMPqDGkYp4bv8unJ6VJ-11" edge="1">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-2" value="Pre-Processing" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-2" value="Pre-Processing" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
||||
<mxGeometry x="480" y="280" width="120" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-3" value="Transpiler" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;width=90;height=40;" vertex="1" parent="1">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-3" value="Transpiler" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;width=90;height=40;" parent="1" vertex="1">
|
||||
<mxGeometry x="440" y="160" width="480" height="480" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-4" value="" style="ellipse;html=1;shape=endState;fillColor=#000000;strokeColor=#000000;" vertex="1" parent="1">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-4" value="" style="ellipse;html=1;shape=endState;fillColor=#000000;strokeColor=#000000;" parent="1" vertex="1">
|
||||
<mxGeometry x="270" y="520" width="40" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="tQMPqDGkYp4bv8unJ6VJ-7" target="tQMPqDGkYp4bv8unJ6VJ-2">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-5" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="tQMPqDGkYp4bv8unJ6VJ-7" target="tQMPqDGkYp4bv8unJ6VJ-2" edge="1">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-6" value="<div align="left"><font style="font-size: 12px;"><b>Input:</b></font></div><div align="left"><font style="font-size: 12px;">Expressions</font></div><div align="left"><font style="font-size: 12px;">Variable-Sets</font></div><div align="left"><font style="font-size: 12px;">Parameters</font></div>" style="edgeLabel;html=1;align=left;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="tQMPqDGkYp4bv8unJ6VJ-5">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-6" value="<div align="left"><font style="font-size: 12px;"><b>Input:</b></font></div><div align="left"><font style="font-size: 12px;">Expressions</font></div><div align="left"><font style="font-size: 12px;">Variable-Sets</font></div><div align="left"><font style="font-size: 12px;">Parameters</font></div>" style="edgeLabel;html=1;align=left;verticalAlign=middle;resizable=0;points=[];" parent="tQMPqDGkYp4bv8unJ6VJ-5" vertex="1" connectable="0">
|
||||
<mxGeometry x="-0.4633" relative="1" as="geometry">
|
||||
<mxPoint x="-16" as="offset" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-7" value="" style="ellipse;html=1;shape=endState;fillColor=#000000;strokeColor=none;" vertex="1" parent="1">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-7" value="" style="ellipse;html=1;shape=endState;fillColor=#000000;strokeColor=none;" parent="1" vertex="1">
|
||||
<mxGeometry x="270" y="280" width="40" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-8" value="Host" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;" vertex="1" parent="1">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-8" value="CPU" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;" parent="1" vertex="1">
|
||||
<mxGeometry x="460" y="220" width="440" height="140" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-9" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" edge="1" parent="1" source="tQMPqDGkYp4bv8unJ6VJ-11" target="tQMPqDGkYp4bv8unJ6VJ-14">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-9" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0.5;exitY=1;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" parent="1" source="tQMPqDGkYp4bv8unJ6VJ-11" target="tQMPqDGkYp4bv8unJ6VJ-14" edge="1">
|
||||
<mxGeometry relative="1" as="geometry">
|
||||
<Array as="points">
|
||||
<mxPoint x="820" y="420" />
|
||||
|
@ -38,44 +38,44 @@
|
|||
</Array>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-10" value="<div align="left"><font style="font-size: 12px;"><b>Input:<br></b></font></div><div align="left"><font style="font-size: 12px;">Processed Expressions</font></div><div align="left"><font style="font-size: 12px;">Variable-Sets</font></div><div align="left"><font style="font-size: 12px;">Parameters</font></div>" style="edgeLabel;html=1;align=left;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="tQMPqDGkYp4bv8unJ6VJ-9">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-10" value="<div align="left"><font style="font-size: 12px;"><b>Input:<br></b></font></div><div align="left"><font style="font-size: 12px;">Processed Expressions</font></div><div align="left"><font style="font-size: 12px;">Variable-Sets</font></div><div align="left"><font style="font-size: 12px;">Parameters</font></div>" style="edgeLabel;html=1;align=left;verticalAlign=middle;resizable=0;points=[];" parent="tQMPqDGkYp4bv8unJ6VJ-9" vertex="1" connectable="0">
|
||||
<mxGeometry x="0.1565" y="-2" relative="1" as="geometry">
|
||||
<mxPoint x="-48" y="-25" as="offset" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-11" value="Dispatch Kernel" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-11" value="Dispatch Kernel" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
||||
<mxGeometry x="760" y="280" width="120" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-12" value="Device" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;" vertex="1" parent="1">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-12" value="GPU" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;" parent="1" vertex="1">
|
||||
<mxGeometry x="720" y="456" width="180" height="134" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-13" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0.5;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="tQMPqDGkYp4bv8unJ6VJ-14" target="tQMPqDGkYp4bv8unJ6VJ-18">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-13" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0.5;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="tQMPqDGkYp4bv8unJ6VJ-14" target="tQMPqDGkYp4bv8unJ6VJ-18" edge="1">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-14" value="Evaluation" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-14" value="Evaluation" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
||||
<mxGeometry x="760" y="520" width="120" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-15" value="Host" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;" vertex="1" parent="1">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-15" value="CPU" style="shape=umlFrame;whiteSpace=wrap;html=1;pointerEvents=0;" parent="1" vertex="1">
|
||||
<mxGeometry x="460" y="456" width="180" height="134" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0.5;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="tQMPqDGkYp4bv8unJ6VJ-18" target="tQMPqDGkYp4bv8unJ6VJ-4">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-16" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0.5;exitDx=0;exitDy=0;entryX=1;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="tQMPqDGkYp4bv8unJ6VJ-18" target="tQMPqDGkYp4bv8unJ6VJ-4" edge="1">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-17" value="<div><font style="font-size: 12px;"><b>Output:</b></font></div><div><font style="font-size: 12px;">Evaluation-Results</font></div>" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" vertex="1" connectable="0" parent="tQMPqDGkYp4bv8unJ6VJ-16">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-17" value="<div><font style="font-size: 12px;"><b>Output:</b></font></div><div><font style="font-size: 12px;">Evaluation-Results</font></div>" style="edgeLabel;html=1;align=center;verticalAlign=middle;resizable=0;points=[];" parent="tQMPqDGkYp4bv8unJ6VJ-16" vertex="1" connectable="0">
|
||||
<mxGeometry x="0.4108" y="-1" relative="1" as="geometry">
|
||||
<mxPoint x="13" y="1" as="offset" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-18" value="Retrieve Results" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-18" value="Retrieve Results" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
||||
<mxGeometry x="485" y="520" width="120" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-22" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="tQMPqDGkYp4bv8unJ6VJ-2" target="tQMPqDGkYp4bv8unJ6VJ-21">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-22" value="" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" parent="1" source="tQMPqDGkYp4bv8unJ6VJ-2" target="tQMPqDGkYp4bv8unJ6VJ-21" edge="1">
|
||||
<mxGeometry relative="1" as="geometry">
|
||||
<mxPoint x="600" y="300" as="sourcePoint" />
|
||||
<mxPoint x="760" y="300" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-21" value="Code-Generation" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxCell id="tQMPqDGkYp4bv8unJ6VJ-21" value="Code-Generation" style="rounded=0;whiteSpace=wrap;html=1;" parent="1" vertex="1">
|
||||
<mxGeometry x="620" y="280" width="120" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
</root>
|
||||
|
|
40
other/pre-processing_result.drawio
Normal file
40
other/pre-processing_result.drawio
Normal file
|
@ -0,0 +1,40 @@
|
|||
<mxfile host="app.diagrams.net" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:137.0) Gecko/20100101 Firefox/137.0" version="26.2.5">
|
||||
<diagram name="Page-1" id="93wPJxm0qDUx-9UJ1EZK">
|
||||
<mxGraphModel dx="1182" dy="655" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="1169" pageHeight="827" math="0" shadow="0">
|
||||
<root>
|
||||
<mxCell id="0" />
|
||||
<mxCell id="1" parent="0" />
|
||||
<mxCell id="399UxkHvPDb8lwnND9dC-1" value="X<sub>1</sub>" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="265" y="240" width="40" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="399UxkHvPDb8lwnND9dC-2" value="2" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="355" y="240" width="40" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="399UxkHvPDb8lwnND9dC-3" value="+" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="445" y="240" width="40" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="399UxkHvPDb8lwnND9dC-5" value="<div>Type: Variable</div><div>Value: 1</div>" style="rounded=0;whiteSpace=wrap;html=1;align=left;" vertex="1" parent="1">
|
||||
<mxGeometry x="240" y="280" width="90" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="399UxkHvPDb8lwnND9dC-6" value="<div>Type: Constant</div><div>Value: 2</div>" style="rounded=0;whiteSpace=wrap;html=1;align=left;" vertex="1" parent="1">
|
||||
<mxGeometry x="330" y="280" width="90" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="399UxkHvPDb8lwnND9dC-9" value="<div>Type: Operator</div><div>Value: Addition</div>" style="rounded=0;whiteSpace=wrap;html=1;align=left;" vertex="1" parent="1">
|
||||
<mxGeometry x="420" y="280" width="90" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="399UxkHvPDb8lwnND9dC-10" value="X<sub>1</sub>" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="80" y="280" width="40" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="399UxkHvPDb8lwnND9dC-14" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" edge="1" parent="1" source="399UxkHvPDb8lwnND9dC-11" target="399UxkHvPDb8lwnND9dC-5">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="399UxkHvPDb8lwnND9dC-11" value="2" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="160" y="280" width="40" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="399UxkHvPDb8lwnND9dC-12" value="+" style="rounded=0;whiteSpace=wrap;html=1;" vertex="1" parent="1">
|
||||
<mxGeometry x="120" y="280" width="40" height="40" as="geometry" />
|
||||
</mxCell>
|
||||
</root>
|
||||
</mxGraphModel>
|
||||
</diagram>
|
||||
</mxfile>
|
|
@ -24,7 +24,7 @@ function interpret(expressions::Vector{Expr}, variables::Matrix{Float32}, parame
|
|||
cudaParams = Utils.create_cuda_array(parameters, NaN32) # column corresponds to data for one expression
|
||||
cudaExprs = Utils.create_cuda_array(exprs, ExpressionElement(EMPTY, 0)) # column corresponds to data for one expression
|
||||
# put into seperate cuArray, as this is static and would be inefficient to send seperatly to every kernel
|
||||
cudaStepsize = CuArray([Utils.get_max_inner_length(exprs), Utils.get_max_inner_length(parameters), size(variables, 1)]) # max num of values per expression; max nam of parameters per expression; number of variables per expression
|
||||
cudaStepsize = CuArray([Utils.get_max_inner_length(parameters), size(variables, 1)]) # max num of values per expression; max nam of parameters per expression; number of variables per expression
|
||||
|
||||
# each expression has nr. of variable sets (nr. of columns of the variables) results and there are n expressions
|
||||
cudaResults = CuArray{Float32}(undef, variableCols, length(exprs))
|
||||
|
@ -46,20 +46,20 @@ end
|
|||
const MAX_STACK_SIZE = 25 # The depth of the stack to store the values and intermediate results
|
||||
function interpret_expression(expressions::CuDeviceArray{ExpressionElement}, variables::CuDeviceArray{Float32}, parameters::CuDeviceArray{Float32}, results::CuDeviceArray{Float32}, stepsize::CuDeviceArray{Int}, exprIndex::Int)
|
||||
varSetIndex = (blockIdx().x - 1) * blockDim().x + threadIdx().x # ctaid.x * ntid.x + tid.x (1-based)
|
||||
variableCols = length(variables) / stepsize[3]
|
||||
@inbounds variableCols = length(variables) / stepsize[2]
|
||||
|
||||
if varSetIndex > variableCols
|
||||
return
|
||||
end
|
||||
|
||||
firstExprIndex = ((exprIndex - 1) * stepsize[1]) + 1 # Inclusive
|
||||
lastExprIndex = firstExprIndex + stepsize[1] - 1 # Inclusive
|
||||
firstParamIndex = ((exprIndex - 1) * stepsize[2]) # Exclusive
|
||||
# firstExprIndex = ((exprIndex - 1) * stepsize[1]) + 1 # Inclusive
|
||||
# lastExprIndex = firstExprIndex + stepsize[1] - 1 # Inclusive
|
||||
@inbounds firstParamIndex = ((exprIndex - 1) * stepsize[1]) # Exclusive
|
||||
|
||||
operationStack = MVector{MAX_STACK_SIZE, Float32}(undef) # Try to get this to function with variable size too, to allow better memory usage
|
||||
operationStackTop = 0 # stores index of the last defined/valid value
|
||||
|
||||
firstVariableIndex = ((varSetIndex-1) * stepsize[3]) # Exclusive
|
||||
@inbounds firstVariableIndex = ((varSetIndex-1) * stepsize[2]) # Exclusive
|
||||
|
||||
@inbounds for expr in expressions
|
||||
if expr.Type == EMPTY
|
||||
|
|
|
@ -7,8 +7,7 @@ To be able to determine whether evaluating mathematical expressions on the GPU i
|
|||
|
||||
\section[Requirements]{Requirements and Data}
|
||||
% short section.
|
||||
% Multiple expressions; vars for all expressions; params unique to expression; operators that need to be supported
|
||||
The main goal of both prototypes or evaluators is to provide a speed-up to the CPU interpreter already in use. However, it is also important to determine which evaluator provides the most speed-up. This also means that if one of the evaluators is faster, it is meant to replace the CPU interpreter. Therefore, they must have similar capabilities, and therefore meet the following requirements:
|
||||
The main goal of both prototypes or evaluators is to provide a speed-up compared to the CPU interpreter already in use. However, it is also important to determine which evaluator provides the most speed-up. This also means that if one of the evaluators is faster, it is intended to replace the CPU interpreter. Therefore, they must have similar capabilities, and therefore meet the following requirements:
|
||||
|
||||
\begin{itemize}
|
||||
\item Multiple expressions as input.
|
||||
|
@ -19,7 +18,7 @@ The main goal of both prototypes or evaluators is to provide a speed-up to the C
|
|||
\item The results of the evaluations are returned in a matrix of the form $k \times N$. In this case, $k$ is equal to the $N$ of the variable matrix and $N$ is equal to the number of input expressions.
|
||||
\end{itemize}
|
||||
|
||||
With these requirements, one possible expression that must be able to be evaluated is the following: $\log(e^{p_1}) - |x_1| * \sqrt{x_2} / 10 + 2^{x_3}$
|
||||
These requirements mean, that one possible expression that must be able to be evaluated is the following: $\log(e^{p_1}) - |x_1| * \sqrt{x_2} / 10 + 2^{x_3}$
|
||||
|
||||
\begin{figure}
|
||||
\centering
|
||||
|
@ -29,26 +28,33 @@ With these requirements, one possible expression that must be able to be evaluat
|
|||
\end{figure}
|
||||
|
||||
|
||||
With this, the capabilities are outlined, however, the input and output data need to further be explained for a better understanding. The first input are the expressions that need to be evaluated. These can have any length and can contain constant values, variables and parameters and all of these are linked together with the supported operations. In the example shown in Figure \ref{fig:input_output_explanation}, there are six expressions $e_1$ through $e_6$. Next is the variable matrix. One entry in this matrix, corresponds to one variable in every expression, with the row indicating which variable it holds the value for. Each column holds a different set of variables. In the provided example, there are three variable sets, each holding the values for four variables $x_1$ through $x_4$. All expressions are evaluated using all variable sets and the results of these evaluations are stored in the results matrix. Each entry in this matrix holds the resulting value of the evaluation of one expression with one variable set. The row indicates the variable set while the column indicates the expression.
|
||||
With this, the capabilities are outlined, however, the input and output data need to further be explained for a better understanding. The first input are the expressions that need to be evaluated. These can have any length and can contain constant values, variables and parameters and all of these are linked together with the supported operations. In the example shown in Figure \ref{fig:input_output_explanation}, there are six expressions $e_1$ through $e_6$. Next is the variable matrix. One entry in this matrix, corresponds to one variable in every expression, with the row indicating which variable it holds the value for. For example the values in row three, are used to parametrise the variable $x_3$. Each column holds a different set of variables. In the provided example, there are three variable sets, each holding the values for four variables $x_1$ through $x_4$. All expressions are evaluated using all variable sets and the results of these evaluations are stored in the results matrix. Each entry in this matrix holds the resulting value of the evaluation of one expression with one variable set. The row indicates the variable set while the column indicates the expression.
|
||||
|
||||
This is the minimal functionality needed to evaluate expressions with variables generated by a symbolic regression algorithm. In the case of parameter optimisation, it is useful to have a different type of variable, called parameter. For parameter optimisation it is important that for the given variable sets, the best fitting parameters need to be found. To achieve this, the evaluator is called multiple times with different parameters, but the same variables, and the results are evaluated for their fitness by the caller. In this case, the parameters do not change within one call. Parameters could therefore be treated as constant values of the expressions and no separate input for them would be needed. However, providing the possibility to have the parameters as an input, makes the process of parameter optimisation easier. This is the reason the prototype evaluators need to support parameters as inputs. Not all expressions need to have the same number of parameters. Therefore, they are structured as a vector of vectors and not a matrix. The example in Figure \ref{fig:input_output_explanation} shows how the parameters are structured. For example one expression has zero parameters, while another has six parameters $p_1$ through $p_6$. It needs to be mentioned that just like the number of variables, the number of parameters per expression is not limited. It is also possible to completely omit the parameters if they are not needed.
|
||||
This is the minimal functionality needed to evaluate expressions with variables generated by a symbolic regression algorithm. In the case of parameter optimisation, it is useful to have a different type of variable, called parameter. For parameter optimisation it is important that for the given variable sets, the best fitting parameters need to be found. To achieve this, the evaluator is called multiple times with different parameters, but the same variables. The results are then evaluated for their fitness by the caller. In this case, the parameters do not change within one call. Parameters could therefore be treated as constant values of the expressions, as the caller and no separate input for them would be needed. However, providing the possibility to have the parameters as an input, makes the process of parameter optimisation easier. This is the reason the prototype evaluators need to support parameters as inputs. Unlike variables, not all expressions need to have the same number of parameters. Therefore, they are structured as a vector of vectors and not a matrix. The example in Figure \ref{fig:input_output_explanation} shows how the parameters are structured. For example one expression has zero parameters, while another has six parameters $p_1$ through $p_6$. It needs to be mentioned that just like the number of variables, the number of parameters per expression is not limited. It is also possible to completely omit the parameters if they are not needed.
|
||||
|
||||
% \subsection{Non-Goals}
|
||||
% Probably a good idea. Probably move this to "introduction"
|
||||
\section{Architecture}
|
||||
|
||||
Based on the requirements above, the architecture of both prototypes can be designed. While the requirements only specify the input and output, the components and workflow also need to be specified. This section aims at giving an architectural overview of both prototypes, alongside their design decisions.
|
||||
Based on the requirements above, the architecture of both prototypes can be designed. While the requirements only specify the input and output, the components and workflow also need to be specified. This section aims at giving an architectural overview of both prototypes, alongside their design decisions.
|
||||
|
||||
A design decision that has been made for both prototypes is to split the evaluation of each expression into a separate kernel dispatch. As explained in Section \ref{sec:thread_hierarchy}, it is desirable to reduce the occurrence of thread divergence as much as possible. Although the SIMT programming model tries to mitigate the negative effects of thread divergence, it is still a good idea to avoid it when possible. In this case, thread divergence can easily be avoided by not evaluating all expressions in a single kernel dispatch. GPUs are able to have multiple resident grids, with modern GPUs being able to have 128 grids concurrently \parencite{nvidia_cuda_2025}. One grid corresponds to one kernel dispatch, and therefore allow 128 kernels to be run concurrently. Therefore, dispatching a kernel for each expression, has the possibility to improve the performance.
|
||||
A design decision that has been made for both prototypes is to split the evaluation of each expression into a separate kernel or kernel dispatch. As explained in Section \ref{sec:thread_hierarchy}, it is desirable to reduce the occurrence of thread divergence as much as possible. Although the SIMT programming model tries to mitigate the negative effects of thread divergence, it is still a good idea to avoid it when possible. For this use-case, thread divergence can easily be avoided by not evaluating all expressions in a single kernel or kernel dispatch. GPUs are able to have multiple resident grids, with modern GPUs being able to accommodate 128 grids concurrently \parencite{nvidia_cuda_2025}. One grid corresponds to one kernel dispatch, and therefore allows up-to 128 kernels to be run concurrently. Therefore, dispatching a kernel for each expression, has the possibility to improve the performance. In the case of the interpreter, having only one kernel that can be dispatched for each expression, also simplifies the kernel itself. This is because the kernel can focus on evaluating one expression and does not require additional code to handle multiple expressions at once. Similarly, the transpiler can also be simplified, as it can generate many smaller kernels than one big kernel. Additionally, the smaller kernels do not need any branching, because the generated code only needs to perform the operations as they occur in the expression itself.
|
||||
|
||||
%% Maybe add overview Diagram
|
||||
%% Shows -> Caller calls Evaluator -> Evaluator dispatches kernel -> kernel evaluates -> Evaluator returns evaluation result
|
||||
%% Probably the same as the interpreter and transpiler diagram. If so, dont add it
|
||||
|
||||
\subsection{Pre-Processing}
|
||||
The first step in both prototypes is the pre-processing step. It is needed, as it simplifies working with the expressions in the later steps. Similar to the front-end in compilers as described in Section \ref{sec:compilers}, it takes an expression and transforms it into an intermediate representation. One of the responsibilities of the pre-processor is to verify that only allowed operators are present in the given expressions. Furthermore, it transforms the expressions into postfix-notation. This further allows the later parts to more easily evaluate the expressions. One of the major benefits of this notation is the implicit operator precedence. It allows the evaluators to evaluate the expressions token by token from left to right, without needing to worry about the correct order of operations. With one token being either an operator, a constant value, a variable or a parameter.
|
||||
The first step in both prototypes is the pre-processing step. It is needed, as it simplifies working with the expressions in the later steps. One of the responsibilities of the pre-processor is to verify that only allowed operators and symbols are present in the given expressions. This is comparable to the work a scanner like Flex\footnote{\url{https://github.com/westes/flex}} performs. Additionally, this step also converts the expression into an intermediate representation. In essence, the pre-processing step can be compared to the front-end of a compiler as described in Section \ref{sec:compilers}. The conversion into the intermediate representation transforms the expressions from infix-notation into postfix-notation. This further allows the later parts to more easily evaluate the expressions. One of the major benefits of this notation is the implicit operator precedence. It allows the evaluators to evaluate the expressions token by token from left to right, without needing to worry about the correct order of operations. One token represents either an operator, a constant value, a variable or a parameter. Apart from the intermediate representation containing the expression in postfix-notation, it also contains the information about the types of the tokens themselves. This is all that is needed for the interpretation and transpilation steps. A simple expression like $x + 2$ would look like depicted in figure \ref{fig:pre-processing_results} after the pre-processing step.
|
||||
|
||||
It would have also been possible to perform the pre-processing step on the GPU. However, pre-processing only one expression can not easily be split into multiple threads, which means one GPU thread would need to process one expression. As described in Section \ref{sec:gpgpu} a single GPU thread is slower than a single CPU thread. Furthermore, it wouldn't make sense to process all expressions in a single kernel. This would lead to a lot of thread divergence, essentially processing one expression after the other. The SIMT programming model might help with parallelising at least some parts of the processing work. The generated expressions can differ a lot from each other and restricting them to be similar and therefore SIMT friendly, would reduce the overall quality of the symbolic regression algorithm. Therefore, it does not make sense to perform the processing step on the GPU. This is a typical example of code that is better run on the CPU, also because the parallelisation possibilities of one thread per expression can be applied to the CPU as well. Concepts like caching processed expressions, or caching parts of the processed expressions can also be employed on the CPU. This would not be possible on the GPU, because they can not save state between two kernel dispatches.
|
||||
\begin{figure}
|
||||
\centering
|
||||
\includegraphics[width=.9\textwidth]{pre-processing_result.png}
|
||||
\caption{This diagram shows how an expression will be transformed in the pre-processing step.}
|
||||
\label{fig:pre-processing_results}
|
||||
\end{figure}
|
||||
|
||||
It would have also been possible to perform the pre-processing step on the GPU. However, pre-processing only one expression can not easily be split into multiple threads, which means one GPU thread would need to process one expression. As described in Section \ref{sec:gpgpu} a single GPU thread is slower than a single CPU thread and as a result means the processing will also be slower. Furthermore, it wouldn't make sense to process all expressions in a single kernel. This would lead to a lot of thread divergence, which essentially means processing one expression after the other. The SIMT programming model might help with parallelising at least some parts of the processing work. However, the generated expressions can differ a lot from each other and restricting them to be similar and therefore SIMT friendly, would likely reduce the overall quality of the symbolic regression algorithm. Therefore, it does not make sense to perform the processing step on the GPU. This is a typical example of code that is better run on the CPU, also because the parallelisation possibilities of one thread per expression can be applied to the CPU as well. Concepts like caching processed expressions, or caching parts of the processed expressions can also be employed on the CPU. This would not be possible on the GPU, because a GPU can not save state between two kernel dispatches.
|
||||
|
||||
\subsection{Interpreter}
|
||||
|
||||
|
@ -59,9 +65,10 @@ It would have also been possible to perform the pre-processing step on the GPU.
|
|||
\label{fig:component_diagram_interpreter}
|
||||
\end{figure}
|
||||
|
||||
The interpreter consists of two parts. The CPU side or host side is the part of the program, that interacts with both the GPU or device and the caller. An overview on the components and the workflow of the interpreter can be seen in Figure \ref{fig:component_diagram_interpreter}. Before the GPU can start evaluating the expressions, a pre-processing step is necessary. This step is crucial, as it transforms the expressions in a format, that greatly simplifies the evaluation part. Before the expression can be evaluated however, all data needs to be sent to the GPU, this includes the processed expressions, as well as the data for the variables and parameters. After the kernel has finished evaluating all expressions, the CPU reads the results from the GPU and returns them to the caller.
|
||||
The interpreter consists of two parts. The CPU side is the part of the program, that interacts with both the GPU and the caller. An overview on the components and the workflow of the interpreter can be seen in Figure \ref{fig:component_diagram_interpreter}. Once the interpreter receives the expressions, they are pre-processed. This ensures the expressions are valid, and that they are transformed into the intermediate representation needed for evaluating them. The results of this pre-processing are then sent to the GPU, which performs the actual interpretation of the expressions. Alongside the expressions, the data for the variables and parameters also needs to be sent to the GPU. Once all the data resides on the GPU, the interpreter kernel can be dispatched. It needs to be noted, that for each of the expressions, a separate kernel will be dispatched. As already described, this decision has been made, to ensure, reduce thread divergence and therefore increase performance. In fact, dispatching the same kernel multiple times with different expressions, means, there will not occur any thread divergence as explained later. Once the GPU has finished evaluating all expressions with all variable sets, the result will be stored in a matrix on the GPU. The CPU then retrieves the results and returns them to the caller in the format specified by the requirements.
|
||||
|
||||
Because of the already mentioned pre-processing step, the evaluation process is relatively straight-forward. The Algorithm \ref{alg:eval_interpreter} demonstrates how an expression in postfix-notation can be evaluated. It shows a simplified version that only works with addition, multiplication and constant values. This is the part of the interpreter prototype, that actually interprets the expressions and runs on the GPU.
|
||||
% somewhere here explain why thread divergence doesn't occur
|
||||
Evaluating the expressions is relatively straight forward. Due to the expressions being in post-fix notation, the actual interpreter must only iterate over all tokens once and perform the appropriate tasks. If the interpreter encounters a binary operator, it must simply read the previous two values and perform the operation specified by the operator. For unary operators, only the previous value must be read. As already mentioned, expressions in postfix-notation implicitly contain the operator precedence, therefore no look-ahead or other strategies need to be used to ensure correct evaluation. The Algorithm \ref{alg:eval_interpreter} shows how the interpreter works. Note that this is a simplified version, that only works with additions, multiplications and constant values.
|
||||
|
||||
\begin{algorithm}
|
||||
\caption{Interpreting an equation in postfix-notation}\label{alg:eval_interpreter}
|
||||
|
@ -71,17 +78,17 @@ Because of the already mentioned pre-processing step, the evaluation process is
|
|||
|
||||
\While{HasTokenLeft(\textit{expr})}
|
||||
\State $\textit{token} \gets \text{GetNextToken}(\textit{expr})$
|
||||
\If{$\textit{token.Kind} = \text{Constant}$}
|
||||
Push($\textit{stack}$, $\textit{token.Value}$)
|
||||
\ElsIf{$\textit{token.Kind} = \text{Operator}$}
|
||||
\If{$\textit{token.Type} = \text{Constant}$}
|
||||
\State Push($\textit{stack}$, $\textit{token.Value}$)
|
||||
\ElsIf{$\textit{token.Type} = \text{Operator}$}
|
||||
\If{$\textit{token.Value} = \text{Addition}$}
|
||||
\State $\textit{right} \gets \text{Pop}(\textit{stack})$
|
||||
\State $\textit{left} \gets \text{Pop}(\textit{stack})$
|
||||
Push($\textit{left} + \textit{right}$)
|
||||
\State Push(stack, $\textit{left} + \textit{right}$)
|
||||
\ElsIf{$\textit{token.Value} = \text{Multiplication}$}
|
||||
\State $\textit{right} \gets \text{Pop}(\textit{stack})$
|
||||
\State $\textit{left} \gets \text{Pop}(\textit{stack})$
|
||||
Push($\textit{left} * \textit{right}$)
|
||||
\State Push(stack, $\textit{left} * \textit{right}$)
|
||||
\EndIf
|
||||
\EndIf
|
||||
\EndWhile
|
||||
|
@ -91,8 +98,11 @@ Because of the already mentioned pre-processing step, the evaluation process is
|
|||
\end{algorithmic}
|
||||
\end{algorithm}
|
||||
|
||||
If a new operator is needed, it must simply be added as another else-if block inside the operator branch. New token kinds like variables or parameters, can also be added by adding a new outer else-if block that checks for these token kinds. However, the pre-processing step also needs to be extended with these new operators and token kinds, otherwise the expression will never reach the evaluation step. It is also possible to add unary operators like logarithm. In this case only one value would be read from the stack, the operation would be applied, and the result would be written back to the stack.
|
||||
If a new operator is needed, it must simply be added as another else-if block inside the operator branch. New token types like variables or parameters, can also be added by adding a new outer else-if block that checks for these token types. However, the pre-processing step also needs to be extended with these new operators and token types. Otherwise, the expression will never reach the evaluation step as they would be seen as invalid. It is also possible to add unary operators like $\log()$. In this case only one value would be read from the stack, the operation would be applied, and the result would be written back to the stack.
|
||||
|
||||
The Algorithm \ref{alg:eval_interpreter} in this case resembles the kernel. This kernel will be dispatched for every expression that needs to be evaluated, to eliminate thread divergence. Thread divergence can only happen on data dependent branches. In this case, the while loop and every if and else-if statement contains a data dependent branch. Depending on the expression passed to the kernel, the while loop may run longer than for another expression. Similarly, not all expressions have the same constants, operators and variables in the same order and would therefore lead to each thread, taking different paths. However, one expression, always has the same constants, operators and variables in the same locations, meaning all threads will take the same paths. This also means that despite the interpreter containing many data dependent branches, these branches only depend on the expression itself. Because of this, all threads will take the same paths and therefore will never diverge from one another.
|
||||
|
||||
% explain why thread convergence does not happen here
|
||||
|
||||
\subsection{Transpiler}
|
||||
|
||||
|
@ -103,6 +113,16 @@ If a new operator is needed, it must simply be added as another else-if block in
|
|||
\label{fig:component_diagram_transpiler}
|
||||
\end{figure}
|
||||
|
||||
Similar to the interpreter, the transpiler also consists of a part that is running on the CPU side and one that is running on the GPU side. When looking at the component and workflow of the transpiler as seen in Figure \ref{fig:component_diagram_transpiler}, it is almost identical to the interpreter. However, the key difference between these two, is the additional code generation, or transpilation step.
|
||||
|
||||
% explain the differences of interpreter and transpiler
|
||||
% explain how the transpilation process works
|
||||
% also add algorithm to further show the process
|
||||
|
||||
% at the end probably, talk why each expression has its own kernel -> because GPU now only evaluates expression and no branches are needed -> results in less overhead instructions (no branch instructions) -> however, this overhead is now on CPU. This is the reason both need to be explored to find out if there are performance differences between them
|
||||
|
||||
|
||||
|
||||
|
||||
% \section{Interpreter}
|
||||
% % as introduction to this section talk about what "interpreter" means in this context. so "gpu parses expr and calculates"
|
||||
|
|
Binary file not shown.
Before ![]() (image error) Size: 88 KiB After ![]() (image error) Size: 88 KiB ![]() ![]() |
Binary file not shown.
Before ![]() (image error) Size: 91 KiB After ![]() (image error) Size: 91 KiB ![]() ![]() |
BIN
thesis/images/pre-processing_result.png
Normal file
BIN
thesis/images/pre-processing_result.png
Normal file
Binary file not shown.
After ![]() (image error) Size: 19 KiB |
BIN
thesis/main.pdf
BIN
thesis/main.pdf
Binary file not shown.
Loading…
Reference in New Issue
Block a user