started implementing transpilation of expression
Some checks failed
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.10) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, 1.6) (push) Has been cancelled
CI / Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} (x64, ubuntu-latest, pre) (push) Has been cancelled

This commit is contained in:
2024-10-27 11:48:11 +01:00
parent 0e24d74e54
commit 9fc55c4c15
4 changed files with 78 additions and 26 deletions

View File

@ -26,9 +26,9 @@ All Instructions: https://docs.nvidia.com/cuda/parallel-thread-execution/index.h
ld.param.u64 %rd3, [VecAdd_kernel_param_2]; -> rd3 = Result
ld.param.u32 %r2, [VecAdd_kernel_param_3]; -> r2 = N
mov.u32 %r3, %ntid.x;
mov.u32 %r4, %ctaid.x;
mov.u32 %r5, %tid.x;
mov.u32 %r3, %ntid.x; -> initialise r3 with ntid.x
mov.u32 %r4, %ctaid.x; -> same as above
mov.u32 %r5, %tid.x; -> same as above
mad.lo.s32 %r1, %r3, %r4, %r5; -> r3 * r4 -> extract lowest 32/2 bits -> add r5 -> r1 = lowest16Bits(r3*r4) + r5