transpiler: trying to fix problem with writing to global memory; not yet fixed

2025-03-27 18:00:31 +01:00
parent eaee21ca75
commit 561b37160b
2 changed files with 108 additions and 43 deletions
--- a/package/test/TranspilerTests.jl
+++ b/package/test/TranspilerTests.jl
@ -27,18 +27,18 @@ parameters[2][2] = 0.0
 	push!(postfixExprs, expr_to_postfix(:(5^3 + x1 - p1)))

 	# generatedCode = Transpiler.transpile(postfixExpr)
-	generatedCode = Transpiler.transpile(postfixExprs[3], 2, 3, 2, 3) # TEMP
-	println(generatedCode)
+	# generatedCode = Transpiler.transpile(postfixExprs[3], 2, 3, 2, 3) # TEMP
+	# println(generatedCode)
 	# CUDA.@sync interpret(postfixExprs, variables, parameters)

 	# This is just here for testing. This will be called inside the execute method in the Transpiler module
-	linker = CuLink()
-	add_data!(linker, "ExpressionProcessing", generatedCode)
+	# linker = CuLink()
+	# add_data!(linker, "ExpressionProcessing", generatedCode)

-	image = complete(linker)
+	# image = complete(linker)

-	mod = CuModule(image)
-	func = CuFunction(mod, "ExpressionProcessing")
+	# mod = CuModule(image)
+	# func = CuFunction(mod, "ExpressionProcessing")
 end

@testset "Test transpiler evaluation" begin
@ -46,7 +46,55 @@ end
 	# push!(postfixExprs, expressions[1])
 	# push!(postfixExprs, expressions[2])

-	@time Transpiler.evaluate(expressions, variables, parameters)
+	expr = Vector{Expr}()
+	push!(expr, expressions[1])
+	# @time Transpiler.evaluate(expr, variables, parameters)
 end

 #TODO: test performance of transpiler PTX generation when doing "return String(take!(buffer))" vs "return take!(buffer)"
+
+@testset "TEMP" begin
+	ptx = "
+	.version 7.1
+	.target sm_61
+	.address_size 64
+
+	.visible .entry ExpressionProcessing(
+	.param .u32 param_1)
+	{
+		.reg .u32   %parameter<1>;
+		.reg .u32   %i<1>;
+
+		ld.param.u32   %i0, [param_1];
+		cvta.to.global.u32   %parameter0, %i0;
+
+		st.global.f32  [%parameter0], 10.0;
+		ret;
+	}"
+
+	linker = CuLink()
+	add_data!(linker, "ExpressionProcessing", ptx)
+	
+	image = complete(linker)
+	
+	mod = CuModule(image)
+	func = CuFunction(mod, "ExpressionProcessing")
+
+	variableCols = 2
+	cudaResults = CuArray{Float32}(undef, 1)
+	# cd = CUDA.alloc(CUDA.DeviceMemory, (variableCols * length(expressions)) * sizeof(Float32))
+	# cudaResults = CUDA.fill(0f0, variableCols * length(expressions))
+	# cudaResults = cu(zeros(Float32, variableCols * length(expressions)))
+
+	config = launch_configuration(func)
+	threads = min(variableCols, config.threads)
+	blocks = cld(variableCols, threads)
+
+	cudacall(func, Tuple{CuPtr{Float32}}, cudaResults; threads=1, blocks=1)
+	# launch(func, cudaResults; threads=threads, blocks=blocks)
+
+	println(Array(cudaResults))
+end
+
+
+# TODO: University setup at 10.20.1.7