From 73d4808e876abdcaa27691cf6c6bb6fde2b7ee37 Mon Sep 17 00:00:00 2001
From: Neil Gershenfeld <gersh@cba.mit.edu>
Date: Sat, 29 Feb 2020 17:12:37 -0500
Subject: [PATCH] wip
---
Python/numbapig.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/Python/numbapig.py b/Python/numbapig.py
index f8a562e..71c8eb8 100644
--- a/Python/numbapig.py
+++ b/Python/numbapig.py
@@ -36,6 +36,7 @@ def CUDA_reduce(arr,NPTS):
len = NPTS >> 1
while (1):
CUDA_sum[grid_size,block_size](arr,len)
+ cuda.synchronize()
len = len >> 1
if (len == 0):
return
@@ -64,6 +65,7 @@ CUDA_result(arr,result)
#
start_time = time.time()
init[grid_size,block_size](arr)
+cuda.synchronize()
end_time = time.time()
mflops = NPTS*4.0/(1.0e6*(end_time-start_time))
print("CUDA kernel array calculation:")
@@ -83,6 +85,7 @@ print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
#
start_time = time.time()
init[grid_size,block_size](arr)
+cuda.synchronize()
pi = Numba_reduce(arr)
end_time = time.time()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
@@ -104,8 +107,10 @@ print(" time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
#
start_time = time.time()
init[grid_size,block_size](arr)
+cuda.synchronize()
CUDA_reduce(arr,NPTS)
CUDA_result(arr,result)
+cuda.synchronize()
end_time = time.time()
pi = result.copy_to_host()
mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
--
GitLab