Select Git revision
HIDReportViewer.txt
-
Dean Camera authored
Minor documentation adjustments to the demos, projects and bootloaders to ensure correct formatting of application device support.
Dean Camera authoredMinor documentation adjustments to the demos, projects and bootloaders to ensure correct formatting of application device support.
numbapig.py NaN GiB
#
# numbapig.py
# Neil Gershenfeld 3/1/20
# calculation of pi by a Numba CUDA sum
# pi = 3.14159265358979323846
#
from numba import cuda
import numpy as np
import time
block_size = 1024
grid_size = 1024
nloop = 1000000
npts = grid_size*block_size
@cuda.jit
def init(arr,nloop):
i = cuda.blockIdx.x*cuda.blockDim.x+cuda.threadIdx.x;
start = nloop*i+1;
end = nloop*(i+1)+1;
arr[i] = 0;
for j in range(start,end):
arr[i] += 0.5/((j-0.75)*(j-0.25));
@cuda.jit
def reduce_sum(arr,len):
i = cuda.blockIdx.x*cuda.blockDim.x+cuda.threadIdx.x;
if (i < len):
arr[i] += arr[i+len]
def reduce(arr,npts):
len = npts >> 1
while (1):
reduce_sum[grid_size,block_size](arr,len)
len = len >> 1
if (len == 0):
return
darr = cuda.device_array(npts,np.float64)
init[grid_size,block_size](darr,nloop) # compile kernel
reduce(darr,npts) # compile kernel
start_time = time.time()
init[grid_size,block_size](darr,nloop)
reduce(darr,npts)
cuda.synchronize()
end_time = time.time()
mflops = npts*nloop*5.0/(1.0e6*(end_time-start_time))
harr = darr.copy_to_host()
print("npts = %d, nloop = %d, pi = %f"%(npts,nloop,harr[0]))
print("time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))