Skip to content
Snippets Groups Projects
Select Git revision
  • d935c24d2d76363cff56c8cea7c0094339adf1b1
  • master default protected
2 results

numbapig.py

Blame
  • numbapig.py NaN GiB
    #
    # numbapig.py
    # Neil Gershenfeld 3/1/20
    # calculation of pi by a Numba CUDA sum
    # pi = 3.14159265358979323846 
    #
    from numba import cuda
    import numpy as np
    import time
    block_size = 1024
    grid_size = 1024
    nloop = 1000000
    npts = grid_size*block_size
    @cuda.jit
    def init(arr,nloop):
       i = cuda.blockIdx.x*cuda.blockDim.x+cuda.threadIdx.x;
       start = nloop*i+1;
       end = nloop*(i+1)+1;
       arr[i] = 0;
       for j in range(start,end):
          arr[i] += 0.5/((j-0.75)*(j-0.25));
    @cuda.jit
    def reduce_sum(arr,len):
       i = cuda.blockIdx.x*cuda.blockDim.x+cuda.threadIdx.x;
       if (i < len):
          arr[i] += arr[i+len]
    def reduce(arr,npts):
       len = npts >> 1
       while (1):
          reduce_sum[grid_size,block_size](arr,len)
          len = len >> 1
          if (len == 0):
             return
    darr = cuda.device_array(npts,np.float64)
    init[grid_size,block_size](darr,nloop) # compile kernel
    reduce(darr,npts) # compile kernel
    start_time = time.time()
    init[grid_size,block_size](darr,nloop)
    reduce(darr,npts)
    cuda.synchronize()
    end_time = time.time()
    mflops = npts*nloop*5.0/(1.0e6*(end_time-start_time))
    harr = darr.copy_to_host()
    print("npts = %d, nloop = %d, pi = %f"%(npts,nloop,harr[0]))
    print("time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))