Skip to content
Snippets Groups Projects
numbapig.py 1.21 KiB
Newer Older
  • Learn to ignore specific revisions
  • Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    #
    # numbapig.py
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    # Neil Gershenfeld 3/1/20
    # calculation of pi by a Numba CUDA sum
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    # pi = 3.14159265358979323846 
    #
    from numba import cuda
    import numpy as np
    import time
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    block_size = 1024
    grid_size = 1024
    nloop = 1000000
    npts = grid_size*block_size
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    @cuda.jit
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    def init(arr,nloop):
       i = cuda.blockIdx.x*cuda.blockDim.x+cuda.threadIdx.x;
       start = nloop*i+1;
       end = nloop*(i+1)+1;
       arr[i] = 0;
       for j in range(start,end):
          arr[i] += 0.5/((j-0.75)*(j-0.25));
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    @cuda.jit
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    def reduce_sum(arr,len):
       i = cuda.blockIdx.x*cuda.blockDim.x+cuda.threadIdx.x;
       if (i < len):
          arr[i] += arr[i+len]
    def reduce(arr,npts):
       len = npts >> 1
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
       while (1):
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
          reduce_sum[grid_size,block_size](arr,len)
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
          len = len >> 1
          if (len == 0):
             return
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    darr = cuda.device_array(npts,np.float64)
    init[grid_size,block_size](darr,nloop) # compile kernel
    reduce(darr,npts) # compile kernel
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    start_time = time.time()
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    init[grid_size,block_size](darr,nloop)
    reduce(darr,npts)
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    cuda.synchronize()
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    end_time = time.time()
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    mflops = npts*nloop*5.0/(1.0e6*(end_time-start_time))
    harr = darr.copy_to_host()
    print("npts = %d, nloop = %d, pi = %f"%(npts,nloop,harr[0]))
    print("time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed