Skip to content
Snippets Groups Projects
numbapig.py 2.33 KiB
Newer Older
  • Learn to ignore specific revisions
  • Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    #
    # numbapig.py
    # Neil Gershenfeld 2/9/20
    # calculation of pi by a Numba CUDA sum
    # pi = 3.14159265358979323846 
    #
    from numba import cuda
    import numpy as np
    import time
    #
    # problem size
    #
    block_size = 2**10
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    grid_size = 2**21
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    NPTS = grid_size*block_size
    #
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    # kernels and functions
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    #
    @cuda.jit
    def init(arr):
        i = 1+cuda.grid(1)
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
        arr[i-1] = 0.5/((i-0.75)*(i-0.25))
    #
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    @cuda.reduce
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    def Numba_reduce(a,b):
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
        return a+b
    #
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    @cuda.jit
    def CUDA_sum(arr,len):
        i = cuda.grid(1)
        if (i < len):
           arr[i] += arr[i+len]
    #
    def CUDA_reduce(arr,NPTS):
       len = NPTS >> 1
       while (1):
          CUDA_sum[grid_size,block_size](arr,len)
          len = len >> 1
          if (len == 0):
             return
    #
    # device array
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    #
    arr = cuda.device_array(NPTS,np.float32)
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    #
    # compile kernels
    #
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    init[grid_size,block_size](arr)
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    pi = Numba_reduce(arr)
    CUDA_reduce(arr,NPTS)
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    #
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    # CUDA kernel array calculation
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    #
    start_time = time.time()
    init[grid_size,block_size](arr)
    end_time = time.time()
    mflops = NPTS*4.0/(1.0e6*(end_time-start_time))
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    print("CUDA kernel array calculation:")
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
    #
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    # Numba reduce
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    #
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    init[grid_size,block_size](arr)
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    start_time = time.time()
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    pi = Numba_reduce(arr)
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    end_time = time.time()
    mflops = NPTS*1.0/(1.0e6*(end_time-start_time))
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    print("Numba reduce:")
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
    #
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    # both with Numba reduce
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    #
    start_time = time.time()
    init[grid_size,block_size](arr)
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    pi = Numba_reduce(arr)
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    end_time = time.time()
    mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    print("both with Numba reduce:")
    
    Neil Gershenfeld's avatar
    wip
    Neil Gershenfeld committed
    print("   NPTS = %d, pi = %f"%(NPTS,pi))
    print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
    
    Neil Gershenfeld's avatar
    Neil Gershenfeld committed
    #
    # CUDA kernel reduction
    #
    init[grid_size,block_size](arr)
    start_time = time.time()
    CUDA_reduce(arr,NPTS)
    end_time = time.time()
    mflops = NPTS*1.0/(1.0e6*(end_time-start_time))
    print("CUDA kernel reduction:")
    print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
    #
    # both with CUDA kernel reduction
    #
    start_time = time.time()
    init[grid_size,block_size](arr)
    CUDA_reduce(arr,NPTS)
    end_time = time.time()
    darr = arr.copy_to_host()
    mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
    print("both with CUDA kernel reduction:")
    print("   NPTS = %d, pi = %f"%(NPTS,darr[0]))
    print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))