diff --git a/Python/numbapig.py b/Python/numbapig.py
new file mode 100644
index 0000000000000000000000000000000000000000..1dc38920a50de6d1cc8f5f8ce883ad4e399806cf
--- /dev/null
+++ b/Python/numbapig.py
@@ -0,0 +1,61 @@
+#
+# numbapig.py
+# Neil Gershenfeld 2/9/20
+# calculation of pi by a Numba CUDA sum
+# pi = 3.14159265358979323846 
+#
+from numba import cuda
+import numpy as np
+import time
+#
+# problem size
+#
+block_size = 2**10
+grid_size = 2**20
+NPTS = grid_size*block_size
+#
+# CUDA kernels
+#
+@cuda.jit
+def init(arr):
+    i = 1+cuda.grid(1)
+    arr[i] = 0.5/((i-0.75)*(i-0.25))
+
+@cuda.reduce
+def sum_reduce(a,b):
+    return a+b
+#
+# compile kernels
+#
+arr = cuda.device_array(NPTS,np.float32)
+init[grid_size,block_size](arr)
+pi = sum_reduce(arr)
+#
+# array calc 
+#
+start_time = time.time()
+init[grid_size,block_size](arr)
+end_time = time.time()
+mflops = NPTS*4.0/(1.0e6*(end_time-start_time))
+print("Numba CUDA array calculation:")
+print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
+#
+# reduction
+#
+start_time = time.time()
+pi = sum_reduce(arr)
+end_time = time.time()
+mflops = NPTS*1.0/(1.0e6*(end_time-start_time))
+print("Numba CUDA reduction:")
+print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))
+#
+# both
+#
+start_time = time.time()
+init[grid_size,block_size](arr)
+pi = sum_reduce(arr)
+end_time = time.time()
+mflops = NPTS*5.0/(1.0e6*(end_time-start_time))
+print("Numba CUDA both:")
+print("   NPTS = %d, pi = %f"%(NPTS,pi))
+print("   time = %f, estimated MFlops = %f"%(end_time-start_time,mflops))