From d913aa21b33212443c3be360ad99c5d2d0484f1d Mon Sep 17 00:00:00 2001
From: Neil Gershenfeld <gersh@cba.mit.edu>
Date: Sun, 24 Oct 2021 16:28:15 -0400
Subject: [PATCH] wip

---
 CUDA/cudapi.cu | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/CUDA/cudapi.cu b/CUDA/cudapi.cu
index b755b84..9870945 100644
--- a/CUDA/cudapi.cu
+++ b/CUDA/cudapi.cu
@@ -35,7 +35,7 @@ void reduce(double *arr) {
    uint64_t len = npts >> 1;
    while (1) {
       reduce_sum<<<blocks,threads>>>(arr,len);
-      cudaCheck("reduce");
+      cudaCheck("reduce_sum");
       len = len >> 1;
       if (len == 0)
          return;
@@ -50,10 +50,12 @@ int main(void) {
    cudaCheck("init");
    reduce(darr);
    cudaDeviceSynchronize();
+   cudaCheck("cudaDeviceSynchronize");
    auto tend = std::chrono::high_resolution_clock::now();        
 	auto dt = std::chrono::duration_cast<std::chrono::microseconds>(tend-tstart).count();
    auto mflops = npts*nloop*5.0/dt;
    cudaMemcpy(harr,darr,8,cudaMemcpyDeviceToHost);
+   cudaCheck("cudaMemcpy");
    printf("npts = %ld, nloop = %ld, pi = %lf\n",npts,nloop,harr[0]);
    printf("time = %f, estimated MFlops = %f\n",1e-6*dt,mflops);
    cudaFree(darr);
-- 
GitLab