Skip to content
Snippets Groups Projects
Commit d913aa21 authored by Neil Gershenfeld's avatar Neil Gershenfeld
Browse files

wip

parent d6068c8f
Branches
No related tags found
No related merge requests found
Pipeline #16409 passed
...@@ -35,7 +35,7 @@ void reduce(double *arr) { ...@@ -35,7 +35,7 @@ void reduce(double *arr) {
uint64_t len = npts >> 1; uint64_t len = npts >> 1;
while (1) { while (1) {
reduce_sum<<<blocks,threads>>>(arr,len); reduce_sum<<<blocks,threads>>>(arr,len);
cudaCheck("reduce"); cudaCheck("reduce_sum");
len = len >> 1; len = len >> 1;
if (len == 0) if (len == 0)
return; return;
...@@ -50,10 +50,12 @@ int main(void) { ...@@ -50,10 +50,12 @@ int main(void) {
cudaCheck("init"); cudaCheck("init");
reduce(darr); reduce(darr);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
cudaCheck("cudaDeviceSynchronize");
auto tend = std::chrono::high_resolution_clock::now(); auto tend = std::chrono::high_resolution_clock::now();
auto dt = std::chrono::duration_cast<std::chrono::microseconds>(tend-tstart).count(); auto dt = std::chrono::duration_cast<std::chrono::microseconds>(tend-tstart).count();
auto mflops = npts*nloop*5.0/dt; auto mflops = npts*nloop*5.0/dt;
cudaMemcpy(harr,darr,8,cudaMemcpyDeviceToHost); cudaMemcpy(harr,darr,8,cudaMemcpyDeviceToHost);
cudaCheck("cudaMemcpy");
printf("npts = %ld, nloop = %ld, pi = %lf\n",npts,nloop,harr[0]); printf("npts = %ld, nloop = %ld, pi = %lf\n",npts,nloop,harr[0]);
printf("time = %f, estimated MFlops = %f\n",1e-6*dt,mflops); printf("time = %f, estimated MFlops = %f\n",1e-6*dt,mflops);
cudaFree(darr); cudaFree(darr);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment