Skip to content
Snippets Groups Projects
Commit d913aa21 authored by Neil Gershenfeld's avatar Neil Gershenfeld
Browse files

wip

parent d6068c8f
Branches
No related tags found
No related merge requests found
Pipeline #16409 passed
......@@ -35,7 +35,7 @@ void reduce(double *arr) {
uint64_t len = npts >> 1;
while (1) {
reduce_sum<<<blocks,threads>>>(arr,len);
cudaCheck("reduce");
cudaCheck("reduce_sum");
len = len >> 1;
if (len == 0)
return;
......@@ -50,10 +50,12 @@ int main(void) {
cudaCheck("init");
reduce(darr);
cudaDeviceSynchronize();
cudaCheck("cudaDeviceSynchronize");
auto tend = std::chrono::high_resolution_clock::now();
auto dt = std::chrono::duration_cast<std::chrono::microseconds>(tend-tstart).count();
auto mflops = npts*nloop*5.0/dt;
cudaMemcpy(harr,darr,8,cudaMemcpyDeviceToHost);
cudaCheck("cudaMemcpy");
printf("npts = %ld, nloop = %ld, pi = %lf\n",npts,nloop,harr[0]);
printf("time = %f, estimated MFlops = %f\n",1e-6*dt,mflops);
cudaFree(darr);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment