diff --git a/CUDA/cudapip.cu b/CUDA/cudapip.cu new file mode 100755 index 0000000000000000000000000000000000000000..c851d0eb26c3af2a636cbf621168304ffbf7167f --- /dev/null +++ b/CUDA/cudapip.cu @@ -0,0 +1,61 @@ +// +// cudapip.cu +// Neil Gershenfeld 7/14/20 +// calculation of pi by a CUDA multi-GPU peer sum +// pi = 3.14159265358979323846 +// +#include <iostream> +#include <chrono> +#include <cstdint> +uint64_t blocks = 1024; +uint64_t threads = 1024; +uint64_t nloop = 1000000; +uint64_t npts = blocks*threads; +using namespace std; +__global__ void init(double *arr,uint64_t nloop,uint64_t npts,int ngpus,int index) { + uint64_t i = blockIdx.x*blockDim.x+threadIdx.x; + uint64_t start = nloop*i+npts*nloop*index+1; + uint64_t end = nloop*(i+1)+npts*nloop*index+1; + arr[i+index*npts] = 0; + for (uint64_t j = start; j < end; ++j) + arr[i+index*npts] += 0.5/((j-0.75)*(j-0.25)); + } +void cudaCheck(string msg) { + cudaError err; + err = cudaGetLastError(); + if (cudaSuccess != err) + cerr << msg << ": " << cudaGetErrorString(err) << endl; + } +int main(void) { + double *arr,*darr; + int ngpus; + cudaGetDeviceCount(&ngpus); + arr = new double[ngpus*npts]; + cudaSetDevice(0); + cudaMalloc(&darr,ngpus*npts*sizeof(double)); + for (int i = 1; i < ngpus; ++i) { + cudaSetDevice(i); + cudaDeviceEnablePeerAccess(0,0); + cudaCheck("peer access"); + } + auto tstart = chrono::high_resolution_clock::now(); + for (int i = 0; i < ngpus; ++i) { + cudaSetDevice(i); + init<<<blocks,threads>>>(darr,nloop,npts,ngpus,i); + } + for (int i = 1; i < ngpus; ++i) { + cudaSetDevice(i); + cudaDeviceSynchronize(); + } + cudaSetDevice(0); + cudaMemcpy(arr,darr,ngpus*npts*sizeof(double),cudaMemcpyDeviceToHost); + double pi = 0; + for (int i = 0; i < ngpus*npts; ++i) + pi += arr[i]; + auto tend = chrono::high_resolution_clock::now(); + auto dt = chrono::duration_cast<std::chrono::microseconds>(tend-tstart).count(); + auto gflops = npts*nloop*ngpus*5.0/dt/1e3; + std::cout << "npts: " << npts << " nloop: " << nloop << " ngpus: " << ngpus << " pi: " << pi << '\n'; + std::cout << "time: " << 1e-6*dt << " estimated GFlops: " << gflops << '\n'; + return 0; + }