// advection.cu #include #include #include __global__ void advect(float *uold, float *unew, float cs, float dx, float dt, int N) { int idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx==0) { unew[idx] = uold[idx] - dt * cs * ( uold[idx] - uold[N-1] ) / dx; } else { unew[idx] = uold[idx] - dt * cs * ( uold[idx] - uold[idx-1] ) / dx; } printf("(%i,%.3f,%.3f)\n", idx, uold[idx], unew[idx]); } int main(void) { float *u_h; // pointer to host memory float *uold_d, *unew_d; // pointers to device memory float cs = 1.0; // bulk velocity float dt = 0.1; // timestep float dx = 0.2; // space step int N = 32; // cells int i; // counters // check CFL condition assert(cs*dt < dx); // allocate host memory u_h = (float *)malloc(sizeof(float)*N); // allocate device memory cudaMalloc((void **) &uold_d, sizeof(float)*N); cudaMalloc((void **) &unew_d, sizeof(float)*N); // fill initial array on host for (i=0; i>> (uold_d, unew_d, cs, dx, dt, N); cudaDeviceSynchronize(); printf("\n"); cudaMemcpy(uold_d, unew_d, sizeof(float)*N, cudaMemcpyDeviceToDevice); } printf("\n"); // copy back cudaMemcpy(u_h, unew_d, sizeof(float)*N, cudaMemcpyDeviceToHost); // dump output for (i=0; i