summaryrefslogtreecommitdiffstats
path: root/advection.cu
blob: 7851ecd5f77750f6bf8c0af9afb4f8be6aa75333 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
// advection.cu
#include <stdio.h>
#include <assert.h>
#include <cuda.h>

__global__ void advect(float *uold, float *unew, float cs, float dx, float dt, int N)
{
    int idx = blockIdx.x * blockDim.x + threadIdx.x;
    if (idx==0) {
        unew[idx] = uold[idx] - dt * cs * (  uold[idx] - uold[N-1] ) / dx;
    } else {
        unew[idx] = uold[idx] - dt * cs * (  uold[idx] - uold[idx-1] ) / dx;
    }
    printf("(%i,%.3f,%.3f)\n", idx, uold[idx], unew[idx]);
}

int main(void)
{
    float *u_h;                 // pointer to host memory
    float *uold_d, *unew_d;     // pointers to device memory
    
    float cs = 1.0;     // bulk velocity
    float dt = 0.1;     // timestep
    float dx = 0.2;     // space step
    
    int N = 32;         // cells

    int i;              // counters

    // check CFL condition
    assert(cs*dt < dx);

    // allocate host memory
    u_h = (float *)malloc(sizeof(float)*N);

    // allocate device memory
    cudaMalloc((void **) &uold_d, sizeof(float)*N);
    cudaMalloc((void **) &unew_d, sizeof(float)*N);

    // fill initial array on host
    for (i=0; i<N; i++) {
        u_h[i] = 0.f;
    }

    // put a square in
    u_h[N/2-2] = 1.f;
    u_h[N/2-1] = 1.f;
    u_h[N/2] = 1.f;
    u_h[N/2+1] = 1.f;
    u_h[N/2+2] = 1.f;

    // copy to device
    cudaMemcpy(uold_d, u_h, sizeof(float)*N, cudaMemcpyHostToDevice);

    // launch configuration
    int blockSize = N;
    int nBlocks = 1;

    // run simulation
    for(i=0; i<10; i++) {
        printf("*** %i\n", i);
        advect <<< nBlocks, blockSize >>> (uold_d, unew_d, cs, dx, dt, N);
        cudaDeviceSynchronize();
        printf("\n");
        cudaMemcpy(uold_d, unew_d, sizeof(float)*N, cudaMemcpyDeviceToDevice);
    }
    printf("\n");
    
    // copy back
    cudaMemcpy(u_h, unew_d, sizeof(float)*N, cudaMemcpyDeviceToHost);

    // dump output
    for (i=0; i<N; i++) {
        printf("%.3f\n", u_h[i]);
    }

    // cleanup    
    free(u_h); cudaFree(unew_d); cudaFree(uold_d);
}