1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
#include <stdio.h>
#include <assert.h>
#include <cuda.h>
int main(void)
{
float *a_h, *b_h; // pointers to host memory
float *a_d, *b_d; // pointers to device memory
int N = 14;
int i;
// allocate arrays on host
a_h = (float *)malloc(sizeof(float)*N);
b_h = (float *)malloc(sizeof(float)*N);
// allocate arrays on device
cudaMalloc((void **) &a_d, sizeof(float)*N);
cudaMalloc((void **) &b_d, sizeof(float)*N);
// initialize host data
for (i=0; i<N; i++) {
a_h[i] = 10.f+i;
b_h[i] = 0.f;
}
// send data from host to device: a_h to a_d
cudaMemcpy(a_d, a_h, sizeof(float)*N, cudaMemcpyHostToDevice);
// copy data within device: a_d to b_d
cudaMemcpy(b_d, a_d, sizeof(float)*N, cudaMemcpyDeviceToDevice);
// retrieve data from device: b_d to b_h
cudaMemcpy(b_h, b_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
// check result
for (i=0; i<N; i++) {
printf("%.2f\n", a_h[i]);
printf("%.2f\n\n", b_h[i]);
assert(a_h[i] == b_h[i]);
}
// cleanup
free(a_h); free(b_h);
cudaFree(a_d); cudaFree(b_d);
}
|