summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVolker Hoffmann <volker@cheleb.net>2014-06-18 15:23:30 +0200
committerVolker Hoffmann <volker@cheleb.net>2014-06-18 15:23:30 +0200
commite0e74374c61a60c24001a7593ffe2a68feb2da80 (patch)
tree6b9a9168a966c8e2a8c979dda41aeefbb8f734ec
parent55d4c30fb62b440a37d9973813f4220483011138 (diff)
increment array test
-rw-r--r--incrementArrays.cu69
1 files changed, 69 insertions, 0 deletions
diff --git a/incrementArrays.cu b/incrementArrays.cu
new file mode 100644
index 0000000..3f727b6
--- /dev/null
+++ b/incrementArrays.cu
@@ -0,0 +1,69 @@
+// incrementArrays.cu
+#include <stdio.h>
+#include <assert.h>
+#include <cuda.h>
+
+void incrementArrayOnHost(float *a, int N)
+{
+ int i;
+ for (i=0; i<N; i++) {
+ a[i] = a[i] + 1.f;
+ }
+}
+
+__global__ void incrementArrayOnDevice(float *a, int N)
+{
+ int idx = blockIdx.x * blockDim.x + threadIdx.x;
+ if (idx<N) {
+ a[idx] = a[idx] + 1.f;
+ }
+}
+
+int main(void)
+{
+ float *a_h, *b_h; // pointers to host memory
+ float *a_d; // pointer to device memory
+ int i, N = 10;
+ size_t size = N*sizeof(float);
+
+ // allocate arrays on host
+ a_h = (float *)malloc(size);
+ b_h = (float *)malloc(size);
+
+ // allocate array on device
+ cudaMalloc((void **) &a_d, size);
+
+ // initialization of host data
+ for (i=0; i<N; i++) {
+ printf("%.2f\n", i);
+ a_h[i] = (float) i;
+ }
+ printf("\n");
+
+ // copy data from host to device
+ cudaMemcpy(a_d, a_h, sizeof(float)*N, cudaMemcpyHostToDevice);
+
+ // do calculation on host
+ incrementArrayOnHost(a_h, N);
+
+ // do calculation on device
+ // 01 - compute execution configuration
+ int blockSize = 4;
+ int nBlocks = N/blockSize + (N%blockSize == 0?0:1);
+ // 02 - call incrementArrayOnDevice kernel
+ incrementArrayOnDevice <<< nBlocks, blockSize >>> (a_d, N);
+
+ // Retrieve result from device and store in b_h
+ // (dst, src, count, kind)
+ cudaMemcpy(b_h, a_d, sizeof(float)*N, cudaMemcpyDeviceToHost);
+
+ // check result
+ for (i=0; i<N; i++) {
+ printf("%.2f\n", b_h[i]);
+ assert(a_h[i] == b_h[i]);
+ }
+
+ // cleanup
+ free(a_h); free(b_h); cudaFree(a_d);
+}
+