diff options
Diffstat (limited to 'functionTest.cu')
-rw-r--r-- | functionTest.cu | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/functionTest.cu b/functionTest.cu new file mode 100644 index 0000000..d0f066d --- /dev/null +++ b/functionTest.cu @@ -0,0 +1,47 @@ +#include <cuda.h> +#include <stdio.h> +#include <stdlib.h> + +__device__ void f2(int idx, float x_s[4]) +{ + if(idx>0) { + printf("%i %.2e %.2e\n", idx, x_s[idx], x_s[idx-1]); + } else { + printf("%i %.2e\n", idx, x_s[idx]); + } +} + +__global__ void f1(float *x_d) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + __shared__ float x_s[4]; + x_s[idx] = x_d[idx]; + __syncthreads(); + //printf("%i %.2e %.2e\n", idx, x_d[idx], x_s[idx]); + f2(idx, x_s); +} + +int main() +{ + float *x_h, *x_d; + int nx = 4; + + // allocate, copy + cudaSetDevice(0); + x_h = (float *) malloc(sizeof(float)*nx); + for(int ii=0; ii<nx; ii++) { + x_h[ii] = ii; + printf("%i %.2e\n", ii, x_h[ii]); + } + printf("\n"); + cudaMalloc((void**) &x_d, sizeof(float)*nx); + cudaMemcpy(x_d, x_h, sizeof(float)*nx, cudaMemcpyHostToDevice); + + // run kernel + f1 <<< 1, 4 >>> (x_d); + + // free + cudaFree(x_d); + free(x_h); +} + |